From 0e97c8e23e2572c14dd0f4f4fbfca77ee8a48be2 Mon Sep 17 00:00:00 2001
From: Nick Mathewson <nickm@torproject.org>
Date: Fri, 7 Feb 2014 17:38:16 -0500
Subject: Siphash-2-4 is now our hash in nearly all cases.

I've made an exception for cases where I'm sure that users can't
influence the inputs.  This is likely to cause a slowdown somewhere,
but it's safer to siphash everything and *then* look for cases to
optimize.

This patch doesn't actually get us any _benefit_ from siphash yet,
since we don't really randomize the key at any point.
---
 src/common/address.c   | 38 +++++++++++++++++++++++++++++++-------
 src/common/address.h   |  3 ++-
 src/common/container.c | 10 ++--------
 src/common/container.h | 21 +++++++++++----------
 4 files changed, 46 insertions(+), 26 deletions(-)

(limited to 'src/common')

diff --git a/src/common/address.c b/src/common/address.c
index b9f2d9315..69049fa0a 100644
--- a/src/common/address.c
+++ b/src/common/address.c
@@ -874,6 +874,32 @@ tor_addr_copy(tor_addr_t *dest, const tor_addr_t *src)
   memcpy(dest, src, sizeof(tor_addr_t));
 }
 
+/** Copy a tor_addr_t from <b>src</b> to <b>dest</b>, taking extra case to
+ * copy only the well-defined portions. Used for computing hashes of
+ * addresses.
+ */
+void
+tor_addr_copy_tight(tor_addr_t *dest, const tor_addr_t *src)
+{
+  tor_assert(src != dest);
+  tor_assert(src);
+  tor_assert(dest);
+  memset(dest, 0, sizeof(tor_addr_t));
+  dest->family = src->family;
+  switch (tor_addr_family(src))
+    {
+    case AF_INET:
+      dest->addr.in_addr.s_addr = src->addr.in_addr.s_addr;
+      break;
+    case AF_INET6:
+      memcpy(dest->addr.in6_addr.s6_addr, src->addr.in6_addr.s6_addr, 16);
+    case AF_UNSPEC:
+      break;
+    default:
+      tor_fragile_assert();
+    }
+}
+
 /** Given two addresses <b>addr1</b> and <b>addr2</b>, return 0 if the two
  * addresses are equivalent under the mask mbits, less than 0 if addr1
  * precedes addr2, and greater than 0 otherwise.
@@ -995,19 +1021,17 @@ tor_addr_compare_masked(const tor_addr_t *addr1, const tor_addr_t *addr2,
   }
 }
 
-/** Return a hash code based on the address addr */
-unsigned int
+/** Return a hash code based on the address addr. DOCDOC extra */
+uint64_t
 tor_addr_hash(const tor_addr_t *addr)
 {
   switch (tor_addr_family(addr)) {
   case AF_INET:
-    return tor_addr_to_ipv4h(addr);
+    return siphash24g(&addr->addr.in_addr.s_addr, 4);
   case AF_UNSPEC:
     return 0x4e4d5342;
-  case AF_INET6: {
-    const uint32_t *u = tor_addr_to_in6_addr32(addr);
-    return u[0] + u[1] + u[2] + u[3];
-    }
+  case AF_INET6:
+    return siphash24g(&addr->addr.in6_addr.s6_addr, 16);
   default:
     tor_fragile_assert();
     return 0;
diff --git a/src/common/address.h b/src/common/address.h
index 77e585534..d41c2f570 100644
--- a/src/common/address.h
+++ b/src/common/address.h
@@ -167,7 +167,7 @@ int tor_addr_compare_masked(const tor_addr_t *addr1, const tor_addr_t *addr2,
  * "exactly". */
 #define tor_addr_eq(a,b) (0==tor_addr_compare((a),(b),CMP_EXACT))
 
-unsigned int tor_addr_hash(const tor_addr_t *addr);
+uint64_t tor_addr_hash(const tor_addr_t *addr);
 int tor_addr_is_v4(const tor_addr_t *addr);
 int tor_addr_is_internal_(const tor_addr_t *ip, int for_listening,
                           const char *filename, int lineno);
@@ -192,6 +192,7 @@ const char * tor_addr_to_str(char *dest, const tor_addr_t *addr, size_t len,
                              int decorate);
 int tor_addr_parse(tor_addr_t *addr, const char *src);
 void tor_addr_copy(tor_addr_t *dest, const tor_addr_t *src);
+void tor_addr_copy_tight(tor_addr_t *dest, const tor_addr_t *src);
 void tor_addr_from_ipv4n(tor_addr_t *dest, uint32_t v4addr);
 /** Set <b>dest</b> to the IPv4 address encoded in <b>v4addr</b> in host
  * order. */
diff --git a/src/common/container.c b/src/common/container.c
index 476dc8291..f489430ca 100644
--- a/src/common/container.c
+++ b/src/common/container.c
@@ -1004,7 +1004,7 @@ strmap_entries_eq(const strmap_entry_t *a, const strmap_entry_t *b)
 static INLINE unsigned int
 strmap_entry_hash(const strmap_entry_t *a)
 {
-  return ht_string_hash(a->key);
+  return (unsigned) siphash24g(a->key, strlen(a->key));
 }
 
 /** Helper: compare digestmap_entry_t objects by key value. */
@@ -1018,13 +1018,7 @@ digestmap_entries_eq(const digestmap_entry_t *a, const digestmap_entry_t *b)
 static INLINE unsigned int
 digestmap_entry_hash(const digestmap_entry_t *a)
 {
-#if SIZEOF_INT != 8
-  const uint32_t *p = (const uint32_t*)a->key;
-  return p[0] ^ p[1] ^ p[2] ^ p[3] ^ p[4];
-#else
-  const uint64_t *p = (const uint64_t*)a->key;
-  return p[0] ^ p[1];
-#endif
+  return (unsigned) siphash24g(a->key, DIGEST_LEN);
 }
 
 HT_PROTOTYPE(strmap_impl, strmap_entry_t, node, strmap_entry_hash,
diff --git a/src/common/container.h b/src/common/container.h
index 1bcc54066..a4691a76c 100644
--- a/src/common/container.h
+++ b/src/common/container.h
@@ -7,6 +7,7 @@
 #define TOR_CONTAINER_H
 
 #include "util.h"
+#include "siphash.h"
 
 /** A resizeable list of pointers, with associated helpful functionality.
  *
@@ -610,11 +611,11 @@ typedef struct {
 static INLINE void
 digestset_add(digestset_t *set, const char *digest)
 {
-  const uint32_t *p = (const uint32_t *)digest;
-  const uint32_t d1 = p[0] + (p[1]>>16);
-  const uint32_t d2 = p[1] + (p[2]>>16);
-  const uint32_t d3 = p[2] + (p[3]>>16);
-  const uint32_t d4 = p[3] + (p[0]>>16);
+  const uint64_t x = siphash24g(digest, 20);
+  const uint32_t d1 = (uint32_t) x;
+  const uint32_t d2 = (uint32_t)( (x>>16) + x);
+  const uint32_t d3 = (uint32_t)( (x>>32) + x);
+  const uint32_t d4 = (uint32_t)( (x>>48) + x);
   bitarray_set(set->ba, BIT(d1));
   bitarray_set(set->ba, BIT(d2));
   bitarray_set(set->ba, BIT(d3));
@@ -626,11 +627,11 @@ digestset_add(digestset_t *set, const char *digest)
 static INLINE int
 digestset_contains(const digestset_t *set, const char *digest)
 {
-  const uint32_t *p = (const uint32_t *)digest;
-  const uint32_t d1 = p[0] + (p[1]>>16);
-  const uint32_t d2 = p[1] + (p[2]>>16);
-  const uint32_t d3 = p[2] + (p[3]>>16);
-  const uint32_t d4 = p[3] + (p[0]>>16);
+  const uint64_t x = siphash24g(digest, 20);
+  const uint32_t d1 = (uint32_t) x;
+  const uint32_t d2 = (uint32_t)( (x>>16) + x);
+  const uint32_t d3 = (uint32_t)( (x>>32) + x);
+  const uint32_t d4 = (uint32_t)( (x>>48) + x);
   return bitarray_is_set(set->ba, BIT(d1)) &&
          bitarray_is_set(set->ba, BIT(d2)) &&
          bitarray_is_set(set->ba, BIT(d3)) &&
-- 
cgit v1.2.3