[Mesa-dev] [PATCH 8/8] util: Add header for hardware crc32c

Thomas Helland thomashelland90 at gmail.com
Sat Feb 28 04:53:54 PST 2015


There are probably better ways to do this.

Results from oprofile on a shader-db run:
mesa_hash_data          3.11 ---> 3.12
hash_table_insert       2.52 ---> 2.50
hash_table_search       2.64 ---> 2.59
set_add	                1.74 ---> 1.72
set_search              2.08 ---> 2.09
runtime	                160  ---> 164
---
 src/mesa/x86/common_x86.c          |  4 +++
 src/mesa/x86/common_x86_features.h |  8 +++++
 src/util/crc32c_hw.h               | 67 ++++++++++++++++++++++++++++++++++++++
 src/util/hash_table.c              | 17 +++++++++-
 4 files changed, 95 insertions(+), 1 deletion(-)
 create mode 100644 src/util/crc32c_hw.h

diff --git a/src/mesa/x86/common_x86.c b/src/mesa/x86/common_x86.c
index 25f5c40..de4defa 100644
--- a/src/mesa/x86/common_x86.c
+++ b/src/mesa/x86/common_x86.c
@@ -266,6 +266,8 @@ _mesa_get_x86_features(void)
 	   _mesa_x86_cpu_features |= X86_FEATURE_XMM2;
        if (cpu_features_ecx & X86_CPU_SSE4_1)
 	   _mesa_x86_cpu_features |= X86_FEATURE_SSE4_1;
+       if (cpu_features_ecx & X86_CPU_SSE4_2)
+      _mesa_x86_cpu_features |= X86_FEATURE_SSE4_2;
 #endif
 
        /* query extended cpu features */
@@ -354,6 +356,8 @@ _mesa_get_x86_features(void)
 
       if (ecx & bit_SSE4_1)
          _mesa_x86_cpu_features |= X86_FEATURE_SSE4_1;
+      if (ecx & X86_CPU_SSE4_2)
+         _mesa_x86_cpu_features |= X86_FEATURE_SSE4_2;
    }
 #endif /* USE_X86_64_ASM */
 
diff --git a/src/mesa/x86/common_x86_features.h b/src/mesa/x86/common_x86_features.h
index 65634aa..c205844 100644
--- a/src/mesa/x86/common_x86_features.h
+++ b/src/mesa/x86/common_x86_features.h
@@ -44,6 +44,7 @@
 #define X86_FEATURE_3DNOWEXT	(1<<7)
 #define X86_FEATURE_3DNOW	(1<<8)
 #define X86_FEATURE_SSE4_1	(1<<9)
+#define X86_FEATURE_SSE4_2 (1<<10)
 
 /* standard X86 CPU features */
 #define X86_CPU_FPU		(1<<0)
@@ -53,6 +54,7 @@
 #define X86_CPU_XMM2		(1<<26)
 /* ECX. */
 #define X86_CPU_SSE4_1		(1<<19)
+#define X86_CPU_SSE4_2     (1<<20)
 
 /* extended X86 CPU features */
 #define X86_CPUEXT_MMX_EXT	(1<<22)
@@ -93,5 +95,11 @@
 #define cpu_has_sse4_1		(_mesa_x86_cpu_features & X86_FEATURE_SSE4_1)
 #endif
 
+#ifdef __SSE4_2__
+#define cpu_has_sse4_2     1
+#else
+#define cpu_has_sse4_2     (_mesa_x86_cpu_features & X86_FEATURE_SSE4_2)
+#endif
+
 #endif
 
diff --git a/src/util/crc32c_hw.h b/src/util/crc32c_hw.h
new file mode 100644
index 0000000..ef8c903
--- /dev/null
+++ b/src/util/crc32c_hw.h
@@ -0,0 +1,67 @@
+/* Compile with gcc -O3 -msse4.2 ... */
+
+#include <stdint.h>
+#ifdef __SSE4_2__
+#include <smmintrin.h>
+
+// Byte-boundary alignment issues
+#define ALIGN_SIZE      0x08UL
+#define ALIGN_MASK      (ALIGN_SIZE - 1)
+#define CALC_CRC(op, crc, type, buf, len)                               \
+  do {                                                                  \
+    for (; (len) >= sizeof (type); (len) -= sizeof(type), buf += sizeof (type)) { \
+      (crc) = op((crc), *(type *) (buf));                               \
+    }                                                                   \
+  } while(0)
+
+
+/* Compute CRC-32C using the Intel hardware instruction. */
+/* for better parallelization with bigger buffers see 
+   http://www.drdobbs.com/parallel/fast-parallelized-crc-computation-using/229401411 */
+static inline uint32_t crc32c_hw(const void *input, int len, uint32_t crc)
+{
+    const char* buf = (const char*)input;
+
+    // XOR the initial CRC with INT_MAX
+    crc ^= 0xFFFFFFFF;
+
+    // Align the input to the word boundary
+    for (; (len > 0) && ((size_t)buf & ALIGN_MASK); len--, buf++) {
+        crc = _mm_crc32_u8(crc, *buf);
+    }
+
+    // Blast off the CRC32 calculation
+#ifdef __x86_64__
+    CALC_CRC(_mm_crc32_u64, crc, uint64_t, buf, len);
+#endif
+    CALC_CRC(_mm_crc32_u32, crc, uint32_t, buf, len);
+    CALC_CRC(_mm_crc32_u16, crc, uint16_t, buf, len);
+    CALC_CRC(_mm_crc32_u8, crc, uint8_t, buf, len);
+
+    // Post-process the crc
+    return (crc ^ 0xFFFFFFFF);
+}
+
+static inline uint64_t crc64c_hw(const void *input, int len, uint32_t seed)
+{
+    const char* buf = (const char*)input;
+    uint64_t crc = (uint64_t)seed;
+
+    // Align the input to the word boundary
+    for (; (len > 0) && ((size_t)buf & ALIGN_MASK); len--, buf++) {
+        crc = _mm_crc32_u8(crc, *buf);
+    }
+
+    // Blast off the CRC32 calculation
+#ifdef __x86_64__
+    CALC_CRC(_mm_crc32_u64, crc, uint64_t, buf, len);
+#endif
+    CALC_CRC(_mm_crc32_u32, crc, uint32_t, buf, len);
+    CALC_CRC(_mm_crc32_u16, crc, uint16_t, buf, len);
+    CALC_CRC(_mm_crc32_u8, crc, uint8_t, buf, len);
+
+    // Post-process the crc
+    return crc;
+}
+
+#endif
diff --git a/src/util/hash_table.c b/src/util/hash_table.c
index f2b8cf6..755b6a5 100644
--- a/src/util/hash_table.c
+++ b/src/util/hash_table.c
@@ -47,6 +47,8 @@
 #include "hash_table.h"
 #include "ralloc.h"
 #include "macros.h"
+#include "x86/common_x86_asm.h"
+#include "crc32c_hw.h"
 
 static const uint32_t deleted_key_value;
 
@@ -423,7 +425,6 @@ _mesa_hash_table_random_entry(struct hash_table *ht,
    return NULL;
 }
 
-
 /**
  * Quick FNV-1a hash implementation based on:
  * http://www.isthe.com/chongo/tech/comp/fnv/
@@ -436,7 +437,12 @@ _mesa_hash_table_random_entry(struct hash_table *ht,
 uint32_t
 _mesa_hash_data(const void *data, size_t size)
 {
+#ifdef _SSE4_2_
+   if (cpu_has_sse4_2)
+      return crc32c_hw(data, size, _mesa_fnv32_1a_offset_bias);
+#endif
    return murmur3_32(data, size, _mesa_fnv32_1a_offset_bias);
+
 }
 
 /** FNV-1a string hash implementation */
@@ -446,6 +452,15 @@ _mesa_hash_string(const char *key)
    uint32_t hash = _mesa_fnv32_1a_offset_bias;
 
    while (*key != 0) {
+#ifdef _SSE4_2_
+   if (cpu_has_sse4_2) {
+      while (*key != 0) {
+         hash = crc32c_hw(key, sizeof(*key), hash);
+         key++;
+      }
+      return hash;
+   }
+#endif
       hash = _mesa_murmur3_32(hash, *key);
       key++;
    }
-- 
2.2.1



More information about the mesa-dev mailing list