[Mesa-dev] [PATCH 8/8] util: Add header for hardware crc32c
Thomas Helland
thomashelland90 at gmail.com
Sat Feb 28 04:53:54 PST 2015
There are probably better ways to do this.
Results from oprofile on a shader-db run:
mesa_hash_data 3.11 ---> 3.12
hash_table_insert 2.52 ---> 2.50
hash_table_search 2.64 ---> 2.59
set_add 1.74 ---> 1.72
set_search 2.08 ---> 2.09
runtime 160 ---> 164
---
src/mesa/x86/common_x86.c | 4 +++
src/mesa/x86/common_x86_features.h | 8 +++++
src/util/crc32c_hw.h | 67 ++++++++++++++++++++++++++++++++++++++
src/util/hash_table.c | 17 +++++++++-
4 files changed, 95 insertions(+), 1 deletion(-)
create mode 100644 src/util/crc32c_hw.h
diff --git a/src/mesa/x86/common_x86.c b/src/mesa/x86/common_x86.c
index 25f5c40..de4defa 100644
--- a/src/mesa/x86/common_x86.c
+++ b/src/mesa/x86/common_x86.c
@@ -266,6 +266,8 @@ _mesa_get_x86_features(void)
_mesa_x86_cpu_features |= X86_FEATURE_XMM2;
if (cpu_features_ecx & X86_CPU_SSE4_1)
_mesa_x86_cpu_features |= X86_FEATURE_SSE4_1;
+ if (cpu_features_ecx & X86_CPU_SSE4_2)
+ _mesa_x86_cpu_features |= X86_FEATURE_SSE4_2;
#endif
/* query extended cpu features */
@@ -354,6 +356,8 @@ _mesa_get_x86_features(void)
if (ecx & bit_SSE4_1)
_mesa_x86_cpu_features |= X86_FEATURE_SSE4_1;
+ if (ecx & X86_CPU_SSE4_2)
+ _mesa_x86_cpu_features |= X86_FEATURE_SSE4_2;
}
#endif /* USE_X86_64_ASM */
diff --git a/src/mesa/x86/common_x86_features.h b/src/mesa/x86/common_x86_features.h
index 65634aa..c205844 100644
--- a/src/mesa/x86/common_x86_features.h
+++ b/src/mesa/x86/common_x86_features.h
@@ -44,6 +44,7 @@
#define X86_FEATURE_3DNOWEXT (1<<7)
#define X86_FEATURE_3DNOW (1<<8)
#define X86_FEATURE_SSE4_1 (1<<9)
+#define X86_FEATURE_SSE4_2 (1<<10)
/* standard X86 CPU features */
#define X86_CPU_FPU (1<<0)
@@ -53,6 +54,7 @@
#define X86_CPU_XMM2 (1<<26)
/* ECX. */
#define X86_CPU_SSE4_1 (1<<19)
+#define X86_CPU_SSE4_2 (1<<20)
/* extended X86 CPU features */
#define X86_CPUEXT_MMX_EXT (1<<22)
@@ -93,5 +95,11 @@
#define cpu_has_sse4_1 (_mesa_x86_cpu_features & X86_FEATURE_SSE4_1)
#endif
+#ifdef __SSE4_2__
+#define cpu_has_sse4_2 1
+#else
+#define cpu_has_sse4_2 (_mesa_x86_cpu_features & X86_FEATURE_SSE4_2)
+#endif
+
#endif
diff --git a/src/util/crc32c_hw.h b/src/util/crc32c_hw.h
new file mode 100644
index 0000000..ef8c903
--- /dev/null
+++ b/src/util/crc32c_hw.h
@@ -0,0 +1,67 @@
+/* Compile with gcc -O3 -msse4.2 ... */
+
+#include <stdint.h>
+#ifdef __SSE4_2__
+#include <smmintrin.h>
+
+// Byte-boundary alignment issues
+#define ALIGN_SIZE 0x08UL
+#define ALIGN_MASK (ALIGN_SIZE - 1)
+#define CALC_CRC(op, crc, type, buf, len) \
+ do { \
+ for (; (len) >= sizeof (type); (len) -= sizeof(type), buf += sizeof (type)) { \
+ (crc) = op((crc), *(type *) (buf)); \
+ } \
+ } while(0)
+
+
+/* Compute CRC-32C using the Intel hardware instruction. */
+/* for better parallelization with bigger buffers see
+ http://www.drdobbs.com/parallel/fast-parallelized-crc-computation-using/229401411 */
+static inline uint32_t crc32c_hw(const void *input, int len, uint32_t crc)
+{
+ const char* buf = (const char*)input;
+
+ // XOR the initial CRC with INT_MAX
+ crc ^= 0xFFFFFFFF;
+
+ // Align the input to the word boundary
+ for (; (len > 0) && ((size_t)buf & ALIGN_MASK); len--, buf++) {
+ crc = _mm_crc32_u8(crc, *buf);
+ }
+
+ // Blast off the CRC32 calculation
+#ifdef __x86_64__
+ CALC_CRC(_mm_crc32_u64, crc, uint64_t, buf, len);
+#endif
+ CALC_CRC(_mm_crc32_u32, crc, uint32_t, buf, len);
+ CALC_CRC(_mm_crc32_u16, crc, uint16_t, buf, len);
+ CALC_CRC(_mm_crc32_u8, crc, uint8_t, buf, len);
+
+ // Post-process the crc
+ return (crc ^ 0xFFFFFFFF);
+}
+
+static inline uint64_t crc64c_hw(const void *input, int len, uint32_t seed)
+{
+ const char* buf = (const char*)input;
+ uint64_t crc = (uint64_t)seed;
+
+ // Align the input to the word boundary
+ for (; (len > 0) && ((size_t)buf & ALIGN_MASK); len--, buf++) {
+ crc = _mm_crc32_u8(crc, *buf);
+ }
+
+ // Blast off the CRC32 calculation
+#ifdef __x86_64__
+ CALC_CRC(_mm_crc32_u64, crc, uint64_t, buf, len);
+#endif
+ CALC_CRC(_mm_crc32_u32, crc, uint32_t, buf, len);
+ CALC_CRC(_mm_crc32_u16, crc, uint16_t, buf, len);
+ CALC_CRC(_mm_crc32_u8, crc, uint8_t, buf, len);
+
+ // Post-process the crc
+ return crc;
+}
+
+#endif
diff --git a/src/util/hash_table.c b/src/util/hash_table.c
index f2b8cf6..755b6a5 100644
--- a/src/util/hash_table.c
+++ b/src/util/hash_table.c
@@ -47,6 +47,8 @@
#include "hash_table.h"
#include "ralloc.h"
#include "macros.h"
+#include "x86/common_x86_asm.h"
+#include "crc32c_hw.h"
static const uint32_t deleted_key_value;
@@ -423,7 +425,6 @@ _mesa_hash_table_random_entry(struct hash_table *ht,
return NULL;
}
-
/**
* Quick FNV-1a hash implementation based on:
* http://www.isthe.com/chongo/tech/comp/fnv/
@@ -436,7 +437,12 @@ _mesa_hash_table_random_entry(struct hash_table *ht,
uint32_t
_mesa_hash_data(const void *data, size_t size)
{
+#ifdef _SSE4_2_
+ if (cpu_has_sse4_2)
+ return crc32c_hw(data, size, _mesa_fnv32_1a_offset_bias);
+#endif
return murmur3_32(data, size, _mesa_fnv32_1a_offset_bias);
+
}
/** FNV-1a string hash implementation */
@@ -446,6 +452,15 @@ _mesa_hash_string(const char *key)
uint32_t hash = _mesa_fnv32_1a_offset_bias;
while (*key != 0) {
+#ifdef _SSE4_2_
+ if (cpu_has_sse4_2) {
+ while (*key != 0) {
+ hash = crc32c_hw(key, sizeof(*key), hash);
+ key++;
+ }
+ return hash;
+ }
+#endif
hash = _mesa_murmur3_32(hash, *key);
key++;
}
--
2.2.1
More information about the mesa-dev
mailing list