Mesa (master): gallivm, llvmpipe: Use 4-wide vectors on AMD Bulldozer.
Jose Fonseca
jrfonseca at kemper.freedesktop.org
Tue Sep 4 08:01:52 UTC 2012
Module: Mesa
Branch: master
Commit: 7eb504019731368fd55f01e0264b195d4f99ae93
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7eb504019731368fd55f01e0264b195d4f99ae93
Author: José Fonseca <jfonseca at vmware.com>
Date: Fri Aug 31 17:01:50 2012 +0100
gallivm,llvmpipe: Use 4-wide vectors on AMD Bulldozer.
8-wide vectors is slower.
Reviewed-by: Roland Scheidegger <sroland at vmware.com>
---
src/gallium/auxiliary/gallivm/lp_bld_init.c | 10 +++++++++-
src/gallium/auxiliary/util/u_cpu_detect.c | 5 +++++
src/gallium/auxiliary/util/u_cpu_detect.h | 1 +
3 files changed, 15 insertions(+), 1 deletions(-)
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c
index 068a2cd..ffbe3ea 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -434,8 +434,16 @@ lp_build_init(void)
util_cpu_detect();
+ /* AMD Bulldozer AVX's throughput is the same as SSE2; and because using
+ * 8-wide vector needs more floating ops than 4-wide (due to padding), it is
+ * actually more efficient to use 4-wide vectors on this processor.
+ *
+ * See also:
+ * - http://www.anandtech.com/show/4955/the-bulldozer-review-amd-fx8150-tested/2
+ */
if (HAVE_AVX &&
- util_cpu_caps.has_avx) {
+ util_cpu_caps.has_avx &&
+ util_cpu_caps.has_intel) {
lp_native_vector_width = 256;
} else {
/* Leave it at 128, even when no SIMD extensions are available.
diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c
index 945f0b0..d7f0be4 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.c
+++ b/src/gallium/auxiliary/util/u_cpu_detect.c
@@ -286,6 +286,11 @@ util_cpu_detect(void)
util_cpu_caps.cacheline = cacheline;
}
+ if (regs[1] == 0x756e6547 && regs[2] == 0x6c65746e && regs[3] == 0x49656e69) {
+ /* GenuineIntel */
+ util_cpu_caps.has_intel = 1;
+ }
+
cpuid(0x80000000, regs);
if (regs[0] >= 0x80000001) {
diff --git a/src/gallium/auxiliary/util/u_cpu_detect.h b/src/gallium/auxiliary/util/u_cpu_detect.h
index b44d9d9..acac686 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.h
+++ b/src/gallium/auxiliary/util/u_cpu_detect.h
@@ -52,6 +52,7 @@ struct util_cpu_caps {
int x86_cpu_type;
unsigned cacheline;
+ unsigned has_intel:1;
unsigned has_tsc:1;
unsigned has_mmx:1;
unsigned has_mmx2:1;
More information about the mesa-commit
mailing list