[Beignet] [V2 PATCH 6/6] Fix double bugs for hsw
junyan.he at inbox.com
junyan.he at inbox.com
Wed May 7 03:03:18 PDT 2014
From: Junyan He <junyan.he at linux.intel.com>
In bspec, IVB should use SIMD8 for double ops, but HSW should use SIMD4.
TODO: The long ops maybe also need change.
Signed-off-by: Yang Rong <rong.r.yang at intel.com>
Signed-off-by: Junyan He <junyan.he at linux.intel.com>
---
backend/src/backend/gen75_encoder.cpp | 63 +++++++++++++++++++++++++++++++++++
backend/src/backend/gen75_encoder.hpp | 2 ++
backend/src/backend/gen_encoder.hpp | 4 +--
3 files changed, 67 insertions(+), 2 deletions(-)
diff --git a/backend/src/backend/gen75_encoder.cpp b/backend/src/backend/gen75_encoder.cpp
index d1a8542..7fc2d1d 100644
--- a/backend/src/backend/gen75_encoder.cpp
+++ b/backend/src/backend/gen75_encoder.cpp
@@ -177,4 +177,67 @@ namespace gbe
msg_length,
response_length);
}
+ void Gen75Encoder::LOAD_DF_IMM(GenRegister dest, GenRegister tmp, double value) {
+ union { double d; unsigned u[2]; } u;
+ u.d = value;
+ GenRegister r = GenRegister::retype(tmp, GEN_TYPE_UD);
+ push();
+ curr.predicate = GEN_PREDICATE_NONE;
+ curr.execWidth = 1;
+ MOV(r, GenRegister::immud(u.u[0]));
+ MOV(GenRegister::suboffset(r, 1), GenRegister::immud(u.u[1]));
+ pop();
+ r.type = GEN_TYPE_DF;
+ r.vstride = GEN_VERTICAL_STRIDE_0;
+ r.width = GEN_WIDTH_1;
+ r.hstride = GEN_HORIZONTAL_STRIDE_0;
+ push();
+ uint32_t width = curr.execWidth;
+ curr.execWidth = 8;
+ curr.predicate = GEN_PREDICATE_NONE;
+ curr.noMask = 1;
+ curr.quarterControl = GEN_COMPRESSION_Q1;
+ MOV(dest, r);
+ if (width == 16) {
+ curr.quarterControl = GEN_COMPRESSION_Q2;
+ MOV(GenRegister::offset(dest, 2), r);
+ }
+ pop();
+ }
+
+ void Gen75Encoder::MOV_DF(GenRegister dest, GenRegister src0, GenRegister r) {
+ int w = curr.execWidth;
+ if (src0.isdf()) {
+ GBE_ASSERT(0); // MOV DF is called from convert instruction,
+ // We should never convert a df to a df.
+ } else {
+ GenRegister r0 = GenRegister::h2(r);
+ push();
+ curr.execWidth = 4;
+ curr.predicate = GEN_PREDICATE_NONE;
+ MOV(r0, src0);
+ MOV(GenRegister::suboffset(r0, 4), GenRegister::suboffset(src0, 4));
+ curr.predicate = GEN_PREDICATE_NORMAL;
+ curr.quarterControl = 0;
+ curr.nibControl = 0;
+ MOV(dest, r0);
+ curr.nibControl = 1;
+ MOV(GenRegister::suboffset(dest, 4), GenRegister::suboffset(r0, 4));
+ pop();
+ if (w == 16) {
+ push();
+ curr.execWidth = 4;
+ curr.predicate = GEN_PREDICATE_NONE;
+ MOV(r0, GenRegister::suboffset(src0, 8));
+ MOV(GenRegister::suboffset(r0, 4), GenRegister::suboffset(src0, 12));
+ curr.predicate = GEN_PREDICATE_NORMAL;
+ curr.quarterControl = 1;
+ curr.nibControl = 0;
+ MOV(GenRegister::suboffset(dest, 8), r0);
+ curr.nibControl = 1;
+ MOV(GenRegister::suboffset(dest, 12), GenRegister::suboffset(r0, 4));
+ pop();
+ }
+ }
+ }
} /* End of the name space. */
diff --git a/backend/src/backend/gen75_encoder.hpp b/backend/src/backend/gen75_encoder.hpp
index a107202..dc2dc76 100644
--- a/backend/src/backend/gen75_encoder.hpp
+++ b/backend/src/backend/gen75_encoder.hpp
@@ -34,6 +34,8 @@ namespace gbe
Gen75Encoder(uint32_t simdWidth, uint32_t gen, uint32_t deviceID)
: GenEncoder(simdWidth, gen, deviceID, 8) { };
+ virtual void MOV_DF(GenRegister dest, GenRegister src0, GenRegister tmp = GenRegister::null());
+ virtual void LOAD_DF_IMM(GenRegister dest, GenRegister tmp, double value);
virtual void ATOMIC(GenRegister dst, uint32_t function, GenRegister src, uint32_t bti, uint32_t srcNum);
virtual void UNTYPED_READ(GenRegister dst, GenRegister src, uint32_t bti, uint32_t elemNum);
virtual void UNTYPED_WRITE(GenRegister src, uint32_t bti, uint32_t elemNum);
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
index d44d323..5129d99 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -133,8 +133,8 @@ namespace gbe
#undef ALU2
#undef ALU2_MOD
#undef ALU3
- void MOV_DF(GenRegister dest, GenRegister src0, GenRegister tmp = GenRegister::null());
- void LOAD_DF_IMM(GenRegister dest, GenRegister tmp, double value);
+ virtual void MOV_DF(GenRegister dest, GenRegister src0, GenRegister tmp = GenRegister::null());
+ virtual void LOAD_DF_IMM(GenRegister dest, GenRegister tmp, double value);
void LOAD_INT64_IMM(GenRegister dest, int64_t value);
/*! Barrier message (to synchronize threads of a workgroup) */
void BARRIER(GenRegister src);
--
1.8.3.2
More information about the Beignet
mailing list