[Beignet] [PATCH V2 2/2] GBE: Optimize constant load with sampler.
Yang Rong
rong.r.yang at intel.com
Mon Jul 14 02:24:38 PDT 2014
From: Ruiling Song <ruiling.song at intel.com>
Signed-off-by: Ruiling Song <ruiling.song at intel.com>
---
backend/src/backend/gen_context.cpp | 2 +-
backend/src/backend/gen_defs.hpp | 2 +-
backend/src/backend/gen_encoder.cpp | 9 ++++++++-
backend/src/backend/gen_encoder.hpp | 3 ++-
backend/src/backend/gen_insn_selection.cpp | 22 ++++++++++++++++------
backend/src/backend/gen_insn_selection.hpp | 1 +
src/intel/intel_gpgpu.c | 2 +-
7 files changed, 30 insertions(+), 11 deletions(-)
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 7b7dec3..c7222a5 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -1720,7 +1720,7 @@ namespace gbe
const unsigned char sampler = insn.extra.sampler;
const unsigned int msgLen = insn.extra.rdmsglen;
uint32_t simdWidth = p->curr.execWidth;
- p->SAMPLE(dst, msgPayload, msgLen, false, bti, sampler, simdWidth, -1, 0, insn.extra.isLD);
+ p->SAMPLE(dst, msgPayload, msgLen, false, bti, sampler, simdWidth, -1, 0, insn.extra.isLD, insn.extra.isUniform);
}
void GenContext::scratchWrite(const GenRegister header, uint32_t offset, uint32_t reg_num, uint32_t reg_type, uint32_t channel_mode) {
diff --git a/backend/src/backend/gen_defs.hpp b/backend/src/backend/gen_defs.hpp
index 1b43864..f0da50a 100644
--- a/backend/src/backend/gen_defs.hpp
+++ b/backend/src/backend/gen_defs.hpp
@@ -396,7 +396,7 @@ enum GenMessageTarget {
#define GEN_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE 1
#define GEN_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2
#define GEN_SAMPLER_MESSAGE_SIMD16_RESINFO 2
-#define GEN_SAMPLER_MESSAGE_SIMD4X2_LD 3
+#define GEN_SAMPLER_MESSAGE_SIMD4X2_LD 7
#define GEN_SAMPLER_MESSAGE_SIMD8_LD 7
#define GEN_SAMPLER_MESSAGE_SIMD16_LD 7
diff --git a/backend/src/backend/gen_encoder.cpp b/backend/src/backend/gen_encoder.cpp
index 26337e9..182752a 100644
--- a/backend/src/backend/gen_encoder.cpp
+++ b/backend/src/backend/gen_encoder.cpp
@@ -1214,7 +1214,8 @@ namespace gbe
uint32_t simdWidth,
uint32_t writemask,
uint32_t return_format,
- bool isLD)
+ bool isLD,
+ bool isUniform)
{
if (writemask == 0) return;
uint32_t msg_type = isLD ? GEN_SAMPLER_MESSAGE_SIMD8_LD :
@@ -1225,6 +1226,12 @@ namespace gbe
msg_length++;
uint32_t simd_mode = (simdWidth == 16) ?
GEN_SAMPLER_SIMD_MODE_SIMD16 : GEN_SAMPLER_SIMD_MODE_SIMD8;
+ if(isUniform) {
+ response_length = 1;
+ msg_type = GEN_SAMPLER_MESSAGE_SIMD4X2_LD;
+ msg_length = 1;
+ simd_mode = GEN_SAMPLER_SIMD_MODE_SIMD4X2;
+ }
GenNativeInstruction *insn = this->next(GEN_OPCODE_SEND);
this->setHeader(insn);
this->setDst(insn, dest);
diff --git a/backend/src/backend/gen_encoder.hpp b/backend/src/backend/gen_encoder.hpp
index eb2d3d7..d6e2b97 100644
--- a/backend/src/backend/gen_encoder.hpp
+++ b/backend/src/backend/gen_encoder.hpp
@@ -192,7 +192,8 @@ namespace gbe
unsigned int simdWidth,
uint32_t writemask,
uint32_t return_format,
- bool isLD);
+ bool isLD,
+ bool isUniform);
/*! TypedWrite instruction for texture */
virtual void TYPED_WRITE(GenRegister header,
diff --git a/backend/src/backend/gen_insn_selection.cpp b/backend/src/backend/gen_insn_selection.cpp
index cd2240d..d70fd8f 100644
--- a/backend/src/backend/gen_insn_selection.cpp
+++ b/backend/src/backend/gen_insn_selection.cpp
@@ -592,7 +592,7 @@ namespace gbe
/*! Encode ternary instructions */
void ALU3(SelectionOpcode opcode, Reg dst, Reg src0, Reg src1, Reg src2);
/*! Encode sample instructions */
- void SAMPLE(GenRegister *dst, uint32_t dstNum, GenRegister *msgPayloads, uint32_t msgNum, uint32_t bti, uint32_t sampler, bool isLD);
+ void SAMPLE(GenRegister *dst, uint32_t dstNum, GenRegister *msgPayloads, uint32_t msgNum, uint32_t bti, uint32_t sampler, bool isLD, bool isUniform);
/*! Encode typed write instructions */
void TYPED_WRITE(GenRegister *msgs, uint32_t msgNum, uint32_t bti, bool is3D);
/*! Get image information */
@@ -1598,7 +1598,7 @@ namespace gbe
void Selection::Opaque::SAMPLE(GenRegister *dst, uint32_t dstNum,
GenRegister *msgPayloads, uint32_t msgNum,
- uint32_t bti, uint32_t sampler, bool isLD) {
+ uint32_t bti, uint32_t sampler, bool isLD, bool isUniform) {
SelectionInstruction *insn = this->appendInsn(SEL_OP_SAMPLE, dstNum, msgNum);
SelectionVector *dstVector = this->appendVector();
SelectionVector *msgVector = this->appendVector();
@@ -1623,6 +1623,7 @@ namespace gbe
insn->extra.sampler = sampler;
insn->extra.rdmsglen = msgNum;
insn->extra.isLD = isLD;
+ insn->extra.isUniform = isUniform;
}
///////////////////////////////////////////////////////////////////////////
@@ -2717,14 +2718,23 @@ namespace gbe
using namespace ir;
const uint32_t simdWidth = sel.isScalarReg(insn.getValue(0)) ? 1 : sel.ctx.getSimdWidth();
GBE_ASSERT(insn.getValueNum() == 1);
+
+ if(simdWidth == 1) {
+ GenRegister dst = sel.selReg(insn.getValue(0), ir::TYPE_U32);
+ sel.push();
+ sel.curr.noMask = 1;
+ sel.SAMPLE(&dst, 1, &addr, 1, bti, 0, true, true);
+ sel.pop();
+ return;
+ }
+
GenRegister dst = GenRegister::retype(sel.selReg(insn.getValue(0)), GEN_TYPE_F);
// get dword based address
- GenRegister addrDW = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD, simdWidth == 1));
+ GenRegister addrDW = GenRegister::udxgrf(simdWidth, sel.reg(FAMILY_DWORD));
sel.push();
- if (simdWidth == 1) {
+ if (sel.isScalarReg(addr.reg())) {
sel.curr.noMask = 1;
- sel.curr.execWidth = 1;
}
sel.SHR(addrDW, GenRegister::retype(addr, GEN_TYPE_UD), GenRegister::immud(2));
sel.pop();
@@ -3612,7 +3622,7 @@ namespace gbe
}
uint32_t sampler = insn.getSamplerIndex();
- sel.SAMPLE(dst, insn.getDstNum(), msgPayloads, msgLen, bti, sampler, insn.getSamplerOffset() != 0);
+ sel.SAMPLE(dst, insn.getDstNum(), msgPayloads, msgLen, bti, sampler, insn.getSamplerOffset() != 0, false);
return true;
}
DECL_CTOR(SampleInstruction, 1, 1);
diff --git a/backend/src/backend/gen_insn_selection.hpp b/backend/src/backend/gen_insn_selection.hpp
index 508a37e..9bcce6f 100644
--- a/backend/src/backend/gen_insn_selection.hpp
+++ b/backend/src/backend/gen_insn_selection.hpp
@@ -126,6 +126,7 @@ namespace gbe
uint16_t sampler:5;
uint16_t rdmsglen:3;
bool isLD; // is this a ld message?
+ bool isUniform;
};
uint32_t barrierType;
bool longjmp;
diff --git a/src/intel/intel_gpgpu.c b/src/intel/intel_gpgpu.c
index d00bc83..9531001 100644
--- a/src/intel/intel_gpgpu.c
+++ b/src/intel/intel_gpgpu.c
@@ -688,7 +688,7 @@ intel_gpgpu_alloc_constant_buffer(intel_gpgpu_t *gpgpu, uint32_t size)
gen7_surface_state_t *ss2 = (gen7_surface_state_t *) heap->surface[2];
memset(ss2, 0, sizeof(gen7_surface_state_t));
ss2->ss0.surface_type = I965_SURFACE_BUFFER;
- ss2->ss0.surface_format = I965_SURFACEFORMAT_RAW;
+ ss2->ss0.surface_format = I965_SURFACEFORMAT_R32G32B32A32_UINT;
ss2->ss2.width = s & 0x7f; /* bits 6:0 of sz */
ss2->ss2.height = (s >> 7) & 0x3fff; /* bits 20:7 of sz */
ss2->ss3.depth = (s >> 21) & 0x3ff; /* bits 30:21 of sz */
--
1.7.10.4
More information about the Beignet
mailing list