[Beignet] [PATCH 3/5] add third coord in backend
Zhigang Gong
zhigang.gong at linux.intel.com
Tue May 7 22:49:32 PDT 2013
On Mon, May 06, 2013 at 08:45:50AM +0800, Homer Hsing wrote:
> add third coord "wcoord" in backend
>
> Signed-off-by: Homer Hsing <homer.xing at intel.com>
> ---
> backend/src/backend/gen_context.cpp | 12 ++++---
> backend/src/ir/instruction.cpp | 12 ++++---
> backend/src/llvm/llvm_gen_backend.cpp | 60 +++++++++++++++++++++++++++++++++--
> 3 files changed, 73 insertions(+), 11 deletions(-)
>
> diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
> index 1f867b8..d2baf1b 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -291,6 +291,7 @@ namespace gbe
> const GenRegister sampler = ra->genReg(insn.src(5));
> const GenRegister ucoord = ra->genReg(insn.src(6));
> const GenRegister vcoord = ra->genReg(insn.src(7));
> + const GenRegister wcoord = ra->genReg(insn.src(8));
> const GenRegister temp = GenRegister::ud1grf(msgPayload.nr, msgPayload.subnr/sizeof(float) + 4);
> const GenRegister a0_0 = GenRegister::ud1arf(GEN_ARF_ADDRESS, 0);
> uint32_t simdWidth = p->curr.execWidth;
> @@ -309,6 +310,7 @@ namespace gbe
> /* Prepare message payload. */
> p->MOV(GenRegister::f8grf(nr , 0), ucoord);
> p->MOV(GenRegister::f8grf(nr + (simdWidth/8), 0), vcoord);
> + p->MOV(GenRegister::f8grf(nr + (simdWidth/4), 0), wcoord);
> p->SAMPLE(dst, msgPayload, a0_0, -1, 0);
>
> p->pop();
> @@ -319,10 +321,11 @@ namespace gbe
> const GenRegister bti = ra->genReg(insn.src(0 + insn.extra.elem));
> const GenRegister ucoord = ra->genReg(insn.src(1 + insn.extra.elem));
> const GenRegister vcoord = ra->genReg(insn.src(2 + insn.extra.elem));
> - const GenRegister R = ra->genReg(insn.src(3 + insn.extra.elem));
> - const GenRegister G = ra->genReg(insn.src(4 + insn.extra.elem));
> - const GenRegister B = ra->genReg(insn.src(5 + insn.extra.elem));
> - const GenRegister A = ra->genReg(insn.src(6 + insn.extra.elem));
> + const GenRegister wcoord = ra->genReg(insn.src(3 + insn.extra.elem));
> + const GenRegister R = ra->genReg(insn.src(4 + insn.extra.elem));
> + const GenRegister G = ra->genReg(insn.src(5 + insn.extra.elem));
> + const GenRegister B = ra->genReg(insn.src(6 + insn.extra.elem));
> + const GenRegister A = ra->genReg(insn.src(7 + insn.extra.elem));
> const GenRegister a0_0 = GenRegister::ud1arf(GEN_ARF_ADDRESS, 0);
>
> p->push();
> @@ -357,6 +360,7 @@ namespace gbe
> GenRegister::retype(GenRegister::QnPhysical(src,quarter), src.type))
> QUARTER_MOV0(nr + 1, ucoord);
> QUARTER_MOV0(nr + 2, vcoord);
> + QUARTER_MOV0(nr + 3, wcoord);
> QUARTER_MOV1(nr + 5, R);
> QUARTER_MOV1(nr + 6, G);
> QUARTER_MOV1(nr + 7, B);
> diff --git a/backend/src/ir/instruction.cpp b/backend/src/ir/instruction.cpp
> index 9fd4247..8980abf 100644
> --- a/backend/src/ir/instruction.cpp
> +++ b/backend/src/ir/instruction.cpp
> @@ -414,6 +414,7 @@ namespace ir {
> << " sampler %" << this->getSrc(fn, 1)
> << " coord u %" << this->getSrc(fn, 2)
> << " coord v %" << this->getSrc(fn, 3)
> + << " coord w %" << this->getSrc(fn, 4)
> << " %" << this->getDst(fn, 0)
> << " %" << this->getDst(fn, 1)
> << " %" << this->getDst(fn, 2)
> @@ -427,7 +428,7 @@ namespace ir {
> INLINE Type getSrcType(void) const { return this->srcType; }
> INLINE Type getDstType(void) const { return this->dstType; }
>
> - static const uint32_t srcNum = 4;
> + static const uint32_t srcNum = 5;
> static const uint32_t dstNum = 4;
> };
>
> @@ -451,10 +452,11 @@ namespace ir {
> << " surface id %" << this->getSrc(fn, 0)
> << " coord u %" << this->getSrc(fn, 1)
> << " coord v %" << this->getSrc(fn, 2)
> - << " %" << this->getSrc(fn, 3)
> + << " coord w %" << this->getSrc(fn, 3)
> << " %" << this->getSrc(fn, 4)
> << " %" << this->getSrc(fn, 5)
> - << " %" << this->getSrc(fn, 6);
> + << " %" << this->getSrc(fn, 6)
> + << " %" << this->getSrc(fn, 7);
> }
>
> Tuple src;
> @@ -463,8 +465,8 @@ namespace ir {
>
> INLINE Type getSrcType(void) const { return this->srcType; }
> INLINE Type getCoordType(void) const { return this->coordType; }
> - // bti, u, v, 4 data elements
> - static const uint32_t srcNum = 7;
> + // bti, u, v, w, 4 data elements
> + static const uint32_t srcNum = 8;
> Register dst[0]; //!< No dest register
> };
>
> diff --git a/backend/src/llvm/llvm_gen_backend.cpp b/backend/src/llvm/llvm_gen_backend.cpp
> index 42265ee..f78bae0 100644
> --- a/backend/src/llvm/llvm_gen_backend.cpp
> +++ b/backend/src/llvm/llvm_gen_backend.cpp
> @@ -1734,6 +1734,12 @@ namespace gbe
> case GEN_OCL_WRITE_IMAGE3:
> case GEN_OCL_WRITE_IMAGE4:
> case GEN_OCL_WRITE_IMAGE5:
> + case GEN_OCL_WRITE_IMAGE10:
> + case GEN_OCL_WRITE_IMAGE11:
> + case GEN_OCL_WRITE_IMAGE12:
> + case GEN_OCL_WRITE_IMAGE13:
> + case GEN_OCL_WRITE_IMAGE14:
> + case GEN_OCL_WRITE_IMAGE15:
> break;
> case GEN_OCL_READ_IMAGE0:
> case GEN_OCL_READ_IMAGE1:
> @@ -1741,6 +1747,12 @@ namespace gbe
> case GEN_OCL_READ_IMAGE3:
> case GEN_OCL_READ_IMAGE4:
> case GEN_OCL_READ_IMAGE5:
> + case GEN_OCL_READ_IMAGE10:
> + case GEN_OCL_READ_IMAGE11:
> + case GEN_OCL_READ_IMAGE12:
> + case GEN_OCL_READ_IMAGE13:
> + case GEN_OCL_READ_IMAGE14:
> + case GEN_OCL_READ_IMAGE15:
> {
> // dst is a 4 elements vector. We allocate all 4 registers here.
> uint32_t elemNum;
> @@ -1876,11 +1888,26 @@ namespace gbe
> case GEN_OCL_READ_IMAGE3:
> case GEN_OCL_READ_IMAGE4:
> case GEN_OCL_READ_IMAGE5:
> + case GEN_OCL_READ_IMAGE10:
> + case GEN_OCL_READ_IMAGE11:
> + case GEN_OCL_READ_IMAGE12:
> + case GEN_OCL_READ_IMAGE13:
> + case GEN_OCL_READ_IMAGE14:
> + case GEN_OCL_READ_IMAGE15:
> {
> GBE_ASSERT(AI != AE); const ir::Register surface_id = this->getRegister(*AI); ++AI;
> GBE_ASSERT(AI != AE); const ir::Register sampler = this->getRegister(*AI); ++AI;
> GBE_ASSERT(AI != AE); const ir::Register ucoord = this->getRegister(*AI); ++AI;
> GBE_ASSERT(AI != AE); const ir::Register vcoord = this->getRegister(*AI); ++AI;
> + ir::Register wcoord;
> + if (it->second == GEN_OCL_READ_IMAGE10 ||
> + it->second == GEN_OCL_READ_IMAGE11 ||
> + it->second == GEN_OCL_READ_IMAGE12 ||
> + it->second == GEN_OCL_READ_IMAGE13 ||
> + it->second == GEN_OCL_READ_IMAGE14) {
> + GBE_ASSERT(AI != AE); wcoord = this->getRegister(*AI); ++AI;
> + } else
> + wcoord = ir::Register(0);
If this is a 2D surface, then we'd better to use another way to identify it rather than allocate
a register for the nonexisting coord. It wastes one register and it will generates useless instructions
in both Sampler and TypedWrite. Any thoughts?
And the other four patches are LGTM. Thanks.
>
> vector<ir::Register> dstTupleData, srcTupleData;
> const uint32_t elemNum = 4;
> @@ -1892,26 +1919,33 @@ namespace gbe
> srcTupleData.push_back(sampler);
> srcTupleData.push_back(ucoord);
> srcTupleData.push_back(vcoord);
> + srcTupleData.push_back(wcoord);
> const ir::Tuple dstTuple = ctx.arrayTuple(&dstTupleData[0], elemNum);
> - const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 4);
> + const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 5);
>
> ir::Type srcType = ir::TYPE_U32, dstType = ir::TYPE_U32;
>
> switch(it->second) {
> case GEN_OCL_READ_IMAGE0:
> case GEN_OCL_READ_IMAGE2:
> + case GEN_OCL_READ_IMAGE10:
> + case GEN_OCL_READ_IMAGE12:
> srcType = dstType = ir::TYPE_U32;
> break;
> case GEN_OCL_READ_IMAGE1:
> case GEN_OCL_READ_IMAGE3:
> + case GEN_OCL_READ_IMAGE11:
> + case GEN_OCL_READ_IMAGE13:
> dstType = ir::TYPE_U32;
> srcType = ir::TYPE_FLOAT;
> break;
> case GEN_OCL_READ_IMAGE4:
> + case GEN_OCL_READ_IMAGE14:
> dstType = ir::TYPE_FLOAT;
> srcType = ir::TYPE_U32;
> break;
> case GEN_OCL_READ_IMAGE5:
> + case GEN_OCL_READ_IMAGE15:
> srcType = dstType = ir::TYPE_FLOAT;
> break;
> default:
> @@ -1927,41 +1961,63 @@ namespace gbe
> case GEN_OCL_WRITE_IMAGE3:
> case GEN_OCL_WRITE_IMAGE4:
> case GEN_OCL_WRITE_IMAGE5:
> + case GEN_OCL_WRITE_IMAGE10:
> + case GEN_OCL_WRITE_IMAGE11:
> + case GEN_OCL_WRITE_IMAGE12:
> + case GEN_OCL_WRITE_IMAGE13:
> + case GEN_OCL_WRITE_IMAGE14:
> + case GEN_OCL_WRITE_IMAGE15:
> {
> GBE_ASSERT(AI != AE); const ir::Register surface_id = this->getRegister(*AI); ++AI;
> GBE_ASSERT(AI != AE); const ir::Register ucoord = this->getRegister(*AI); ++AI;
> GBE_ASSERT(AI != AE); const ir::Register vcoord = this->getRegister(*AI); ++AI;
> + ir::Register wcoord;
> + if(it->second == GEN_OCL_WRITE_IMAGE10 ||
> + it->second == GEN_OCL_WRITE_IMAGE11 ||
> + it->second == GEN_OCL_WRITE_IMAGE12 ||
> + it->second == GEN_OCL_WRITE_IMAGE13 ||
> + it->second == GEN_OCL_WRITE_IMAGE14) {
> + GBE_ASSERT(AI != AE); wcoord = this->getRegister(*AI); ++AI;
> + } else
> + wcoord = ir::Register(0);
> GBE_ASSERT(AI != AE);
> vector<ir::Register> srcTupleData;
>
> srcTupleData.push_back(surface_id);
> srcTupleData.push_back(ucoord);
> srcTupleData.push_back(vcoord);
> + srcTupleData.push_back(wcoord);
>
> const uint32_t elemNum = 4;
> for (uint32_t elemID = 0; elemID < elemNum; ++elemID) {
> const ir::Register reg = this->getRegister(*AI, elemID);
> srcTupleData.push_back(reg);
> }
> - const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 7);
> + const ir::Tuple srcTuple = ctx.arrayTuple(&srcTupleData[0], 8);
>
> ir::Type srcType = ir::TYPE_U32, coordType = ir::TYPE_U32;
>
> switch(it->second) {
> case GEN_OCL_WRITE_IMAGE0:
> case GEN_OCL_WRITE_IMAGE2:
> + case GEN_OCL_WRITE_IMAGE10:
> + case GEN_OCL_WRITE_IMAGE12:
> srcType = coordType = ir::TYPE_U32;
> break;
> case GEN_OCL_WRITE_IMAGE1:
> case GEN_OCL_WRITE_IMAGE3:
> + case GEN_OCL_WRITE_IMAGE11:
> + case GEN_OCL_WRITE_IMAGE13:
> coordType = ir::TYPE_FLOAT;
> srcType = ir::TYPE_U32;
> break;
> case GEN_OCL_WRITE_IMAGE4:
> + case GEN_OCL_WRITE_IMAGE14:
> srcType = ir::TYPE_FLOAT;
> coordType = ir::TYPE_U32;
> break;
> case GEN_OCL_WRITE_IMAGE5:
> + case GEN_OCL_WRITE_IMAGE15:
> srcType = coordType = ir::TYPE_FLOAT;
> break;
> default:
> --
> 1.8.1.2
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list