[Beignet] [PATCH] Kill the A0 cache in GenContext.
Song, Ruiling
ruiling.song at intel.com
Tue Apr 14 01:50:23 PDT 2015
The patch LGTM
> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> junyan.he at inbox.com
> Sent: Tuesday, April 14, 2015 4:17 PM
> To: beignet at lists.freedesktop.org
> Cc: Junyan He
> Subject: [Beignet] [PATCH] Kill the A0 cache in GenContext.
>
> From: Junyan He <junyan.he at linux.intel.com>
>
> The a0 value cache in Gencontext can just hold the value in compiling time,
> which may be different with the true offset value in run time when the code
> generates the backward jump. So just kill the cache of a0 and we will use
> load vector instruction to optimize it lader.
>
> Signed-off-by: Junyan He <junyan.he at linux.intel.com>
> ---
> backend/src/backend/gen8_context.cpp | 54
> ++++++++--------------------------
> backend/src/backend/gen_context.cpp | 51
> +++++++-------------------------
> backend/src/backend/gen_context.hpp | 1 -
> 3 files changed, 24 insertions(+), 82 deletions(-)
>
> diff --git a/backend/src/backend/gen8_context.cpp
> b/backend/src/backend/gen8_context.cpp
> index 920eb3e..2cdb248 100644
> --- a/backend/src/backend/gen8_context.cpp
> +++ b/backend/src/backend/gen8_context.cpp
> @@ -98,8 +98,7 @@ namespace gbe
> p->curr.execWidth = 4;
> p->curr.predicate = GEN_PREDICATE_NONE;
> p->curr.noMask = 1;
> - GenRegister ind_src =
> GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB),
> - a0[0], new_a0[0] - a0[0]);
> + GenRegister ind_src =
> + GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB),
> + new_a0[0], 0);
> GenRegister dst_ = dst;
> dst_.type = GEN_TYPE_UB;
> dst_.hstride = GEN_HORIZONTAL_STRIDE_1; @@ -159,8
> +158,7 @@ namespace gbe
> p->curr.execWidth = 16;
> p->curr.predicate = GEN_PREDICATE_NONE;
> p->curr.noMask = 1;
> - GenRegister ind_src =
> GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB),
> - a0[0], new_a0[0] - a0[0]);
> + GenRegister ind_src =
> + GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB),
> + new_a0[0], 0);
> p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB),
> ind_src);
> ind_src.addr_imm += 16;
> p->MOV(GenRegister::offset(GenRegister::retype(tmp,
> GEN_TYPE_UB), 0, 16), ind_src); @@ -218,8 +216,7 @@ namespace gbe
> p->curr.execWidth = 16;
> p->curr.predicate = GEN_PREDICATE_NONE;
> p->curr.noMask = 1;
> - GenRegister ind_src =
> GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB),
> - a0[0], new_a0[0] - a0[0]);
> + GenRegister ind_src =
> + GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB),
> + new_a0[0], 0);
> p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB),
> ind_src);
> if (simd == 16) {
> ind_src.addr_imm += 16; @@ -862,46 +859,21 @@
> namespace gbe
> }
>
> void Gen8Context::setA0Content(uint16_t new_a0[16], uint16_t
> max_offset, int sz) {
> - int16_t diff = new_a0[0] - this->a0[0];
> if (sz == 0)
> sz = 16;
> GBE_ASSERT(sz%4 == 0);
> GBE_ASSERT(new_a0[0] >= 0 && new_a0[0] < 4096);
> - bool need_reset = false;
> - for (int i = 1; i < sz; i++) {
> - GBE_ASSERT(new_a0[i] >= 0 && new_a0[0] < 4096);
> - int16_t d = new_a0[i] - this->a0[i];
> - if (diff != d) {
> - need_reset = true;
> - break;
> - }
> - }
>
> - GBE_ASSERT(this->a0[0] + diff < 4096 && this->a0[0] + diff >= 0);
> - if (!need_reset && diff >= -512 && diff + max_offset <= 511) {
> - return;
> - } else if (!need_reset && sz == 16) {
> - p->push();
> - p->curr.execWidth = 16;
> - p->curr.predicate = GEN_PREDICATE_NONE;
> - p->curr.noMask = 1;
> - p->ADD(GenRegister::retype(GenRegister::addr8(0), GEN_TYPE_W),
> - GenRegister::retype(GenRegister::addr8(0), GEN_TYPE_W),
> GenRegister::immw(diff));
> - p->pop();
> - } else {
> - p->push();
> - p->curr.execWidth = 1;
> - p->curr.predicate = GEN_PREDICATE_NONE;
> - p->curr.noMask = 1;
> - for (int i = 0; i < sz/4; i++) {
> - uint64_t addr = (new_a0[i*4 + 3] << 16) | (new_a0[i*4 + 2]);
> - addr = addr << 32;
> - addr = addr | (new_a0[i*4 + 1] << 16) | (new_a0[i*4]);
> - p->MOV(GenRegister::retype(GenRegister::addr1(i*4),
> GEN_TYPE_UL), GenRegister::immuint64(addr));
> - }
> - p->pop();
> + p->push();
> + p->curr.execWidth = 1;
> + p->curr.predicate = GEN_PREDICATE_NONE;
> + p->curr.noMask = 1;
> + for (int i = 0; i < sz/4; i++) {
> + uint64_t addr = (new_a0[i*4 + 3] << 16) | (new_a0[i*4 + 2]);
> + addr = addr << 32;
> + addr = addr | (new_a0[i*4 + 1] << 16) | (new_a0[i*4]);
> + p->MOV(GenRegister::retype(GenRegister::addr1(i*4),
> GEN_TYPE_UL),
> + GenRegister::immuint64(addr));
> }
> - memcpy(this->a0, new_a0, sizeof(uint16_t)*sz);
> + p->pop();
> }
> -
> }
> diff --git a/backend/src/backend/gen_context.cpp
> b/backend/src/backend/gen_context.cpp
> index 094e6b4..684ecaf 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -51,7 +51,6 @@ namespace gbe
> this->ra = NULL;
> this->ifEndifFix = false;
> this->regSpillTick = 0;
> - memset(a0, 0, sizeof(a0));
> }
>
> GenContext::~GenContext(void) {
> @@ -340,8 +339,7 @@ namespace gbe
> p->curr.execWidth = 4;
> p->curr.predicate = GEN_PREDICATE_NONE;
> p->curr.noMask = 1;
> - GenRegister ind_src =
> GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB),
> - a0[0], new_a0[0] - a0[0]);
> + GenRegister ind_src =
> + GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB),
> + new_a0[0], 0);
> GenRegister dst_ = dst;
> dst_.type = GEN_TYPE_UB;
> dst_.hstride = GEN_HORIZONTAL_STRIDE_1; @@ -385,8
> +383,7 @@ namespace gbe
> p->curr.execWidth = 8;
> p->curr.predicate = GEN_PREDICATE_NONE;
> p->curr.noMask = 1;
> - GenRegister ind_src =
> GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB),
> - a0[0], new_a0[0] - a0[0]);
> + GenRegister ind_src =
> + GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB),
> + new_a0[0], 0);
> p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB), ind_src);
> for (int i = 1; i < 4; i++) {
> ind_src.addr_imm += 8;
> @@ -430,8 +427,7 @@ namespace gbe
> p->curr.execWidth = 8;
> p->curr.predicate = GEN_PREDICATE_NONE;
> p->curr.noMask = 1;
> - GenRegister ind_src =
> GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB),
> - a0[0], new_a0[0] - a0[0]);
> + GenRegister ind_src =
> + GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB),
> + new_a0[0], 0);
> p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB), ind_src);
> for (int i = 1; i < (simd == 8 ? 2 : 4); i++) {
> ind_src.addr_imm += 8;
> @@ -1951,45 +1947,20 @@ namespace gbe
> }
>
> void GenContext::setA0Content(uint16_t new_a0[16], uint16_t
> max_offset, int sz) {
> - int16_t diff = new_a0[0] - this->a0[0];
> -
> if (sz == 0)
> sz = 8;
> GBE_ASSERT(sz%4 == 0);
> GBE_ASSERT(new_a0[0] >= 0 && new_a0[0] < 4096);
> - bool need_reset = false;
> - for (int i = 1; i < sz; i++) {
> - GBE_ASSERT(new_a0[i] >= 0 && new_a0[0] < 4096);
> - int16_t d = new_a0[i] - this->a0[i];
> - if (diff != d) {
> - need_reset = true;
> - break;
> - }
> - }
>
> - GBE_ASSERT(a0[0] + diff < 4096 && a0[0] + diff >= 0);
> - if (!need_reset && diff >= -512 && diff + max_offset <= 511) {
> - return;
> - } else if (!need_reset && sz == 8) {
> - p->push();
> - p->curr.execWidth = 8;
> - p->curr.predicate = GEN_PREDICATE_NONE;
> - p->curr.noMask = 1;
> - p->ADD(GenRegister::retype(GenRegister::addr8(0), GEN_TYPE_W),
> - GenRegister::retype(GenRegister::addr8(0), GEN_TYPE_W),
> GenRegister::immw(diff));
> - p->pop();
> - } else {
> - p->push();
> - p->curr.execWidth = 1;
> - p->curr.predicate = GEN_PREDICATE_NONE;
> - p->curr.noMask = 1;
> - for (int i = 0; i < sz/2; i++) {
> - p->MOV(GenRegister::retype(GenRegister::addr1(i*2),
> GEN_TYPE_UD),
> - GenRegister::immud(new_a0[i*2 + 1] << 16 | new_a0[i*2]));
> - }
> - p->pop();
> + p->push();
> + p->curr.execWidth = 1;
> + p->curr.predicate = GEN_PREDICATE_NONE;
> + p->curr.noMask = 1;
> + for (int i = 0; i < sz/2; i++) {
> + p->MOV(GenRegister::retype(GenRegister::addr1(i*2),
> GEN_TYPE_UD),
> + GenRegister::immud(new_a0[i*2 + 1] << 16 |
> new_a0[i*2]));
> }
> - memcpy(this->a0, new_a0, sizeof(uint16_t)*sz);
> + p->pop();
> }
>
> BVAR(OCL_OUTPUT_REG_ALLOC, false);
> diff --git a/backend/src/backend/gen_context.hpp
> b/backend/src/backend/gen_context.hpp
> index 6ca88db..560248a 100644
> --- a/backend/src/backend/gen_context.hpp
> +++ b/backend/src/backend/gen_context.hpp
> @@ -208,7 +208,6 @@ namespace gbe
> /*! allocate a new curbe register and insert to curbe pool. */
> void allocCurbeReg(ir::Register reg, gbe_curbe_type value, uint32_t
> subValue = 0);
>
> - uint16_t a0[16];
> virtual void setA0Content(uint16_t new_a0[16], uint16_t max_offset =
> 0, int sz = 0);
>
> private:
> --
> 1.7.9.5
>
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list