[Beignet] [PATCH] Kill the A0 cache in GenContext.

Song, Ruiling ruiling.song at intel.com
Tue Apr 14 01:50:23 PDT 2015


The patch LGTM

> -----Original Message-----
> From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf Of
> junyan.he at inbox.com
> Sent: Tuesday, April 14, 2015 4:17 PM
> To: beignet at lists.freedesktop.org
> Cc: Junyan He
> Subject: [Beignet] [PATCH] Kill the A0 cache in GenContext.
> 
> From: Junyan He <junyan.he at linux.intel.com>
> 
> The a0 value cache in Gencontext can just hold the value in compiling time,
> which may be different with the true offset value in run time when the code
> generates the backward jump. So just kill the cache of a0 and we will use
> load vector instruction to optimize it lader.
> 
> Signed-off-by: Junyan He <junyan.he at linux.intel.com>
> ---
>  backend/src/backend/gen8_context.cpp |   54
> ++++++++--------------------------
>  backend/src/backend/gen_context.cpp  |   51
> +++++++-------------------------
>  backend/src/backend/gen_context.hpp  |    1 -
>  3 files changed, 24 insertions(+), 82 deletions(-)
> 
> diff --git a/backend/src/backend/gen8_context.cpp
> b/backend/src/backend/gen8_context.cpp
> index 920eb3e..2cdb248 100644
> --- a/backend/src/backend/gen8_context.cpp
> +++ b/backend/src/backend/gen8_context.cpp
> @@ -98,8 +98,7 @@ namespace gbe
>                p->curr.execWidth = 4;
>                p->curr.predicate = GEN_PREDICATE_NONE;
>                p->curr.noMask = 1;
> -              GenRegister ind_src =
> GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB),
> -                  a0[0], new_a0[0] - a0[0]);
> +              GenRegister ind_src =
> + GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB),
> + new_a0[0], 0);
>                GenRegister dst_ = dst;
>                dst_.type = GEN_TYPE_UB;
>                dst_.hstride = GEN_HORIZONTAL_STRIDE_1; @@ -159,8
> +158,7 @@ namespace gbe
>                p->curr.execWidth = 16;
>                p->curr.predicate = GEN_PREDICATE_NONE;
>                p->curr.noMask = 1;
> -              GenRegister ind_src =
> GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB),
> -                  a0[0], new_a0[0] - a0[0]);
> +              GenRegister ind_src =
> + GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB),
> + new_a0[0], 0);
>                p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB),
> ind_src);
>                ind_src.addr_imm += 16;
>                p->MOV(GenRegister::offset(GenRegister::retype(tmp,
> GEN_TYPE_UB), 0, 16), ind_src); @@ -218,8 +216,7 @@ namespace gbe
>                p->curr.execWidth = 16;
>                p->curr.predicate = GEN_PREDICATE_NONE;
>                p->curr.noMask = 1;
> -              GenRegister ind_src =
> GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB),
> -                  a0[0], new_a0[0] - a0[0]);
> +              GenRegister ind_src =
> + GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB),
> + new_a0[0], 0);
>                p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB),
> ind_src);
>                if (simd == 16) {
>                  ind_src.addr_imm += 16; @@ -862,46 +859,21 @@
> namespace gbe
>    }
> 
>    void Gen8Context::setA0Content(uint16_t new_a0[16], uint16_t
> max_offset, int sz) {
> -    int16_t diff = new_a0[0] - this->a0[0];
>      if (sz == 0)
>        sz = 16;
>      GBE_ASSERT(sz%4 == 0);
>      GBE_ASSERT(new_a0[0] >= 0 && new_a0[0] < 4096);
> -    bool need_reset = false;
> -    for (int i = 1; i < sz; i++) {
> -      GBE_ASSERT(new_a0[i] >= 0 && new_a0[0] < 4096);
> -      int16_t d = new_a0[i] - this->a0[i];
> -      if (diff != d) {
> -        need_reset = true;
> -        break;
> -      }
> -    }
> 
> -    GBE_ASSERT(this->a0[0] + diff < 4096 && this->a0[0] + diff >= 0);
> -    if (!need_reset && diff >= -512 && diff + max_offset <= 511) {
> -      return;
> -    } else if (!need_reset && sz == 16) {
> -      p->push();
> -      p->curr.execWidth = 16;
> -      p->curr.predicate = GEN_PREDICATE_NONE;
> -      p->curr.noMask = 1;
> -      p->ADD(GenRegister::retype(GenRegister::addr8(0), GEN_TYPE_W),
> -          GenRegister::retype(GenRegister::addr8(0), GEN_TYPE_W),
> GenRegister::immw(diff));
> -      p->pop();
> -    } else {
> -      p->push();
> -      p->curr.execWidth = 1;
> -      p->curr.predicate = GEN_PREDICATE_NONE;
> -      p->curr.noMask = 1;
> -      for (int i = 0; i < sz/4; i++) {
> -        uint64_t addr = (new_a0[i*4 + 3] << 16) | (new_a0[i*4 + 2]);
> -        addr = addr << 32;
> -        addr = addr | (new_a0[i*4 + 1] << 16) | (new_a0[i*4]);
> -        p->MOV(GenRegister::retype(GenRegister::addr1(i*4),
> GEN_TYPE_UL), GenRegister::immuint64(addr));
> -      }
> -      p->pop();
> +    p->push();
> +    p->curr.execWidth = 1;
> +    p->curr.predicate = GEN_PREDICATE_NONE;
> +    p->curr.noMask = 1;
> +    for (int i = 0; i < sz/4; i++) {
> +      uint64_t addr = (new_a0[i*4 + 3] << 16) | (new_a0[i*4 + 2]);
> +      addr = addr << 32;
> +      addr = addr | (new_a0[i*4 + 1] << 16) | (new_a0[i*4]);
> +      p->MOV(GenRegister::retype(GenRegister::addr1(i*4),
> GEN_TYPE_UL),
> + GenRegister::immuint64(addr));
>      }
> -    memcpy(this->a0, new_a0, sizeof(uint16_t)*sz);
> +    p->pop();
>    }
> -
>  }
> diff --git a/backend/src/backend/gen_context.cpp
> b/backend/src/backend/gen_context.cpp
> index 094e6b4..684ecaf 100644
> --- a/backend/src/backend/gen_context.cpp
> +++ b/backend/src/backend/gen_context.cpp
> @@ -51,7 +51,6 @@ namespace gbe
>      this->ra = NULL;
>      this->ifEndifFix = false;
>      this->regSpillTick = 0;
> -    memset(a0, 0, sizeof(a0));
>    }
> 
>    GenContext::~GenContext(void) {
> @@ -340,8 +339,7 @@ namespace gbe
>              p->curr.execWidth = 4;
>              p->curr.predicate = GEN_PREDICATE_NONE;
>              p->curr.noMask = 1;
> -            GenRegister ind_src =
> GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB),
> -                a0[0], new_a0[0] - a0[0]);
> +            GenRegister ind_src =
> + GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB),
> + new_a0[0], 0);
>              GenRegister dst_ = dst;
>              dst_.type = GEN_TYPE_UB;
>              dst_.hstride = GEN_HORIZONTAL_STRIDE_1; @@ -385,8
> +383,7 @@ namespace gbe
>              p->curr.execWidth = 8;
>              p->curr.predicate = GEN_PREDICATE_NONE;
>              p->curr.noMask = 1;
> -            GenRegister ind_src =
> GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB),
> -                a0[0], new_a0[0] - a0[0]);
> +            GenRegister ind_src =
> + GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB),
> + new_a0[0], 0);
>              p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB), ind_src);
>              for (int i = 1; i < 4; i++) {
>                ind_src.addr_imm += 8;
> @@ -430,8 +427,7 @@ namespace gbe
>              p->curr.execWidth = 8;
>              p->curr.predicate = GEN_PREDICATE_NONE;
>              p->curr.noMask = 1;
> -            GenRegister ind_src =
> GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB),
> -                a0[0], new_a0[0] - a0[0]);
> +            GenRegister ind_src =
> + GenRegister::to_indirect1xN(GenRegister::retype(src, GEN_TYPE_UB),
> + new_a0[0], 0);
>              p->MOV(GenRegister::retype(tmp, GEN_TYPE_UB), ind_src);
>              for (int i = 1; i < (simd == 8 ? 2 : 4); i++) {
>                ind_src.addr_imm += 8;
> @@ -1951,45 +1947,20 @@ namespace gbe
>    }
> 
>    void GenContext::setA0Content(uint16_t new_a0[16], uint16_t
> max_offset, int sz) {
> -    int16_t diff = new_a0[0] - this->a0[0];
> -
>      if (sz == 0)
>        sz = 8;
>      GBE_ASSERT(sz%4 == 0);
>      GBE_ASSERT(new_a0[0] >= 0 && new_a0[0] < 4096);
> -    bool need_reset = false;
> -    for (int i = 1; i < sz; i++) {
> -      GBE_ASSERT(new_a0[i] >= 0 && new_a0[0] < 4096);
> -      int16_t d = new_a0[i] - this->a0[i];
> -      if (diff != d) {
> -        need_reset = true;
> -        break;
> -      }
> -    }
> 
> -    GBE_ASSERT(a0[0] + diff < 4096 && a0[0] + diff >= 0);
> -    if (!need_reset && diff >= -512 && diff + max_offset <= 511) {
> -      return;
> -    } else if (!need_reset && sz == 8) {
> -      p->push();
> -      p->curr.execWidth = 8;
> -      p->curr.predicate = GEN_PREDICATE_NONE;
> -      p->curr.noMask = 1;
> -      p->ADD(GenRegister::retype(GenRegister::addr8(0), GEN_TYPE_W),
> -          GenRegister::retype(GenRegister::addr8(0), GEN_TYPE_W),
> GenRegister::immw(diff));
> -      p->pop();
> -    } else {
> -      p->push();
> -      p->curr.execWidth = 1;
> -      p->curr.predicate = GEN_PREDICATE_NONE;
> -      p->curr.noMask = 1;
> -      for (int i = 0; i < sz/2; i++) {
> -        p->MOV(GenRegister::retype(GenRegister::addr1(i*2),
> GEN_TYPE_UD),
> -            GenRegister::immud(new_a0[i*2 + 1] << 16 | new_a0[i*2]));
> -      }
> -      p->pop();
> +    p->push();
> +    p->curr.execWidth = 1;
> +    p->curr.predicate = GEN_PREDICATE_NONE;
> +    p->curr.noMask = 1;
> +    for (int i = 0; i < sz/2; i++) {
> +      p->MOV(GenRegister::retype(GenRegister::addr1(i*2),
> GEN_TYPE_UD),
> +             GenRegister::immud(new_a0[i*2 + 1] << 16 |
> new_a0[i*2]));
>      }
> -    memcpy(this->a0, new_a0, sizeof(uint16_t)*sz);
> +    p->pop();
>    }
> 
>    BVAR(OCL_OUTPUT_REG_ALLOC, false);
> diff --git a/backend/src/backend/gen_context.hpp
> b/backend/src/backend/gen_context.hpp
> index 6ca88db..560248a 100644
> --- a/backend/src/backend/gen_context.hpp
> +++ b/backend/src/backend/gen_context.hpp
> @@ -208,7 +208,6 @@ namespace gbe
>      /*! allocate a new curbe register and insert to curbe pool. */
>      void allocCurbeReg(ir::Register reg, gbe_curbe_type value, uint32_t
> subValue = 0);
> 
> -    uint16_t a0[16];
>      virtual void setA0Content(uint16_t new_a0[16], uint16_t max_offset =
> 0, int sz = 0);
> 
>    private:
> --
> 1.7.9.5
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list