[Beignet] [PATCH 5/5] BDW: Add class Gen8Context.

Wed Oct 8 22:58:38 PDT 2014

Yes, I am also plan to change GenContext to a pure virtual class, and I think It is better to do this when optimize the long operations in BDW.

-----Original Message-----
From: He Junyan [mailto:junyan.he at inbox.com] 
Sent: Thursday, October 9, 2014 13:09
To: Yang, Rong R
Cc: beignet at lists.freedesktop.org
Subject: Re: [Beignet] [PATCH 5/5] BDW: Add class Gen8Context.

This patchset is OK and will not cause regression on previous platform.
In this patch set, the GenEncoder will be a pure virtual class and all platform encoders will derive from it.
But the GenContext still represents the Gen7 context. I think it is better to follow the same way as the encoder to make the architecture clearer.

On 一, 2014-09-29 at 13:37 +0800, Yang Rong wrote:
> Now Gen8Context is almost same as Gen75Context, but still derive Gen8Context from GenContext for clearly.
> 
> Signed-off-by: Yang Rong <rong.r.yang at intel.com>
> ---
>  backend/src/CMakeLists.txt           |   2 +
>  backend/src/backend/gen8_context.cpp | 113 
> +++++++++++++++++++++++++++++++++++
>  backend/src/backend/gen8_context.hpp |  63 +++++++++++++++++++
>  backend/src/backend/gen_program.cpp  |   3 +
>  4 files changed, 181 insertions(+)
>  create mode 100644 backend/src/backend/gen8_context.cpp
>  create mode 100644 backend/src/backend/gen8_context.hpp
> 
> diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt 
> index 2daa630..c5d388e 100644
> --- a/backend/src/CMakeLists.txt
> +++ b/backend/src/CMakeLists.txt
> @@ -96,6 +96,8 @@ set (GBE_SRC
>      backend/gen_context.cpp
>      backend/gen75_context.hpp
>      backend/gen75_context.cpp
> +    backend/gen8_context.hpp
> +    backend/gen8_context.cpp
>      backend/gen_program.cpp
>      backend/gen_program.hpp
>      backend/gen_program.h
> diff --git a/backend/src/backend/gen8_context.cpp 
> b/backend/src/backend/gen8_context.cpp
> new file mode 100644
> index 0000000..a9914f6
> --- /dev/null
> +++ b/backend/src/backend/gen8_context.cpp
> @@ -0,0 +1,113 @@
> +/*
> + * Copyright © 2012 Intel Corporation
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library. If not, see <http://www.gnu.org/licenses/>.
> + *
> + */
> +
> +/**
> + * \file gen8_context.cpp
> + */
> +
> +#include "backend/gen8_context.hpp"
> +#include "backend/gen8_encoder.hpp"
> +#include "backend/gen_program.hpp"
> +#include "backend/gen_defs.hpp"
> +#include "backend/gen_encoder.hpp"
> +#include "backend/gen_insn_selection.hpp"
> +#include "backend/gen_insn_scheduling.hpp"
> +#include "backend/gen_reg_allocation.hpp"
> +#include "sys/cvar.hpp"
> +#include "ir/function.hpp"
> +#include "ir/value.hpp"
> +#include <cstring>
> +
> +namespace gbe
> +{
> +  void Gen8Context::emitSLMOffset(void) {
> +    if(kernel->getUseSLM() == false)
> +      return;
> +
> +    const GenRegister slm_offset = ra->genReg(GenRegister::ud1grf(ir::ocl::slmoffset));
> +    const GenRegister slm_index = GenRegister::ud1grf(0, 0);
> +    //the slm index is hold in r0.0 24-27 bit, in 4K unit, shift left 12 to get byte unit
> +    p->push();
> +      p->curr.execWidth = 1;
> +      p->curr.predicate = GEN_PREDICATE_NONE;
> +      p->SHR(slm_offset, slm_index, GenRegister::immud(12));
> +    p->pop();
> +  }
> +
> +  void Gen8Context::allocSLMOffsetCurbe(void) {
> +    if(fn.getUseSLM())
> +      allocCurbeReg(ir::ocl::slmoffset, GBE_CURBE_SLM_OFFSET);  }
> +
> +  uint32_t Gen8Context::alignScratchSize(uint32_t size){
> +    if(size == 0)
> +      return 0;
> +    uint32_t i = 2048;
> +    while(i < size) i *= 2;
> +    return i;
> +  }
> +
> +  void Gen8Context::emitStackPointer(void) {
> +    using namespace ir;
> +
> +    // Only emit stack pointer computation if we use a stack
> +    if (kernel->getCurbeOffset(GBE_CURBE_STACK_POINTER, 0) <= 0)
> +      return;
> +
> +    // Check that everything is consistent in the kernel code
> +    const uint32_t perLaneSize = kernel->getStackSize();
> +    const uint32_t perThreadSize = perLaneSize * this->simdWidth;
> +    GBE_ASSERT(perLaneSize > 0);
> +    GBE_ASSERT(isPowerOf<2>(perLaneSize) == true);
> +    GBE_ASSERT(isPowerOf<2>(perThreadSize) == true);
> +
> +    // Use shifts rather than muls which are limited to 32x16 bit sources
> +    const uint32_t perLaneShift = logi2(perLaneSize);
> +    const uint32_t perThreadShift = logi2(perThreadSize);
> +    const GenRegister selStatckPtr = this->simdWidth == 8 ?
> +      GenRegister::ud8grf(ir::ocl::stackptr) :
> +      GenRegister::ud16grf(ir::ocl::stackptr);
> +    const GenRegister stackptr = ra->genReg(selStatckPtr);
> +    const GenRegister selStackBuffer = GenRegister::ud1grf(ir::ocl::stackbuffer);
> +    const GenRegister bufferptr = ra->genReg(selStackBuffer);
> +
> +    // We compute the per-lane stack pointer here
> +    p->push();
> +      p->curr.execWidth = 1;
> +      p->curr.predicate = GEN_PREDICATE_NONE;
> +      //p->AND(GenRegister::ud1grf(126,0), GenRegister::ud1grf(0,5), GenRegister::immud(0x1ff));
> +      p->AND(GenRegister::ud1grf(126,0), GenRegister::ud1grf(0,5), GenRegister::immud(0x7f));
> +      p->AND(GenRegister::ud1grf(126,4), GenRegister::ud1grf(0,5), GenRegister::immud(0x180));
> +      p->SHR(GenRegister::ud1grf(126,4), GenRegister::ud1grf(126, 4), GenRegister::immud(7));
> +      p->curr.execWidth = this->simdWidth;
> +      p->SHL(stackptr, stackptr, GenRegister::immud(perLaneShift));
> +      p->curr.execWidth = 1;
> +      p->SHL(GenRegister::ud1grf(126,0), GenRegister::ud1grf(126,0), GenRegister::immud(2));
> +      p->ADD(GenRegister::ud1grf(126,0), GenRegister::ud1grf(126,0), GenRegister::ud1grf(126, 4));
> +      p->SHL(GenRegister::ud1grf(126,0), GenRegister::ud1grf(126,0), GenRegister::immud(perThreadShift));
> +      p->curr.execWidth = this->simdWidth;
> +      p->ADD(stackptr, stackptr, bufferptr);
> +      p->ADD(stackptr, stackptr, GenRegister::ud1grf(126,0));
> +    p->pop();
> +  }
> +
> +  void Gen8Context::newSelection(void) {
> +    this->sel = GBE_NEW(Selection75, *this);
> +  }
> +}
> +
> diff --git a/backend/src/backend/gen8_context.hpp 
> b/backend/src/backend/gen8_context.hpp
> new file mode 100644
> index 0000000..3269eeb
> --- /dev/null
> +++ b/backend/src/backend/gen8_context.hpp
> @@ -0,0 +1,63 @@
> +/*
> + * Copyright © 2012 Intel Corporation
> + *
> + * This library is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU Lesser General Public
> + * License as published by the Free Software Foundation; either
> + * version 2 of the License, or (at your option) any later version.
> + *
> + * This library is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * Lesser General Public License for more details.
> + *
> + * You should have received a copy of the GNU Lesser General Public
> + * License along with this library. If not, see <http://www.gnu.org/licenses/>.
> + *
> + */
> +
> +/**
> + * \file gen8_context.hpp
> + */
> +#ifndef __GBE_GEN8_CONTEXT_HPP__
> +#define __GBE_GEN8_CONTEXT_HPP__
> +
> +#include "backend/gen_context.hpp"
> +#include "backend/gen8_encoder.hpp"
> +
> +namespace gbe
> +{
> +  /* This class is used to implement the HSW
> +     specific logic for context. */
> +  class Gen8Context : public GenContext
> +  {
> +  public:
> +    virtual ~Gen8Context(void) { }
> +    Gen8Context(const ir::Unit &unit, const std::string &name, uint32_t deviceID, bool relaxMath = false)
> +            : GenContext(unit, name, deviceID, relaxMath) {
> +    };
> +    /*! device's max srcatch buffer size */
> +    #define GEN8_SCRATCH_SIZE  (2 * KB * KB)
> +    /*! Emit the per-lane stack pointer computation */
> +    virtual void emitStackPointer(void);
> +    /*! Align the scratch size to the device's scratch unit size */
> +    virtual uint32_t alignScratchSize(uint32_t size);
> +    /*! Get the device's max srcatch size */
> +    virtual uint32_t getScratchSize(void) {
> +      //Because the allocate is use uint16_t, so clamp it, need refine
> +      return std::min(GEN8_SCRATCH_SIZE, 0x7fff);
> +    }
> +
> +  protected:
> +    virtual GenEncoder* generateEncoder(void) {
> +      return GBE_NEW(Gen8Encoder, this->simdWidth, 8, deviceID);
> +    }
> +
> +  private:
> +    virtual void emitSLMOffset(void);
> +    virtual void allocSLMOffsetCurbe(void);
> +    virtual void newSelection(void);
> +  };
> +}
> +#endif /* __GBE_GEN8_CONTEXT_HPP__ */
> +
> diff --git a/backend/src/backend/gen_program.cpp 
> b/backend/src/backend/gen_program.cpp
> index 44f7e33..71a69dd 100644
> --- a/backend/src/backend/gen_program.cpp
> +++ b/backend/src/backend/gen_program.cpp
> @@ -51,6 +51,7 @@
>  #include "backend/gen_program.hpp"
>  #include "backend/gen_context.hpp"
>  #include "backend/gen75_context.hpp"
> +#include "backend/gen8_context.hpp"
>  #include "backend/gen_defs.hpp"
>  #include "backend/gen/gen_mesa_disasm.h"
>  #include "backend/gen_reg_allocation.hpp"
> @@ -164,6 +165,8 @@ namespace gbe {
>        ctx = GBE_NEW(GenContext, unit, name, deviceID, relaxMath);
>      } else if (IS_HASWELL(deviceID)) {
>        ctx = GBE_NEW(Gen75Context, unit, name, deviceID, relaxMath);
> +    } else if (IS_BROADWELL(deviceID)) {
> +      ctx = GBE_NEW(Gen8Context, unit, name, deviceID, relaxMath);
>      }
>      GBE_ASSERTM(ctx != NULL, "Fail to create the gen context\n");
>