[Beignet] [PATCH 1/3] Backend: Add intel_reqd_sub_group_size support

Wed Jun 14 06:05:55 UTC 2017

The spec only required the intel_reqd_sub_group_size in the return value CL_DEVICE_SUB_GROUP_SIZES_INTEL.
If some applications use the hard code sub_group_size, such as 32, then the program will exit in the GBE, I think it does not make sense.

> -----Original Message-----
> From: Pan, Xiuli
> Sent: Tuesday, June 13, 2017 17:04
> To: Yang, Rong R <rong.r.yang at intel.com>; beignet at lists.freedesktop.org
> Subject: RE: [Beignet] [PATCH 1/3] Backend: Add intel_reqd_sub_group_size
> support
> 
> The spec has required the subgroup size to be 8 or 16, and I think we may
> need to fail the build in some other place.
> 
> -----Original Message-----
> From: Yang, Rong R
> Sent: Tuesday, June 13, 2017 16:44
> To: Pan, Xiuli <xiuli.pan at intel.com>; beignet at lists.freedesktop.org
> Cc: Pan, Xiuli <xiuli.pan at intel.com>
> Subject: RE: [Beignet] [PATCH 1/3] Backend: Add intel_reqd_sub_group_size
> support
> 
> 
> 
> > -----Original Message-----
> > From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf
> > Of Xiuli Pan
> > Sent: Monday, June 5, 2017 16:28
> > To: beignet at lists.freedesktop.org
> > Cc: Pan, Xiuli <xiuli.pan at intel.com>
> > Subject: [Beignet] [PATCH 1/3] Backend: Add intel_reqd_sub_group_size
> > support
> >
> > From: Pan Xiuli <xiuli.pan at intel.com>
> >
> > If we get intel_reqd_sub_group_size attribute from frontend then set
> > it to backend.
> >
> > Signed-off-by: Pan Xiuli <xiuli.pan at intel.com>
> > ---
> >  backend/src/backend/context.cpp       |  6 +-----
> >  backend/src/backend/gen_program.cpp   | 28 ++++++++++++++++++++--
> --
> > ----
> >  backend/src/llvm/llvm_gen_backend.cpp | 20 ++++++++++++++++++++
> >  3 files changed, 41 insertions(+), 13 deletions(-)
> >
> > diff --git a/backend/src/backend/context.cpp
> > b/backend/src/backend/context.cpp index e9ddd17..c9500c8 100644
> > --- a/backend/src/backend/context.cpp
> > +++ b/backend/src/backend/context.cpp
> > @@ -340,7 +340,6 @@ namespace gbe
> >    ///////////////////////////////////////////////////////////////////////////
> >    // Generic Context (shared by the simulator and the HW context)
> >
> > //////////////////////////////////////////////////////////////////////
> > /////
> > -  IVAR(OCL_SIMD_WIDTH, 8, 15, 16);
> >
> >    Context::Context(const ir::Unit &unit, const std::string &name) :
> >      unit(unit), fn(*unit.getFunction(name)), name(name),
> > liveness(NULL), dag(NULL), useDWLabel(false) @@ -361,10 +360,7 @@
> namespace gbe
> >    }
> >
> >    void Context::startNewCG(uint32_t simdWidth) {
> > -    if (simdWidth == 0 || OCL_SIMD_WIDTH != 15)
> > -      this->simdWidth = nextHighestPowerOf2(OCL_SIMD_WIDTH);
> > -    else
> > -      this->simdWidth = simdWidth;
> > +    this->simdWidth = simdWidth;
> >      GBE_SAFE_DELETE(this->registerAllocator);
> >      GBE_SAFE_DELETE(this->scratchAllocator);
> >      GBE_ASSERT(dag != NULL && liveness != NULL); diff --git
> > a/backend/src/backend/gen_program.cpp
> > b/backend/src/backend/gen_program.cpp
> > index c1827b1..26b646a 100644
> > --- a/backend/src/backend/gen_program.cpp
> > +++ b/backend/src/backend/gen_program.cpp
> > @@ -59,6 +59,7 @@
> >  #include <clang/CodeGen/CodeGenAction.h>  #endif
> >
> > +#include "sys/cvar.hpp"
> >  #include <cstring>
> >  #include <sstream>
> >  #include <memory>
> > @@ -138,17 +139,24 @@ namespace gbe {
> >    }
> >
> >    /*! We must avoid spilling at all cost with Gen */
> > -  static const struct CodeGenStrategy {
> > +  struct CodeGenStrategy {
> >      uint32_t simdWidth;
> >      uint32_t reservedSpillRegs;
> >      bool limitRegisterPressure;
> > -  } codeGenStrategy[] = {
> > +  };
> > +  static const struct CodeGenStrategy codeGenStrategyDefault[] = {
> >      {16, 0, false},
> >      {8, 0, false},
> >      {8, 8, false},
> >      {8, 16, false},
> >    };
> > +  static const struct CodeGenStrategy codeGenStrategySimd16[] = {
> > +    {16, 0, false},
> > +    {16, 8, false},
> > +    {16, 16, false},
> > +  };
> >
> > +  IVAR(OCL_SIMD_WIDTH, 8, 15, 16);
> >    Kernel *GenProgram::compileKernel(const ir::Unit &unit, const
> > std::string &name,
> >                                      bool relaxMath, int profiling) {
> > #ifdef GBE_COMPILER_AVAILABLE @@ -156,19 +164,23 @@ namespace
> gbe {
> >      // when the function already provides the simd width we need to use
> (i.e.
> >      // non zero)
> >      const ir::Function *fn = unit.getFunction(name);
> > +    const struct CodeGenStrategy* codeGenStrategy =
> > + codeGenStrategyDefault;
> >      if(fn == NULL)
> >        GBE_ASSERT(0);
> > -    uint32_t codeGenNum = sizeof(codeGenStrategy) /
> > sizeof(codeGenStrategy[0]);
> > +    uint32_t codeGenNum = sizeof(codeGenStrategyDefault) /
> > + sizeof(codeGenStrategyDefault[0]);
> >      uint32_t codeGen = 0;
> >      GenContext *ctx = NULL;
> > -    if (fn->getSimdWidth() == 8) {
> > +    if ( fn->getSimdWidth() != 0 && OCL_SIMD_WIDTH != 15) {
> > +      GBE_ASSERTM(0, "unsupported SIMD width!");
> > +    }else if (fn->getSimdWidth() == 8 || OCL_SIMD_WIDTH == 8) {
> >        codeGen = 1;
> > -    } else if (fn->getSimdWidth() == 16) {
> > -      codeGenNum = 1;
> > -    } else if (fn->getSimdWidth() == 0) {
> > +    } else if (fn->getSimdWidth() == 16 || OCL_SIMD_WIDTH == 16){
> > +      codeGenStrategy = codeGenStrategySimd16;
> > +      codeGenNum = 3;
> Don't hard the codeGenStrategySimd16's array size here.
> 
> > +    } else if (fn->getSimdWidth() == 0 && OCL_SIMD_WIDTH == 15) {
> >        codeGen = 0;
> >      } else
> > -      GBE_ASSERT(0);
> > +      GBE_ASSERTM(0, "unsupported SIMD width!");
> >      Kernel *kernel = NULL;
> >
> >      // Stop when compilation is successful diff --git
> > a/backend/src/llvm/llvm_gen_backend.cpp
> > b/backend/src/llvm/llvm_gen_backend.cpp
> > index c8e29c5..b2d9b1b 100644
> > --- a/backend/src/llvm/llvm_gen_backend.cpp
> > +++ b/backend/src/llvm/llvm_gen_backend.cpp
> > @@ -2125,6 +2125,7 @@ namespace gbe
> >      // Loop over the kernel metadatas to set the required work group size.
> >      size_t reqd_wg_sz[3] = {0, 0, 0};
> >      size_t hint_wg_sz[3] = {0, 0, 0};
> > +    size_t reqd_sg_sz = 0;
> >      ir::FunctionArgument::InfoFromLLVM llvmInfo;
> >      MDNode *addrSpaceNode = NULL;
> >      MDNode *typeNameNode = NULL;
> > @@ -2220,6 +2221,23 @@ namespace gbe
> >        functionAttributes += buffer;
> >        functionAttributes += " ";
> >      }
> > +    if ((attrNode = F.getMetadata("intel_reqd_sub_group_size"))) {
> > +      GBE_ASSERT(attrNode->getNumOperands() == 1);
> > +      ConstantInt *sz = mdconst::extract<ConstantInt>(attrNode-
> > >getOperand(0));
> > +      GBE_ASSERT(sz);
> > +      reqd_sg_sz = sz->getZExtValue();
> > +      GBE_ASSERT(reqd_sg_sz == 8 || reqd_sg_sz == 16);
> I think fail the program building is better then assert.
> 
> 
> > +      functionAttributes += "intel_reqd_sub_group_size";
> > +      std::stringstream param;
> > +      char buffer[100] = {0};
> > +      param << "(";
> > +      param << reqd_sg_sz;
> > +      param << ")";
> > +      param >> buffer;
> > +      functionAttributes += buffer;
> > +      functionAttributes += " ";
> > +    }
> > +
> >  #else
> >      /* First find the meta data belong to this function. */
> >      MDNode *node = getKernelFunctionMetadata(&F); @@ -2345,6 +2363,8
> > @@ namespace gbe  #endif /* LLVM 3.9 Function metadata */
> >
> >      ctx.getFunction().setCompileWorkGroupSize(reqd_wg_sz[0],
> > reqd_wg_sz[1], reqd_wg_sz[2]);
> > +    if (reqd_sg_sz)
> > +      ctx.setSimdWidth(reqd_sg_sz);
> >
> >      ctx.getFunction().setFunctionAttributes(functionAttributes);
> >      // Loop over the arguments and output registers for them
> > --
> > 2.7.4
> >
> > _______________________________________________
> > Beignet mailing list
> > Beignet at lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/beignet