[Beignet] [PATCH 1/3] Backend: Add intel_reqd_sub_group_size support
Yang, Rong R
rong.r.yang at intel.com
Wed Jun 14 06:05:55 UTC 2017
The spec only required the intel_reqd_sub_group_size in the return value CL_DEVICE_SUB_GROUP_SIZES_INTEL.
If some applications use the hard code sub_group_size, such as 32, then the program will exit in the GBE, I think it does not make sense.
> -----Original Message-----
> From: Pan, Xiuli
> Sent: Tuesday, June 13, 2017 17:04
> To: Yang, Rong R <rong.r.yang at intel.com>; beignet at lists.freedesktop.org
> Subject: RE: [Beignet] [PATCH 1/3] Backend: Add intel_reqd_sub_group_size
> support
>
> The spec has required the subgroup size to be 8 or 16, and I think we may
> need to fail the build in some other place.
>
> -----Original Message-----
> From: Yang, Rong R
> Sent: Tuesday, June 13, 2017 16:44
> To: Pan, Xiuli <xiuli.pan at intel.com>; beignet at lists.freedesktop.org
> Cc: Pan, Xiuli <xiuli.pan at intel.com>
> Subject: RE: [Beignet] [PATCH 1/3] Backend: Add intel_reqd_sub_group_size
> support
>
>
>
> > -----Original Message-----
> > From: Beignet [mailto:beignet-bounces at lists.freedesktop.org] On Behalf
> > Of Xiuli Pan
> > Sent: Monday, June 5, 2017 16:28
> > To: beignet at lists.freedesktop.org
> > Cc: Pan, Xiuli <xiuli.pan at intel.com>
> > Subject: [Beignet] [PATCH 1/3] Backend: Add intel_reqd_sub_group_size
> > support
> >
> > From: Pan Xiuli <xiuli.pan at intel.com>
> >
> > If we get intel_reqd_sub_group_size attribute from frontend then set
> > it to backend.
> >
> > Signed-off-by: Pan Xiuli <xiuli.pan at intel.com>
> > ---
> > backend/src/backend/context.cpp | 6 +-----
> > backend/src/backend/gen_program.cpp | 28 ++++++++++++++++++++--
> --
> > ----
> > backend/src/llvm/llvm_gen_backend.cpp | 20 ++++++++++++++++++++
> > 3 files changed, 41 insertions(+), 13 deletions(-)
> >
> > diff --git a/backend/src/backend/context.cpp
> > b/backend/src/backend/context.cpp index e9ddd17..c9500c8 100644
> > --- a/backend/src/backend/context.cpp
> > +++ b/backend/src/backend/context.cpp
> > @@ -340,7 +340,6 @@ namespace gbe
> > ///////////////////////////////////////////////////////////////////////////
> > // Generic Context (shared by the simulator and the HW context)
> >
> > //////////////////////////////////////////////////////////////////////
> > /////
> > - IVAR(OCL_SIMD_WIDTH, 8, 15, 16);
> >
> > Context::Context(const ir::Unit &unit, const std::string &name) :
> > unit(unit), fn(*unit.getFunction(name)), name(name),
> > liveness(NULL), dag(NULL), useDWLabel(false) @@ -361,10 +360,7 @@
> namespace gbe
> > }
> >
> > void Context::startNewCG(uint32_t simdWidth) {
> > - if (simdWidth == 0 || OCL_SIMD_WIDTH != 15)
> > - this->simdWidth = nextHighestPowerOf2(OCL_SIMD_WIDTH);
> > - else
> > - this->simdWidth = simdWidth;
> > + this->simdWidth = simdWidth;
> > GBE_SAFE_DELETE(this->registerAllocator);
> > GBE_SAFE_DELETE(this->scratchAllocator);
> > GBE_ASSERT(dag != NULL && liveness != NULL); diff --git
> > a/backend/src/backend/gen_program.cpp
> > b/backend/src/backend/gen_program.cpp
> > index c1827b1..26b646a 100644
> > --- a/backend/src/backend/gen_program.cpp
> > +++ b/backend/src/backend/gen_program.cpp
> > @@ -59,6 +59,7 @@
> > #include <clang/CodeGen/CodeGenAction.h> #endif
> >
> > +#include "sys/cvar.hpp"
> > #include <cstring>
> > #include <sstream>
> > #include <memory>
> > @@ -138,17 +139,24 @@ namespace gbe {
> > }
> >
> > /*! We must avoid spilling at all cost with Gen */
> > - static const struct CodeGenStrategy {
> > + struct CodeGenStrategy {
> > uint32_t simdWidth;
> > uint32_t reservedSpillRegs;
> > bool limitRegisterPressure;
> > - } codeGenStrategy[] = {
> > + };
> > + static const struct CodeGenStrategy codeGenStrategyDefault[] = {
> > {16, 0, false},
> > {8, 0, false},
> > {8, 8, false},
> > {8, 16, false},
> > };
> > + static const struct CodeGenStrategy codeGenStrategySimd16[] = {
> > + {16, 0, false},
> > + {16, 8, false},
> > + {16, 16, false},
> > + };
> >
> > + IVAR(OCL_SIMD_WIDTH, 8, 15, 16);
> > Kernel *GenProgram::compileKernel(const ir::Unit &unit, const
> > std::string &name,
> > bool relaxMath, int profiling) {
> > #ifdef GBE_COMPILER_AVAILABLE @@ -156,19 +164,23 @@ namespace
> gbe {
> > // when the function already provides the simd width we need to use
> (i.e.
> > // non zero)
> > const ir::Function *fn = unit.getFunction(name);
> > + const struct CodeGenStrategy* codeGenStrategy =
> > + codeGenStrategyDefault;
> > if(fn == NULL)
> > GBE_ASSERT(0);
> > - uint32_t codeGenNum = sizeof(codeGenStrategy) /
> > sizeof(codeGenStrategy[0]);
> > + uint32_t codeGenNum = sizeof(codeGenStrategyDefault) /
> > + sizeof(codeGenStrategyDefault[0]);
> > uint32_t codeGen = 0;
> > GenContext *ctx = NULL;
> > - if (fn->getSimdWidth() == 8) {
> > + if ( fn->getSimdWidth() != 0 && OCL_SIMD_WIDTH != 15) {
> > + GBE_ASSERTM(0, "unsupported SIMD width!");
> > + }else if (fn->getSimdWidth() == 8 || OCL_SIMD_WIDTH == 8) {
> > codeGen = 1;
> > - } else if (fn->getSimdWidth() == 16) {
> > - codeGenNum = 1;
> > - } else if (fn->getSimdWidth() == 0) {
> > + } else if (fn->getSimdWidth() == 16 || OCL_SIMD_WIDTH == 16){
> > + codeGenStrategy = codeGenStrategySimd16;
> > + codeGenNum = 3;
> Don't hard the codeGenStrategySimd16's array size here.
>
> > + } else if (fn->getSimdWidth() == 0 && OCL_SIMD_WIDTH == 15) {
> > codeGen = 0;
> > } else
> > - GBE_ASSERT(0);
> > + GBE_ASSERTM(0, "unsupported SIMD width!");
> > Kernel *kernel = NULL;
> >
> > // Stop when compilation is successful diff --git
> > a/backend/src/llvm/llvm_gen_backend.cpp
> > b/backend/src/llvm/llvm_gen_backend.cpp
> > index c8e29c5..b2d9b1b 100644
> > --- a/backend/src/llvm/llvm_gen_backend.cpp
> > +++ b/backend/src/llvm/llvm_gen_backend.cpp
> > @@ -2125,6 +2125,7 @@ namespace gbe
> > // Loop over the kernel metadatas to set the required work group size.
> > size_t reqd_wg_sz[3] = {0, 0, 0};
> > size_t hint_wg_sz[3] = {0, 0, 0};
> > + size_t reqd_sg_sz = 0;
> > ir::FunctionArgument::InfoFromLLVM llvmInfo;
> > MDNode *addrSpaceNode = NULL;
> > MDNode *typeNameNode = NULL;
> > @@ -2220,6 +2221,23 @@ namespace gbe
> > functionAttributes += buffer;
> > functionAttributes += " ";
> > }
> > + if ((attrNode = F.getMetadata("intel_reqd_sub_group_size"))) {
> > + GBE_ASSERT(attrNode->getNumOperands() == 1);
> > + ConstantInt *sz = mdconst::extract<ConstantInt>(attrNode-
> > >getOperand(0));
> > + GBE_ASSERT(sz);
> > + reqd_sg_sz = sz->getZExtValue();
> > + GBE_ASSERT(reqd_sg_sz == 8 || reqd_sg_sz == 16);
> I think fail the program building is better then assert.
>
>
> > + functionAttributes += "intel_reqd_sub_group_size";
> > + std::stringstream param;
> > + char buffer[100] = {0};
> > + param << "(";
> > + param << reqd_sg_sz;
> > + param << ")";
> > + param >> buffer;
> > + functionAttributes += buffer;
> > + functionAttributes += " ";
> > + }
> > +
> > #else
> > /* First find the meta data belong to this function. */
> > MDNode *node = getKernelFunctionMetadata(&F); @@ -2345,6 +2363,8
> > @@ namespace gbe #endif /* LLVM 3.9 Function metadata */
> >
> > ctx.getFunction().setCompileWorkGroupSize(reqd_wg_sz[0],
> > reqd_wg_sz[1], reqd_wg_sz[2]);
> > + if (reqd_sg_sz)
> > + ctx.setSimdWidth(reqd_sg_sz);
> >
> > ctx.getFunction().setFunctionAttributes(functionAttributes);
> > // Loop over the arguments and output registers for them
> > --
> > 2.7.4
> >
> > _______________________________________________
> > Beignet mailing list
> > Beignet at lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/beignet
More information about the Beignet
mailing list