[Mesa-dev] [PATCH] nv50/ir: always return 0 when trying to read thread id along unit dim

Samuel Pitoiset samuel.pitoiset at gmail.com
Thu Jan 26 10:55:03 UTC 2017


That's a cool opt because RDSV is a costly operation (on maxwell it 
requires a bar dep). Thanks!

Reviewed-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>

On 01/26/2017 04:20 AM, Ilia Mirkin wrote:
> Many many many compute shaders only define a 1- or 2-dimensional block,
> but then continue to use system values that take the full 3d into
> account (like gl_LocalInvocationIndex, etc). So for the special case
> that a dimension is exactly 1, we know that the thread id along that
> axis will always be 0, so return it as such and allow constant folding
> to fix things up.
>
> Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
> ---
>  src/gallium/drivers/nouveau/codegen/nv50_ir.cpp           |  6 +++++-
>  src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h      |  2 +-
>  src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 10 ++++++++--
>  src/gallium/drivers/nouveau/codegen/nv50_ir_target.h      |  4 +++-
>  4 files changed, 17 insertions(+), 5 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
> index 186c9fd..b67a1dd 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
> @@ -1179,7 +1179,11 @@ nv50_ir_init_prog_info(struct nv50_ir_prog_info *info)
>        info->prop.gp.instanceCount = 1;
>        info->prop.gp.maxVertices = 1;
>     }
> -   info->prop.cp.numThreads = 1;
> +   if (info->type == PIPE_SHADER_COMPUTE) {
> +      info->prop.cp.numThreads[0] =
> +      info->prop.cp.numThreads[1] =
> +      info->prop.cp.numThreads[2] = 1;
> +   }
>     info->io.pointSize = 0xff;
>     info->io.instanceId = 0xff;
>     info->io.vertexId = 0xff;
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> index 65d0904..e7d840d 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> @@ -152,7 +152,7 @@ struct nv50_ir_prog_info
>           uint32_t inputOffset; /* base address for user args */
>           uint32_t sharedOffset; /* reserved space in s[] */
>           uint32_t gridInfoBase;  /* base address for NTID,NCTAID */
> -         uint32_t numThreads; /* max number of threads */
> +         uint16_t numThreads[3]; /* max number of threads */
>        } cp;
>     } prop;
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> index 6320e52..51f8b29 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> @@ -1047,7 +1047,6 @@ bool Source::scanSource()
>     }
>
>     info->io.viewportId = -1;
> -   info->prop.cp.numThreads = 1;
>
>     info->immd.data = (uint32_t *)MALLOC(scan.immediate_count * 16);
>     info->immd.type = (ubyte *)MALLOC(scan.immediate_count * sizeof(ubyte));
> @@ -1150,9 +1149,13 @@ void Source::scanProperty(const struct tgsi_full_property *prop)
>           info->prop.tp.outputPrim = PIPE_PRIM_TRIANGLES; /* anything but points */
>        break;
>     case TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH:
> +      info->prop.cp.numThreads[0] = prop->u[0].Data;
> +      break;
>     case TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT:
> +      info->prop.cp.numThreads[1] = prop->u[0].Data;
> +      break;
>     case TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH:
> -      info->prop.cp.numThreads *= prop->u[0].Data;
> +      info->prop.cp.numThreads[2] = prop->u[0].Data;
>        break;
>     case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
>        info->io.clipDistances = prop->u[0].Data;
> @@ -1941,6 +1944,9 @@ Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
>        return ld->getDef(0);
>     case TGSI_FILE_SYSTEM_VALUE:
>        assert(!ptr);
> +      if (info->sv[idx].sn == TGSI_SEMANTIC_THREAD_ID &&
> +          info->prop.cp.numThreads[swz] == 1)
> +         return zero;
>        ld = mkOp1(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c));
>        ld->perPatch = info->sv[idx].patch;
>        return ld->getDef(0);
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
> index eaf50cc..e9d1057 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
> @@ -174,7 +174,9 @@ public:
>     virtual void getBuiltinCode(const uint32_t **code, uint32_t *size) const = 0;
>
>     virtual void parseDriverInfo(const struct nv50_ir_prog_info *info) {
> -      threads = info->prop.cp.numThreads;
> +      threads = info->prop.cp.numThreads[0] *
> +         info->prop.cp.numThreads[1] *
> +         info->prop.cp.numThreads[2];
>        if (threads == 0)
>           threads = info->target >= NVISA_GK104_CHIPSET ? 1024 : 512;
>     }
>


More information about the mesa-dev mailing list