[Mesa-dev] [PATCH] nv50/ir: always return 0 when trying to read thread id along unit dim
Samuel Pitoiset
samuel.pitoiset at gmail.com
Thu Jan 26 10:55:03 UTC 2017
That's a cool opt because RDSV is a costly operation (on maxwell it
requires a bar dep). Thanks!
Reviewed-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
On 01/26/2017 04:20 AM, Ilia Mirkin wrote:
> Many many many compute shaders only define a 1- or 2-dimensional block,
> but then continue to use system values that take the full 3d into
> account (like gl_LocalInvocationIndex, etc). So for the special case
> that a dimension is exactly 1, we know that the thread id along that
> axis will always be 0, so return it as such and allow constant folding
> to fix things up.
>
> Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
> ---
> src/gallium/drivers/nouveau/codegen/nv50_ir.cpp | 6 +++++-
> src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h | 2 +-
> src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 10 ++++++++--
> src/gallium/drivers/nouveau/codegen/nv50_ir_target.h | 4 +++-
> 4 files changed, 17 insertions(+), 5 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
> index 186c9fd..b67a1dd 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
> @@ -1179,7 +1179,11 @@ nv50_ir_init_prog_info(struct nv50_ir_prog_info *info)
> info->prop.gp.instanceCount = 1;
> info->prop.gp.maxVertices = 1;
> }
> - info->prop.cp.numThreads = 1;
> + if (info->type == PIPE_SHADER_COMPUTE) {
> + info->prop.cp.numThreads[0] =
> + info->prop.cp.numThreads[1] =
> + info->prop.cp.numThreads[2] = 1;
> + }
> info->io.pointSize = 0xff;
> info->io.instanceId = 0xff;
> info->io.vertexId = 0xff;
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> index 65d0904..e7d840d 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
> @@ -152,7 +152,7 @@ struct nv50_ir_prog_info
> uint32_t inputOffset; /* base address for user args */
> uint32_t sharedOffset; /* reserved space in s[] */
> uint32_t gridInfoBase; /* base address for NTID,NCTAID */
> - uint32_t numThreads; /* max number of threads */
> + uint16_t numThreads[3]; /* max number of threads */
> } cp;
> } prop;
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> index 6320e52..51f8b29 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> @@ -1047,7 +1047,6 @@ bool Source::scanSource()
> }
>
> info->io.viewportId = -1;
> - info->prop.cp.numThreads = 1;
>
> info->immd.data = (uint32_t *)MALLOC(scan.immediate_count * 16);
> info->immd.type = (ubyte *)MALLOC(scan.immediate_count * sizeof(ubyte));
> @@ -1150,9 +1149,13 @@ void Source::scanProperty(const struct tgsi_full_property *prop)
> info->prop.tp.outputPrim = PIPE_PRIM_TRIANGLES; /* anything but points */
> break;
> case TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH:
> + info->prop.cp.numThreads[0] = prop->u[0].Data;
> + break;
> case TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT:
> + info->prop.cp.numThreads[1] = prop->u[0].Data;
> + break;
> case TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH:
> - info->prop.cp.numThreads *= prop->u[0].Data;
> + info->prop.cp.numThreads[2] = prop->u[0].Data;
> break;
> case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED:
> info->io.clipDistances = prop->u[0].Data;
> @@ -1941,6 +1944,9 @@ Converter::fetchSrc(tgsi::Instruction::SrcRegister src, int c, Value *ptr)
> return ld->getDef(0);
> case TGSI_FILE_SYSTEM_VALUE:
> assert(!ptr);
> + if (info->sv[idx].sn == TGSI_SEMANTIC_THREAD_ID &&
> + info->prop.cp.numThreads[swz] == 1)
> + return zero;
> ld = mkOp1(OP_RDSV, TYPE_U32, getSSA(), srcToSym(src, c));
> ld->perPatch = info->sv[idx].patch;
> return ld->getDef(0);
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
> index eaf50cc..e9d1057 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
> @@ -174,7 +174,9 @@ public:
> virtual void getBuiltinCode(const uint32_t **code, uint32_t *size) const = 0;
>
> virtual void parseDriverInfo(const struct nv50_ir_prog_info *info) {
> - threads = info->prop.cp.numThreads;
> + threads = info->prop.cp.numThreads[0] *
> + info->prop.cp.numThreads[1] *
> + info->prop.cp.numThreads[2];
> if (threads == 0)
> threads = info->target >= NVISA_GK104_CHIPSET ? 1024 : 512;
> }
>
More information about the mesa-dev
mailing list