[Mesa-dev] [RFC 3/3] nouveau: codegen: Add support for SV_WORK_DIM
Samuel Pitoiset
samuel.pitoiset at gmail.com
Wed Apr 27 15:46:11 UTC 2016
On 04/27/2016 05:41 PM, Samuel Pitoiset wrote:
> Yeah, please do it for Fermi as well.
>
> On 04/27/2016 04:43 PM, Hans de Goede wrote:
>> Add support for SV_WORK_DIM.
>>
>> Note this is only implemented for nve4 for now, hence this patch
>> being RFC.
>>
>> Signed-off-by: Hans de Goede <hdegoede at redhat.com>
>> ---
>> src/gallium/drivers/nouveau/codegen/nv50_ir.h | 1 +
>> src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 1 +
>> src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 2 ++
>> src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp | 1 +
>> src/gallium/drivers/nouveau/nvc0/nvc0_context.h | 2 +-
>> src/gallium/drivers/nouveau/nvc0/nve4_compute.c | 7
>> ++++---
>> 6 files changed, 10 insertions(+), 4 deletions(-)
>>
>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
>> b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
>> index 94e54bb..41804b6 100644
>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
>> @@ -461,6 +461,7 @@ enum SVSemantic
>> SV_BASEVERTEX,
>> SV_BASEINSTANCE,
>> SV_DRAWID,
>> + SV_WORK_DIM,
>> SV_UNDEFINED,
>> SV_LAST
>> };
>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
>> index 3708f37..f75f480 100644
>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
>> @@ -408,6 +408,7 @@ static nv50_ir::SVSemantic translateSysVal(uint
>> sysval)
>> case TGSI_SEMANTIC_BASEVERTEX: return nv50_ir::SV_BASEVERTEX;
>> case TGSI_SEMANTIC_BASEINSTANCE: return nv50_ir::SV_BASEINSTANCE;
>> case TGSI_SEMANTIC_DRAWID: return nv50_ir::SV_DRAWID;
>> + case TGSI_SEMANTIC_WORK_DIM: return nv50_ir::SV_WORK_DIM;
>> default:
>> assert(0);
>> return nv50_ir::SV_CLOCK;
>> diff --git
>> a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
>> index 3bce962..1785623 100644
>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
>> @@ -2178,6 +2178,8 @@ NVC0LoweringPass::handleRDSV(Instruction *i)
>> i->setSrc(0, bld.mkImm(sv == SV_GRIDID ? 0 : 1));
>> return true;
>> }
>> + // Fallthrough
>> + case SV_WORK_DIM:
>> addr += prog->driver->prop.cp.gridInfoBase;
>> bld.mkLoad(TYPE_U32, i->getDef(0),
>> bld.mkSymbol(FILE_MEMORY_CONST,
>> prog->driver->io.auxCBSlot,
>> diff --git
>> a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
>> index 9e1e7bf..80cb9fd 100644
>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
>> @@ -293,6 +293,7 @@ TargetNVC0::getSVAddress(DataFile shaderFile,
>> const Symbol *sym) const
>> case SV_NTID: return kepler ? (0x00 + idx * 4) : ~0;
>> case SV_NCTAID: return kepler ? (0x0c + idx * 4) : ~0;
>> case SV_GRIDID: return kepler ? 0x18 : ~0;
>> + case SV_WORK_DIM: return 0x1c;
>> case SV_SAMPLE_INDEX: return 0;
>> case SV_SAMPLE_POS: return 0;
>> case SV_SAMPLE_MASK: return 0;
>> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
>> b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
>> index 7fcbf4a..b6c52d5 100644
>> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
>> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
>> @@ -113,7 +113,7 @@
>> /* 8 sets of 32-bits coordinate offsets */
>> #define NVC0_CB_AUX_MS_INFO 0x0a0 /* CP */
>> #define NVC0_CB_AUX_MS_SIZE (8 * 2 * 4)
>> -/* block/grid size, at 3 32-bits integers each and gridid */
>> +/* block/grid size, at 3 32-bits integers each, gridid and work_dim */
>> #define NVC0_CB_AUX_GRID_INFO 0x0e0 /* CP */
>> #define NVC0_CB_AUX_GRID_SIZE (7 * 4)
>
> You are lucky because this fits perfectly. :-)
>
>> /* 8 user clip planes, at 4 32-bits floats each */
>> diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
>> b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
>> index 1fe6026..1ecf65c 100644
>> --- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
>> +++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
>> @@ -496,7 +496,7 @@ nve4_compute_upload_input(struct nvc0_context *nvc0,
>> PUSH_DATAh(push, address + NVC0_CB_AUX_GRID_INFO);
>> PUSH_DATA (push, address + NVC0_CB_AUX_GRID_INFO);
>> BEGIN_NVC0(push, NVE4_CP(UPLOAD_LINE_LENGTH_IN), 2);
>> - PUSH_DATA (push, 7 * 4);
>> + PUSH_DATA (push, 8 * 4);
Oh and please make use of NVC0_CB_AUX_GRID_SIZE here instead of this
magic value. I forgot to do it when I introduced those constants but we
should avoid such a thing. :-)
>> PUSH_DATA (push, 0x1);
>>
>> if (unlikely(info->indirect)) {
>> @@ -506,18 +506,19 @@ nve4_compute_upload_input(struct nvc0_context
>> *nvc0,
>> nouveau_pushbuf_space(push, 16, 0, 1);
>> PUSH_REFN(push, res->bo, NOUVEAU_BO_RD | res->domain);
>>
>> - BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 7);
>> + BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 8);
>> PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
>> PUSH_DATAp(push, info->block, 3);
>> nouveau_pushbuf_data(push, res->bo, offset,
>> NVC0_IB_ENTRY_1_NO_PREFETCH | 3 * 4);
>> } else {
>> - BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 7);
>> + BEGIN_1IC0(push, NVE4_CP(UPLOAD_EXEC), 1 + 8);
>> PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
>> PUSH_DATAp(push, info->block, 3);
>> PUSH_DATAp(push, info->grid, 3);
>> }
>> PUSH_DATA (push, 0);
>> + PUSH_DATA (push, info->work_dim);
>>
>> BEGIN_NVC0(push, NVE4_CP(FLUSH), 1);
>> PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
>>
>
--
-Samuel
More information about the mesa-dev
mailing list