[Nouveau] [PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers
Hans de Goede
hdegoede at redhat.com
Tue Apr 12 10:04:41 UTC 2016
Hi,
On 08-04-16 18:14, Samuel Pitoiset wrote:
>
>
> On 04/08/2016 12:17 PM, Hans de Goede wrote:
>> Hi,
>>
>> On 23-03-16 23:10, Samuel Pitoiset wrote:
>>> Are you sure this won't break compute shaders on fermi?
>>> Could you please double-check that?
>>
>> I just checked:
>>
>> lspci:
>> 01:00.0 VGA compatible controller: NVIDIA Corporation GF119 [GeForce GT
>> 610] (rev a1)
>>
>> Before this patch-set:
>>
>> [hans at plank piglit]$ ./piglit run -o shader -t
>> '.*arb_shader_storage_buffer_object.*' results/shader
>> [9/9] pass: 9 /
>>
>> After this patch-set:
>>
>> [hans at plank piglit]$ ./piglit run -o shader -t
>> '.*arb_shader_storage_buffer_object.*' results/shader
>> [9/9] pass: 9 /
>
> And what about compute shaders? (ie. -t arb_compute_shader)
> Sorry to ask you again but I just want to be sure it's fine. :-)
Those tests don't run yet on mesa master on the GF119 card
I've:
[hans at plank piglit]$ ./piglit run -o shader -t '.*arb_compute_shader.*' results/shader
[20/20] skip: 20 |
My patches don't change anything about this, and they only touch
the buffer handling paths and the buffer tests (still) pass...
I've also tried on a:
01:00.0 VGA compatible controller: NVIDIA Corporation GK107 [GeForce GT 740] (rev a1)
Same result.
Regards,
Hans
>
>>
>>> One minor comment below.
>>>
>>> On 03/17/2016 05:07 PM, Hans de Goede wrote:
>>>> Some of the lowering steps we currently do for FILE_MEMORY_GLOBAL only
>>>> apply to buffers, making it impossible to use FILE_MEMORY_GLOBAL for
>>>> OpenCL global buffers.
>>>>
>>>> This commits changes the buffer code to use FILE_MEMORY_BUFFER at the
>>>> ir_from_tgsi and lowering steps, freeing use of FILE_MEMORY_GLOBAL
>>>> for use with OpenCL global buffers.
>>>>
>>>> Note that after lowering buffer accesses use the FILE_MEMORY_GLOBAL
>>>> register file.
>>>>
>>>> Tested with piglet on a gk107, before this patch:
>>>> ./piglit run -o shader -t '.*arb_shader_storage_buffer_object.*'
>>>> results/shader
>>>> [9/9] pass: 9 /
>>>> after:
>>>> ./piglit run -o shader -t '.*arb_shader_storage_buffer_object.*'
>>>> results/shader
>>>> [9/9] pass: 9 /
>>>>
>>>> Signed-off-by: Hans de Goede <hdegoede at redhat.com>
>>>> ---
>>>> Changes in v2:
>>>> -New patch in v2 of patch-set to re-enable support for global opencl
>>>> buffers
>>>> ---
>>>> src/gallium/drivers/nouveau/codegen/nv50_ir.h | 1 +
>>>> src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 2 +-
>>>> src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 8
>>>> +++++---
>>>> src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp | 1 +
>>>> src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp | 5
>>>> ++++-
>>>> src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp | 1 +
>>>> 6 files changed, 13 insertions(+), 5 deletions(-)
>>>>
>>>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
>>>> b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
>>>> index 7b0eb2f..5141fc6 100644
>>>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
>>>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
>>>> @@ -332,6 +332,7 @@ enum DataFile
>>>> FILE_MEMORY_CONST,
>>>> FILE_SHADER_INPUT,
>>>> FILE_SHADER_OUTPUT,
>>>> + FILE_MEMORY_BUFFER,
>>>> FILE_MEMORY_GLOBAL,
>>>> FILE_MEMORY_SHARED,
>>>> FILE_MEMORY_LOCAL,
>>>> diff --git
>>>> a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
>>>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
>>>> index baa2e30..7ae0cb2 100644
>>>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
>>>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
>>>> @@ -373,7 +373,7 @@ static nv50_ir::DataFile translateFile(uint file)
>>>> case TGSI_FILE_PREDICATE: return nv50_ir::FILE_PREDICATE;
>>>> case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE;
>>>> case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE;
>>>> - case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_GLOBAL;
>>>> + case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_BUFFER;
>>>> case TGSI_FILE_MEMORY: return nv50_ir::FILE_MEMORY_GLOBAL;
>>>> case TGSI_FILE_SAMPLER:
>>>> case TGSI_FILE_NULL:
>>>> diff --git
>>>> a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
>>>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
>>>> index d0936d8..628deb7 100644
>>>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
>>>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
>>>> @@ -1141,13 +1141,14 @@ NVC0LoweringPass::handleATOM(Instruction *atom)
>>>> handleSharedATOM(atom);
>>>> return true;
>>>> default:
>>>> - assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL);
>>>> + assert(atom->src(0).getFile() == FILE_MEMORY_BUFFER);
>>>> base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16);
>>>> assert(base->reg.size == 8);
>>>> if (ptr)
>>>> base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr);
>>>> assert(base->reg.size == 8);
>>>> atom->setIndirect(0, 0, base);
>>>> + atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL;
>>>> return true;
>>>> }
>>>> base =
>>>> @@ -1963,7 +1964,7 @@ NVC0LoweringPass::visit(Instruction *i)
>>>> } else if (i->src(0).getFile() == FILE_SHADER_OUTPUT) {
>>>> assert(prog->getType() ==
>>>> Program::TYPE_TESSELLATION_CONTROL);
>>>> i->op = OP_VFETCH;
>>>> - } else if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
>>>> + } else if (i->src(0).getFile() == FILE_MEMORY_BUFFER) {
>>>> Value *ind = i->getIndirect(0, 1);
>>>> Value *ptr = loadResInfo64(ind,
>>>> i->getSrc(0)->reg.fileIndex * 16);
>>>> // XXX come up with a way not to do this for EVERY little
>>>> access but
>>>> @@ -1978,6 +1979,7 @@ NVC0LoweringPass::visit(Instruction *i)
>>>> }
>>>> i->setIndirect(0, 1, NULL);
>>>> i->setIndirect(0, 0, ptr);
>>>> + i->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL;
>>>> bld.mkCmp(OP_SET, CC_GT, TYPE_U32, pred, TYPE_U32, offset,
>>>> length);
>>>> i->setPredicate(CC_NOT_P, pred);
>>>> if (i->defExists(0)) {
>>>> @@ -1987,7 +1989,7 @@ NVC0LoweringPass::visit(Instruction *i)
>>>> break;
>>>> case OP_ATOM:
>>>> {
>>>> - const bool cctl = i->src(0).getFile() == FILE_MEMORY_GLOBAL;
>>>> + const bool cctl = i->src(0).getFile() == FILE_MEMORY_BUFFER;
>>>> handleATOM(i);
>>>> handleCasExch(i, cctl);
>>>> }
>>>> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
>>>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
>>>> index cfa85ec..870b36e 100644
>>>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
>>>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
>>>> @@ -455,6 +455,7 @@ int Symbol::print(char *buf, size_t size,
>>>> case FILE_MEMORY_CONST: c = 'c'; break;
>>>> case FILE_SHADER_INPUT: c = 'a'; break;
>>>> case FILE_SHADER_OUTPUT: c = 'o'; break;
>>>> + case FILE_MEMORY_BUFFER: c = 'b'; break; // Only used before
>>>> lowering
>>>
>>> Could you please show me the output of NV50_PROG_DEBUG=255 with a test
>>> which uses this file type? I'm not sure if using b[] is better than
>>> g[] actually.
>>
>> NV50_PROG_DEBUG=255 bin/arb_shader_storage_buffer_object-rendering
>>
>> ...
>>
>> MAIN:-1 ()
>> BB:0 (52 instructions) - df = { }
>> -> BB:1 (tree)
>> 0: mov u32 %r1 0x00000000 (0)
>> 1: ld u32 %r0 b[0x0] (0)
>> 2: presin f32 %r3 %r0 (0)
>> 3: cos f32 %r3 %r3 (0)
>> 4: mov u32 %r2 %r3 (0)
>> ...
>>
>> And after the first lowering step:
>>
>> MAIN:-1 ()
>> BB:0 (77 instructions) - df = { }
>> -> BB:1 (tree)
>> 0: mov u32 %r56 0x00000000 (0)
>> 1: ld u64 %r57d c15[0x300] (0)
>> 2: mov u32 %r58 0x00000004 (0)
>> 3: ld u32 %r59 c15[0x308] (0)
>> 4: set u8 %p60 gt u32 %r58 %r59 (0)
>> 5: mov u32 %r61 0x00000000 (0)
>> 6: not %p60 ld u32 %r62 g[%r57d+0x0] (0)
>> ...
>>
>> Note how the 'b' printing is only used before the buffer access
>> is lowered to a global access, so this seems to be the right thing todo.
>>
>
> Fine by me.
> Thanks.
>
>> Regards,
>>
>> Hans
>>
>>
>>>
>>>> case FILE_MEMORY_GLOBAL: c = 'g'; break;
>>>> case FILE_MEMORY_SHARED: c = 's'; break;
>>>> case FILE_MEMORY_LOCAL: c = 'l'; break;
>>>> diff --git
>>>> a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
>>>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
>>>> index 2c4d7f5..2af1715 100644
>>>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
>>>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
>>>> @@ -207,6 +207,7 @@ TargetNV50::getFileSize(DataFile file) const
>>>> case FILE_MEMORY_CONST: return 65536;
>>>> case FILE_SHADER_INPUT: return 0x200;
>>>> case FILE_SHADER_OUTPUT: return 0x200;
>>>> + case FILE_MEMORY_BUFFER: return 0xffffffff;
>>>> case FILE_MEMORY_GLOBAL: return 0xffffffff;
>>>> case FILE_MEMORY_SHARED: return 16 << 10;
>>>> case FILE_MEMORY_LOCAL: return 48 << 10;
>>>> @@ -406,7 +407,8 @@ TargetNV50::isAccessSupported(DataFile file,
>>>> DataType ty) const
>>>> if (ty == TYPE_B96 || ty == TYPE_NONE)
>>>> return false;
>>>> if (typeSizeof(ty) > 4)
>>>> - return (file == FILE_MEMORY_LOCAL) || (file ==
>>>> FILE_MEMORY_GLOBAL);
>>>> + return (file == FILE_MEMORY_LOCAL) || (file ==
>>>> FILE_MEMORY_GLOBAL) ||
>>>> + (file == FILE_MEMORY_BUFFER);
>>>> return true;
>>>> }
>>>>
>>>> @@ -509,6 +511,7 @@ int TargetNV50::getLatency(const Instruction *i)
>>>> const
>>>> switch (i->src(0).getFile()) {
>>>> case FILE_MEMORY_LOCAL:
>>>> case FILE_MEMORY_GLOBAL:
>>>> + case FILE_MEMORY_BUFFER:
>>>> return 100; // really 400 to 800
>>>> default:
>>>> return 22;
>>>> diff --git
>>>> a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
>>>> b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
>>>> index a03afa8..9e1e7bf 100644
>>>> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
>>>> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
>>>> @@ -248,6 +248,7 @@ TargetNVC0::getFileSize(DataFile file) const
>>>> case FILE_MEMORY_CONST: return 65536;
>>>> case FILE_SHADER_INPUT: return 0x400;
>>>> case FILE_SHADER_OUTPUT: return 0x400;
>>>> + case FILE_MEMORY_BUFFER: return 0xffffffff;
>>>> case FILE_MEMORY_GLOBAL: return 0xffffffff;
>>>> case FILE_MEMORY_SHARED: return 16 << 10;
>>>> case FILE_MEMORY_LOCAL: return 48 << 10;
>>>>
More information about the Nouveau
mailing list