[Nouveau] [PATCH mesa v2 1/2] nouveau: codegen: Use FILE_MEMORY_BUFFER for buffers
Samuel Pitoiset
samuel.pitoiset at gmail.com
Wed Mar 23 22:10:29 UTC 2016
Are you sure this won't break compute shaders on fermi?
Could you please double-check that?
One minor comment below.
On 03/17/2016 05:07 PM, Hans de Goede wrote:
> Some of the lowering steps we currently do for FILE_MEMORY_GLOBAL only
> apply to buffers, making it impossible to use FILE_MEMORY_GLOBAL for
> OpenCL global buffers.
>
> This commits changes the buffer code to use FILE_MEMORY_BUFFER at the
> ir_from_tgsi and lowering steps, freeing use of FILE_MEMORY_GLOBAL
> for use with OpenCL global buffers.
>
> Note that after lowering buffer accesses use the FILE_MEMORY_GLOBAL
> register file.
>
> Tested with piglet on a gk107, before this patch:
> ./piglit run -o shader -t '.*arb_shader_storage_buffer_object.*' results/shader
> [9/9] pass: 9 /
> after:
> ./piglit run -o shader -t '.*arb_shader_storage_buffer_object.*' results/shader
> [9/9] pass: 9 /
>
> Signed-off-by: Hans de Goede <hdegoede at redhat.com>
> ---
> Changes in v2:
> -New patch in v2 of patch-set to re-enable support for global opencl buffers
> ---
> src/gallium/drivers/nouveau/codegen/nv50_ir.h | 1 +
> src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp | 2 +-
> src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp | 8 +++++---
> src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp | 1 +
> src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp | 5 ++++-
> src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp | 1 +
> 6 files changed, 13 insertions(+), 5 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
> index 7b0eb2f..5141fc6 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
> @@ -332,6 +332,7 @@ enum DataFile
> FILE_MEMORY_CONST,
> FILE_SHADER_INPUT,
> FILE_SHADER_OUTPUT,
> + FILE_MEMORY_BUFFER,
> FILE_MEMORY_GLOBAL,
> FILE_MEMORY_SHARED,
> FILE_MEMORY_LOCAL,
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> index baa2e30..7ae0cb2 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
> @@ -373,7 +373,7 @@ static nv50_ir::DataFile translateFile(uint file)
> case TGSI_FILE_PREDICATE: return nv50_ir::FILE_PREDICATE;
> case TGSI_FILE_IMMEDIATE: return nv50_ir::FILE_IMMEDIATE;
> case TGSI_FILE_SYSTEM_VALUE: return nv50_ir::FILE_SYSTEM_VALUE;
> - case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_GLOBAL;
> + case TGSI_FILE_BUFFER: return nv50_ir::FILE_MEMORY_BUFFER;
> case TGSI_FILE_MEMORY: return nv50_ir::FILE_MEMORY_GLOBAL;
> case TGSI_FILE_SAMPLER:
> case TGSI_FILE_NULL:
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> index d0936d8..628deb7 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
> @@ -1141,13 +1141,14 @@ NVC0LoweringPass::handleATOM(Instruction *atom)
> handleSharedATOM(atom);
> return true;
> default:
> - assert(atom->src(0).getFile() == FILE_MEMORY_GLOBAL);
> + assert(atom->src(0).getFile() == FILE_MEMORY_BUFFER);
> base = loadResInfo64(ind, atom->getSrc(0)->reg.fileIndex * 16);
> assert(base->reg.size == 8);
> if (ptr)
> base = bld.mkOp2v(OP_ADD, TYPE_U64, base, base, ptr);
> assert(base->reg.size == 8);
> atom->setIndirect(0, 0, base);
> + atom->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL;
> return true;
> }
> base =
> @@ -1963,7 +1964,7 @@ NVC0LoweringPass::visit(Instruction *i)
> } else if (i->src(0).getFile() == FILE_SHADER_OUTPUT) {
> assert(prog->getType() == Program::TYPE_TESSELLATION_CONTROL);
> i->op = OP_VFETCH;
> - } else if (i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
> + } else if (i->src(0).getFile() == FILE_MEMORY_BUFFER) {
> Value *ind = i->getIndirect(0, 1);
> Value *ptr = loadResInfo64(ind, i->getSrc(0)->reg.fileIndex * 16);
> // XXX come up with a way not to do this for EVERY little access but
> @@ -1978,6 +1979,7 @@ NVC0LoweringPass::visit(Instruction *i)
> }
> i->setIndirect(0, 1, NULL);
> i->setIndirect(0, 0, ptr);
> + i->getSrc(0)->reg.file = FILE_MEMORY_GLOBAL;
> bld.mkCmp(OP_SET, CC_GT, TYPE_U32, pred, TYPE_U32, offset, length);
> i->setPredicate(CC_NOT_P, pred);
> if (i->defExists(0)) {
> @@ -1987,7 +1989,7 @@ NVC0LoweringPass::visit(Instruction *i)
> break;
> case OP_ATOM:
> {
> - const bool cctl = i->src(0).getFile() == FILE_MEMORY_GLOBAL;
> + const bool cctl = i->src(0).getFile() == FILE_MEMORY_BUFFER;
> handleATOM(i);
> handleCasExch(i, cctl);
> }
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
> index cfa85ec..870b36e 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
> @@ -455,6 +455,7 @@ int Symbol::print(char *buf, size_t size,
> case FILE_MEMORY_CONST: c = 'c'; break;
> case FILE_SHADER_INPUT: c = 'a'; break;
> case FILE_SHADER_OUTPUT: c = 'o'; break;
> + case FILE_MEMORY_BUFFER: c = 'b'; break; // Only used before lowering
Could you please show me the output of NV50_PROG_DEBUG=255 with a test
which uses this file type? I'm not sure if using b[] is better than g[]
actually.
> case FILE_MEMORY_GLOBAL: c = 'g'; break;
> case FILE_MEMORY_SHARED: c = 's'; break;
> case FILE_MEMORY_LOCAL: c = 'l'; break;
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
> index 2c4d7f5..2af1715 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
> @@ -207,6 +207,7 @@ TargetNV50::getFileSize(DataFile file) const
> case FILE_MEMORY_CONST: return 65536;
> case FILE_SHADER_INPUT: return 0x200;
> case FILE_SHADER_OUTPUT: return 0x200;
> + case FILE_MEMORY_BUFFER: return 0xffffffff;
> case FILE_MEMORY_GLOBAL: return 0xffffffff;
> case FILE_MEMORY_SHARED: return 16 << 10;
> case FILE_MEMORY_LOCAL: return 48 << 10;
> @@ -406,7 +407,8 @@ TargetNV50::isAccessSupported(DataFile file, DataType ty) const
> if (ty == TYPE_B96 || ty == TYPE_NONE)
> return false;
> if (typeSizeof(ty) > 4)
> - return (file == FILE_MEMORY_LOCAL) || (file == FILE_MEMORY_GLOBAL);
> + return (file == FILE_MEMORY_LOCAL) || (file == FILE_MEMORY_GLOBAL) ||
> + (file == FILE_MEMORY_BUFFER);
> return true;
> }
>
> @@ -509,6 +511,7 @@ int TargetNV50::getLatency(const Instruction *i) const
> switch (i->src(0).getFile()) {
> case FILE_MEMORY_LOCAL:
> case FILE_MEMORY_GLOBAL:
> + case FILE_MEMORY_BUFFER:
> return 100; // really 400 to 800
> default:
> return 22;
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
> index a03afa8..9e1e7bf 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
> @@ -248,6 +248,7 @@ TargetNVC0::getFileSize(DataFile file) const
> case FILE_MEMORY_CONST: return 65536;
> case FILE_SHADER_INPUT: return 0x400;
> case FILE_SHADER_OUTPUT: return 0x400;
> + case FILE_MEMORY_BUFFER: return 0xffffffff;
> case FILE_MEMORY_GLOBAL: return 0xffffffff;
> case FILE_MEMORY_SHARED: return 16 << 10;
> case FILE_MEMORY_LOCAL: return 48 << 10;
>
More information about the Nouveau
mailing list