[Mesa-dev] [PATCH mesa 5/6] nouveau: codegen: Add support for OpenCL global memory buffers

Hans de Goede hdegoede at redhat.com
Wed Mar 16 09:23:58 UTC 2016


Add support for OpenCL global memory buffers, note this has only
been tested with regular load and stores and likely needs more work
for e.g. atomic ops.

Signed-off-by: Hans de Goede <hdegoede at redhat.com>
---
 src/gallium/drivers/nouveau/codegen/nv50_ir.h      |  1 +
 .../drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp | 31 +++++++++++++++++-----
 .../drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp |  5 +++-
 .../drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp  | 10 ++++---
 .../drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp  | 26 +++++++++++++-----
 .../drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp  | 14 +++++++---
 .../drivers/nouveau/codegen/nv50_ir_peephole.cpp   |  5 +++-
 .../drivers/nouveau/codegen/nv50_ir_print.cpp      |  1 +
 .../nouveau/codegen/nv50_ir_target_nv50.cpp        |  1 +
 .../nouveau/codegen/nv50_ir_target_nvc0.cpp        |  1 +
 10 files changed, 74 insertions(+), 21 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index fdc2195..5141fc6 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -333,6 +333,7 @@ enum DataFile
    FILE_SHADER_INPUT,
    FILE_SHADER_OUTPUT,
    FILE_MEMORY_BUFFER,
+   FILE_MEMORY_GLOBAL,
    FILE_MEMORY_SHARED,
    FILE_MEMORY_LOCAL,
    FILE_SYSTEM_VALUE,
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
index 02a1101..62f1598 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gk110.cpp
@@ -1641,8 +1641,15 @@ CodeEmitterGK110::emitSTORE(const Instruction *i)
    int32_t offset = SDATA(i->src(0)).offset;
 
    switch (i->src(0).getFile()) {
-   case FILE_MEMORY_BUFFER: code[1] = 0xe0000000; code[0] = 0x00000000; break;
-   case FILE_MEMORY_LOCAL:  code[1] = 0x7a800000; code[0] = 0x00000002; break;
+   case FILE_MEMORY_BUFFER:
+   case FILE_MEMORY_GLOBAL:
+      code[0] = 0x00000000;
+      code[1] = 0xe0000000;
+      break;
+   case FILE_MEMORY_LOCAL:
+      code[0] = 0x00000002;
+      code[1] = 0x7a800000;
+      break;
    case FILE_MEMORY_SHARED:
       code[0] = 0x00000002;
       if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED)
@@ -1678,7 +1685,8 @@ CodeEmitterGK110::emitSTORE(const Instruction *i)
 
    srcId(i->src(1), 2);
    srcId(i->src(0).getIndirect(0), 10);
-   if (i->src(0).getFile() == FILE_MEMORY_BUFFER &&
+   if ((i->src(0).getFile() == FILE_MEMORY_BUFFER ||
+        i->src(0).getFile() == FILE_MEMORY_GLOBAL) &&
        i->src(0).isIndirect(0) &&
        i->getIndirect(0, 0)->reg.size == 8)
       code[1] |= 1 << 23;
@@ -1690,8 +1698,15 @@ CodeEmitterGK110::emitLOAD(const Instruction *i)
    int32_t offset = SDATA(i->src(0)).offset;
 
    switch (i->src(0).getFile()) {
-   case FILE_MEMORY_BUFFER: code[1] = 0xc0000000; code[0] = 0x00000000; break;
-   case FILE_MEMORY_LOCAL:  code[1] = 0x7a000000; code[0] = 0x00000002; break;
+   case FILE_MEMORY_BUFFER:
+   case FILE_MEMORY_GLOBAL:
+      code[0] = 0x00000000;
+      code[1] = 0xc0000000;
+      break;
+   case FILE_MEMORY_LOCAL:
+      code[0] = 0x00000002;
+      code[1] = 0x7a000000;
+      break;
    case FILE_MEMORY_SHARED:
       code[0] = 0x00000002;
       if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED)
@@ -1800,7 +1815,8 @@ CodeEmitterGK110::emitMOV(const Instruction *i)
 static inline bool
 uses64bitAddress(const Instruction *ldst)
 {
-   return ldst->src(0).getFile() == FILE_MEMORY_BUFFER &&
+   return (ldst->src(0).getFile() == FILE_MEMORY_BUFFER ||
+           ldst->src(0).getFile() == FILE_MEMORY_GLOBAL) &&
       ldst->src(0).isIndirect(0) &&
       ldst->getIndirect(0, 0)->reg.size == 8;
 }
@@ -1862,7 +1878,8 @@ CodeEmitterGK110::emitCCTL(const Instruction *i)
 
    code[0] = 0x00000002 | (i->subOp << 2);
 
-   if (i->src(0).getFile() == FILE_MEMORY_BUFFER) {
+   if (i->src(0).getFile() == FILE_MEMORY_BUFFER ||
+       i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
       code[1] = 0x7b000000;
    } else {
       code[1] = 0x7c000000;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
index 27f287f..3fcdc55 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
@@ -2417,7 +2417,8 @@ void
 CodeEmitterGM107::emitCCTL()
 {
    unsigned width;
-   if (insn->src(0).getFile() == FILE_MEMORY_BUFFER) {
+   if (insn->src(0).getFile() == FILE_MEMORY_BUFFER ||
+       insn->src(0).getFile() == FILE_MEMORY_GLOBAL) {
       emitInsn(0xef600000);
       width = 30;
    } else {
@@ -2989,6 +2990,7 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
       case FILE_MEMORY_LOCAL : emitLDL(); break;
       case FILE_MEMORY_SHARED: emitLDS(); break;
       case FILE_MEMORY_BUFFER: emitLD(); break;
+      case FILE_MEMORY_GLOBAL: emitLD(); break;
       default:
          assert(!"invalid load");
          emitNOP();
@@ -3000,6 +3002,7 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
       case FILE_MEMORY_LOCAL : emitSTL(); break;
       case FILE_MEMORY_SHARED: emitSTS(); break;
       case FILE_MEMORY_BUFFER: emitST(); break;
+      case FILE_MEMORY_GLOBAL: emitST(); break;
       default:
          assert(!"invalid load");
          emitNOP();
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
index 7476e21..2653c82 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
@@ -663,6 +663,7 @@ CodeEmitterNV50::emitLOAD(const Instruction *i)
       code[1] = 0x40000000;
       break;
    case FILE_MEMORY_BUFFER:
+   case FILE_MEMORY_GLOBAL:
       code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
       code[1] = 0x80000000;
       break;
@@ -671,7 +672,8 @@ CodeEmitterNV50::emitLOAD(const Instruction *i)
       break;
    }
    if (sf == FILE_MEMORY_LOCAL ||
-       sf == FILE_MEMORY_BUFFER)
+       sf == FILE_MEMORY_BUFFER ||
+       sf == FILE_MEMORY_GLOBAL)
       emitLoadStoreSizeLG(i->sType, 21 + 32);
 
    setDst(i, 0);
@@ -679,7 +681,8 @@ CodeEmitterNV50::emitLOAD(const Instruction *i)
    emitFlagsRd(i);
    emitFlagsWr(i);
 
-   if (i->src(0).getFile() == FILE_MEMORY_BUFFER) {
+   if (i->src(0).getFile() == FILE_MEMORY_BUFFER ||
+       i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
       srcId(*i->src(0).getIndirect(0), 9);
    } else {
       setAReg16(i, 0);
@@ -700,6 +703,7 @@ CodeEmitterNV50::emitSTORE(const Instruction *i)
       srcId(i->src(1), 32 + 14);
       break;
    case FILE_MEMORY_BUFFER:
+   case FILE_MEMORY_GLOBAL:
       code[0] = 0xd0000001 | (i->getSrc(0)->reg.fileIndex << 16);
       code[1] = 0xa0000000;
       emitLoadStoreSizeLG(i->dType, 21 + 32);
@@ -737,7 +741,7 @@ CodeEmitterNV50::emitSTORE(const Instruction *i)
       break;
    }
 
-   if (f == FILE_MEMORY_BUFFER)
+   if (f == FILE_MEMORY_BUFFER || f == FILE_MEMORY_GLOBAL)
       srcId(*i->src(0).getIndirect(0), 9);
    else
       setAReg16(i, 0);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
index 6236659..ca475ce 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -281,6 +281,7 @@ CodeEmitterNVC0::setAddressByFile(const ValueRef& src)
 {
    switch (src.getFile()) {
    case FILE_MEMORY_BUFFER:
+   case FILE_MEMORY_GLOBAL:
       srcAddr32(src, 26, 0);
       break;
    case FILE_MEMORY_LOCAL:
@@ -1768,7 +1769,8 @@ CodeEmitterNVC0::emitCachingMode(CacheMode c)
 static inline bool
 uses64bitAddress(const Instruction *ldst)
 {
-   return ldst->src(0).getFile() == FILE_MEMORY_BUFFER &&
+   return (ldst->src(0).getFile() == FILE_MEMORY_BUFFER ||
+           ldst->src(0).getFile() == FILE_MEMORY_GLOBAL) &&
       ldst->src(0).isIndirect(0) &&
       ldst->getIndirect(0, 0)->reg.size == 8;
 }
@@ -1779,8 +1781,13 @@ CodeEmitterNVC0::emitSTORE(const Instruction *i)
    uint32_t opc;
 
    switch (i->src(0).getFile()) {
-   case FILE_MEMORY_BUFFER: opc = 0x90000000; break;
-   case FILE_MEMORY_LOCAL:  opc = 0xc8000000; break;
+   case FILE_MEMORY_BUFFER:
+   case FILE_MEMORY_GLOBAL:
+      opc = 0x90000000;
+      break;
+   case FILE_MEMORY_LOCAL:
+      opc = 0xc8000000;
+      break;
    case FILE_MEMORY_SHARED:
       if (i->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {
          if (targ->getChipset() >= NVISA_GK104_CHIPSET)
@@ -1828,8 +1835,13 @@ CodeEmitterNVC0::emitLOAD(const Instruction *i)
    code[0] = 0x00000005;
 
    switch (i->src(0).getFile()) {
-   case FILE_MEMORY_BUFFER: opc = 0x80000000; break;
-   case FILE_MEMORY_LOCAL:  opc = 0xc0000000; break;
+   case FILE_MEMORY_BUFFER:
+   case FILE_MEMORY_GLOBAL:
+      opc = 0x80000000;
+      break;
+   case FILE_MEMORY_LOCAL:
+      opc = 0xc0000000;
+      break;
    case FILE_MEMORY_SHARED:
       if (i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
          if (targ->getChipset() >= NVISA_GK104_CHIPSET)
@@ -2090,7 +2102,8 @@ CodeEmitterNVC0::emitCCTL(const Instruction *i)
 {
    code[0] = 0x00000005 | (i->subOp << 5);
 
-   if (i->src(0).getFile() == FILE_MEMORY_BUFFER) {
+   if (i->src(0).getFile() == FILE_MEMORY_BUFFER ||
+       i->src(0).getFile() == FILE_MEMORY_GLOBAL) {
       code[1] = 0x98000000;
       srcAddr32(i->src(0), 28, 2);
    } else {
@@ -3122,6 +3135,7 @@ SchedDataCalculator::checkRd(const Value *v, int cycle, int& delay) const
    case FILE_MEMORY_CONST:
    case FILE_MEMORY_SHARED:
    case FILE_MEMORY_BUFFER:
+   case FILE_MEMORY_GLOBAL:
    case FILE_SYSTEM_VALUE:
       // TODO: any restrictions here ?
       break;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index 91879e4..c167c4a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -374,7 +374,7 @@ static nv50_ir::DataFile translateFile(uint file)
    case TGSI_FILE_IMMEDIATE:       return nv50_ir::FILE_IMMEDIATE;
    case TGSI_FILE_SYSTEM_VALUE:    return nv50_ir::FILE_SYSTEM_VALUE;
    case TGSI_FILE_BUFFER:          return nv50_ir::FILE_MEMORY_BUFFER;
-   case TGSI_FILE_MEMORY:          return nv50_ir::FILE_MEMORY_BUFFER;
+   case TGSI_FILE_MEMORY:          return nv50_ir::FILE_MEMORY_GLOBAL;
    case TGSI_FILE_SAMPLER:
    case TGSI_FILE_NULL:
    default:
@@ -1284,7 +1284,9 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
          if (dst.isIndirect(0))
             indirectTempArrays.insert(dst.getArrayId());
       } else
-      if (dst.getFile() == TGSI_FILE_BUFFER) {
+      if (dst.getFile() == TGSI_FILE_BUFFER ||
+          (dst.getFile() == TGSI_FILE_MEMORY &&
+           memoryFiles[dst.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) {
          info->io.globalAccess |= 0x2;
       }
    }
@@ -1295,7 +1297,9 @@ bool Source::scanInstruction(const struct tgsi_full_instruction *inst)
          if (src.isIndirect(0))
             indirectTempArrays.insert(src.getArrayId());
       } else
-      if (src.getFile() == TGSI_FILE_BUFFER) {
+      if (src.getFile() == TGSI_FILE_BUFFER ||
+          (src.getFile() == TGSI_FILE_MEMORY &&
+           memoryFiles[src.getIndex(0)].mem_type == TGSI_MEMORY_TYPE_GLOBAL)) {
          info->io.globalAccess |= (insn.getOpcode() == TGSI_OPCODE_LOAD) ?
                0x1 : 0x2;
       } else
@@ -1529,6 +1533,10 @@ Converter::makeSym(uint tgsiFile, int fileIdx, int idx, int c, uint32_t address)
 
    if (tgsiFile == TGSI_FILE_MEMORY) {
       switch (code->memoryFiles[fileIdx].mem_type) {
+      case TGSI_MEMORY_TYPE_GLOBAL:
+         /* No-op this is the default for TGSI_FILE_MEMORY */
+         sym->setFile(FILE_MEMORY_GLOBAL);
+         break;
       case TGSI_MEMORY_TYPE_SHARED:
          sym->setFile(FILE_MEMORY_SHARED);
          break;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 4a96d04..84d2944 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -2581,6 +2581,7 @@ MemoryOpt::runOpt(BasicBlock *bb)
              ldst->op == OP_MEMBAR) {
             purgeRecords(NULL, FILE_MEMORY_LOCAL);
             purgeRecords(NULL, FILE_MEMORY_BUFFER);
+            purgeRecords(NULL, FILE_MEMORY_GLOBAL);
             purgeRecords(NULL, FILE_MEMORY_SHARED);
             purgeRecords(NULL, FILE_SHADER_OUTPUT);
          } else
@@ -2588,6 +2589,7 @@ MemoryOpt::runOpt(BasicBlock *bb)
             if (ldst->src(0).getFile() == FILE_MEMORY_BUFFER) {
                purgeRecords(NULL, FILE_MEMORY_LOCAL);
                purgeRecords(NULL, FILE_MEMORY_BUFFER);
+               purgeRecords(NULL, FILE_MEMORY_GLOBAL);
                purgeRecords(NULL, FILE_MEMORY_SHARED);
             } else {
                purgeRecords(NULL, ldst->src(0).getFile());
@@ -2607,7 +2609,8 @@ MemoryOpt::runOpt(BasicBlock *bb)
          DataFile file = ldst->src(0).getFile();
 
          // if ld l[]/g[] look for previous store to eliminate the reload
-         if (file == FILE_MEMORY_BUFFER || file == FILE_MEMORY_LOCAL) {
+         if (file == FILE_MEMORY_BUFFER || file == FILE_MEMORY_LOCAL ||
+             file == FILE_MEMORY_GLOBAL) {
             // TODO: shared memory ?
             rec = findRecord(ldst, false, isAdjacent);
             if (rec && !isAdjacent)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
index 73ed753..3917768 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
@@ -456,6 +456,7 @@ int Symbol::print(char *buf, size_t size,
    case FILE_SHADER_INPUT:  c = 'a'; break;
    case FILE_SHADER_OUTPUT: c = 'o'; break;
    case FILE_MEMORY_BUFFER: c = 'g'; break;
+   case FILE_MEMORY_GLOBAL: c = 'g'; break;
    case FILE_MEMORY_SHARED: c = 's'; break;
    case FILE_MEMORY_LOCAL:  c = 'l'; break;
    default:
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
index 1cd45a2..5c60b22 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nv50.cpp
@@ -208,6 +208,7 @@ TargetNV50::getFileSize(DataFile file) const
    case FILE_SHADER_INPUT:  return 0x200;
    case FILE_SHADER_OUTPUT: return 0x200;
    case FILE_MEMORY_BUFFER: return 0xffffffff;
+   case FILE_MEMORY_GLOBAL: return 0xffffffff;
    case FILE_MEMORY_SHARED: return 16 << 10;
    case FILE_MEMORY_LOCAL:  return 48 << 10;
    case FILE_SYSTEM_VALUE:  return 16;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
index bda59a5..9e1e7bf 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
@@ -249,6 +249,7 @@ TargetNVC0::getFileSize(DataFile file) const
    case FILE_SHADER_INPUT:  return 0x400;
    case FILE_SHADER_OUTPUT: return 0x400;
    case FILE_MEMORY_BUFFER: return 0xffffffff;
+   case FILE_MEMORY_GLOBAL: return 0xffffffff;
    case FILE_MEMORY_SHARED: return 16 << 10;
    case FILE_MEMORY_LOCAL:  return 48 << 10;
    case FILE_SYSTEM_VALUE:  return 32;
-- 
2.7.2



More information about the mesa-dev mailing list