[Mesa-dev] [PATCH] nv50/ir, nvc0: add debug options for shader replacement

Rhys Perry pendingchaos02 at gmail.com
Tue May 29 16:17:56 UTC 2018


Changes in v4:
- Move code to nv50_ir_dump.cpp
- Dump headers of nvc0 programs
- Use CRC-32 instead of a truncated SHA1
- Set prog->maxGPR to targ->getFileSize() - 1 and set prog->tlsSize
- Don't compile the program if a replacement is offered
    This has the consequence that a program is not dumped when it's replaced
Changes in v3:
- Fixed messed up patch description and diff
- Use the checksum of the TGSI instead of the binary if possible
Changes in v2:
- move "#ifdef DEBUG" from above dumpProgram to above createDumpFilename

The NV50_PROG_DUMP environment variable specifies a (already created)
directory to dump shader binaries, headers and tgsi code. The
NV50_PROG_REPLACE environment variable specifies a (already created)
directory that is searched to find replacement binaries and headers. This
is all much like MESA_SHADER_DUMP_PATH and MESA_SHADER_READ_PATH expect
using CRC-32 checksums instead of program IDs and chip-specific binaries
instead of GLSL.

Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
---
 src/gallium/auxiliary/tgsi/tgsi_util.h             |   1 +
 src/gallium/drivers/nouveau/Makefile.sources       |   2 +
 src/gallium/drivers/nouveau/codegen/nv50_ir.cpp    |  40 +++--
 .../drivers/nouveau/codegen/nv50_ir_driver.h       |   1 +
 .../drivers/nouveau/codegen/nv50_ir_dump.cpp       | 171 +++++++++++++++++++++
 src/gallium/drivers/nouveau/codegen/nv50_ir_dump.h |  70 +++++++++
 src/gallium/drivers/nouveau/meson.build            |   2 +
 src/gallium/drivers/nouveau/nvc0/nvc0_program.c    | 138 +++++++++++------
 8 files changed, 360 insertions(+), 65 deletions(-)
 create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_dump.cpp
 create mode 100644 src/gallium/drivers/nouveau/codegen/nv50_ir_dump.h

diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.h b/src/gallium/auxiliary/tgsi/tgsi_util.h
index 686b90f467..81cf955d8f 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_util.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_util.h
@@ -28,6 +28,7 @@
 #ifndef TGSI_UTIL_H
 #define TGSI_UTIL_H
 
+#include "pipe/p_compiler.h"
 #include "pipe/p_shader_tokens.h"
 
 #if defined __cplusplus
diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources
index 65f08c7d8d..e867221818 100644
--- a/src/gallium/drivers/nouveau/Makefile.sources
+++ b/src/gallium/drivers/nouveau/Makefile.sources
@@ -114,6 +114,8 @@ NV50_CODEGEN_SOURCES := \
 	codegen/nv50_ir_build_util.cpp \
 	codegen/nv50_ir_build_util.h \
 	codegen/nv50_ir_driver.h \
+	codegen/nv50_ir_dump.cpp \
+	codegen/nv50_ir_dump.h \
 	codegen/nv50_ir_emit_nv50.cpp \
 	codegen/nv50_ir_from_tgsi.cpp \
 	codegen/nv50_ir_graph.cpp \
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
index c987da9908..b1782bb4f2 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
@@ -23,6 +23,7 @@
 #include "codegen/nv50_ir.h"
 #include "codegen/nv50_ir_target.h"
 #include "codegen/nv50_ir_driver.h"
+#include "codegen/nv50_ir_dump.h"
 
 extern "C" {
 #include "nouveau_debug.h"
@@ -1244,30 +1245,35 @@ nv50_ir_generate_code(struct nv50_ir_prog_info *info)
       prog->print();
 
    targ->parseDriverInfo(info);
-   prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA);
 
-   prog->convertToSSA();
+   if (!nv50_ir::replaceProgramCode(prog)) {
+      prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_PRE_SSA);
 
-   if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
-      prog->print();
+      prog->convertToSSA();
 
-   prog->optimizeSSA(info->optLevel);
-   prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_SSA);
+      if (prog->dbgFlags & NV50_IR_DEBUG_VERBOSE)
+         prog->print();
 
-   if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
-      prog->print();
+      prog->optimizeSSA(info->optLevel);
+      prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_SSA);
 
-   if (!prog->registerAllocation()) {
-      ret = -4;
-      goto out;
-   }
-   prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_POST_RA);
+      if (prog->dbgFlags & NV50_IR_DEBUG_BASIC)
+         prog->print();
 
-   prog->optimizePostRA(info->optLevel);
+      if (!prog->registerAllocation()) {
+         ret = -4;
+         goto out;
+      }
+      prog->getTarget()->runLegalizePass(prog, nv50_ir::CG_STAGE_POST_RA);
 
-   if (!prog->emitBinary(info)) {
-      ret = -5;
-      goto out;
+      prog->optimizePostRA(info->optLevel);
+
+      if (!prog->emitBinary(info)) {
+         ret = -5;
+         goto out;
+      }
+
+      nv50_ir::dumpProgramCodeAndIR(prog);
    }
 
 out:
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index 3d0782f86b..9c23c74628 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -93,6 +93,7 @@ struct nv50_ir_prog_info
       uint32_t codeSize;
       uint32_t instructions;
       uint8_t sourceRep;  /* PIPE_SHADER_IR_* */
+      uint32_t sourceHash; /* CRC-32 */
       const void *source;
       void *relocData;
       void *fixupData;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_dump.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_dump.cpp
new file mode 100644
index 0000000000..2d421e8e03
--- /dev/null
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_dump.cpp
@@ -0,0 +1,171 @@
+#include "nv50_ir_dump.h"
+
+#include "codegen/nv50_ir_target.h"
+#include "tgsi/tgsi_dump.h"
+#include "util/crc32.h"
+
+#ifdef DEBUG
+static char *
+createDumpFilename(const char *dir, const nv50_ir_prog_info *info, const char *ext)
+{
+   char* fname = (char*)MALLOC(strlen(dir) + 13 + strlen(ext));
+   if (dir[0] && dir[strlen(dir) - 1] == '/')
+      sprintf(fname, "%s%.8x", dir, info->bin.sourceHash);
+   else
+      sprintf(fname, "%s/%.8x", dir, info->bin.sourceHash);
+
+   switch (info->type) {
+   case PIPE_SHADER_VERTEX:
+      strcat(fname, ".vs");
+      break;
+   case PIPE_SHADER_TESS_CTRL:
+      strcat(fname, ".tcs");
+      break;
+   case PIPE_SHADER_TESS_EVAL:
+      strcat(fname, ".tes");
+      break;
+   case PIPE_SHADER_GEOMETRY:
+      strcat(fname, ".gs");
+      break;
+   case PIPE_SHADER_FRAGMENT:
+      strcat(fname, ".fs");
+      break;
+   case PIPE_SHADER_COMPUTE:
+      strcat(fname, ".cs");
+      break;
+   default:
+      assert(0);
+      break;
+   }
+
+   strcat(fname, ext);
+
+   return fname;
+}
+
+extern "C" {
+
+void
+nv50_ir_create_source_hash(nv50_ir_prog_info *info)
+{
+   switch (info->bin.sourceRep) {
+   case PIPE_SHADER_IR_TGSI: {
+      const tgsi_header* header = (const tgsi_header*)info->bin.source;
+      unsigned size = (header->HeaderSize + header->BodySize) * sizeof(tgsi_token);
+      info->bin.sourceHash = util_hash_crc32(info->bin.source, size);
+      break;
+   }
+   default:
+      assert(0);
+      break;
+   }
+}
+
+FILE *
+nv50_ir_begin_dump(const nv50_ir_prog_info *info, const char *what,
+                   const char *ext, bool binary)
+{
+   const char *dump_dir = debug_get_option("NV50_PROG_DUMP", NULL);
+   if (!dump_dir)
+      return NULL;
+
+   char* fname = createDumpFilename(dump_dir, info, ext);
+
+   FILE *fp = fopen(fname, binary ? "wb" : "w");
+   if (!fp) {
+      INFO("Failed to dump %s of a program to %s\n", what, fname);
+      return NULL;
+   }
+
+   INFO("Dumping %s of a program to %s\n", what, fname);
+
+   FREE(fname);
+
+   return fp;
+}
+
+bool
+nv50_ir_get_replacement(const nv50_ir_prog_info *info, const char *what,
+                        const char *ext, size_t *size, void **data)
+{
+   const char *replace_dir = debug_get_option("NV50_PROG_REPLACE", NULL);
+   if (!replace_dir)
+      return false;
+
+   char* fname = createDumpFilename(replace_dir, info, ext);
+
+   FILE *fp = fopen(fname, "rb");
+   if (!fp)
+      return false;
+
+   *size = 0;
+   *data = MALLOC(65536);
+
+   size_t bufSize = 65536;
+   size_t read = 0;
+   while ((read = fread(*data, 1, bufSize - *size, fp))) {
+      *size += read;
+      if (*size == bufSize) {
+         *data = REALLOC(*data, bufSize, bufSize * 2);
+         bufSize *= 2;
+      }
+   }
+
+   INFO("Replacing code of a program with that from %s\n", fname);
+
+   FREE(fname);
+
+   return true;
+}
+
+}
+
+namespace nv50_ir {
+
+void
+dumpProgramCodeAndIR(const nv50_ir::Program *prog)
+{
+   FILE *fp = nv50_ir_begin_dump(prog->driver, "code", ".bin", true);
+   if (fp) {
+      fwrite(prog->code, prog->binSize, 1, fp);
+      fclose(fp);
+   }
+
+   switch (prog->driver->bin.sourceRep) {
+   case PIPE_SHADER_IR_TGSI: {
+      const tgsi_token *tokens = (const tgsi_token *)prog->driver->bin.source;
+      fp = nv50_ir_begin_dump(prog->driver, "tgsi", ".tgsi.txt", false);
+      if (fp) {
+         tgsi_dump_to_file(tokens, 0, fp);
+         fclose(fp);
+      }
+      break;
+   }
+   default:
+      assert(0);
+      break;
+   }
+}
+
+bool
+replaceProgramCode(nv50_ir::Program *prog)
+{
+   const nv50_ir::Target* targ = prog->getTarget();
+
+   size_t size;
+   void *data;
+   if (!nv50_ir_get_replacement(prog->driver, "code", ".bin", &size, &data))
+      return false;
+
+   FREE(prog->code);
+   prog->code = (uint32_t*)data;
+   prog->binSize = size;
+   prog->maxGPR = targ->getFileSize(nv50_ir::FILE_GPR) - 1;
+   prog->tlsSize = targ->getFileSize(nv50_ir::FILE_MEMORY_LOCAL);
+
+   return true;
+}
+
+} // namespace nv50_ir
+
+#endif
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_dump.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_dump.h
new file mode 100644
index 0000000000..80f4f7e7d5
--- /dev/null
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_dump.h
@@ -0,0 +1,70 @@
+#ifndef __NV50_IR_DUMP__
+#define __NV50_IR_DUMP__
+
+#include <stdio.h>
+#include "util/macros.h" /* For ALWAYS_INLINE */
+#include "nv50_ir_driver.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef DEBUG
+void
+nv50_ir_create_source_hash(struct nv50_ir_prog_info *info);
+
+FILE *
+nv50_ir_begin_dump(const struct nv50_ir_prog_info *info, const char *what,
+                   const char *ext, bool binary);
+
+bool
+nv50_ir_get_replacement(const struct nv50_ir_prog_info *info, const char *what,
+                        const char *ext, size_t *size, void **data);
+#else
+ALWAYS_INLINE void
+nv50_ir_create_source_hash(struct nv50_ir_prog_info *info) {
+   info->bin.sourceHash = 0;
+}
+
+ALWAYS_INLINE FILE *
+nv50_ir_begin_dump(const struct nv50_ir_prog_info *info, const char *what,
+                   const char *ext, bool binary)
+{
+   return NULL;
+}
+
+ALWAYS_INLINE bool
+nv50_ir_get_replacement(const struct nv50_ir_prog_info *info, const char *what,
+                        const char *ext, size_t *size, void **data)
+{
+   return false;
+}
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#ifdef __cplusplus
+#include "nv50_ir.h"
+
+namespace nv50_ir {
+
+#ifdef DEBUG
+void
+dumpProgramCodeAndIR(const Program *prog);
+
+bool
+replaceProgramCode(Program *prog);
+#else
+ALWAYS_INLINE void
+dumpProgramCodeAndIR(Program *prog) {}
+
+ALWAYS_INLINE bool
+replaceProgramCode(Program *prog) {return false;}
+#endif
+
+} // namespace nv50_ir
+#endif
+
+#endif
diff --git a/src/gallium/drivers/nouveau/meson.build b/src/gallium/drivers/nouveau/meson.build
index 242ee0e000..f7774326f2 100644
--- a/src/gallium/drivers/nouveau/meson.build
+++ b/src/gallium/drivers/nouveau/meson.build
@@ -128,6 +128,8 @@ files_libnouveau = files(
   'codegen/nv50_ir_build_util.cpp',
   'codegen/nv50_ir_build_util.h',
   'codegen/nv50_ir_driver.h',
+  'codegen/nv50_ir_dump.cpp',
+  'codegen/nv50_ir_dump.h',
   'codegen/nv50_ir_emit_nv50.cpp',
   'codegen/nv50_ir_from_tgsi.cpp',
   'codegen/nv50_ir_graph.cpp',
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index 9520d984bb..06d989a3f8 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -27,6 +27,7 @@
 #include "nvc0/nvc0_context.h"
 
 #include "codegen/nv50_ir_driver.h"
+#include "codegen/nv50_ir_dump.h"
 #include "nvc0/nve4_compute.h"
 
 /* NOTE: Using a[0x270] in FP may cause an error even if we're using less than
@@ -506,6 +507,64 @@ nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info)
    return 0;
 }
 
+static int
+nvc0_program_create_header(struct nvc0_program *prog,
+                           struct nv50_ir_prog_info *info)
+{
+   int ret = 0;
+   switch (prog->type) {
+   case PIPE_SHADER_VERTEX:
+      ret = nvc0_vp_gen_header(prog, info);
+      break;
+   case PIPE_SHADER_TESS_CTRL:
+      ret = nvc0_tcp_gen_header(prog, info);
+      break;
+   case PIPE_SHADER_TESS_EVAL:
+      ret = nvc0_tep_gen_header(prog, info);
+      break;
+   case PIPE_SHADER_GEOMETRY:
+      ret = nvc0_gp_gen_header(prog, info);
+      break;
+   case PIPE_SHADER_FRAGMENT:
+      ret = nvc0_fp_gen_header(prog, info);
+      break;
+   case PIPE_SHADER_COMPUTE:
+      prog->cp.syms = info->bin.syms;
+      prog->cp.num_syms = info->bin.numSyms;
+      break;
+   default:
+      ret = -1;
+      NOUVEAU_ERR("unknown program type: %u\n", prog->type);
+      break;
+   }
+   if (ret)
+      return ret;
+
+   if (info->bin.tlsSpace) {
+      assert(info->bin.tlsSpace < (1 << 24));
+      prog->hdr[0] |= 1 << 26;
+      prog->hdr[1] |= align(info->bin.tlsSpace, 0x10); /* l[] size */
+      prog->need_tls = true;
+   }
+   /* TODO: factor 2 only needed where joinat/precont is used,
+    *       and we only have to count non-uniform branches
+    */
+   /*
+   if ((info->maxCFDepth * 2) > 16) {
+      prog->hdr[2] |= (((info->maxCFDepth * 2) + 47) / 48) * 0x200;
+      prog->need_tls = true;
+   }
+   */
+   if (info->io.globalAccess)
+      prog->hdr[0] |= 1 << 26;
+   if (info->io.globalAccess & 0x2)
+      prog->hdr[0] |= 1 << 16;
+   if (info->io.fp64)
+      prog->hdr[0] |= 1 << 27;
+
+   return 0;
+}
+
 static struct nvc0_transform_feedback_state *
 nvc0_program_create_tfb_state(const struct nv50_ir_prog_info *info,
                               const struct pipe_stream_output_info *pso)
@@ -565,6 +624,30 @@ nvc0_program_dump(struct nvc0_program *prog)
 }
 #endif
 
+static void
+nvc0_dump_header(struct nvc0_program *prog, struct nv50_ir_prog_info *info)
+{
+   FILE *fp = nv50_ir_begin_dump(info, "header", ".hdr", true);
+   if (fp) {
+      fwrite(prog->hdr, sizeof(prog->hdr), 1, fp);
+      fclose(fp);
+   }
+}
+
+static bool
+nvc0_replace_header(struct nvc0_program *prog, struct nv50_ir_prog_info *info)
+{
+   size_t size;
+   void *data;
+   if (!nv50_ir_get_replacement(info, "header", ".hdr", &size, &data))
+      return false;
+
+   memcpy(prog->hdr, data, MIN2(size, 20));
+   FREE(data);
+
+   return true;
+}
+
 bool
 nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
                        struct pipe_debug_callback *debug)
@@ -618,6 +701,8 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
 
    info->assignSlots = nvc0_program_assign_varying_slots;
 
+   nv50_ir_create_source_hash(info);
+
    ret = nv50_ir_generate_code(info);
    if (ret) {
       NOUVEAU_ERR("shader translation failed: %i\n", ret);
@@ -641,55 +726,12 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
       info->out[info->io.edgeFlagOut].mask = 0; /* for headergen */
    prog->vp.edgeflag = info->io.edgeFlagIn;
 
-   switch (prog->type) {
-   case PIPE_SHADER_VERTEX:
-      ret = nvc0_vp_gen_header(prog, info);
-      break;
-   case PIPE_SHADER_TESS_CTRL:
-      ret = nvc0_tcp_gen_header(prog, info);
-      break;
-   case PIPE_SHADER_TESS_EVAL:
-      ret = nvc0_tep_gen_header(prog, info);
-      break;
-   case PIPE_SHADER_GEOMETRY:
-      ret = nvc0_gp_gen_header(prog, info);
-      break;
-   case PIPE_SHADER_FRAGMENT:
-      ret = nvc0_fp_gen_header(prog, info);
-      break;
-   case PIPE_SHADER_COMPUTE:
-      prog->cp.syms = info->bin.syms;
-      prog->cp.num_syms = info->bin.numSyms;
-      break;
-   default:
-      ret = -1;
-      NOUVEAU_ERR("unknown program type: %u\n", prog->type);
-      break;
-   }
-   if (ret)
-      goto out;
-
-   if (info->bin.tlsSpace) {
-      assert(info->bin.tlsSpace < (1 << 24));
-      prog->hdr[0] |= 1 << 26;
-      prog->hdr[1] |= align(info->bin.tlsSpace, 0x10); /* l[] size */
-      prog->need_tls = true;
+   if (!nvc0_replace_header(prog, info)) {
+      ret = nvc0_program_create_header(prog, info);
+      if (ret)
+         goto out;
+      nvc0_dump_header(prog, info);
    }
-   /* TODO: factor 2 only needed where joinat/precont is used,
-    *       and we only have to count non-uniform branches
-    */
-   /*
-   if ((info->maxCFDepth * 2) > 16) {
-      prog->hdr[2] |= (((info->maxCFDepth * 2) + 47) / 48) * 0x200;
-      prog->need_tls = true;
-   }
-   */
-   if (info->io.globalAccess)
-      prog->hdr[0] |= 1 << 26;
-   if (info->io.globalAccess & 0x2)
-      prog->hdr[0] |= 1 << 16;
-   if (info->io.fp64)
-      prog->hdr[0] |= 1 << 27;
 
    if (prog->pipe.stream_output.num_outputs)
       prog->tfb = nvc0_program_create_tfb_state(info,
-- 
2.14.3



More information about the mesa-dev mailing list