[Mesa-dev] [PATCH v2] i965: Capture INTEL_DEBUG alongside the hanging batch

Chris Wilson chris at chris-wilson.co.uk
Thu Nov 30 11:47:54 UTC 2017


Similar to how we create a growing state buffer to live alongside the
batch buffer, also create a debug buffer that is submitted and recreated
on every batch. This allows us to emit debugging information about this
batch that will be captured alongside the hanging batch for aide in
post-mortem debugging.

For testing purposes, a hang is not required, you can
/sys/kernel/debug/dri/0/i915_gpu_info to dump the current batch.

v2: Rebase onto brw_growing_bo

Cc: Matt Turner <mattst88 at gmail.com>
Cc: Kenneth Graunke <kenneth at whitecape.org>
---
Doing an aub or api capture may be more useful for port-mortem, but
hooking up INTEL_DEBUG is relatively easy and maybe interesting of its
own right...
---
 src/mesa/drivers/dri/i965/brw_context.c         |   6 +-
 src/mesa/drivers/dri/i965/brw_context.h         |   4 +
 src/mesa/drivers/dri/i965/brw_curbe.c           |   6 +-
 src/mesa/drivers/dri/i965/brw_disk_cache.c      |  14 +--
 src/mesa/drivers/dri/i965/brw_draw_upload.c     |   4 +-
 src/mesa/drivers/dri/i965/brw_ff_gs.c           |   6 +-
 src/mesa/drivers/dri/i965/brw_link.cpp          |  26 +++---
 src/mesa/drivers/dri/i965/brw_program.c         |   8 +-
 src/mesa/drivers/dri/i965/brw_program.h         |   4 +-
 src/mesa/drivers/dri/i965/brw_program_cache.c   |   7 +-
 src/mesa/drivers/dri/i965/brw_state_upload.c    |  10 +--
 src/mesa/drivers/dri/i965/brw_urb.c             |   4 +-
 src/mesa/drivers/dri/i965/brw_vs.c              |   5 +-
 src/mesa/drivers/dri/i965/brw_wm.c              |   4 +-
 src/mesa/drivers/dri/i965/gen6_constant_state.c |  15 ++--
 src/mesa/drivers/dri/i965/gen7_l3_state.c       |   5 +-
 src/mesa/drivers/dri/i965/intel_batchbuffer.c   | 114 +++++++++++++++++++++---
 src/mesa/drivers/dri/i965/intel_screen.c        |   2 +-
 src/mesa/drivers/dri/i965/intel_tex_copy.c      |   2 +-
 19 files changed, 178 insertions(+), 68 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index b62852d90c..3202d67012 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -1324,7 +1324,7 @@ intel_update_dri2_buffers(struct brw_context *brw, __DRIdrawable *drawable)
    drawable->lastStamp = drawable->dri2.stamp;
 
    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
-      fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
+      fprintf(brw->dbg_stream, "enter %s, drawable %p\n", __func__, drawable);
 
    intel_query_dri2_buffers(brw, drawable, &buffers, &count);
 
@@ -1377,7 +1377,7 @@ intel_update_renderbuffers(__DRIcontext *context, __DRIdrawable *drawable)
    drawable->lastStamp = drawable->dri2.stamp;
 
    if (unlikely(INTEL_DEBUG & DEBUG_DRI))
-      fprintf(stderr, "enter %s, drawable %p\n", __func__, drawable);
+      fprintf(brw->dbg_stream, "enter %s, drawable %p\n", __func__, drawable);
 
    if (dri_screen->image.loader)
       intel_update_image_buffers(brw, drawable);
@@ -1549,7 +1549,7 @@ intel_process_dri2_buffer(struct brw_context *brw,
       return;
 
    if (unlikely(INTEL_DEBUG & DEBUG_DRI)) {
-      fprintf(stderr,
+      fprintf(brw->dbg_stream,
               "attaching buffer %d, at %d, cpp %d, pitch %d\n",
               buffer->name, buffer->attachment,
               buffer->cpp, buffer->pitch);
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 0670483806..589ee43e50 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -480,6 +480,8 @@ struct intel_batchbuffer {
    struct brw_growing_bo batch;
    /** Current statebuffer being queued up. */
    struct brw_growing_bo state;
+   /** Optional debug messages for the current batch */
+   struct brw_growing_bo dbg;
 
    /** Last batchbuffer submitted to the hardware.  Used for glFinish(). */
    struct brw_bo *last_bo;
@@ -489,6 +491,7 @@ struct intel_batchbuffer {
 #endif
    uint32_t *map_next;
    uint32_t state_used;
+   uint32_t dbg_used;
 
    enum brw_gpu_ring ring;
    bool use_batch_first;
@@ -686,6 +689,7 @@ struct brw_perf_query_info
 struct brw_context
 {
    struct gl_context ctx; /**< base class, must be first field */
+   FILE *dbg_stream;
 
    struct
    {
diff --git a/src/mesa/drivers/dri/i965/brw_curbe.c b/src/mesa/drivers/dri/i965/brw_curbe.c
index c747110e31..2dee241319 100644
--- a/src/mesa/drivers/dri/i965/brw_curbe.c
+++ b/src/mesa/drivers/dri/i965/brw_curbe.c
@@ -127,7 +127,8 @@ static void calculate_curbe_offsets( struct brw_context *brw )
       brw->curbe.total_size = reg;
 
       if (0)
-	 fprintf(stderr, "curbe wm %d+%d clip %d+%d vs %d+%d\n",
+	 fprintf(brw->dbg_stream,
+                 "curbe wm %d+%d clip %d+%d vs %d+%d\n",
                  brw->curbe.wm_start,
                  brw->curbe.wm_size,
                  brw->curbe.clip_start,
@@ -275,7 +276,8 @@ brw_upload_constant_buffer(struct brw_context *brw)
 
    if (0) {
       for (i = 0; i < sz*16; i+=4)
-	 fprintf(stderr, "curbe %d.%d: %f %f %f %f\n", i/8, i&4,
+	 fprintf(brw->dbg_stream,
+                 "curbe %d.%d: %f %f %f %f\n", i/8, i&4,
                  buf[i+0].f, buf[i+1].f, buf[i+2].f, buf[i+3].f);
    }
 
diff --git a/src/mesa/drivers/dri/i965/brw_disk_cache.c b/src/mesa/drivers/dri/i965/brw_disk_cache.c
index 853ea98af0..a9ccf07267 100644
--- a/src/mesa/drivers/dri/i965/brw_disk_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_disk_cache.c
@@ -67,7 +67,7 @@ restore_serialized_nir_shader(struct brw_context *brw, struct gl_program *prog,
 {
    prog->program_written_to_cache = false;
    if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
-      fprintf(stderr, "falling back to nir %s.\n",
+      fprintf(brw->dbg_stream, "falling back to nir %s.\n",
               _mesa_shader_stage_to_abbrev(prog->info.stage));
    }
 
@@ -178,7 +178,7 @@ read_and_upload(struct brw_context *brw, struct disk_cache *cache,
       if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
          char sha1_buf[41];
          _mesa_sha1_format(sha1_buf, binary_sha1);
-         fprintf(stderr, "No cached %s binary found for: %s\n",
+         fprintf(brw->dbg_stream, "No cached %s binary found for: %s\n",
                  _mesa_shader_stage_to_abbrev(stage), sha1_buf);
       }
       return false;
@@ -187,7 +187,8 @@ read_and_upload(struct brw_context *brw, struct disk_cache *cache,
    if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
       char sha1_buf[41];
       _mesa_sha1_format(sha1_buf, binary_sha1);
-      fprintf(stderr, "attempting to populate bo cache with binary: %s\n",
+      fprintf(brw->dbg_stream,
+              "attempting to populate bo cache with binary: %s\n",
               sha1_buf);
    }
 
@@ -202,7 +203,8 @@ read_and_upload(struct brw_context *brw, struct disk_cache *cache,
        * rebuild from source.
        */
       if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
-         fprintf(stderr, "Error reading program from cache (invalid i965 "
+         fprintf(brw->dbg_stream,
+                 "Error reading program from cache (invalid i965 "
                  "cache item)\n");
       }
 
@@ -292,7 +294,7 @@ brw_disk_cache_upload_program(struct brw_context *brw, gl_shader_stage stage)
       goto fail;
 
    if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
-      fprintf(stderr, "read gen program from cache\n");
+      fprintf(brw->dbg_stream, "read gen program from cache\n");
    }
 
    return true;
@@ -323,7 +325,7 @@ write_program_data(struct brw_context *brw, struct gl_program *prog,
    gen_shader_sha1(brw, prog, stage, key, sha1);
    _mesa_sha1_format(buf, sha1);
    if (brw->ctx._Shader->Flags & GLSL_CACHE_INFO) {
-      fprintf(stderr, "putting binary in cache: %s\n", buf);
+      fprintf(brw->dbg_stream, "putting binary in cache: %s\n", buf);
    }
 
    disk_cache_put(cache, sha1, binary.data, binary.size, NULL);
diff --git a/src/mesa/drivers/dri/i965/brw_draw_upload.c b/src/mesa/drivers/dri/i965/brw_draw_upload.c
index 9b81999ea0..b9fa4ea7bb 100644
--- a/src/mesa/drivers/dri/i965/brw_draw_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_draw_upload.c
@@ -257,7 +257,7 @@ brw_get_vertex_surface_type(struct brw_context *brw,
       devinfo->gen <= 7 && !devinfo->is_baytrail && !devinfo->is_haswell;
 
    if (unlikely(INTEL_DEBUG & DEBUG_VERTS))
-      fprintf(stderr, "type %s size %d normalized %d\n",
+      fprintf(brw->dbg_stream, "type %s size %d normalized %d\n",
               _mesa_enum_to_string(glarray->Type),
               glarray->Size, glarray->Normalized);
 
@@ -482,7 +482,7 @@ brw_prepare_vertices(struct brw_context *brw)
    }
 
    if (0)
-      fprintf(stderr, "%s %d..%d\n", __func__, min_index, max_index);
+      fprintf(brw->dbg_stream, "%s %d..%d\n", __func__, min_index, max_index);
 
    /* Accumulate the list of enabled arrays. */
    brw->vb.nr_enabled = 0;
diff --git a/src/mesa/drivers/dri/i965/brw_ff_gs.c b/src/mesa/drivers/dri/i965/brw_ff_gs.c
index 174418a474..c74d641418 100644
--- a/src/mesa/drivers/dri/i965/brw_ff_gs.c
+++ b/src/mesa/drivers/dri/i965/brw_ff_gs.c
@@ -134,10 +134,10 @@ brw_codegen_ff_gs_prog(struct brw_context *brw,
    program = brw_get_program(&c.func, &program_size);
 
    if (unlikely(INTEL_DEBUG & DEBUG_GS)) {
-      fprintf(stderr, "gs:\n");
+      fprintf(brw->dbg_stream, "gs:\n");
       brw_disassemble(&brw->screen->devinfo, c.func.store,
-                      0, program_size, stderr);
-      fprintf(stderr, "\n");
+                      0, program_size, brw->dbg_stream);
+      fprintf(brw->dbg_stream, "\n");
     }
 
    brw_upload_cache(&brw->cache, BRW_CACHE_FF_GS_PROG,
diff --git a/src/mesa/drivers/dri/i965/brw_link.cpp b/src/mesa/drivers/dri/i965/brw_link.cpp
index d18521e792..a56a08869c 100644
--- a/src/mesa/drivers/dri/i965/brw_link.cpp
+++ b/src/mesa/drivers/dri/i965/brw_link.cpp
@@ -144,18 +144,20 @@ process_glsl_ir(struct brw_context *brw,
    ralloc_free(mem_ctx);
 
    if (ctx->_Shader->Flags & GLSL_DUMP) {
-      fprintf(stderr, "\n");
+      fprintf(brw->dbg_stream, "\n");
       if (shader->ir) {
-         fprintf(stderr, "GLSL IR for linked %s program %d:\n",
+         fprintf(brw->dbg_stream,
+                 "GLSL IR for linked %s program %d:\n",
                  _mesa_shader_stage_to_string(shader->Stage),
                  shader_prog->Name);
-         _mesa_print_ir(stderr, shader->ir, NULL);
+         _mesa_print_ir(brw->dbg_stream, shader->ir, NULL);
       } else {
-         fprintf(stderr, "No GLSL IR for linked %s program %d (shader may be "
+         fprintf(brw->dbg_stream,
+                 "No GLSL IR for linked %s program %d (shader may be "
                  "from cache)\n", _mesa_shader_stage_to_string(shader->Stage),
                  shader_prog->Name);
       }
-      fprintf(stderr, "\n");
+      fprintf(brw->dbg_stream, "\n");
    }
 }
 
@@ -247,10 +249,11 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
          (INTEL_DEBUG & intel_debug_flag_for_shader_stage(shader->Stage));
 
       if (debug_enabled && shader->ir) {
-         fprintf(stderr, "GLSL IR for native %s shader %d:\n",
+         fprintf(brw->dbg_stream,
+                 "GLSL IR for native %s shader %d:\n",
                  _mesa_shader_stage_to_string(shader->Stage), shProg->Name);
-         _mesa_print_ir(stderr, shader->ir, NULL);
-         fprintf(stderr, "\n\n");
+         _mesa_print_ir(brw->dbg_stream, shader->ir, NULL);
+         fprintf(brw->dbg_stream, "\n\n");
       }
 
       prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage,
@@ -349,11 +352,12 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
          if (!sh)
             continue;
 
-         fprintf(stderr, "GLSL %s shader %d source for linked program %d:\n",
+         fprintf(brw->dbg_stream,
+                 "GLSL %s shader %d source for linked program %d:\n",
                  _mesa_shader_stage_to_string(sh->Stage),
                  i, shProg->Name);
-         fprintf(stderr, "%s", sh->Source);
-         fprintf(stderr, "\n");
+         fprintf(brw->dbg_stream, "%s", sh->Source);
+         fprintf(brw->dbg_stream, "\n");
       }
    }
 
diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c
index 755d4973cc..930c9c033e 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -678,11 +678,13 @@ brw_stage_prog_data_free(const void *p)
 }
 
 void
-brw_dump_arb_asm(const char *stage, struct gl_program *prog)
+brw_dump_arb_asm(struct brw_context *brw,
+                 const char *stage,
+                 struct gl_program *prog)
 {
-   fprintf(stderr, "ARB_%s_program %d ir for native %s shader\n",
+   fprintf(brw->dbg_stream, "ARB_%s_program %d ir for native %s shader\n",
            stage, prog->Id, stage);
-   _mesa_print_program(prog);
+   _mesa_fprint_program_opt(brw->dbg_stream, prog, PROG_PRINT_DEBUG, GL_TRUE);
 }
 
 void
diff --git a/src/mesa/drivers/dri/i965/brw_program.h b/src/mesa/drivers/dri/i965/brw_program.h
index 701b8da482..ebcbe60871 100644
--- a/src/mesa/drivers/dri/i965/brw_program.h
+++ b/src/mesa/drivers/dri/i965/brw_program.h
@@ -85,7 +85,9 @@ void
 brw_stage_prog_data_free(const void *prog_data);
 
 void
-brw_dump_arb_asm(const char *stage, struct gl_program *prog);
+brw_dump_arb_asm(struct brw_context *brw,
+                 const char *stage,
+                 struct gl_program *prog);
 
 bool brw_vs_precompile(struct gl_context *ctx, struct gl_program *prog);
 bool brw_tcs_precompile(struct gl_context *ctx,
diff --git a/src/mesa/drivers/dri/i965/brw_program_cache.c b/src/mesa/drivers/dri/i965/brw_program_cache.c
index adb0cd5a23..1a9d178cec 100644
--- a/src/mesa/drivers/dri/i965/brw_program_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_program_cache.c
@@ -139,7 +139,8 @@ search_cache(struct brw_cache *cache, GLuint hash,
    for (c = cache->items[hash % cache->size]; c; c = c->next)
       bucketcount++;
 
-   fprintf(stderr, "bucket %d/%d = %d/%d items\n", hash % cache->size,
+   fprintf(brw->dbg_stream,
+           "bucket %d/%d = %d/%d items\n", hash % cache->size,
            cache->size, bucketcount, cache->n_items);
 #endif
 
@@ -524,9 +525,9 @@ brw_print_program_cache(struct brw_context *brw)
 
    for (unsigned i = 0; i < cache->size; i++) {
       for (item = cache->items[i]; item; item = item->next) {
-         fprintf(stderr, "%s:\n", cache_name(i));
+         fprintf(brw->dbg_stream, "%s:\n", cache_name(i));
          brw_disassemble(&brw->screen->devinfo, cache->map,
-                         item->offset, item->size, stderr);
+                         item->offset, item->size, brw->dbg_stream);
       }
    }
 }
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 20c59c6e9d..275733cc00 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -356,11 +356,11 @@ brw_update_dirty_count(struct dirty_bit_map *bit_map, uint64_t bits)
 }
 
 static void
-brw_print_dirty_count(struct dirty_bit_map *bit_map)
+brw_print_dirty_count(struct brw_context *brw, struct dirty_bit_map *bit_map)
 {
    for (int i = 0; bit_map[i].bit != 0; i++) {
       if (bit_map[i].count > 1) {
-         fprintf(stderr, "0x%016"PRIx64": %12d (%s)\n",
+         fprintf(brw->dbg_stream, "0x%016"PRIx64": %12d (%s)\n",
                  bit_map[i].bit, bit_map[i].count, bit_map[i].name);
       }
    }
@@ -582,9 +582,9 @@ brw_upload_pipeline_state(struct brw_context *brw,
       brw_update_dirty_count(mesa_bits, state.mesa);
       brw_update_dirty_count(brw_bits, state.brw);
       if (dirty_count++ % 1000 == 0) {
-	 brw_print_dirty_count(mesa_bits);
-	 brw_print_dirty_count(brw_bits);
-	 fprintf(stderr, "\n");
+	 brw_print_dirty_count(brw, mesa_bits);
+	 brw_print_dirty_count(brw, brw_bits);
+	 fprintf(brw->dbg_stream, "\n");
       }
    }
 }
diff --git a/src/mesa/drivers/dri/i965/brw_urb.c b/src/mesa/drivers/dri/i965/brw_urb.c
index a86fa78aca..1f0a3ced8a 100644
--- a/src/mesa/drivers/dri/i965/brw_urb.c
+++ b/src/mesa/drivers/dri/i965/brw_urb.c
@@ -190,12 +190,12 @@ brw_calculate_urb_fence(struct brw_context *brw, unsigned csize,
 	 }
 
 	 if (unlikely(INTEL_DEBUG & (DEBUG_URB|DEBUG_PERF)))
-	    fprintf(stderr, "URB CONSTRAINED\n");
+	    fprintf(brw->dbg_stream, "URB CONSTRAINED\n");
       }
 
 done:
       if (unlikely(INTEL_DEBUG & DEBUG_URB))
-	 fprintf(stderr,
+	 fprintf(brw->dbg_stream,
                  "URB fence: %d ..VS.. %d ..GS.. %d ..CLP.. %d ..SF.. %d ..CS.. %d\n",
                  brw->urb.vs_start,
                  brw->urb.gs_start,
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index a56b256bc3..8c11794fc3 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -196,7 +196,8 @@ brw_codegen_vs_prog(struct brw_context *brw,
                        vp->program.nir->info.separate_shader);
 
    if (0) {
-      _mesa_fprint_program_opt(stderr, &vp->program, PROG_PRINT_DEBUG, true);
+      _mesa_fprint_program_opt(brw->dbg_stream,
+                               &vp->program, PROG_PRINT_DEBUG, true);
    }
 
    if (unlikely(brw->perf_debug)) {
@@ -207,7 +208,7 @@ brw_codegen_vs_prog(struct brw_context *brw,
 
    if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
       if (vp->program.is_arb_asm)
-         brw_dump_arb_asm("vertex", &vp->program);
+         brw_dump_arb_asm(brw, "vertex", &vp->program);
    }
 
    int st_index = -1;
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index 08bacebd57..3b0b6365cf 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -162,7 +162,7 @@ brw_codegen_wm_prog(struct brw_context *brw,
                                  &prog_data.base);
 
       if (unlikely(INTEL_DEBUG & DEBUG_WM))
-         brw_dump_arb_asm("fragment", &fp->program);
+         brw_dump_arb_asm(brw,"fragment", &fp->program);
    }
 
    if (unlikely(brw->perf_debug)) {
@@ -212,7 +212,7 @@ brw_codegen_wm_prog(struct brw_context *brw,
    brw_alloc_stage_scratch(brw, &brw->wm.base, prog_data.base.total_scratch);
 
    if (unlikely((INTEL_DEBUG & DEBUG_WM) && fp->program.is_arb_asm))
-      fprintf(stderr, "\n");
+      fprintf(brw->dbg_stream, "\n");
 
    /* The param and pull_param arrays will be freed by the shader cache. */
    ralloc_steal(NULL, prog_data.base.param);
diff --git a/src/mesa/drivers/dri/i965/gen6_constant_state.c b/src/mesa/drivers/dri/i965/gen6_constant_state.c
index 89b1202dd6..3043b8255d 100644
--- a/src/mesa/drivers/dri/i965/gen6_constant_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_constant_state.c
@@ -172,19 +172,19 @@ gen6_upload_push_constants(struct brw_context *brw,
                                  prog_data->nr_params);
 
       if (0) {
-         fprintf(stderr, "%s constants:\n",
+         fprintf(brw->dbg_stream, "%s constants:\n",
                  _mesa_shader_stage_to_string(stage_state->stage));
          for (i = 0; i < prog_data->nr_params; i++) {
             if ((i & 7) == 0)
-               fprintf(stderr, "g%d: ",
+               fprintf(brw->dbg_stream, "g%d: ",
                        prog_data->dispatch_grf_start_reg + i / 8);
-            fprintf(stderr, "%8f ", param[i].f);
+            fprintf(brw->dbg_stream, "%8f ", param[i].f);
             if ((i & 7) == 7)
-               fprintf(stderr, "\n");
+               fprintf(brw->dbg_stream, "\n");
          }
          if ((i & 7) != 0)
-            fprintf(stderr, "\n");
-         fprintf(stderr, "\n");
+            fprintf(brw->dbg_stream, "\n");
+         fprintf(brw->dbg_stream, "\n");
       }
 
       stage_state->push_const_size = ALIGN(prog_data->nr_params, 8) / 8;
@@ -261,7 +261,8 @@ brw_upload_pull_constants(struct brw_context *brw,
    if (0) {
       for (i = 0; i < ALIGN(prog_data->nr_pull_params, 4) / 4; i++) {
 	 const gl_constant_value *row = &constants[i * 4];
-	 fprintf(stderr, "const surface %3d: %4.3f %4.3f %4.3f %4.3f\n",
+	 fprintf(brw->dbg_stream,
+                 "const surface %3d: %4.3f %4.3f %4.3f %4.3f\n",
                  i, row[0].f, row[1].f, row[2].f, row[3].f);
       }
    }
diff --git a/src/mesa/drivers/dri/i965/gen7_l3_state.c b/src/mesa/drivers/dri/i965/gen7_l3_state.c
index 8c8f4169e7..f175daf946 100644
--- a/src/mesa/drivers/dri/i965/gen7_l3_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_l3_state.c
@@ -244,8 +244,9 @@ emit_l3_state(struct brw_context *brw)
       brw->l3.config = cfg;
 
       if (unlikely(INTEL_DEBUG & DEBUG_L3)) {
-         fprintf(stderr, "L3 config transition (%f > %f): ", dw, dw_threshold);
-         gen_dump_l3_config(cfg, stderr);
+         fprintf(brw->dbg_stream,
+                 "L3 config transition (%f > %f): ", dw, dw_threshold);
+         gen_dump_l3_config(cfg, brw->dbg_stream);
       }
    }
 }
diff --git a/src/mesa/drivers/dri/i965/intel_batchbuffer.c b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
index 91a6506a89..ed1f908abd 100644
--- a/src/mesa/drivers/dri/i965/intel_batchbuffer.c
+++ b/src/mesa/drivers/dri/i965/intel_batchbuffer.c
@@ -37,6 +37,7 @@
 
 #include <xf86drm.h>
 #include <i915_drm.h>
+#include <libio.h>
 
 #define FILE_DEBUG_FLAG DEBUG_BUFMGR
 
@@ -51,9 +52,12 @@
  */
 #define BATCH_SZ (20 * 1024)
 #define STATE_SZ (16 * 1024)
+#define DBG_SZ   (16 * 1024)
 
 static void
 intel_batchbuffer_reset(struct brw_context *brw);
+static void
+brw_debug_open_stream(struct brw_context *brw);
 
 static bool
 uint_key_compare(const void *a, const void *b)
@@ -89,6 +93,10 @@ intel_batchbuffer_init(struct brw_context *brw)
       batch->map_next = batch->batch.map;
       batch->state.cpu_map = malloc(STATE_SZ);
       batch->state.map = batch->state.cpu_map;
+      if (INTEL_DEBUG) {
+         batch->dbg.cpu_map = malloc(DBG_SZ);
+         batch->dbg.map = batch->dbg.cpu_map;
+      }
    }
 
    init_reloc_list(&batch->batch_relocs, 250);
@@ -115,6 +123,7 @@ intel_batchbuffer_init(struct brw_context *brw)
       batch->valid_reloc_flags |= EXEC_OBJECT_NEEDS_GTT;
 
    intel_batchbuffer_reset(brw);
+   brw_debug_open_stream(brw);
 }
 
 #define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x))
@@ -188,6 +197,14 @@ intel_batchbuffer_reset(struct brw_context *brw)
          brw_bo_map(brw, batch->state.bo, MAP_READ | MAP_WRITE);
    }
 
+   batch->dbg_used = 0;
+   if (INTEL_DEBUG && can_do_exec_capture(screen)) {
+      batch->dbg.bo = brw_bo_alloc(bufmgr, "debug", DBG_SZ, 0);
+      batch->dbg.bo->kflags = EXEC_OBJECT_CAPTURE;
+      if (!batch->dbg.cpu_map)
+         batch->dbg.map = brw_bo_map(brw, batch->dbg.bo, MAP_WRITE);
+   }
+
    /* Avoid making 0 a valid state offset - otherwise the decoder will try
     * and decode data when we use offset 0 as a null pointer.
     */
@@ -245,6 +262,7 @@ intel_batchbuffer_free(struct intel_batchbuffer *batch)
 {
    free(batch->batch.cpu_map);
    free(batch->state.cpu_map);
+   free(batch->dbg.cpu_map);
 
    for (int i = 0; i < batch->exec_count; i++) {
       brw_bo_unreference(batch->exec_bos[i]);
@@ -257,6 +275,8 @@ intel_batchbuffer_free(struct intel_batchbuffer *batch)
    brw_bo_unreference(batch->last_bo);
    brw_bo_unreference(batch->batch.bo);
    brw_bo_unreference(batch->state.bo);
+   brw_bo_unreference(batch->dbg.bo);
+
    if (batch->state_batch_sizes)
       _mesa_hash_table_destroy(batch->state_batch_sizes, NULL);
 }
@@ -399,8 +419,8 @@ decode_struct(struct brw_context *brw, struct gen_spec *spec,
    if (!group)
       return;
 
-   fprintf(stderr, "%s\n", struct_name);
-   gen_print_group(stderr, group, gtt_offset + offset,
+   fprintf(brw->dbg_stream, "%s\n", struct_name);
+   gen_print_group(brw->dbg_stream, group, gtt_offset + offset,
                    &data[offset / 4], color);
 }
 
@@ -416,8 +436,8 @@ decode_structs(struct brw_context *brw, struct gen_spec *spec,
 
    int entries = brw_state_batch_size(brw, offset) / struct_size;
    for (int i = 0; i < entries; i++) {
-      fprintf(stderr, "%s %d\n", struct_name, i);
-      gen_print_group(stderr, group, gtt_offset + offset,
+      fprintf(brw->dbg_stream, "%s %d\n", struct_name, i);
+      gen_print_group(brw->dbg_stream, group, gtt_offset + offset,
                       &data[(offset + i * struct_size) / 4], color);
    }
 }
@@ -454,16 +474,17 @@ do_batch_dump(struct brw_context *brw)
       assert(inst == NULL || length > 0);
       length = MAX2(1, length);
       if (inst == NULL) {
-         fprintf(stderr, "unknown instruction %08x\n", p[0]);
+         fprintf(brw->dbg_stream, "unknown instruction %08x\n", p[0]);
          continue;
       }
 
       uint64_t offset = batch_gtt_offset + 4 * (p - batch_data);
 
-      fprintf(stderr, "%s0x%08"PRIx64":  0x%08x:  %-80s%s\n", header_color,
+      fprintf(brw->dbg_stream,
+              "%s0x%08"PRIx64":  0x%08x:  %-80s%s\n", header_color,
               offset, p[0], gen_group_get_name(inst), reset_color);
 
-      gen_print_group(stderr, inst, offset, p, color);
+      gen_print_group(brw->dbg_stream, inst, offset, p, color);
 
       switch (gen_group_get_opcode(inst) >> 16) {
       case _3DSTATE_PIPELINED_POINTERS:
@@ -503,8 +524,9 @@ do_batch_dump(struct brw_context *brw)
          int bt_entries = brw_state_batch_size(brw, bt_offset) / 4;
          uint32_t *bt_pointers = &state[bt_offset / 4];
          for (int i = 0; i < bt_entries; i++) {
-            fprintf(stderr, "SURFACE_STATE - BTI = %d\n", i);
-            gen_print_group(stderr, group, state_gtt_offset + bt_pointers[i],
+            fprintf(brw->dbg_stream, "SURFACE_STATE - BTI = %d\n", i);
+            gen_print_group(brw->dbg_stream,
+                            group, state_gtt_offset + bt_pointers[i],
                             &state[bt_pointers[i] / 4], color);
          }
          break;
@@ -577,8 +599,9 @@ do_batch_dump(struct brw_context *brw)
          int bt_entries = brw_state_batch_size(brw, bt_offset) / 4;
          uint32_t *bt_pointers = &state[bt_offset / 4];
          for (int i = 0; i < bt_entries; i++) {
-            fprintf(stderr, "SURFACE_STATE - BTI = %d\n", i);
-            gen_print_group(stderr, group, state_gtt_offset + bt_pointers[i],
+            fprintf(brw->dbg_stream, "SURFACE_STATE - BTI = %d\n", i);
+            gen_print_group(brw->dbg_stream,
+                            group, state_gtt_offset + bt_pointers[i],
                             &state[bt_pointers[i] / 4], color);
          }
          break;
@@ -610,6 +633,7 @@ brw_new_batch(struct brw_context *brw)
    brw->batch.aperture_space = 0;
 
    brw_bo_unreference(brw->batch.state.bo);
+   brw_bo_unreference(brw->batch.dbg.bo);
 
    /* Create a new batchbuffer and reset the associated state: */
    intel_batchbuffer_reset_and_clear_render_cache(brw);
@@ -816,6 +840,17 @@ submit_batch(struct brw_context *brw, int in_fence_fd, int *out_fence_fd)
    brw_bo_unmap(batch->batch.bo);
    brw_bo_unmap(batch->state.bo);
 
+   fflush(brw->dbg_stream);
+   if (batch->dbg_used) {
+      batch->dbg.map[batch->dbg_used++] = '\0';
+      if (batch->dbg.cpu_map) {
+         void *map = brw_bo_map(brw, batch->dbg.bo, MAP_WRITE);
+         memcpy(map, batch->dbg.cpu_map, batch->dbg_used);
+      }
+      add_exec_bo(batch, batch->dbg.bo);
+      brw_bo_unmap(batch->dbg.bo);
+   }
+
    if (!brw->screen->no_hw) {
       /* The requirement for using I915_EXEC_NO_RELOC are:
        *
@@ -923,7 +958,8 @@ _intel_batchbuffer_flush_fence(struct brw_context *brw,
    if (unlikely(INTEL_DEBUG & (DEBUG_BATCH | DEBUG_SUBMIT))) {
       int bytes_for_commands = 4 * USED_BATCH(brw->batch);
       int bytes_for_state = brw->batch.state_used;
-      fprintf(stderr, "%19s:%-3d: Batchbuffer flush with %5db (%0.1f%%) (pkt),"
+      fprintf(brw->dbg_stream,
+              "%19s:%-3d: Batchbuffer flush with %5db (%0.1f%%) (pkt),"
               " %5db (%0.1f%%) (state), %4d BOs (%0.1fMb aperture),"
               " %4d batch relocs, %4d state relocs\n", file, line,
               bytes_for_commands, 100.0f * bytes_for_commands / BATCH_SZ,
@@ -1322,3 +1358,57 @@ brw_store_data_imm64(struct brw_context *brw, struct brw_bo *bo,
    OUT_BATCH(imm >> 32);
    ADVANCE_BATCH();
 }
+
+static void *
+__brw_debug_map(struct brw_context *brw,  size_t len)
+{
+   struct intel_batchbuffer *batch = &brw->batch;
+   struct brw_bo *bo = batch->dbg.bo;
+   void *ptr;
+
+   if (batch->dbg_used + len + 1>= bo->size) {
+      const unsigned new_size = bo->size + bo->size / 2 + len + 1;
+      struct brw_bo *new_bo = brw_bo_alloc(brw->bufmgr, bo->name, new_size, 0);
+      assert(batch->dbg_used + len + 1 < new_bo->size);
+
+      if (batch->dbg.cpu_map) {
+         batch->dbg.cpu_map = realloc(batch->dbg.cpu_map, new_size);
+	 batch->dbg.map = batch->dbg.cpu_map;
+      } else {
+	      void *map = brw_bo_map(brw, new_bo, MAP_WRITE);
+	      memcpy(map, batch->dbg.map, batch->dbg_used);
+
+	      brw_bo_unmap(bo);
+	      batch->dbg.map = map;
+      }
+
+      new_bo->kflags = bo->kflags;
+      brw_bo_unreference(bo);
+      batch->dbg.bo = new_bo;
+   }
+
+   ptr = batch->dbg.map + batch->dbg_used;
+   batch->dbg_used += len;
+
+   return ptr;
+}
+
+static ssize_t __brw_debug_write(void *cookie, const char *data, size_t len)
+{
+   memcpy(__brw_debug_map(cookie, len), data, len);
+   if (!(INTEL_DEBUG & 0))
+	   fwrite(data, len, 1, stderr);
+   return len;
+}
+
+static cookie_io_functions_t iofuncs = {
+	.write = __brw_debug_write,
+};
+
+static void brw_debug_open_stream(struct brw_context *brw)
+{
+	if (brw->batch.dbg.bo)
+		brw->dbg_stream = fopencookie(brw, "w", iofuncs);
+	if (!brw->dbg_stream)
+		brw->dbg_stream = stderr;
+}
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c
index db1552c188..9e6e734657 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -2248,7 +2248,7 @@ shader_perf_log_mesa(void *data, const char *fmt, ...)
    if (unlikely(INTEL_DEBUG & DEBUG_PERF)) {
       va_list args_copy;
       va_copy(args_copy, args);
-      vfprintf(stderr, fmt, args_copy);
+      vfprintf(brw->dbg_stream, fmt, args_copy);
       va_end(args_copy);
    }
 
diff --git a/src/mesa/drivers/dri/i965/intel_tex_copy.c b/src/mesa/drivers/dri/i965/intel_tex_copy.c
index 4fe3585296..2a483a7efe 100644
--- a/src/mesa/drivers/dri/i965/intel_tex_copy.c
+++ b/src/mesa/drivers/dri/i965/intel_tex_copy.c
@@ -54,7 +54,7 @@ intel_copy_texsubimage(struct brw_context *brw,
 
    if (!intelImage->mt || !irb || !irb->mt) {
       if (unlikely(INTEL_DEBUG & DEBUG_PERF))
-	 fprintf(stderr, "%s fail %p %p (0x%08x)\n",
+	 fprintf(brw->dbg_stream, "%s fail %p %p (0x%08x)\n",
 		 __func__, intelImage->mt, irb, internalFormat);
       return false;
    }
-- 
2.15.0



More information about the mesa-dev mailing list