[Mesa-dev] [PATCH 09/14] i965: Make perf_debug() output to GL_ARB_debug_output in a debug context.
Eric Anholt
eric at anholt.net
Fri Feb 22 19:52:18 PST 2013
---
src/mesa/drivers/dri/i965/brw_context.c | 6 ++-
src/mesa/drivers/dri/i965/brw_draw.c | 1 +
src/mesa/drivers/dri/i965/brw_fs.cpp | 4 +-
src/mesa/drivers/dri/i965/brw_program.h | 3 +-
src/mesa/drivers/dri/i965/brw_queryobj.c | 2 +-
src/mesa/drivers/dri/i965/brw_shader.cpp | 2 +-
src/mesa/drivers/dri/i965/brw_state_cache.c | 2 +
src/mesa/drivers/dri/i965/brw_vec4.cpp | 4 +-
src/mesa/drivers/dri/i965/brw_vs.c | 21 ++++----
src/mesa/drivers/dri/i965/brw_wm.c | 58 +++++++++++++--------
src/mesa/drivers/dri/intel/intel_buffer_objects.c | 2 +-
src/mesa/drivers/dri/intel/intel_context.c | 2 +
src/mesa/drivers/dri/intel/intel_context.h | 17 +++++-
src/mesa/drivers/dri/intel/intel_regions.c | 2 +-
src/mesa/drivers/dri/intel/intel_tex_copy.c | 3 +-
src/mesa/drivers/dri/intel/intel_tex_subimage.c | 2 +-
16 files changed, 83 insertions(+), 48 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 19497e9..b12d71c 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -386,9 +386,13 @@ brwCreateContext(int api,
if ((flags & __DRI_CTX_FLAG_FORWARD_COMPATIBLE) != 0)
ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_FORWARD_COMPATIBLE_BIT;
- if ((flags & __DRI_CTX_FLAG_DEBUG) != 0)
+ if ((flags & __DRI_CTX_FLAG_DEBUG) != 0) {
ctx->Const.ContextFlags |= GL_CONTEXT_FLAG_DEBUG_BIT;
+ /* Turn on some extra GL_ARB_debug_output generation. */
+ intel->perf_debug = true;
+ }
+
brw_fs_alloc_reg_sets(brw);
if (INTEL_DEBUG & DEBUG_SHADER_TIME)
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index 2c2b826..9c96f69 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -550,6 +550,7 @@ void brw_draw_prims( struct gl_context *ctx,
GLuint max_index,
struct gl_transform_feedback_object *tfb_vertcount )
{
+ struct intel_context *intel = intel_context(ctx);
const struct gl_client_array **arrays = ctx->Array._DrawArrays;
if (!_mesa_check_conditional_render(ctx))
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index e53de66..0f78396 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -2872,7 +2872,7 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c,
bool start_busy = false;
float start_time = 0;
- if (unlikely(INTEL_DEBUG & DEBUG_PERF)) {
+ if (unlikely(intel->perf_debug)) {
start_busy = (intel->batch.last_bo &&
drm_intel_bo_busy(intel->batch.last_bo));
start_time = get_time();
@@ -2927,7 +2927,7 @@ brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c,
simd16_instructions,
final_assembly_size);
- if (unlikely(INTEL_DEBUG & DEBUG_PERF) && shader) {
+ if (unlikely(intel->perf_debug) && shader) {
if (shader->compiled_once)
brw_wm_debug_recompile(brw, prog, &c->key);
shader->compiled_once = true;
diff --git a/src/mesa/drivers/dri/i965/brw_program.h b/src/mesa/drivers/dri/i965/brw_program.h
index 1821775..010a9b8 100644
--- a/src/mesa/drivers/dri/i965/brw_program.h
+++ b/src/mesa/drivers/dri/i965/brw_program.h
@@ -45,7 +45,8 @@ struct brw_sampler_prog_key_data {
void brw_populate_sampler_prog_key_data(struct gl_context *ctx,
const struct gl_program *prog,
struct brw_sampler_prog_key_data *key);
-bool brw_debug_recompile_sampler_key(const struct brw_sampler_prog_key_data *old_key,
+bool brw_debug_recompile_sampler_key(struct intel_context *intel,
+ const struct brw_sampler_prog_key_data *old_key,
const struct brw_sampler_prog_key_data *key);
void brw_add_texrect_params(struct gl_program *prog);
diff --git a/src/mesa/drivers/dri/i965/brw_queryobj.c b/src/mesa/drivers/dri/i965/brw_queryobj.c
index cd9c848..40188fd 100644
--- a/src/mesa/drivers/dri/i965/brw_queryobj.c
+++ b/src/mesa/drivers/dri/i965/brw_queryobj.c
@@ -145,7 +145,7 @@ brw_queryobj_get_results(struct gl_context *ctx,
if (drm_intel_bo_references(intel->batch.bo, query->bo))
intel_batchbuffer_flush(intel);
- if (unlikely(INTEL_DEBUG & DEBUG_PERF)) {
+ if (unlikely(intel->perf_debug)) {
if (drm_intel_bo_busy(query->bo)) {
perf_debug("Stalling on the GPU waiting for a query object.\n");
}
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index c71715e..ab0f080 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -180,7 +180,7 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
lower_variable_index_to_cond_assign(shader->ir,
input, output, temp, uniform);
- if (unlikely((INTEL_DEBUG & DEBUG_PERF) && lowered_variable_indexing)) {
+ if (unlikely((intel->perf_debug) && lowered_variable_indexing)) {
perf_debug("Unsupported form of variable indexing in FS; falling "
"back to very inefficient code generation\n");
}
diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c
index d44b2b7..505c7e8 100644
--- a/src/mesa/drivers/dri/i965/brw_state_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_state_cache.c
@@ -389,6 +389,8 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
void
brw_state_cache_check_size(struct brw_context *brw)
{
+ struct intel_context *intel = &brw->intel;
+
/* un-tuned guess. Each object is generally a page, so 2000 of them is 8 MB of
* state cache.
*/
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index d5b7cb7..a255bbb 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1349,7 +1349,7 @@ brw_vs_emit(struct brw_context *brw,
bool start_busy = false;
float start_time = 0;
- if (unlikely(INTEL_DEBUG & DEBUG_PERF)) {
+ if (unlikely(intel->perf_debug)) {
start_busy = (intel->batch.last_bo &&
drm_intel_bo_busy(intel->batch.last_bo));
start_time = get_time();
@@ -1382,7 +1382,7 @@ brw_vs_emit(struct brw_context *brw,
const unsigned *generated =g.generate_assembly(&v.instructions,
final_assembly_size);
- if (unlikely(INTEL_DEBUG & DEBUG_PERF) && shader) {
+ if (unlikely(intel->perf_debug) && shader) {
if (shader->compiled_once) {
brw_vs_debug_recompile(brw, prog, &c->key);
}
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index 64659c0..3daed19 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -319,7 +319,7 @@ do_vs_prog(struct brw_context *brw,
}
static bool
-key_debug(const char *name, int a, int b)
+key_debug(struct intel_context *intel, const char *name, int a, int b)
{
if (a != b) {
perf_debug(" %s %d->%d\n", name, a, b);
@@ -333,6 +333,7 @@ brw_vs_debug_recompile(struct brw_context *brw,
struct gl_shader_program *prog,
const struct brw_vs_prog_key *key)
{
+ struct intel_context *intel = &brw->intel;
struct brw_cache_item *c = NULL;
const struct brw_vs_prog_key *old_key = NULL;
bool found = false;
@@ -359,31 +360,31 @@ brw_vs_debug_recompile(struct brw_context *brw,
}
for (unsigned int i = 0; i < VERT_ATTRIB_MAX; i++) {
- found |= key_debug("Vertex attrib w/a flags",
+ found |= key_debug(intel, "Vertex attrib w/a flags",
old_key->gl_attrib_wa_flags[i],
key->gl_attrib_wa_flags[i]);
}
- found |= key_debug("user clip flags",
+ found |= key_debug(intel, "user clip flags",
old_key->userclip_active, key->userclip_active);
- found |= key_debug("user clipping planes as push constants",
+ found |= key_debug(intel, "user clipping planes as push constants",
old_key->nr_userclip_plane_consts,
key->nr_userclip_plane_consts);
- found |= key_debug("clip distance enable",
+ found |= key_debug(intel, "clip distance enable",
old_key->uses_clip_distance, key->uses_clip_distance);
- found |= key_debug("clip plane enable bitfield",
+ found |= key_debug(intel, "clip plane enable bitfield",
old_key->userclip_planes_enabled_gen_4_5,
key->userclip_planes_enabled_gen_4_5);
- found |= key_debug("copy edgeflag",
+ found |= key_debug(intel, "copy edgeflag",
old_key->copy_edgeflag, key->copy_edgeflag);
- found |= key_debug("PointCoord replace",
+ found |= key_debug(intel, "PointCoord replace",
old_key->point_coord_replace, key->point_coord_replace);
- found |= key_debug("vertex color clamping",
+ found |= key_debug(intel, "vertex color clamping",
old_key->clamp_vertex_color, key->clamp_vertex_color);
- found |= brw_debug_recompile_sampler_key(&old_key->tex, &key->tex);
+ found |= brw_debug_recompile_sampler_key(intel, &old_key->tex, &key->tex);
if (!found) {
perf_debug(" Something else\n");
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
index 4b04465..77bede0 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -203,7 +203,7 @@ bool do_wm_prog(struct brw_context *brw,
}
static bool
-key_debug(const char *name, int a, int b)
+key_debug(struct intel_context *intel, const char *name, int a, int b)
{
if (a != b) {
perf_debug(" %s %d->%d\n", name, a, b);
@@ -214,24 +214,25 @@ key_debug(const char *name, int a, int b)
}
bool
-brw_debug_recompile_sampler_key(const struct brw_sampler_prog_key_data *old_key,
+brw_debug_recompile_sampler_key(struct intel_context *intel,
+ const struct brw_sampler_prog_key_data *old_key,
const struct brw_sampler_prog_key_data *key)
{
bool found = false;
for (unsigned int i = 0; i < MAX_SAMPLERS; i++) {
- found |= key_debug("EXT_texture_swizzle or DEPTH_TEXTURE_MODE",
+ found |= key_debug(intel, "EXT_texture_swizzle or DEPTH_TEXTURE_MODE",
old_key->swizzles[i], key->swizzles[i]);
}
- found |= key_debug("GL_CLAMP enabled on any texture unit's 1st coordinate",
+ found |= key_debug(intel, "GL_CLAMP enabled on any texture unit's 1st coordinate",
old_key->gl_clamp_mask[0], key->gl_clamp_mask[0]);
- found |= key_debug("GL_CLAMP enabled on any texture unit's 2nd coordinate",
+ found |= key_debug(intel, "GL_CLAMP enabled on any texture unit's 2nd coordinate",
old_key->gl_clamp_mask[1], key->gl_clamp_mask[1]);
- found |= key_debug("GL_CLAMP enabled on any texture unit's 3rd coordinate",
+ found |= key_debug(intel, "GL_CLAMP enabled on any texture unit's 3rd coordinate",
old_key->gl_clamp_mask[2], key->gl_clamp_mask[2]);
- found |= key_debug("GL_MESA_ycbcr texturing\n",
+ found |= key_debug(intel, "GL_MESA_ycbcr texturing\n",
old_key->yuvtex_mask, key->yuvtex_mask);
- found |= key_debug("GL_MESA_ycbcr UV swapping\n",
+ found |= key_debug(intel, "GL_MESA_ycbcr UV swapping\n",
old_key->yuvtex_swap_mask, key->yuvtex_swap_mask);
return found;
@@ -242,6 +243,7 @@ brw_wm_debug_recompile(struct brw_context *brw,
struct gl_shader_program *prog,
const struct brw_wm_prog_key *key)
{
+ struct intel_context *intel = &brw->intel;
struct brw_cache_item *c = NULL;
const struct brw_wm_prog_key *old_key = NULL;
bool found = false;
@@ -262,25 +264,35 @@ brw_wm_debug_recompile(struct brw_context *brw,
}
if (!c) {
- perf_debug(" Didn't find previous compile in the shader cache for "
- "debug\n");
+ perf_debug(" Didn't find previous compile in the shader cache for debug\n");
return;
}
- found |= key_debug("alphatest, computed depth, depth test, or depth write",
+ found |= key_debug(intel, "alphatest, computed depth, depth test, or "
+ "depth write",
old_key->iz_lookup, key->iz_lookup);
- found |= key_debug("depth statistics", old_key->stats_wm, key->stats_wm);
- found |= key_debug("flat shading", old_key->flat_shade, key->flat_shade);
- found |= key_debug("number of color buffers", old_key->nr_color_regions, key->nr_color_regions);
- found |= key_debug("sample alpha to coverage", old_key->sample_alpha_to_coverage, key->sample_alpha_to_coverage);
- found |= key_debug("rendering to FBO", old_key->render_to_fbo, key->render_to_fbo);
- found |= key_debug("fragment color clamping", old_key->clamp_fragment_color, key->clamp_fragment_color);
- found |= key_debug("line smoothing", old_key->line_aa, key->line_aa);
- found |= key_debug("proj_attrib_mask", old_key->proj_attrib_mask, key->proj_attrib_mask);
- found |= key_debug("renderbuffer height", old_key->drawable_height, key->drawable_height);
- found |= key_debug("vertex shader outputs", old_key->vp_outputs_written, key->vp_outputs_written);
-
- found |= brw_debug_recompile_sampler_key(&old_key->tex, &key->tex);
+ found |= key_debug(intel, "depth statistics",
+ old_key->stats_wm, key->stats_wm);
+ found |= key_debug(intel, "flat shading",
+ old_key->flat_shade, key->flat_shade);
+ found |= key_debug(intel, "number of color buffers",
+ old_key->nr_color_regions, key->nr_color_regions);
+ found |= key_debug(intel, "sample alpha to coverage",
+ old_key->sample_alpha_to_coverage, key->sample_alpha_to_coverage);
+ found |= key_debug(intel, "rendering to FBO",
+ old_key->render_to_fbo, key->render_to_fbo);
+ found |= key_debug(intel, "fragment color clamping",
+ old_key->clamp_fragment_color, key->clamp_fragment_color);
+ found |= key_debug(intel, "line smoothing",
+ old_key->line_aa, key->line_aa);
+ found |= key_debug(intel, "proj_attrib_mask",
+ old_key->proj_attrib_mask, key->proj_attrib_mask);
+ found |= key_debug(intel, "renderbuffer height",
+ old_key->drawable_height, key->drawable_height);
+ found |= key_debug(intel, "vertex shader outputs",
+ old_key->vp_outputs_written, key->vp_outputs_written);
+
+ found |= brw_debug_recompile_sampler_key(intel, &old_key->tex, &key->tex);
if (!found) {
perf_debug(" Something else\n");
diff --git a/src/mesa/drivers/dri/intel/intel_buffer_objects.c b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
index f94c6f5..34eb7c9 100644
--- a/src/mesa/drivers/dri/intel/intel_buffer_objects.c
+++ b/src/mesa/drivers/dri/intel/intel_buffer_objects.c
@@ -227,7 +227,7 @@ intel_bufferobj_subdata(struct gl_context * ctx,
drm_intel_bo_unreference(temp_bo);
}
} else {
- if (unlikely(INTEL_DEBUG & DEBUG_PERF)) {
+ if (unlikely(intel->perf_debug)) {
if (drm_intel_bo_busy(intel_obj->buffer)) {
perf_debug("Stalling on the GPU in glBufferSubData().\n");
}
diff --git a/src/mesa/drivers/dri/intel/intel_context.c b/src/mesa/drivers/dri/intel/intel_context.c
index 9bb7156..4e7e688 100644
--- a/src/mesa/drivers/dri/intel/intel_context.c
+++ b/src/mesa/drivers/dri/intel/intel_context.c
@@ -803,6 +803,8 @@ intelInitContext(struct intel_context *intel,
"shader_time debugging requires gen7 (Ivybridge) or better.\n");
INTEL_DEBUG &= ~DEBUG_SHADER_TIME;
}
+ if (INTEL_DEBUG & DEBUG_PERF)
+ intel->perf_debug = true;
if (INTEL_DEBUG & DEBUG_AUB)
drm_intel_bufmgr_gem_set_aub_dump(intel->bufmgr, true);
diff --git a/src/mesa/drivers/dri/intel/intel_context.h b/src/mesa/drivers/dri/intel/intel_context.h
index c72176a..42db5a7 100644
--- a/src/mesa/drivers/dri/intel/intel_context.h
+++ b/src/mesa/drivers/dri/intel/intel_context.h
@@ -239,6 +239,13 @@ struct intel_context
bool no_batch_wrap;
bool tnl_pipeline_running; /**< Set while i915's _tnl_run_pipeline. */
+ /**
+ * Set if we're either a debug context or the INTEL_DEBUG=perf environment
+ * variable is set, this is the flag indicating to do expensive work that
+ * might lead to a perf_debug() call.
+ */
+ bool perf_debug;
+
struct
{
GLuint id;
@@ -462,8 +469,14 @@ extern int INTEL_DEBUG;
} while(0)
#define perf_debug(...) do { \
- if (unlikely(INTEL_DEBUG & DEBUG_PERF)) \
- dbg_printf(__VA_ARGS__); \
+ static GLuint msg_id = 0; \
+ if (unlikely(INTEL_DEBUG & DEBUG_PERF)) \
+ dbg_printf(__VA_ARGS__); \
+ if (intel->perf_debug) \
+ _mesa_gl_debug(&intel->ctx, &msg_id, \
+ MESA_DEBUG_TYPE_PERFORMANCE, \
+ MESA_DEBUG_SEVERITY_MEDIUM, \
+ __VA_ARGS__); \
} while(0)
#define WARN_ONCE(cond, fmt...) do { \
diff --git a/src/mesa/drivers/dri/intel/intel_regions.c b/src/mesa/drivers/dri/intel/intel_regions.c
index 89d91b0..4ff8872 100644
--- a/src/mesa/drivers/dri/intel/intel_regions.c
+++ b/src/mesa/drivers/dri/intel/intel_regions.c
@@ -123,7 +123,7 @@ intel_region_map(struct intel_context *intel, struct intel_region *region,
* flush is only needed on first map of the buffer.
*/
- if (unlikely(INTEL_DEBUG & DEBUG_PERF)) {
+ if (unlikely(intel->perf_debug)) {
if (drm_intel_bo_busy(region->bo)) {
perf_debug("Mapping a busy BO, causing a stall on the GPU.\n");
}
diff --git a/src/mesa/drivers/dri/intel/intel_tex_copy.c b/src/mesa/drivers/dri/intel/intel_tex_copy.c
index 085b6fd..6043ed2 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_copy.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_copy.c
@@ -114,8 +114,7 @@ intel_copy_texsubimage(struct intel_context *intel,
}
if (!copy_supported && !copy_supported_with_alpha_override) {
- if (unlikely(INTEL_DEBUG & DEBUG_PERF))
- fprintf(stderr, "%s mismatched formats %s, %s\n",
+ perf_debug("%s mismatched formats %s, %s\n",
__FUNCTION__,
_mesa_get_format_name(intelImage->base.Base.TexFormat),
_mesa_get_format_name(intel_rb_format(irb)));
diff --git a/src/mesa/drivers/dri/intel/intel_tex_subimage.c b/src/mesa/drivers/dri/intel/intel_tex_subimage.c
index 7a2f713..b02e5fc 100644
--- a/src/mesa/drivers/dri/intel/intel_tex_subimage.c
+++ b/src/mesa/drivers/dri/intel/intel_tex_subimage.c
@@ -230,7 +230,7 @@ intel_texsubimage_tiled_memcpy(struct gl_context * ctx,
intel_batchbuffer_flush(intel);
}
- if (unlikely(INTEL_DEBUG & DEBUG_PERF)) {
+ if (unlikely(intel->perf_debug)) {
if (drm_intel_bo_busy(bo)) {
perf_debug("Mapping a busy BO, causing a stall on the GPU.\n");
}
--
1.7.10.4
More information about the mesa-dev
mailing list