[Intel-gfx] [PATCH 02/10] i965: copy in system routine, reserve extra scratch

Eric Anholt eric at anholt.net
Mon Jul 18 20:13:26 CEST 2011


On Wed, 13 Jul 2011 13:51:44 -0700, Ben Widawsky <ben at bwidawsk.net> wrote:
> The debugger shared memory needs to be a fixed size. Since this is
> scratch memory that is already used by register spilling, add
> appropriate hooks to do the right thing when debugging.
> 
> v2: Include the bytes for a known good system routine and upload it into
> the instruction state cache
> 
> Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
> ---
>  src/mesa/drivers/dri/i965/Makefile           |    1 +
>  src/mesa/drivers/dri/i965/brw_context.h      |    6 +
>  src/mesa/drivers/dri/i965/brw_state.h        |    6 +-
>  src/mesa/drivers/dri/i965/brw_state_cache.c  |   49 +-
>  src/mesa/drivers/dri/i965/brw_state_upload.c |   18 +-
>  src/mesa/drivers/dri/i965/brw_wm.c           |   24 +-
>  src/mesa/drivers/dri/i965/brw_wm.h           |    2 +
>  src/mesa/drivers/dri/i965/brw_wm_debug.c     |   29 +
>  src/mesa/drivers/dri/i965/gen6_wm_sr.c       |   31 +
>  src/mesa/drivers/dri/i965/gen_wm_sr.g4a      |32826 ++++++++++++++++++++++++++

.g4a is the extension for assembly source, not for a compiled hex dump.
I'd make it a .c file -- seems like you need just a touch more
processing to generate that in place in gen6_wm_sr.c.

> +/**
> + * Upload a debugger to the instruction cache. Currently only 1 debugger at a
> + * time is supported. The debugger will live at the beginning of the state bo.
> + */
> +void
> +brw_upload_debugger_cache(struct brw_context *brw,
> +			  void *system_routine,
> +			  uint32_t length)

s/length/size/ to be consistent with the rest of the driver.

> +{
> +   uint32_t sip_size = 0;
> +   struct brw_cache *cache = &brw->cache;
> +   /* It is useful to have an offset so we can see if the SIP is != 0 */
> +   const uint32_t sip_offset = 64;
> +   struct brw_cache_item *item;
> +
> +   if (cache->debugger) {
> +      cache->persistent_size -= cache->debugger->size;
> +      cache->debugger = NULL;
> +      brw->wm.sip_offset = 0;
> +   }

Why check here?  We're only called if cache->debugger, right?

> +
> +   if (!cache->persistent_size)
> +      sip_size += sip_offset;

We're only called once, so we know if cache->persistent_size, right?

> +   sip_size += length;
> +
> +   item  = CALLOC_STRUCT(brw_cache_item);
> +   if (!item)
> +      return;
> +   item->size = sip_size;
> +   item->key = system_routine;
> +
> +   brw->wm.sip_offset = cache->persistent_size + sip_offset;
> +   drm_intel_bo_subdata(cache->bo, brw->wm.sip_offset, length, system_routine);
> +
> +   cache->persistent_size += sip_size;
> +   cache->debugger = item;
> +   brw_upload_item_data(cache, item, system_routine);
> +}
> +
>  void
>  brw_upload_cache(struct brw_cache *cache,
>  		 enum brw_cache_id cache_id,
> @@ -318,7 +357,7 @@ brw_upload_cache(struct brw_cache *cache,
>  }

>  static void
> @@ -357,8 +396,9 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
>  
>     /* Start putting programs into the start of the BO again, since
>      * we'll never find the old results.
> +    * Save space at beginning for persistent items.
>      */
> -   cache->next_offset = 0;
> +   cache->next_offset = cache->persistent_size;
>  
>     /* We need to make sure that the programs get regenerated, since
>      * any offsets leftover in brw_context will no longer be valid.

You claim to hold on to persistent_size, but we don't actually copy the
data to a new BO...  Oh, my.  It looks like I totally broke this
function, by letting later uploads stomp over old programs if the cache
was cleared mid-batchbuffer.  Wonder if this is one of the P1 bugs.

> @@ -388,7 +428,10 @@ brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache)
>  
>     brw_clear_cache(brw, cache);
>     free(cache->items);
> +   if (cache->debugger)
> +      free(cache->debugger);
>     cache->items = NULL;
> +   cache->debugger = NULL;
>     cache->size = 0;
>  }

You don't need to check non-NULL for free().

> diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
> index 76ffa0d..cc78f73 100644
> --- a/src/mesa/drivers/dri/i965/brw_state_upload.c
> +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
> @@ -33,9 +33,13 @@
>  
>  #include "brw_context.h"
>  #include "brw_state.h"
> +#include "brw_wm.h"
>  #include "intel_batchbuffer.h"
>  #include "intel_buffers.h"
>  
> +extern char * const gen_wm_debugger;
> +extern const int gen_wm_debugger_size;
> +
>  /* This is used to initialize brw->state.atoms[].  We could use this
>   * list directly except for a single atom, brw_constant_buffer, which
>   * has a .dirty value which changes according to the parameters of the
> @@ -239,8 +243,18 @@ void brw_init_state( struct brw_context *brw )
>  {
>     const struct brw_tracked_state **atoms;
>     int num_atoms;
> -
> -   brw_init_caches(brw);
> +   uint32_t rsvd_cache_space = 0;

Set-but-unused variable?

>     if (brw->intel.gen >= 7) {
>        atoms = gen7_atoms;
> diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
> index b0dfdd5..b97542f 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm.c
> @@ -194,6 +194,7 @@ bool do_wm_prog(struct brw_context *brw,
>     struct brw_wm_compile *c;
>     const GLuint *program;
>     GLuint program_size;
> +   uint32_t total_scratch = 0;
>  
>     c = brw->wm.compile_data;
>     if (c == NULL) {
> @@ -239,18 +240,17 @@ bool do_wm_prog(struct brw_context *brw,
>        c->prog_data.dispatch_width = 16;
>     }
>  
> -   /* Scratch space is used for register spilling */
> -   if (c->last_scratch) {
> -      uint32_t total_scratch;
> -
> +   /* Scratch space is used for register spilling and debugger shared memory.
> +    * Since the debugger must know the size in advance, we try for a fixed size,
> +    * and hope it's large enough
> +    */
> +   if (c->last_scratch && !brw->wm.debugging) {
>        /* Per-thread scratch space is power-of-two sized. */
>        for (c->prog_data.total_scratch = 1024;
>  	   c->prog_data.total_scratch <= c->last_scratch;
>  	   c->prog_data.total_scratch *= 2) {
>  	 /* empty */
>        }
> -      total_scratch = c->prog_data.total_scratch * brw->wm_max_threads;
> -
>        if (brw->wm.scratch_bo && total_scratch > brw->wm.scratch_bo->size) {
>  	 drm_intel_bo_unreference(brw->wm.scratch_bo);
>  	 brw->wm.scratch_bo = NULL;
> @@ -261,8 +261,12 @@ bool do_wm_prog(struct brw_context *brw,
>  						 total_scratch,
>  						 4096);
>        }
> -   }
> -   else {
> +   } else if (brw->wm.debugging) {
> +      uint32_t thread_scratch = brw->wm.scratch_bo->size / brw->wm_max_threads;
> +      assert(brw->wm.scratch_bo);
> +      assert(thread_scratch / 2 >= c->last_scratch);
> +      c->prog_data.total_scratch = thread_scratch;
> +   } else {

This looks like it breaks the !brw->wm.debugging case by not setting
total_scratch to nonzero.  I'm not sure why the variable got moved out
if you're shadowing it in the new branch in this last hunk.

> diff --git a/src/mesa/drivers/dri/i965/brw_wm_debug.c b/src/mesa/drivers/dri/i965/brw_wm_debug.c
> index 6a91251..cacb24a 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm_debug.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm_debug.c
> @@ -31,8 +31,10 @@
>                 
>  
>  #include "brw_context.h"
> +#include "brw_state.h"
>  #include "brw_wm.h"
>  
> +extern char gen_wm_debugger[];
>  
>  void brw_wm_print_value( struct brw_wm_compile *c,
>  		       struct brw_wm_value *value )
> @@ -172,3 +174,30 @@ void brw_wm_print_program( struct brw_wm_compile *c,
>     printf("\n");
>  }
>  
> +#define SCRATCH_SIZE (512 * 1024)
> +void brw_wm_init_debug( struct brw_context *brw )
> +{
> +   struct intel_context *intel = &brw->intel;
> +   uint32_t name;
> +   int ret;
> +
> +   /* In the debug case, we allocate the buffer now because we'll need to attach
> +    * with the debugger at a later time when flink will not work (the ring /
> +    * struct_mutex are likely to be frozen). If we make the buffer visible early
> +    * enough, the debugger can map it before the first breakpoint.
> +    */
> +   brw->wm.scratch_bo = drm_intel_bo_alloc(intel->bufmgr,
> +					   "wm scratch",
> +					   SCRATCH_SIZE * brw->wm_max_threads,
> +					   4096);
> +   assert(brw->wm.scratch_bo);
> +
> +   drm_intel_bo_map(brw->wm.scratch_bo, 0);
> +   memset(brw->wm.scratch_bo->virtual, 0xa5, SCRATCH_SIZE * brw->wm_max_threads);
> +   drm_intel_bo_unmap(brw->wm.scratch_bo);

If you're mapping the buffer to write it, you should be passing true to
the second argument of the map.  I'd just subdata this.

> +   ret = drm_intel_bo_flink(brw->wm.scratch_bo, &name);
> +   assert(ret == 0);
> +
> +   brw->wm.debugging = true;
> +}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 197 bytes
Desc: not available
URL: <http://lists.freedesktop.org/archives/intel-gfx/attachments/20110718/45927132/attachment.sig>


More information about the Intel-gfx mailing list