[Intel-gfx] [PATCH 02/10] i965: copy in system routine, reserve extra scratch
Eric Anholt
eric at anholt.net
Mon Jul 18 20:13:26 CEST 2011
On Wed, 13 Jul 2011 13:51:44 -0700, Ben Widawsky <ben at bwidawsk.net> wrote:
> The debugger shared memory needs to be a fixed size. Since this is
> scratch memory that is already used by register spilling, add
> appropriate hooks to do the right thing when debugging.
>
> v2: Include the bytes for a known good system routine and upload it into
> the instruction state cache
>
> Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
> ---
> src/mesa/drivers/dri/i965/Makefile | 1 +
> src/mesa/drivers/dri/i965/brw_context.h | 6 +
> src/mesa/drivers/dri/i965/brw_state.h | 6 +-
> src/mesa/drivers/dri/i965/brw_state_cache.c | 49 +-
> src/mesa/drivers/dri/i965/brw_state_upload.c | 18 +-
> src/mesa/drivers/dri/i965/brw_wm.c | 24 +-
> src/mesa/drivers/dri/i965/brw_wm.h | 2 +
> src/mesa/drivers/dri/i965/brw_wm_debug.c | 29 +
> src/mesa/drivers/dri/i965/gen6_wm_sr.c | 31 +
> src/mesa/drivers/dri/i965/gen_wm_sr.g4a |32826 ++++++++++++++++++++++++++
.g4a is the extension for assembly source, not for a compiled hex dump.
I'd make it a .c file -- seems like you need just a touch more
processing to generate that in place in gen6_wm_sr.c.
> +/**
> + * Upload a debugger to the instruction cache. Currently only 1 debugger at a
> + * time is supported. The debugger will live at the beginning of the state bo.
> + */
> +void
> +brw_upload_debugger_cache(struct brw_context *brw,
> + void *system_routine,
> + uint32_t length)
s/length/size/ to be consistent with the rest of the driver.
> +{
> + uint32_t sip_size = 0;
> + struct brw_cache *cache = &brw->cache;
> + /* It is useful to have an offset so we can see if the SIP is != 0 */
> + const uint32_t sip_offset = 64;
> + struct brw_cache_item *item;
> +
> + if (cache->debugger) {
> + cache->persistent_size -= cache->debugger->size;
> + cache->debugger = NULL;
> + brw->wm.sip_offset = 0;
> + }
Why check here? We're only called if cache->debugger, right?
> +
> + if (!cache->persistent_size)
> + sip_size += sip_offset;
We're only called once, so we know if cache->persistent_size, right?
> + sip_size += length;
> +
> + item = CALLOC_STRUCT(brw_cache_item);
> + if (!item)
> + return;
> + item->size = sip_size;
> + item->key = system_routine;
> +
> + brw->wm.sip_offset = cache->persistent_size + sip_offset;
> + drm_intel_bo_subdata(cache->bo, brw->wm.sip_offset, length, system_routine);
> +
> + cache->persistent_size += sip_size;
> + cache->debugger = item;
> + brw_upload_item_data(cache, item, system_routine);
> +}
> +
> void
> brw_upload_cache(struct brw_cache *cache,
> enum brw_cache_id cache_id,
> @@ -318,7 +357,7 @@ brw_upload_cache(struct brw_cache *cache,
> }
> static void
> @@ -357,8 +396,9 @@ brw_clear_cache(struct brw_context *brw, struct brw_cache *cache)
>
> /* Start putting programs into the start of the BO again, since
> * we'll never find the old results.
> + * Save space at beginning for persistent items.
> */
> - cache->next_offset = 0;
> + cache->next_offset = cache->persistent_size;
>
> /* We need to make sure that the programs get regenerated, since
> * any offsets leftover in brw_context will no longer be valid.
You claim to hold on to persistent_size, but we don't actually copy the
data to a new BO... Oh, my. It looks like I totally broke this
function, by letting later uploads stomp over old programs if the cache
was cleared mid-batchbuffer. Wonder if this is one of the P1 bugs.
> @@ -388,7 +428,10 @@ brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache)
>
> brw_clear_cache(brw, cache);
> free(cache->items);
> + if (cache->debugger)
> + free(cache->debugger);
> cache->items = NULL;
> + cache->debugger = NULL;
> cache->size = 0;
> }
You don't need to check non-NULL for free().
> diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
> index 76ffa0d..cc78f73 100644
> --- a/src/mesa/drivers/dri/i965/brw_state_upload.c
> +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
> @@ -33,9 +33,13 @@
>
> #include "brw_context.h"
> #include "brw_state.h"
> +#include "brw_wm.h"
> #include "intel_batchbuffer.h"
> #include "intel_buffers.h"
>
> +extern char * const gen_wm_debugger;
> +extern const int gen_wm_debugger_size;
> +
> /* This is used to initialize brw->state.atoms[]. We could use this
> * list directly except for a single atom, brw_constant_buffer, which
> * has a .dirty value which changes according to the parameters of the
> @@ -239,8 +243,18 @@ void brw_init_state( struct brw_context *brw )
> {
> const struct brw_tracked_state **atoms;
> int num_atoms;
> -
> - brw_init_caches(brw);
> + uint32_t rsvd_cache_space = 0;
Set-but-unused variable?
> if (brw->intel.gen >= 7) {
> atoms = gen7_atoms;
> diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c
> index b0dfdd5..b97542f 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm.c
> @@ -194,6 +194,7 @@ bool do_wm_prog(struct brw_context *brw,
> struct brw_wm_compile *c;
> const GLuint *program;
> GLuint program_size;
> + uint32_t total_scratch = 0;
>
> c = brw->wm.compile_data;
> if (c == NULL) {
> @@ -239,18 +240,17 @@ bool do_wm_prog(struct brw_context *brw,
> c->prog_data.dispatch_width = 16;
> }
>
> - /* Scratch space is used for register spilling */
> - if (c->last_scratch) {
> - uint32_t total_scratch;
> -
> + /* Scratch space is used for register spilling and debugger shared memory.
> + * Since the debugger must know the size in advance, we try for a fixed size,
> + * and hope it's large enough
> + */
> + if (c->last_scratch && !brw->wm.debugging) {
> /* Per-thread scratch space is power-of-two sized. */
> for (c->prog_data.total_scratch = 1024;
> c->prog_data.total_scratch <= c->last_scratch;
> c->prog_data.total_scratch *= 2) {
> /* empty */
> }
> - total_scratch = c->prog_data.total_scratch * brw->wm_max_threads;
> -
> if (brw->wm.scratch_bo && total_scratch > brw->wm.scratch_bo->size) {
> drm_intel_bo_unreference(brw->wm.scratch_bo);
> brw->wm.scratch_bo = NULL;
> @@ -261,8 +261,12 @@ bool do_wm_prog(struct brw_context *brw,
> total_scratch,
> 4096);
> }
> - }
> - else {
> + } else if (brw->wm.debugging) {
> + uint32_t thread_scratch = brw->wm.scratch_bo->size / brw->wm_max_threads;
> + assert(brw->wm.scratch_bo);
> + assert(thread_scratch / 2 >= c->last_scratch);
> + c->prog_data.total_scratch = thread_scratch;
> + } else {
This looks like it breaks the !brw->wm.debugging case by not setting
total_scratch to nonzero. I'm not sure why the variable got moved out
if you're shadowing it in the new branch in this last hunk.
> diff --git a/src/mesa/drivers/dri/i965/brw_wm_debug.c b/src/mesa/drivers/dri/i965/brw_wm_debug.c
> index 6a91251..cacb24a 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm_debug.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm_debug.c
> @@ -31,8 +31,10 @@
>
>
> #include "brw_context.h"
> +#include "brw_state.h"
> #include "brw_wm.h"
>
> +extern char gen_wm_debugger[];
>
> void brw_wm_print_value( struct brw_wm_compile *c,
> struct brw_wm_value *value )
> @@ -172,3 +174,30 @@ void brw_wm_print_program( struct brw_wm_compile *c,
> printf("\n");
> }
>
> +#define SCRATCH_SIZE (512 * 1024)
> +void brw_wm_init_debug( struct brw_context *brw )
> +{
> + struct intel_context *intel = &brw->intel;
> + uint32_t name;
> + int ret;
> +
> + /* In the debug case, we allocate the buffer now because we'll need to attach
> + * with the debugger at a later time when flink will not work (the ring /
> + * struct_mutex are likely to be frozen). If we make the buffer visible early
> + * enough, the debugger can map it before the first breakpoint.
> + */
> + brw->wm.scratch_bo = drm_intel_bo_alloc(intel->bufmgr,
> + "wm scratch",
> + SCRATCH_SIZE * brw->wm_max_threads,
> + 4096);
> + assert(brw->wm.scratch_bo);
> +
> + drm_intel_bo_map(brw->wm.scratch_bo, 0);
> + memset(brw->wm.scratch_bo->virtual, 0xa5, SCRATCH_SIZE * brw->wm_max_threads);
> + drm_intel_bo_unmap(brw->wm.scratch_bo);
If you're mapping the buffer to write it, you should be passing true to
the second argument of the map. I'd just subdata this.
> + ret = drm_intel_bo_flink(brw->wm.scratch_bo, &name);
> + assert(ret == 0);
> +
> + brw->wm.debugging = true;
> +}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 197 bytes
Desc: not available
URL: <http://lists.freedesktop.org/archives/intel-gfx/attachments/20110718/45927132/attachment.sig>
More information about the Intel-gfx
mailing list