[Mesa-dev] [PATCH v2 4/5] gallium/radeon: emit relocations for query fences

Dieter Nützel Dieter at nuetzel-hh.de
Fri Sep 30 21:23:19 UTC 2016


Fixed all Blender issues on r600g/NI/Turks XT.

Apart from that that it's already committed you have my

Tb

Thank you Nicolai!

Dieter.

Am 30.09.2016 11:52, schrieb Nicolai Hähnle:
> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
> 
> This is only needed for r600 which doesn't have ARB_query_buffer_object 
> and
> therefore wouldn't really need the fences, but let's be optimistic 
> about
> filling in this feature gap eventually.
> 
> Cc: Dieter Nützel <Dieter at nuetzel-hh.de>
> ---
>  src/gallium/drivers/radeon/r600_pipe_common.c |  7 ++++++-
>  src/gallium/drivers/radeon/r600_pipe_common.h |  2 +-
>  src/gallium/drivers/radeon/r600_query.c       | 13 +++++++------
>  src/gallium/drivers/radeonsi/si_perfcounter.c |  2 +-
>  4 files changed, 15 insertions(+), 9 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c
> b/src/gallium/drivers/radeon/r600_pipe_common.c
> index e9377b2..6dc92cb 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.c
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.c
> @@ -73,21 +73,21 @@ void radeon_shader_binary_clean(struct
> radeon_shader_binary *b)
>  	FREE(b->global_symbol_offsets);
>  	FREE(b->relocs);
>  	FREE(b->disasm_string);
>  	FREE(b->llvm_ir_string);
>  }
> 
>  /*
>   * pipe_context
>   */
> 
> -void r600_gfx_write_fence(struct r600_common_context *ctx,
> +void r600_gfx_write_fence(struct r600_common_context *ctx, struct
> r600_resource *buf,
>  			  uint64_t va, uint32_t old_value, uint32_t new_value)
>  {
>  	struct radeon_winsys_cs *cs = ctx->gfx.cs;
> 
>  	if (ctx->chip_class == CIK) {
>  		/* Two EOP events are required to make all engines go idle
>  		 * (and optional cache flushes executed) before the timestamp
>  		 * is written.
>  		 */
>  		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
> @@ -99,29 +99,34 @@ void r600_gfx_write_fence(struct 
> r600_common_context *ctx,
>  		radeon_emit(cs, 0); /* unused */
>  	}
> 
>  	radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
>  	radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_BOTTOM_OF_PIPE_TS) |
>  			EVENT_INDEX(5));
>  	radeon_emit(cs, va);
>  	radeon_emit(cs, (va >> 32) | EOP_DATA_SEL(1));
>  	radeon_emit(cs, new_value); /* immediate data */
>  	radeon_emit(cs, 0); /* unused */
> +
> +	r600_emit_reloc(ctx, &ctx->gfx, buf, RADEON_USAGE_WRITE, 
> RADEON_PRIO_QUERY);
>  }
> 
>  unsigned r600_gfx_write_fence_dwords(struct r600_common_screen 
> *screen)
>  {
>  	unsigned dwords = 6;
> 
>  	if (screen->chip_class == CIK)
>  		dwords *= 2;
> 
> +	if (!screen->info.has_virtual_memory)
> +		dwords += 2;
> +
>  	return dwords;
>  }
> 
>  void r600_gfx_wait_fence(struct r600_common_context *ctx,
>  			 uint64_t va, uint32_t ref, uint32_t mask)
>  {
>  	struct radeon_winsys_cs *cs = ctx->gfx.cs;
> 
>  	radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
>  	radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1));
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h
> b/src/gallium/drivers/radeon/r600_pipe_common.h
> index ec7f7c0..cdcc80b 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.h
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.h
> @@ -696,21 +696,21 @@ struct pipe_resource *
> r600_aligned_buffer_create(struct pipe_screen *screen,
>  						  unsigned alignment);
>  struct pipe_resource *
>  r600_buffer_from_user_memory(struct pipe_screen *screen,
>  			     const struct pipe_resource *templ,
>  			     void *user_memory);
>  void
>  r600_invalidate_resource(struct pipe_context *ctx,
>  			 struct pipe_resource *resource);
> 
>  /* r600_common_pipe.c */
> -void r600_gfx_write_fence(struct r600_common_context *ctx,
> +void r600_gfx_write_fence(struct r600_common_context *ctx, struct
> r600_resource *buf,
>  			  uint64_t va, uint32_t old_value, uint32_t new_value);
>  unsigned r600_gfx_write_fence_dwords(struct r600_common_screen 
> *screen);
>  void r600_gfx_wait_fence(struct r600_common_context *ctx,
>  			 uint64_t va, uint32_t ref, uint32_t mask);
>  void r600_draw_rectangle(struct blitter_context *blitter,
>  			 int x1, int y1, int x2, int y2, float depth,
>  			 enum blitter_attrib_type type,
>  			 const union pipe_color_union *attrib);
>  bool r600_common_screen_init(struct r600_common_screen *rscreen,
>  			     struct radeon_winsys *ws);
> diff --git a/src/gallium/drivers/radeon/r600_query.c
> b/src/gallium/drivers/radeon/r600_query.c
> index 0908a6c..925c950 100644
> --- a/src/gallium/drivers/radeon/r600_query.c
> +++ b/src/gallium/drivers/radeon/r600_query.c
> @@ -609,32 +609,32 @@ static void r600_query_hw_emit_start(struct
> r600_common_context *ctx,
> 
>  	ctx->num_cs_dw_queries_suspend += query->num_cs_dw_end;
>  }
> 
>  static void r600_query_hw_do_emit_stop(struct r600_common_context 
> *ctx,
>  				       struct r600_query_hw *query,
>  				       struct r600_resource *buffer,
>  				       uint64_t va)
>  {
>  	struct radeon_winsys_cs *cs = ctx->gfx.cs;
> +	uint64_t fence_va = 0;
> 
>  	switch (query->b.type) {
>  	case PIPE_QUERY_OCCLUSION_COUNTER:
>  	case PIPE_QUERY_OCCLUSION_PREDICATE:
>  		va += 8;
>  		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
>  		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
>  		radeon_emit(cs, va);
>  		radeon_emit(cs, (va >> 32) & 0xFFFF);
> 
> -		va += ctx->max_db * 16 - 8;
> -		r600_gfx_write_fence(ctx, va, 0, 0x80000000);
> +		fence_va = va + ctx->max_db * 16 - 8;
>  		break;
>  	case PIPE_QUERY_PRIMITIVES_EMITTED:
>  	case PIPE_QUERY_PRIMITIVES_GENERATED:
>  	case PIPE_QUERY_SO_STATISTICS:
>  	case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
>  		va += query->result_size/2;
>  		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
>  		radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | 
> EVENT_INDEX(3));
>  		radeon_emit(cs, va);
>  		radeon_emit(cs, (va >> 32) & 0xFFFF);
> @@ -643,41 +643,42 @@ static void r600_query_hw_do_emit_stop(struct
> r600_common_context *ctx,
>  		va += 8;
>  		/* fall through */
>  	case PIPE_QUERY_TIMESTAMP:
>  		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
>  		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_BOTTOM_OF_PIPE_TS) | 
> EVENT_INDEX(5));
>  		radeon_emit(cs, va);
>  		radeon_emit(cs, (3 << 29) | ((va >> 32) & 0xFFFF));
>  		radeon_emit(cs, 0);
>  		radeon_emit(cs, 0);
> 
> -		va += 8;
> -		r600_gfx_write_fence(ctx, va, 0, 0x80000000);
> +		fence_va = va + 8;
>  		break;
>  	case PIPE_QUERY_PIPELINE_STATISTICS: {
>  		unsigned sample_size = (query->result_size - 8) / 2;
> 
>  		va += sample_size;
>  		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
>  		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | 
> EVENT_INDEX(2));
>  		radeon_emit(cs, va);
>  		radeon_emit(cs, (va >> 32) & 0xFFFF);
> 
> -		va += sample_size;
> -		r600_gfx_write_fence(ctx, va, 0, 0x80000000);
> +		fence_va = va + sample_size;
>  		break;
>  	}
>  	default:
>  		assert(0);
>  	}
>  	r600_emit_reloc(ctx, &ctx->gfx, query->buffer.buf, 
> RADEON_USAGE_WRITE,
>  			RADEON_PRIO_QUERY);
> +
> +	if (fence_va)
> +		r600_gfx_write_fence(ctx, query->buffer.buf, fence_va, 0, 
> 0x80000000);
>  }
> 
>  static void r600_query_hw_emit_stop(struct r600_common_context *ctx,
>  				    struct r600_query_hw *query)
>  {
>  	uint64_t va;
> 
>  	if (!query->buffer.buf)
>  		return; // previous buffer allocation failure
> 
> diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c
> b/src/gallium/drivers/radeonsi/si_perfcounter.c
> index d0c5392..0f5c28c 100644
> --- a/src/gallium/drivers/radeonsi/si_perfcounter.c
> +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c
> @@ -584,21 +584,21 @@ static void si_pc_emit_start(struct
> r600_common_context *ctx,
>  			       S_036020_PERFMON_STATE(V_036020_START_COUNTING));
>  }
> 
>  /* Note: The buffer was already added in si_pc_emit_start, so we don't 
> have to
>   * do it again in here. */
>  static void si_pc_emit_stop(struct r600_common_context *ctx,
>  			    struct r600_resource *buffer, uint64_t va)
>  {
>  	struct radeon_winsys_cs *cs = ctx->gfx.cs;
> 
> -	r600_gfx_write_fence(ctx, va, 1, 0);
> +	r600_gfx_write_fence(ctx, buffer, va, 1, 0);
>  	r600_gfx_wait_fence(ctx, va, 0, 0xffffffff);
> 
>  	radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
>  	radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_PERFCOUNTER_SAMPLE) | 
> EVENT_INDEX(0));
>  	radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
>  	radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_PERFCOUNTER_STOP) | 
> EVENT_INDEX(0));
>  	radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
>  			       S_036020_PERFMON_STATE(V_036020_STOP_COUNTING) |
>  			       S_036020_PERFMON_SAMPLE_ENABLE(1));
>  }


More information about the mesa-dev mailing list