[Mesa-dev] [PATCH 2/2] gallium/radeon: remove old_fence parameter from r600_gfx_write_event_eop

Nicolai Hähnle nhaehnle at gmail.com
Fri Aug 18 11:21:36 UTC 2017


For both:

Reviewed-by: Nicolai Hähnle <nicolai.haehnle at amd.com>


On 17.08.2017 19:58, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
> 
> just use the new scratch buffer.
> ---
>   src/gallium/drivers/radeon/r600_pipe_common.c | 15 +++++++++++----
>   src/gallium/drivers/radeon/r600_pipe_common.h |  3 +--
>   src/gallium/drivers/radeon/r600_query.c       |  6 +++---
>   src/gallium/drivers/radeonsi/si_perfcounter.c |  2 +-
>   src/gallium/drivers/radeonsi/si_state_draw.c  |  3 +--
>   5 files changed, 17 insertions(+), 12 deletions(-)
> 
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
> index b28f385..dc54b5e 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.c
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.c
> @@ -96,22 +96,21 @@ void radeon_shader_binary_clean(struct ac_shader_binary *b)
>    * \param data_sel	1 = fence, 3 = timestamp
>    * \param buf		Buffer
>    * \param va		GPU address
>    * \param old_value	Previous fence value (for a bug workaround)
>    * \param new_value	Fence value to write for this event.
>    */
>   void r600_gfx_write_event_eop(struct r600_common_context *ctx,
>   			      unsigned event, unsigned event_flags,
>   			      unsigned data_sel,
>   			      struct r600_resource *buf, uint64_t va,
> -			      uint32_t old_fence, uint32_t new_fence,
> -			      unsigned query_type)
> +			      uint32_t new_fence, unsigned query_type)
>   {
>   	struct radeon_winsys_cs *cs = ctx->gfx.cs;
>   	unsigned op = EVENT_TYPE(event) |
>   		      EVENT_INDEX(5) |
>   		      event_flags;
>   
>   	if (ctx->chip_class >= GFX9) {
>   		/* A ZPASS_DONE or PIXEL_STAT_DUMP_EVENT (of the DB occlusion
>   		 * counters) must immediately precede every timestamp event to
>   		 * prevent a GPU hang on GFX9.
> @@ -139,30 +138,36 @@ void r600_gfx_write_event_eop(struct r600_common_context *ctx,
>   		radeon_emit(cs, op);
>   		radeon_emit(cs, EOP_DATA_SEL(data_sel));
>   		radeon_emit(cs, va);		/* address lo */
>   		radeon_emit(cs, va >> 32);	/* address hi */
>   		radeon_emit(cs, new_fence);	/* immediate data lo */
>   		radeon_emit(cs, 0); /* immediate data hi */
>   		radeon_emit(cs, 0); /* unused */
>   	} else {
>   		if (ctx->chip_class == CIK ||
>   		    ctx->chip_class == VI) {
> +			struct r600_resource *scratch = ctx->eop_bug_scratch;
> +			uint64_t va = scratch->gpu_address;
> +
>   			/* Two EOP events are required to make all engines go idle
>   			 * (and optional cache flushes executed) before the timestamp
>   			 * is written.
>   			 */
>   			radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
>   			radeon_emit(cs, op);
>   			radeon_emit(cs, va);
>   			radeon_emit(cs, ((va >> 32) & 0xffff) | EOP_DATA_SEL(data_sel));
> -			radeon_emit(cs, old_fence); /* immediate data */
> +			radeon_emit(cs, 0); /* immediate data */
>   			radeon_emit(cs, 0); /* unused */
> +
> +			radeon_add_to_buffer_list(ctx, &ctx->gfx, scratch,
> +						  RADEON_USAGE_WRITE, RADEON_PRIO_QUERY);
>   		}
>   
>   		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
>   		radeon_emit(cs, op);
>   		radeon_emit(cs, va);
>   		radeon_emit(cs, ((va >> 32) & 0xffff) | EOP_DATA_SEL(data_sel));
>   		radeon_emit(cs, new_fence); /* immediate data */
>   		radeon_emit(cs, 0); /* unused */
>   	}
>   
> @@ -672,21 +677,23 @@ bool r600_common_context_init(struct r600_common_context *rctx,
>   	}
>   
>   	rctx->b.set_device_reset_callback = r600_set_device_reset_callback;
>   
>   	r600_init_context_texture_functions(rctx);
>   	r600_init_viewport_functions(rctx);
>   	r600_streamout_init(rctx);
>   	r600_query_init(rctx);
>   	cayman_init_msaa(&rctx->b);
>   
> -	if (rctx->chip_class == GFX9) {
> +	if (rctx->chip_class == CIK ||
> +	    rctx->chip_class == VI ||
> +	    rctx->chip_class == GFX9) {
>   		rctx->eop_bug_scratch = (struct r600_resource*)
>   			pipe_buffer_create(&rscreen->b, 0, PIPE_USAGE_DEFAULT,
>   					   16 * rscreen->info.num_render_backends);
>   		if (!rctx->eop_bug_scratch)
>   			return false;
>   	}
>   
>   	rctx->allocator_zeroed_memory =
>   		u_suballocator_create(&rctx->b, rscreen->info.gart_page_size,
>   				      0, PIPE_USAGE_DEFAULT, 0, true);
> diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
> index 952fb77..c7e4c8a 100644
> --- a/src/gallium/drivers/radeon/r600_pipe_common.h
> +++ b/src/gallium/drivers/radeon/r600_pipe_common.h
> @@ -741,22 +741,21 @@ r600_invalidate_resource(struct pipe_context *ctx,
>   			 struct pipe_resource *resource);
>   void r600_replace_buffer_storage(struct pipe_context *ctx,
>   				 struct pipe_resource *dst,
>   				 struct pipe_resource *src);
>   
>   /* r600_common_pipe.c */
>   void r600_gfx_write_event_eop(struct r600_common_context *ctx,
>   			      unsigned event, unsigned event_flags,
>   			      unsigned data_sel,
>   			      struct r600_resource *buf, uint64_t va,
> -			      uint32_t old_fence, uint32_t new_fence,
> -			      unsigned query_type);
> +			      uint32_t new_fence, unsigned query_type);
>   unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen);
>   void r600_gfx_wait_fence(struct r600_common_context *ctx,
>   			 uint64_t va, uint32_t ref, uint32_t mask);
>   void r600_draw_rectangle(struct blitter_context *blitter,
>   			 int x1, int y1, int x2, int y2, float depth,
>   			 enum blitter_attrib_type type,
>   			 const union pipe_color_union *attrib);
>   bool r600_common_screen_init(struct r600_common_screen *rscreen,
>   			     struct radeon_winsys *ws);
>   void r600_destroy_common_screen(struct r600_common_screen *rscreen);
> diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
> index 53b7955..bccfe7f 100644
> --- a/src/gallium/drivers/radeon/r600_query.c
> +++ b/src/gallium/drivers/radeon/r600_query.c
> @@ -773,21 +773,21 @@ static void r600_query_hw_do_emit_start(struct r600_common_context *ctx,
>   					COPY_DATA_DST_SEL(COPY_DATA_MEM_ASYNC));
>   			radeon_emit(cs, 0);
>   			radeon_emit(cs, 0);
>   			radeon_emit(cs, va);
>   			radeon_emit(cs, va >> 32);
>   		} else {
>   			/* Write the timestamp after the last draw is done.
>   			 * (bottom-of-pipe)
>   			 */
>   			r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS,
> -						 0, 3, NULL, va, 0, 0, query->b.type);
> +						 0, 3, NULL, va, 0, query->b.type);
>   		}
>   		break;
>   	case PIPE_QUERY_PIPELINE_STATISTICS:
>   		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
>   		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
>   		radeon_emit(cs, va);
>   		radeon_emit(cs, va >> 32);
>   		break;
>   	default:
>   		assert(0);
> @@ -858,21 +858,21 @@ static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
>   	case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
>   		va += 16;
>   		for (unsigned stream = 0; stream < R600_MAX_STREAMS; ++stream)
>   			emit_sample_streamout(cs, va + 32 * stream, stream);
>   		break;
>   	case PIPE_QUERY_TIME_ELAPSED:
>   		va += 8;
>   		/* fall through */
>   	case PIPE_QUERY_TIMESTAMP:
>   		r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS,
> -					 0, 3, NULL, va, 0, 0, query->b.type);
> +					 0, 3, NULL, va, 0, query->b.type);
>   		fence_va = va + 8;
>   		break;
>   	case PIPE_QUERY_PIPELINE_STATISTICS: {
>   		unsigned sample_size = (query->result_size - 8) / 2;
>   
>   		va += sample_size;
>   		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
>   		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
>   		radeon_emit(cs, va);
>   		radeon_emit(cs, va >> 32);
> @@ -881,21 +881,21 @@ static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
>   		break;
>   	}
>   	default:
>   		assert(0);
>   	}
>   	r600_emit_reloc(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE,
>   			RADEON_PRIO_QUERY);
>   
>   	if (fence_va)
>   		r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS, 0, 1,
> -					 query->buffer.buf, fence_va, 0, 0x80000000,
> +					 query->buffer.buf, fence_va, 0x80000000,
>   					 query->b.type);
>   }
>   
>   static void r600_query_hw_emit_stop(struct r600_common_context *ctx,
>   				    struct r600_query_hw *query)
>   {
>   	uint64_t va;
>   
>   	if (!query->buffer.buf)
>   		return; // previous buffer allocation failure
> diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c
> index 38aa9ad..50b8d1c 100644
> --- a/src/gallium/drivers/radeonsi/si_perfcounter.c
> +++ b/src/gallium/drivers/radeonsi/si_perfcounter.c
> @@ -608,21 +608,21 @@ static void si_pc_emit_start(struct r600_common_context *ctx,
>   }
>   
>   /* Note: The buffer was already added in si_pc_emit_start, so we don't have to
>    * do it again in here. */
>   static void si_pc_emit_stop(struct r600_common_context *ctx,
>   			    struct r600_resource *buffer, uint64_t va)
>   {
>   	struct radeon_winsys_cs *cs = ctx->gfx.cs;
>   
>   	r600_gfx_write_event_eop(ctx, EVENT_TYPE_BOTTOM_OF_PIPE_TS, 0, 1,
> -				 buffer, va, 1, 0, 0);
> +				 buffer, va, 0, 0);
>   	r600_gfx_wait_fence(ctx, va, 0, 0xffffffff);
>   
>   	radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
>   	radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0));
>   	radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
>   	radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_PERFCOUNTER_STOP) | EVENT_INDEX(0));
>   	radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
>   			       S_036020_PERFMON_STATE(V_036020_STOP_COUNTING) |
>   			       S_036020_PERFMON_SAMPLE_ENABLE(1));
>   }
> diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
> index 0961289..a26e38d 100644
> --- a/src/gallium/drivers/radeonsi/si_state_draw.c
> +++ b/src/gallium/drivers/radeonsi/si_state_draw.c
> @@ -891,21 +891,21 @@ void si_emit_cache_flush(struct si_context *sctx)
>   					 S_0085F0_CB2_DEST_BASE_ENA(1) |
>   					 S_0085F0_CB3_DEST_BASE_ENA(1) |
>   					 S_0085F0_CB4_DEST_BASE_ENA(1) |
>   					 S_0085F0_CB5_DEST_BASE_ENA(1) |
>   					 S_0085F0_CB6_DEST_BASE_ENA(1) |
>   					 S_0085F0_CB7_DEST_BASE_ENA(1);
>   
>   			/* Necessary for DCC */
>   			if (rctx->chip_class == VI)
>   				r600_gfx_write_event_eop(rctx, V_028A90_FLUSH_AND_INV_CB_DATA_TS,
> -							 0, 0, NULL, 0, 0, 0, 0);
> +							 0, 0, NULL, 0, 0, 0);
>   		}
>   		if (rctx->flags & SI_CONTEXT_FLUSH_AND_INV_DB)
>   			cp_coher_cntl |= S_0085F0_DB_ACTION_ENA(1) |
>   					 S_0085F0_DB_DEST_BASE_ENA(1);
>   	}
>   
>   	if (rctx->flags & SI_CONTEXT_FLUSH_AND_INV_CB) {
>   		/* Flush CMASK/FMASK/DCC. SURFACE_SYNC will wait for idle. */
>   		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
>   		radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0));
> @@ -992,21 +992,20 @@ void si_emit_cache_flush(struct si_context *sctx)
>   					 SI_CONTEXT_INV_VMEM_L1);
>   			sctx->b.num_L2_invalidates++;
>   		}
>   
>   		/* Do the flush (enqueue the event and wait for it). */
>   		va = sctx->wait_mem_scratch->gpu_address;
>   		sctx->wait_mem_number++;
>   
>   		r600_gfx_write_event_eop(rctx, cb_db_event, tc_flags, 1,
>   					 sctx->wait_mem_scratch, va,
> -					 sctx->wait_mem_number - 1,
>   					 sctx->wait_mem_number, 0);
>   		r600_gfx_wait_fence(rctx, va, sctx->wait_mem_number, 0xffffffff);
>   	}
>   
>   	/* Make sure ME is idle (it executes most packets) before continuing.
>   	 * This prevents read-after-write hazards between PFP and ME.
>   	 */
>   	if (cp_coher_cntl ||
>   	    (rctx->flags & (SI_CONTEXT_CS_PARTIAL_FLUSH |
>   			    SI_CONTEXT_INV_VMEM_L1 |
> 


-- 
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.


More information about the mesa-dev mailing list