[Mesa-dev] [PATCH 3/6] radeonsi: make si_cp_wait_mem more configurable
Marek Olšák
maraeo at gmail.com
Fri Dec 14 21:23:57 UTC 2018
From: Marek Olšák <marek.olsak at amd.com>
---
src/gallium/drivers/radeonsi/si_fence.c | 6 ++----
src/gallium/drivers/radeonsi/si_perfcounter.c | 2 +-
src/gallium/drivers/radeonsi/si_pipe.h | 2 +-
src/gallium/drivers/radeonsi/si_query.c | 3 ++-
src/gallium/drivers/radeonsi/si_state_draw.c | 3 ++-
5 files changed, 8 insertions(+), 8 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_fence.c b/src/gallium/drivers/radeonsi/si_fence.c
index d385f445774..b6920c95e34 100644
--- a/src/gallium/drivers/radeonsi/si_fence.c
+++ b/src/gallium/drivers/radeonsi/si_fence.c
@@ -153,27 +153,25 @@ unsigned si_cp_write_fence_dwords(struct si_screen *screen)
{
unsigned dwords = 6;
if (screen->info.chip_class == CIK ||
screen->info.chip_class == VI)
dwords *= 2;
return dwords;
}
-void si_cp_wait_mem(struct si_context *ctx,
+void si_cp_wait_mem(struct si_context *ctx, struct radeon_cmdbuf *cs,
uint64_t va, uint32_t ref, uint32_t mask, unsigned flags)
{
- struct radeon_cmdbuf *cs = ctx->gfx_cs;
-
radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
- radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1) | flags);
+ radeon_emit(cs, WAIT_REG_MEM_MEM_SPACE(1) | flags);
radeon_emit(cs, va);
radeon_emit(cs, va >> 32);
radeon_emit(cs, ref); /* reference value */
radeon_emit(cs, mask); /* mask */
radeon_emit(cs, 4); /* poll interval */
}
static void si_add_fence_dependency(struct si_context *sctx,
struct pipe_fence_handle *fence)
{
diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c
index cea7d57e518..f0d40e03982 100644
--- a/src/gallium/drivers/radeonsi/si_perfcounter.c
+++ b/src/gallium/drivers/radeonsi/si_perfcounter.c
@@ -577,21 +577,21 @@ static void si_pc_emit_start(struct si_context *sctx,
* do it again in here. */
static void si_pc_emit_stop(struct si_context *sctx,
struct r600_resource *buffer, uint64_t va)
{
struct radeon_cmdbuf *cs = sctx->gfx_cs;
si_cp_release_mem(sctx, V_028A90_BOTTOM_OF_PIPE_TS, 0,
EOP_DST_SEL_MEM, EOP_INT_SEL_NONE,
EOP_DATA_SEL_VALUE_32BIT,
buffer, va, 0, SI_NOT_QUERY);
- si_cp_wait_mem(sctx, va, 0, 0xffffffff, 0);
+ si_cp_wait_mem(sctx, cs, va, 0, 0xffffffff, WAIT_REG_MEM_EQUAL);
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_SAMPLE) | EVENT_INDEX(0));
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_PERFCOUNTER_STOP) | EVENT_INDEX(0));
radeon_set_uconfig_reg(cs, R_036020_CP_PERFMON_CNTL,
S_036020_PERFMON_STATE(V_036020_STOP_COUNTING) |
S_036020_PERFMON_SAMPLE_ENABLE(1));
}
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index b3522b60752..23f2089034d 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -1206,21 +1206,21 @@ void si_flush_dma_cs(struct si_context *ctx, unsigned flags,
void si_screen_clear_buffer(struct si_screen *sscreen, struct pipe_resource *dst,
uint64_t offset, uint64_t size, unsigned value);
/* si_fence.c */
void si_cp_release_mem(struct si_context *ctx,
unsigned event, unsigned event_flags,
unsigned dst_sel, unsigned int_sel, unsigned data_sel,
struct r600_resource *buf, uint64_t va,
uint32_t new_fence, unsigned query_type);
unsigned si_cp_write_fence_dwords(struct si_screen *screen);
-void si_cp_wait_mem(struct si_context *ctx,
+void si_cp_wait_mem(struct si_context *ctx, struct radeon_cmdbuf *cs,
uint64_t va, uint32_t ref, uint32_t mask, unsigned flags);
void si_init_fence_functions(struct si_context *ctx);
void si_init_screen_fence_functions(struct si_screen *screen);
struct pipe_fence_handle *si_create_fence(struct pipe_context *ctx,
struct tc_unflushed_batch_token *tc_token);
/* si_get.c */
void si_init_screen_get_functions(struct si_screen *sscreen);
/* si_gfx_cs.c */
diff --git a/src/gallium/drivers/radeonsi/si_query.c b/src/gallium/drivers/radeonsi/si_query.c
index aed3e1e80c1..3b55906a3cd 100644
--- a/src/gallium/drivers/radeonsi/si_query.c
+++ b/src/gallium/drivers/radeonsi/si_query.c
@@ -1564,21 +1564,22 @@ static void si_query_hw_get_result_resource(struct si_context *sctx,
if (wait && qbuf == &query->buffer) {
uint64_t va;
/* Wait for result availability. Wait only for readiness
* of the last entry, since the fence writes should be
* serialized in the CP.
*/
va = qbuf->buf->gpu_address + qbuf->results_end - query->result_size;
va += params.fence_offset;
- si_cp_wait_mem(sctx, va, 0x80000000, 0x80000000, 0);
+ si_cp_wait_mem(sctx, sctx->gfx_cs, va, 0x80000000,
+ 0x80000000, WAIT_REG_MEM_EQUAL);
}
sctx->b.launch_grid(&sctx->b, &grid);
sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
}
si_restore_qbo_state(sctx, &saved_state);
pipe_resource_reference(&tmp_buffer, NULL);
}
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index b707a6585c5..c72c59b29d4 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -1045,21 +1045,22 @@ void si_emit_cache_flush(struct si_context *sctx)
/* Do the flush (enqueue the event and wait for it). */
va = sctx->wait_mem_scratch->gpu_address;
sctx->wait_mem_number++;
si_cp_release_mem(sctx, cb_db_event, tc_flags,
EOP_DST_SEL_MEM,
EOP_INT_SEL_SEND_DATA_AFTER_WR_CONFIRM,
EOP_DATA_SEL_VALUE_32BIT,
sctx->wait_mem_scratch, va,
sctx->wait_mem_number, SI_NOT_QUERY);
- si_cp_wait_mem(sctx, va, sctx->wait_mem_number, 0xffffffff, 0);
+ si_cp_wait_mem(sctx, cs, va, sctx->wait_mem_number, 0xffffffff,
+ WAIT_REG_MEM_EQUAL);
}
/* Make sure ME is idle (it executes most packets) before continuing.
* This prevents read-after-write hazards between PFP and ME.
*/
if (cp_coher_cntl ||
(flags & (SI_CONTEXT_CS_PARTIAL_FLUSH |
SI_CONTEXT_INV_VMEM_L1 |
SI_CONTEXT_INV_GLOBAL_L2 |
SI_CONTEXT_WRITEBACK_GLOBAL_L2))) {
--
2.17.1
More information about the mesa-dev
mailing list