[Mesa-dev] [PATCH 13/18] radeonsi: implement clear_buffer using CP DMA, initialize CMASK with it
Marek Olšák
maraeo at gmail.com
Wed Sep 25 18:35:33 PDT 2013
From: Marek Olšák <marek.olsak at amd.com>
More work needs to be done for this to be entirely shared with r600g.
I'm just trying to share r600_texture.c now.
The reason I put the implementation to si_descriptors.c is that the emit
function had already been there.
---
src/gallium/drivers/radeonsi/r600_texture.c | 5 +-
src/gallium/drivers/radeonsi/radeonsi_pipe.c | 32 +++++-----
src/gallium/drivers/radeonsi/si_descriptors.c | 85 ++++++++++++++++++++++++++-
src/gallium/drivers/radeonsi/si_state_draw.c | 5 +-
4 files changed, 108 insertions(+), 19 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/r600_texture.c b/src/gallium/drivers/radeonsi/r600_texture.c
index aa307fb..53452c8 100644
--- a/src/gallium/drivers/radeonsi/r600_texture.c
+++ b/src/gallium/drivers/radeonsi/r600_texture.c
@@ -462,8 +462,9 @@ r600_texture_create_object(struct pipe_screen *screen,
if (rtex->cmask.size) {
/* Initialize the cmask to 0xCC (= compressed state). */
- char *map = rscreen->b.ws->buffer_map(resource->cs_buf, NULL, PIPE_TRANSFER_WRITE);
- memset(map + rtex->cmask.offset, 0xCC, rtex->cmask.size);
+ r600_screen_clear_buffer(&rscreen->b, &resource->b.b,
+ rtex->cmask.offset, rtex->cmask.size,
+ 0xCCCCCCCC);
}
if (rscreen->b.debug_flags & DBG_TEX_DEPTH && rtex->is_depth) {
diff --git a/src/gallium/drivers/radeonsi/radeonsi_pipe.c b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
index 5528e19..8ed5d26 100644
--- a/src/gallium/drivers/radeonsi/radeonsi_pipe.c
+++ b/src/gallium/drivers/radeonsi/radeonsi_pipe.c
@@ -653,6 +653,8 @@ static void r600_destroy_screen(struct pipe_screen* pscreen)
if (!radeon_winsys_unref(rscreen->b.ws))
return;
+ r600_common_screen_cleanup(&rscreen->b);
+
if (rscreen->fences.bo) {
struct r600_fence_block *entry, *tmp;
@@ -823,18 +825,8 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
return NULL;
}
- r600_common_screen_init(&rscreen->b, ws);
-
- if (debug_get_bool_option("RADEON_PRINT_TEXDEPTH", FALSE))
- rscreen->b.debug_flags |= DBG_TEX_DEPTH;
- if (debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE))
- rscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | DBG_CS;
-
- if (r600_init_tiling(rscreen)) {
- FREE(rscreen);
- return NULL;
- }
-
+ /* Set functions first. */
+ rscreen->b.b.context_create = r600_create_context;
rscreen->b.b.destroy = r600_destroy_screen;
rscreen->b.b.get_name = r600_get_name;
rscreen->b.b.get_vendor = r600_get_vendor;
@@ -844,12 +836,9 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
rscreen->b.b.get_compute_param = r600_get_compute_param;
rscreen->b.b.get_timestamp = r600_get_timestamp;
rscreen->b.b.is_format_supported = si_is_format_supported;
- rscreen->b.b.context_create = r600_create_context;
rscreen->b.b.fence_reference = r600_fence_reference;
rscreen->b.b.fence_signalled = r600_fence_signalled;
rscreen->b.b.fence_finish = r600_fence_finish;
- r600_init_screen_resource_functions(&rscreen->b.b);
-
if (rscreen->b.info.has_uvd) {
rscreen->b.b.get_video_param = ruvd_get_video_param;
rscreen->b.b.is_video_format_supported = ruvd_is_format_supported;
@@ -857,6 +846,19 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
rscreen->b.b.get_video_param = r600_get_video_param;
rscreen->b.b.is_video_format_supported = vl_video_buffer_is_format_supported;
}
+ r600_init_screen_resource_functions(&rscreen->b.b);
+
+ r600_common_screen_init(&rscreen->b, ws);
+
+ if (debug_get_bool_option("RADEON_PRINT_TEXDEPTH", FALSE))
+ rscreen->b.debug_flags |= DBG_TEX_DEPTH;
+ if (debug_get_bool_option("RADEON_DUMP_SHADERS", FALSE))
+ rscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | DBG_CS;
+
+ if (r600_init_tiling(rscreen)) {
+ FREE(rscreen);
+ return NULL;
+ }
util_format_s3tc_init();
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index a8f8781..93d3684 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -44,7 +44,7 @@ static uint32_t null_desc[8]; /* zeros */
#define SI_CP_DMA_RAW_WAIT (1 << 1) /* SI+ */
/* Emit a CP DMA packet to do a copy from one buffer to another.
- * The size must fit in bits [20:0]. Notes:
+ * The size must fit in bits [20:0].
*/
static void si_emit_cp_dma_copy_buffer(struct r600_context *rctx,
uint64_t dst_va, uint64_t src_va,
@@ -517,6 +517,88 @@ static void si_set_streamout_targets(struct pipe_context *ctx,
si_update_descriptors(rctx, &buffers->desc);
}
+/* CP DMA */
+
+/* The max number of bytes to copy per packet. */
+#define CP_DMA_MAX_BYTE_COUNT ((1 << 21) - 8)
+
+static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
+ unsigned offset, unsigned size, unsigned value)
+{
+ struct r600_context *rctx = (struct r600_context*)ctx;
+
+ if (!size)
+ return;
+
+ /* Fallback for unaligned clears. */
+ if (offset % 4 != 0 || size % 4 != 0) {
+ uint32_t *map = rctx->b.ws->buffer_map(r600_resource(dst)->cs_buf,
+ rctx->b.rings.gfx.cs,
+ PIPE_TRANSFER_WRITE);
+ size /= 4;
+ for (unsigned i = 0; i < size; i++)
+ *map++ = value;
+
+ util_range_add(&r600_resource(dst)->valid_buffer_range, offset,
+ offset + size);
+ return;
+ }
+
+ uint64_t va = r600_resource_va(&rctx->screen->b.b, dst) + offset;
+
+ /* Flush the caches where the resource is bound. */
+ /* XXX only flush the caches where the buffer is bound. */
+ rctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE |
+ R600_CONTEXT_INV_CONST_CACHE |
+ R600_CONTEXT_FLUSH_AND_INV_CB |
+ R600_CONTEXT_FLUSH_AND_INV_DB |
+ R600_CONTEXT_FLUSH_AND_INV_CB_META |
+ R600_CONTEXT_FLUSH_AND_INV_DB_META;
+ rctx->b.flags |= R600_CONTEXT_WAIT_3D_IDLE;
+
+ while (size) {
+ unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
+ unsigned dma_flags = 0;
+
+ si_need_cs_space(rctx, 7 + (rctx->b.flags ? rctx->cache_flush.num_dw : 0),
+ FALSE);
+
+ /* This must be done after need_cs_space. */
+ r600_context_bo_reloc(&rctx->b, &rctx->b.rings.gfx,
+ (struct r600_resource*)dst, RADEON_USAGE_WRITE);
+
+ /* Flush the caches for the first copy only.
+ * Also wait for the previous CP DMA operations. */
+ if (rctx->b.flags) {
+ si_emit_cache_flush(&rctx->b, NULL);
+ dma_flags |= SI_CP_DMA_RAW_WAIT; /* same as WAIT_UNTIL=CP_DMA_IDLE */
+ }
+
+ /* Do the synchronization after the last copy, so that all data is written to memory. */
+ if (size == byte_count)
+ dma_flags |= R600_CP_DMA_SYNC;
+
+ /* Emit the clear packet. */
+ si_emit_cp_dma_clear_buffer(rctx, va, byte_count, value, dma_flags);
+
+ size -= byte_count;
+ va += byte_count;
+ }
+
+ /* Flush the caches again in case the 3D engine has been prefetching
+ * the resource. */
+ /* XXX only flush the caches where the buffer is bound. */
+ rctx->b.flags |= R600_CONTEXT_INV_TEX_CACHE |
+ R600_CONTEXT_INV_CONST_CACHE |
+ R600_CONTEXT_FLUSH_AND_INV_CB |
+ R600_CONTEXT_FLUSH_AND_INV_DB |
+ R600_CONTEXT_FLUSH_AND_INV_CB_META |
+ R600_CONTEXT_FLUSH_AND_INV_DB_META;
+
+ util_range_add(&r600_resource(dst)->valid_buffer_range, offset,
+ offset + size);
+}
+
/* INIT/DEINIT */
void si_init_all_descriptors(struct r600_context *rctx)
@@ -541,6 +623,7 @@ void si_init_all_descriptors(struct r600_context *rctx)
/* Set pipe_context functions. */
rctx->b.b.set_constant_buffer = si_set_constant_buffer;
rctx->b.b.set_stream_output_targets = si_set_streamout_targets;
+ rctx->b.clear_buffer = si_clear_buffer;
}
void si_release_all_descriptors(struct r600_context *rctx)
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.c b/src/gallium/drivers/radeonsi/si_state_draw.c
index cb5055a..0213523 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.c
+++ b/src/gallium/drivers/radeonsi/si_state_draw.c
@@ -677,7 +677,10 @@ void si_emit_cache_flush(struct r600_common_context *rctx, struct r600_atom *ato
radeon_emit(cs, EVENT_TYPE(V_028A90_FLUSH_AND_INV_CB_META) | EVENT_INDEX(0));
}
- if (rctx->flags & R600_CONTEXT_STREAMOUT_FLUSH) {
+ if (rctx->flags & R600_CONTEXT_WAIT_3D_IDLE) {
+ radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
+ radeon_emit(cs, EVENT_TYPE(V_028A90_PS_PARTIAL_FLUSH) | EVENT_INDEX(4));
+ } else if (rctx->flags & R600_CONTEXT_STREAMOUT_FLUSH) {
/* Needed if streamout buffers are going to be used as a source. */
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
--
1.8.1.2
More information about the mesa-dev
mailing list