[Mesa-dev] [PATCH 06/13] radeonsi: only flush the right set of caches for CP DMA operations
Marek Olšák
maraeo at gmail.com
Mon Jan 5 12:20:56 PST 2015
From: Marek Olšák <marek.olsak at amd.com>
That's either framebuffer caches or caches for shader resources.
The motivation is that framebuffer caches need to be flushed very rarely
here.
---
src/gallium/drivers/r600/r600_blit.c | 3 +-
src/gallium/drivers/radeon/r600_pipe_common.c | 5 +--
src/gallium/drivers/radeon/r600_pipe_common.h | 6 ++--
src/gallium/drivers/radeon/r600_texture.c | 8 +++--
src/gallium/drivers/radeon/radeon_video.c | 3 +-
src/gallium/drivers/radeonsi/si_blit.c | 2 +-
src/gallium/drivers/radeonsi/si_descriptors.c | 51 +++++++++++++++------------
src/gallium/drivers/radeonsi/si_pipe.c | 2 +-
src/gallium/drivers/radeonsi/si_state.h | 2 +-
9 files changed, 48 insertions(+), 34 deletions(-)
diff --git a/src/gallium/drivers/r600/r600_blit.c b/src/gallium/drivers/r600/r600_blit.c
index bdc5f9f..01262a5 100644
--- a/src/gallium/drivers/r600/r600_blit.c
+++ b/src/gallium/drivers/r600/r600_blit.c
@@ -565,7 +565,8 @@ static void r600_copy_global_buffer(struct pipe_context *ctx,
}
static void r600_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
- unsigned offset, unsigned size, unsigned value)
+ unsigned offset, unsigned size, unsigned value,
+ bool is_framebuffer)
{
struct r600_context *rctx = (struct r600_context*)ctx;
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c b/src/gallium/drivers/radeon/r600_pipe_common.c
index d4133d8..8449a1f 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -905,12 +905,13 @@ bool r600_can_dump_shader(struct r600_common_screen *rscreen,
}
void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
- unsigned offset, unsigned size, unsigned value)
+ unsigned offset, unsigned size, unsigned value,
+ bool is_framebuffer)
{
struct r600_common_context *rctx = (struct r600_common_context*)rscreen->aux_context;
pipe_mutex_lock(rscreen->aux_context_lock);
- rctx->clear_buffer(&rctx->b, dst, offset, size, value);
+ rctx->clear_buffer(&rctx->b, dst, offset, size, value, is_framebuffer);
rscreen->aux_context->flush(rscreen->aux_context, NULL, 0);
pipe_mutex_unlock(rscreen->aux_context_lock);
}
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index 15736d7..a9416b6 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -388,7 +388,8 @@ struct r600_common_context {
const struct pipe_box *src_box);
void (*clear_buffer)(struct pipe_context *ctx, struct pipe_resource *dst,
- unsigned offset, unsigned size, unsigned value);
+ unsigned offset, unsigned size, unsigned value,
+ bool is_framebuffer);
void (*blit_decompress_depth)(struct pipe_context *ctx,
struct r600_texture *texture,
@@ -441,7 +442,8 @@ void r600_context_add_resource_size(struct pipe_context *ctx, struct pipe_resour
bool r600_can_dump_shader(struct r600_common_screen *rscreen,
const struct tgsi_token *tokens);
void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst,
- unsigned offset, unsigned size, unsigned value);
+ unsigned offset, unsigned size, unsigned value,
+ bool is_framebuffer);
struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen,
const struct pipe_resource *templ);
const char *r600_get_llvm_processor_name(enum radeon_family family);
diff --git a/src/gallium/drivers/radeon/r600_texture.c b/src/gallium/drivers/radeon/r600_texture.c
index fdf4d76..ab8ce7b 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -559,7 +559,8 @@ static void r600_texture_allocate_htile(struct r600_common_screen *rscreen,
* without htile buffer */
R600_ERR("Failed to create buffer object for htile buffer.\n");
} else {
- r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b, 0, htile_size, 0);
+ r600_screen_clear_buffer(rscreen, &rtex->htile_buffer->b.b, 0,
+ htile_size, 0, true);
}
}
@@ -638,7 +639,8 @@ r600_texture_create_object(struct pipe_screen *screen,
if (rtex->cmask.size) {
/* Initialize the cmask to 0xCC (= compressed state). */
r600_screen_clear_buffer(rscreen, &rtex->cmask_buffer->b.b,
- rtex->cmask.offset, rtex->cmask.size, 0xCCCCCCCC);
+ rtex->cmask.offset, rtex->cmask.size,
+ 0xCCCCCCCC, true);
}
/* Initialize the CMASK base register value. */
@@ -1273,7 +1275,7 @@ void evergreen_do_fast_color_clear(struct r600_common_context *rctx,
/* Do the fast clear. */
evergreen_set_clear_color(tex, fb->cbufs[i]->format, color);
rctx->clear_buffer(&rctx->b, &tex->cmask_buffer->b.b,
- tex->cmask.offset, tex->cmask.size, 0);
+ tex->cmask.offset, tex->cmask.size, 0, true);
tex->dirty_level_mask |= 1 << fb->cbufs[i]->u.tex.level;
fb_state->dirty = true;
diff --git a/src/gallium/drivers/radeon/radeon_video.c b/src/gallium/drivers/radeon/radeon_video.c
index f6cfdff..1420798 100644
--- a/src/gallium/drivers/radeon/radeon_video.c
+++ b/src/gallium/drivers/radeon/radeon_video.c
@@ -122,7 +122,8 @@ void rvid_clear_buffer(struct pipe_context *context, struct rvid_buffer* buffer)
{
struct r600_common_context *rctx = (struct r600_common_context*)context;
- rctx->clear_buffer(context, &buffer->res->b.b, 0, buffer->res->buf->size, 0);
+ rctx->clear_buffer(context, &buffer->res->b.b, 0, buffer->res->buf->size,
+ 0, false);
context->flush(context, NULL, 0);
}
diff --git a/src/gallium/drivers/radeonsi/si_blit.c b/src/gallium/drivers/radeonsi/si_blit.c
index 4744154..1f2c408 100644
--- a/src/gallium/drivers/radeonsi/si_blit.c
+++ b/src/gallium/drivers/radeonsi/si_blit.c
@@ -556,7 +556,7 @@ void si_resource_copy_region(struct pipe_context *ctx,
/* Fallback for buffers. */
if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
- si_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width);
+ si_copy_buffer(sctx, dst, src, dstx, src_box->x, src_box->width, false);
return;
}
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index d46f4e5..c959961 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -1054,9 +1054,11 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
#define CP_DMA_MAX_BYTE_COUNT ((1 << 21) - 8)
static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
- unsigned offset, unsigned size, unsigned value)
+ unsigned offset, unsigned size, unsigned value,
+ bool is_framebuffer)
{
struct si_context *sctx = (struct si_context*)ctx;
+ unsigned flush_flags;
if (!size)
return;
@@ -1081,12 +1083,15 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
uint64_t va = r600_resource(dst)->gpu_address + offset;
/* Flush the caches where the resource is bound. */
- /* XXX only flush the caches where the buffer is bound. */
- sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
- SI_CONTEXT_INV_TC_L2 |
- SI_CONTEXT_INV_KCACHE |
- SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
- sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH;
+ if (is_framebuffer)
+ flush_flags = SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
+ else
+ flush_flags = SI_CONTEXT_INV_TC_L1 |
+ SI_CONTEXT_INV_TC_L2 |
+ SI_CONTEXT_INV_KCACHE;
+
+ sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
+ flush_flags;
while (size) {
unsigned byte_count = MIN2(size, CP_DMA_MAX_BYTE_COUNT);
@@ -1120,17 +1125,16 @@ static void si_clear_buffer(struct pipe_context *ctx, struct pipe_resource *dst,
/* Flush the caches again in case the 3D engine has been prefetching
* the resource. */
- /* XXX only flush the caches where the buffer is bound. */
- sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
- SI_CONTEXT_INV_TC_L2 |
- SI_CONTEXT_INV_KCACHE |
- SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
+ sctx->b.flags |= flush_flags;
}
void si_copy_buffer(struct si_context *sctx,
struct pipe_resource *dst, struct pipe_resource *src,
- uint64_t dst_offset, uint64_t src_offset, unsigned size)
+ uint64_t dst_offset, uint64_t src_offset, unsigned size,
+ bool is_framebuffer)
{
+ unsigned flush_flags;
+
if (!size)
return;
@@ -1144,11 +1148,15 @@ void si_copy_buffer(struct si_context *sctx,
src_offset += r600_resource(src)->gpu_address;
/* Flush the caches where the resource is bound. */
- sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
- SI_CONTEXT_INV_TC_L2 |
- SI_CONTEXT_INV_KCACHE |
- SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER |
- SI_CONTEXT_PS_PARTIAL_FLUSH;
+ if (is_framebuffer)
+ flush_flags = SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
+ else
+ flush_flags = SI_CONTEXT_INV_TC_L1 |
+ SI_CONTEXT_INV_TC_L2 |
+ SI_CONTEXT_INV_KCACHE;
+
+ sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
+ flush_flags;
while (size) {
unsigned sync_flags = 0;
@@ -1180,10 +1188,9 @@ void si_copy_buffer(struct si_context *sctx,
dst_offset += byte_count;
}
- sctx->b.flags |= SI_CONTEXT_INV_TC_L1 |
- SI_CONTEXT_INV_TC_L2 |
- SI_CONTEXT_INV_KCACHE |
- SI_CONTEXT_FLUSH_AND_INV_FRAMEBUFFER;
+ /* Flush the caches again in case the 3D engine has been prefetching
+ * the resource. */
+ sctx->b.flags |= flush_flags;
}
/* INIT/DEINIT */
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 4b66499..8352c0e 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -151,7 +151,7 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen, void *
/* Clear the NULL constant buffer, because loads should return zeros. */
sctx->b.clear_buffer(&sctx->b.b, sctx->null_const_buf.buffer, 0,
- sctx->null_const_buf.buffer->width0, 0);
+ sctx->null_const_buf.buffer->width0, 0, false);
}
return &sctx->b.b;
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 8927e50..3cd252c 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -238,7 +238,7 @@ void si_release_all_descriptors(struct si_context *sctx);
void si_all_descriptors_begin_new_cs(struct si_context *sctx);
void si_copy_buffer(struct si_context *sctx,
struct pipe_resource *dst, struct pipe_resource *src,
- uint64_t dst_offset, uint64_t src_offset, unsigned size);
+ uint64_t dst_offset, uint64_t src_offset, unsigned size, bool is_framebuffer);
void si_upload_const_buffer(struct si_context *sctx, struct r600_resource **rbuffer,
const uint8_t *ptr, unsigned size, uint32_t *const_offset);
--
2.1.0
More information about the mesa-dev
mailing list