[Mesa-dev] [PATCH 8/8] radeonsi: use WRITE_DATA for small MapBuffer(INVALIDATE_RANGE) sizes
Marek Olšák
maraeo at gmail.com
Fri Jan 18 16:43:59 UTC 2019
From: Marek Olšák <marek.olsak at amd.com>
---
src/gallium/drivers/radeonsi/si_buffer.c | 30 ++++++++++++++++++++----
src/gallium/drivers/radeonsi/si_pipe.c | 2 ++
src/gallium/drivers/radeonsi/si_pipe.h | 8 +++++++
3 files changed, 35 insertions(+), 5 deletions(-)
diff --git a/src/gallium/drivers/radeonsi/si_buffer.c b/src/gallium/drivers/radeonsi/si_buffer.c
index a1e421b8b0d..1d4387252a0 100644
--- a/src/gallium/drivers/radeonsi/si_buffer.c
+++ b/src/gallium/drivers/radeonsi/si_buffer.c
@@ -443,32 +443,47 @@ static void *si_buffer_transfer_map(struct pipe_context *ctx,
PIPE_TRANSFER_PERSISTENT))) ||
(rbuffer->flags & RADEON_FLAG_SPARSE))) {
assert(usage & PIPE_TRANSFER_WRITE);
/* Check if mapping this buffer would cause waiting for the GPU.
*/
if (rbuffer->flags & RADEON_FLAG_SPARSE ||
force_discard_range ||
si_rings_is_buffer_referenced(sctx, rbuffer->buf, RADEON_USAGE_READWRITE) ||
!sctx->ws->buffer_wait(rbuffer->buf, 0, RADEON_USAGE_READWRITE)) {
+ unsigned alloc_start = box->x % SI_MAP_BUFFER_ALIGNMENT;
+ unsigned alloc_size = alloc_start + box->width;
+
+ /* Use PKT3_WRITE_DATA for small uploads. */
+ if (box->width <= SI_TRANSFER_WRITE_DATA_THRESHOLD &&
+ box->x % 4 == 0 && box->width % 4 == 0) {
+ void *cpu_map = u_cpu_suballoc(&sctx->cpu_suballoc, alloc_size,
+ SI_MAP_BUFFER_ALIGNMENT);
+ cpu_map = (char*)cpu_map + alloc_start;
+
+ return si_buffer_get_transfer(ctx, resource, usage, box,
+ ptransfer, cpu_map, cpu_map,
+ SI_TRANSFER_SPECIAL_OFFSET_USE_CPU_ALLOC);
+ }
+
/* Do a wait-free write-only transfer using a temporary buffer. */
unsigned offset;
struct r600_resource *staging = NULL;
u_upload_alloc(ctx->stream_uploader, 0,
- box->width + (box->x % SI_MAP_BUFFER_ALIGNMENT),
+ alloc_size,
sctx->screen->info.tcc_cache_line_size,
&offset, (struct pipe_resource**)&staging,
(void**)&data);
if (staging) {
- data += box->x % SI_MAP_BUFFER_ALIGNMENT;
+ data += alloc_start;
return si_buffer_get_transfer(ctx, resource, usage, box,
ptransfer, data, staging, offset);
} else if (rbuffer->flags & RADEON_FLAG_SPARSE) {
return NULL;
}
} else {
/* At this point, the buffer is always idle (we checked it above). */
usage |= PIPE_TRANSFER_UNSYNCHRONIZED;
}
}
@@ -530,26 +545,30 @@ static void si_buffer_write_data(struct si_context *sctx, struct r600_resource *
si_cp_write_data(sctx, buf, offset, size, V_370_TC_L2, V_370_ME, data);
radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
radeon_emit(cs, 0);
}
static void si_buffer_do_flush_region(struct pipe_context *ctx,
struct pipe_transfer *transfer,
const struct pipe_box *box)
{
+ struct si_context *sctx = (struct si_context*)ctx;
struct si_transfer *stransfer = (struct si_transfer*)transfer;
struct r600_resource *rbuffer = r600_resource(transfer->resource);
- if (stransfer->u.staging) {
+ if (stransfer->offset == SI_TRANSFER_SPECIAL_OFFSET_USE_CPU_ALLOC) {
+ si_buffer_write_data(sctx, rbuffer, box->x, box->width,
+ stransfer->u.cpu);
+ } else if (stransfer->u.staging) {
/* Copy the staging buffer into the original one. */
- si_copy_buffer((struct si_context*)ctx, transfer->resource,
+ si_copy_buffer(sctx, transfer->resource,
&stransfer->u.staging->b.b, box->x,
stransfer->offset + box->x % SI_MAP_BUFFER_ALIGNMENT,
box->width);
}
util_range_add(&rbuffer->valid_buffer_range, box->x,
box->x + box->width);
}
static void si_buffer_flush_region(struct pipe_context *ctx,
@@ -570,21 +589,22 @@ static void si_buffer_flush_region(struct pipe_context *ctx,
static void si_buffer_transfer_unmap(struct pipe_context *ctx,
struct pipe_transfer *transfer)
{
struct si_context *sctx = (struct si_context*)ctx;
struct si_transfer *stransfer = (struct si_transfer*)transfer;
if (transfer->usage & PIPE_TRANSFER_WRITE &&
!(transfer->usage & PIPE_TRANSFER_FLUSH_EXPLICIT))
si_buffer_do_flush_region(ctx, transfer, &transfer->box);
- r600_resource_reference(&stransfer->u.staging, NULL);
+ if (stransfer->offset != SI_TRANSFER_SPECIAL_OFFSET_USE_CPU_ALLOC)
+ r600_resource_reference(&stransfer->u.staging, NULL);
assert(stransfer->b.staging == NULL); /* for threaded context only */
pipe_resource_reference(&transfer->resource, NULL);
/* Don't use pool_transfers_unsync. We are always in the driver
* thread. */
slab_free(&sctx->pool_transfers, transfer);
}
static void si_buffer_subdata(struct pipe_context *ctx,
struct pipe_resource *buffer,
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 3bb8e04e4ad..a17929c2d5f 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -253,20 +253,21 @@ static void si_destroy_context(struct pipe_context *context)
si_saved_cs_reference(&sctx->current_saved_cs, NULL);
_mesa_hash_table_destroy(sctx->tex_handles, NULL);
_mesa_hash_table_destroy(sctx->img_handles, NULL);
util_dynarray_fini(&sctx->resident_tex_handles);
util_dynarray_fini(&sctx->resident_img_handles);
util_dynarray_fini(&sctx->resident_tex_needs_color_decompress);
util_dynarray_fini(&sctx->resident_img_needs_color_decompress);
util_dynarray_fini(&sctx->resident_tex_needs_depth_decompress);
+ u_cpu_suballoc_deinit(&sctx->cpu_suballoc);
FREE(sctx);
}
static enum pipe_reset_status si_get_reset_status(struct pipe_context *ctx)
{
struct si_context *sctx = (struct si_context *)ctx;
if (sctx->screen->info.has_gpu_reset_status_query)
return sctx->ws->ctx_query_reset_status(sctx->ctx);
@@ -390,20 +391,21 @@ static struct pipe_context *si_create_context(struct pipe_screen *screen,
sctx->b.screen = screen; /* this must be set first */
sctx->b.priv = NULL;
sctx->b.destroy = si_destroy_context;
sctx->b.emit_string_marker = si_emit_string_marker;
sctx->b.set_debug_callback = si_set_debug_callback;
sctx->b.set_log_context = si_set_log_context;
sctx->b.set_context_param = si_set_context_param;
sctx->screen = sscreen; /* Easy accessing of screen/winsys. */
sctx->is_debug = (flags & PIPE_CONTEXT_DEBUG) != 0;
+ u_cpu_suballoc_init(&sctx->cpu_suballoc, 64 * 1024, SI_MAP_BUFFER_ALIGNMENT);
slab_create_child(&sctx->pool_transfers, &sscreen->pool_transfers);
slab_create_child(&sctx->pool_transfers_unsync, &sscreen->pool_transfers);
sctx->ws = sscreen->ws;
sctx->family = sscreen->info.family;
sctx->chip_class = sscreen->info.chip_class;
if (sscreen->info.has_gpu_reset_counter_query) {
sctx->gpu_reset_counter =
sctx->ws->query_value(sctx->ws, RADEON_GPU_RESET_COUNTER);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index f79828f3438..e2cca55a8e2 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -21,20 +21,21 @@
* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
* USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
#ifndef SI_PIPE_H
#define SI_PIPE_H
#include "si_shader.h"
#include "si_state.h"
+#include "util/u_cpu_suballoc.h"
#include "util/u_dynarray.h"
#include "util/u_idalloc.h"
#include "util/u_threaded_context.h"
#ifdef PIPE_ARCH_BIG_ENDIAN
#define SI_BIG_ENDIAN 1
#else
#define SI_BIG_ENDIAN 0
#endif
@@ -244,25 +245,31 @@ struct r600_resource {
bool TC_L2_dirty;
/* Whether this resource is referenced by bindless handles. */
bool texture_handle_allocated;
bool image_handle_allocated;
/* Whether the resource has been exported via resource_get_handle. */
unsigned external_usage; /* PIPE_HANDLE_USAGE_* */
};
+#define SI_TRANSFER_SPECIAL_OFFSET_USE_CPU_ALLOC UINT_MAX
+
struct si_transfer {
struct threaded_transfer b;
union {
struct r600_resource *staging;
+ uint32_t *cpu;
} u;
+ /* If offset == SI_TRANSFER_SPECIAL_OFFSET_USE_CPU_ALLOC, use "cpu",
+ * else use "staging".
+ */
unsigned offset;
};
struct si_texture {
struct r600_resource buffer;
struct radeon_surf surface;
uint64_t size;
struct si_texture *flushed_depth_texture;
@@ -778,20 +785,21 @@ struct si_context {
struct radeon_winsys *ws;
struct radeon_winsys_ctx *ctx;
struct radeon_cmdbuf *gfx_cs;
struct radeon_cmdbuf *dma_cs;
struct pipe_fence_handle *last_gfx_fence;
struct pipe_fence_handle *last_sdma_fence;
struct r600_resource *eop_bug_scratch;
struct u_upload_mgr *cached_gtt_allocator;
struct threaded_context *tc;
struct u_suballocator *allocator_zeroed_memory;
+ struct u_cpu_suballoc cpu_suballoc;
struct slab_child_pool pool_transfers;
struct slab_child_pool pool_transfers_unsync; /* for threaded_context */
struct pipe_device_reset_callback device_reset_callback;
struct u_log_context *log;
void *query_result_shader;
struct blitter_context *blitter;
void *custom_dsa_flush;
void *custom_blend_resolve;
void *custom_blend_fmask_decompress;
void *custom_blend_eliminate_fastclear;
--
2.17.1
More information about the mesa-dev
mailing list