[Mesa-dev] [PATCH] winsys/amdgpu: add a fast exit path into amdgpu_cs_add_buffer
Marek Olšák
maraeo at gmail.com
Thu Jan 26 23:51:43 UTC 2017
From: Marek Olšák <marek.olsak at amd.com>
The time spent in the function dropped by 37% for torcs.
---
src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 16 ++++++++++++++++
src/gallium/winsys/amdgpu/drm/amdgpu_cs.h | 5 +++++
2 files changed, 21 insertions(+)
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 0bc4ce9..2a1b932 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -437,40 +437,54 @@ static unsigned amdgpu_cs_add_buffer(struct radeon_winsys_cs *rcs,
{
/* Don't use the "domains" parameter. Amdgpu doesn't support changing
* the buffer placement during command submission.
*/
struct amdgpu_cs *acs = amdgpu_cs(rcs);
struct amdgpu_cs_context *cs = acs->csc;
struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
struct amdgpu_cs_buffer *buffer;
int index;
+ /* Fast exit for no-op calls.
+ * This is very effective with suballocators and linear uploaders that
+ * are outside of the winsys.
+ */
+ if (bo == cs->last_added_bo &&
+ (usage & cs->last_added_bo_usage) == usage &&
+ (1ull << priority) & cs->last_added_bo_priority_usage)
+ return cs->last_added_bo_index;
+
if (!bo->bo) {
index = amdgpu_lookup_or_add_slab_buffer(acs, bo);
if (index < 0)
return 0;
buffer = &cs->slab_buffers[index];
buffer->usage |= usage;
usage &= ~RADEON_USAGE_SYNCHRONIZED;
index = buffer->u.slab.real_idx;
} else {
index = amdgpu_lookup_or_add_real_buffer(acs, bo);
if (index < 0)
return 0;
}
buffer = &cs->real_buffers[index];
buffer->u.real.priority_usage |= 1llu << priority;
buffer->usage |= usage;
cs->flags[index] = MAX2(cs->flags[index], priority / 4);
+
+ cs->last_added_bo = bo;
+ cs->last_added_bo_index = index;
+ cs->last_added_bo_usage = buffer->usage;
+ cs->last_added_bo_priority_usage = buffer->u.real.priority_usage;
return index;
}
static bool amdgpu_ib_new_buffer(struct amdgpu_winsys *ws, struct amdgpu_ib *ib)
{
struct pb_buffer *pb;
uint8_t *mapped;
unsigned buffer_size;
/* Always create a buffer that is at least as large as the maximum seen IB
@@ -638,20 +652,21 @@ static bool amdgpu_init_cs_context(struct amdgpu_cs_context *cs,
default:
case RING_GFX:
cs->request.ip_type = AMDGPU_HW_IP_GFX;
break;
}
for (i = 0; i < ARRAY_SIZE(cs->buffer_indices_hashlist); i++) {
cs->buffer_indices_hashlist[i] = -1;
}
+ cs->last_added_bo = NULL;
cs->request.number_of_ibs = 1;
cs->request.ibs = &cs->ib[IB_MAIN];
cs->ib[IB_CONST].flags = AMDGPU_IB_FLAG_CE;
cs->ib[IB_CONST_PREAMBLE].flags = AMDGPU_IB_FLAG_CE |
AMDGPU_IB_FLAG_PREAMBLE;
return true;
}
@@ -669,20 +684,21 @@ static void amdgpu_cs_context_cleanup(struct amdgpu_cs_context *cs)
amdgpu_winsys_bo_reference(&cs->slab_buffers[i].bo, NULL);
}
cs->num_real_buffers = 0;
cs->num_slab_buffers = 0;
amdgpu_fence_reference(&cs->fence, NULL);
for (i = 0; i < ARRAY_SIZE(cs->buffer_indices_hashlist); i++) {
cs->buffer_indices_hashlist[i] = -1;
}
+ cs->last_added_bo = NULL;
}
static void amdgpu_destroy_cs_context(struct amdgpu_cs_context *cs)
{
amdgpu_cs_context_cleanup(cs);
FREE(cs->flags);
FREE(cs->real_buffers);
FREE(cs->handles);
FREE(cs->slab_buffers);
FREE(cs->request.dependencies);
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
index 90b9e83..495d55b 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
@@ -87,20 +87,25 @@ struct amdgpu_cs_context {
amdgpu_bo_handle *handles;
uint8_t *flags;
struct amdgpu_cs_buffer *real_buffers;
unsigned num_slab_buffers;
unsigned max_slab_buffers;
struct amdgpu_cs_buffer *slab_buffers;
int buffer_indices_hashlist[4096];
+ struct amdgpu_winsys_bo *last_added_bo;
+ unsigned last_added_bo_index;
+ unsigned last_added_bo_usage;
+ uint64_t last_added_bo_priority_usage;
+
unsigned max_dependencies;
struct pipe_fence_handle *fence;
/* the error returned from cs_flush for non-async submissions */
int error_code;
};
struct amdgpu_cs {
struct amdgpu_ib main; /* must be first because this is inherited */
--
2.7.4
More information about the mesa-dev
mailing list