[Mesa-dev] [PATCH 7/9] radeonsi: rework RADEON_PRIO flags to be <= 31
Marek Olšák
maraeo at gmail.com
Thu Jul 12 05:26:38 UTC 2018
From: Marek Olšák <marek.olsak at amd.com>
This decreases sizeof(struct amdgpu_cs_buffer) from 24 to 16 bytes.
---
src/gallium/drivers/radeon/radeon_winsys.h | 39 ++++++++++---------
src/gallium/drivers/radeonsi/si_debug.c | 2 +-
src/gallium/winsys/amdgpu/drm/amdgpu_cs.c | 6 +--
src/gallium/winsys/amdgpu/drm/amdgpu_cs.h | 4 +-
src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 2 +-
src/gallium/winsys/radeon/drm/radeon_drm_cs.h | 2 +-
6 files changed, 28 insertions(+), 27 deletions(-)
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h
index bcd6831ed35..10c63ae4d82 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -108,63 +108,64 @@ enum radeon_value_id {
RADEON_VRAM_USAGE,
RADEON_VRAM_VIS_USAGE,
RADEON_GTT_USAGE,
RADEON_GPU_TEMPERATURE, /* DRM 2.42.0 */
RADEON_CURRENT_SCLK,
RADEON_CURRENT_MCLK,
RADEON_GPU_RESET_COUNTER, /* DRM 2.43.0 */
RADEON_CS_THREAD_TIME,
};
-/* Each group of four has the same priority. */
enum radeon_bo_priority {
+ /* Each group of two has the same priority. */
RADEON_PRIO_FENCE = 0,
RADEON_PRIO_TRACE,
- RADEON_PRIO_SO_FILLED_SIZE,
+
+ RADEON_PRIO_SO_FILLED_SIZE = 2,
RADEON_PRIO_QUERY,
RADEON_PRIO_IB1 = 4, /* main IB submitted to the kernel */
RADEON_PRIO_IB2, /* IB executed with INDIRECT_BUFFER */
- RADEON_PRIO_DRAW_INDIRECT,
+
+ RADEON_PRIO_DRAW_INDIRECT = 6,
RADEON_PRIO_INDEX_BUFFER,
- RADEON_PRIO_CP_DMA = 12,
+ RADEON_PRIO_CP_DMA = 8,
+ RADEON_PRIO_BORDER_COLORS,
- RADEON_PRIO_CONST_BUFFER = 16,
+ RADEON_PRIO_CONST_BUFFER = 10,
RADEON_PRIO_DESCRIPTORS,
- RADEON_PRIO_BORDER_COLORS,
- RADEON_PRIO_SAMPLER_BUFFER = 20,
+ RADEON_PRIO_SAMPLER_BUFFER = 12,
RADEON_PRIO_VERTEX_BUFFER,
- RADEON_PRIO_SHADER_RW_BUFFER = 24,
+ RADEON_PRIO_SHADER_RW_BUFFER = 14,
RADEON_PRIO_COMPUTE_GLOBAL,
- RADEON_PRIO_SAMPLER_TEXTURE = 28,
+ RADEON_PRIO_SAMPLER_TEXTURE = 16,
RADEON_PRIO_SHADER_RW_IMAGE,
- RADEON_PRIO_SAMPLER_TEXTURE_MSAA = 32,
-
- RADEON_PRIO_COLOR_BUFFER = 36,
+ RADEON_PRIO_SAMPLER_TEXTURE_MSAA = 18,
+ RADEON_PRIO_COLOR_BUFFER,
- RADEON_PRIO_DEPTH_BUFFER = 40,
+ RADEON_PRIO_DEPTH_BUFFER = 20,
- RADEON_PRIO_COLOR_BUFFER_MSAA = 44,
+ RADEON_PRIO_COLOR_BUFFER_MSAA = 22,
- RADEON_PRIO_DEPTH_BUFFER_MSAA = 48,
+ RADEON_PRIO_DEPTH_BUFFER_MSAA = 24,
- RADEON_PRIO_SEPARATE_META = 52,
+ RADEON_PRIO_SEPARATE_META = 26,
RADEON_PRIO_SHADER_BINARY, /* the hw can't hide instruction cache misses */
- RADEON_PRIO_SHADER_RINGS = 56,
+ RADEON_PRIO_SHADER_RINGS = 28,
- RADEON_PRIO_SCRATCH_BUFFER = 60,
+ RADEON_PRIO_SCRATCH_BUFFER = 30,
/* 63 is the maximum value */
};
struct winsys_handle;
struct radeon_winsys_ctx;
struct radeon_cmdbuf_chunk {
unsigned cdw; /* Number of used dwords. */
unsigned max_dw; /* Maximum number of dwords. */
uint32_t *buf; /* The base pointer of the chunk. */
@@ -216,21 +217,21 @@ struct radeon_bo_metadata {
};
enum radeon_feature_id {
RADEON_FID_R300_HYPERZ_ACCESS, /* ZMask + HiZ */
RADEON_FID_R300_CMASK_ACCESS,
};
struct radeon_bo_list_item {
uint64_t bo_size;
uint64_t vm_address;
- uint64_t priority_usage; /* mask of (1 << RADEON_PRIO_*) */
+ uint32_t priority_usage; /* mask of (1 << RADEON_PRIO_*) */
};
struct radeon_winsys {
/**
* The screen object this winsys was created for
*/
struct pipe_screen *screen;
/**
* Decrement the winsys reference count.
diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c
index 50375ce7cbe..d6207e68d12 100644
--- a/src/gallium/drivers/radeonsi/si_debug.c
+++ b/src/gallium/drivers/radeonsi/si_debug.c
@@ -562,21 +562,21 @@ static void si_dump_bo_list(struct si_context *sctx,
(va - previous_va_end) / page_size);
}
}
/* Print the buffer. */
fprintf(f, " %10"PRIu64" 0x%013"PRIX64" 0x%013"PRIX64" ",
size / page_size, va / page_size, (va + size) / page_size);
/* Print the usage. */
for (j = 0; j < 64; j++) {
- if (!(saved->bo_list[i].priority_usage & (1ull << j)))
+ if (!(saved->bo_list[i].priority_usage & (1u << j)))
continue;
fprintf(f, "%s%s", !hit ? "" : ", ", priority_to_string(j));
hit = true;
}
fprintf(f, "\n");
}
fprintf(f, "\nNote: The holes represent memory not used by the IB.\n"
" Other buffers can still be allocated there.\n\n");
}
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index ec164175dbc..872e67a790a 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -622,21 +622,21 @@ static unsigned amdgpu_cs_add_buffer(struct radeon_cmdbuf *rcs,
struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
struct amdgpu_cs_buffer *buffer;
int index;
/* Fast exit for no-op calls.
* This is very effective with suballocators and linear uploaders that
* are outside of the winsys.
*/
if (bo == cs->last_added_bo &&
(usage & cs->last_added_bo_usage) == usage &&
- (1ull << priority) & cs->last_added_bo_priority_usage)
+ (1u << priority) & cs->last_added_bo_priority_usage)
return cs->last_added_bo_index;
if (!bo->sparse) {
if (!bo->bo) {
index = amdgpu_lookup_or_add_slab_buffer(acs, bo);
if (index < 0)
return 0;
buffer = &cs->slab_buffers[index];
buffer->usage |= usage;
@@ -651,21 +651,21 @@ static unsigned amdgpu_cs_add_buffer(struct radeon_cmdbuf *rcs,
buffer = &cs->real_buffers[index];
} else {
index = amdgpu_lookup_or_add_sparse_buffer(acs, bo);
if (index < 0)
return 0;
buffer = &cs->sparse_buffers[index];
}
- buffer->u.real.priority_usage |= 1ull << priority;
+ buffer->u.real.priority_usage |= 1u << priority;
buffer->usage |= usage;
cs->last_added_bo = bo;
cs->last_added_bo_index = index;
cs->last_added_bo_usage = buffer->usage;
cs->last_added_bo_priority_usage = buffer->u.real.priority_usage;
return index;
}
static bool amdgpu_ib_new_buffer(struct amdgpu_winsys *ws, struct amdgpu_ib *ib,
@@ -1332,21 +1332,21 @@ void amdgpu_cs_submit_ib(void *job, int thread_index)
num_handles = 0;
for (i = 0; i < cs->num_real_buffers; ++i) {
struct amdgpu_cs_buffer *buffer = &cs->real_buffers[i];
if (buffer->bo->is_local)
continue;
assert(buffer->u.real.priority_usage != 0);
handles[num_handles] = buffer->bo->bo;
- flags[num_handles] = (util_last_bit64(buffer->u.real.priority_usage) - 1) / 4;
+ flags[num_handles] = (util_last_bit(buffer->u.real.priority_usage) - 1) / 2;
++num_handles;
}
if (num_handles) {
r = amdgpu_bo_list_create(ws->dev, num_handles,
handles, flags, &bo_list);
if (r) {
fprintf(stderr, "amdgpu: buffer list creation failed (%d)\n", r);
amdgpu_fence_signalled(cs->fence);
cs->error_code = r;
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
index 3b10cc66c21..9f5a4fd991a 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
@@ -39,21 +39,21 @@ struct amdgpu_ctx {
uint64_t *user_fence_cpu_address_base;
int refcount;
unsigned initial_num_total_rejected_cs;
unsigned num_rejected_cs;
};
struct amdgpu_cs_buffer {
struct amdgpu_winsys_bo *bo;
union {
struct {
- uint64_t priority_usage;
+ uint32_t priority_usage;
} real;
struct {
uint32_t real_idx; /* index of underlying real BO */
} slab;
} u;
enum radeon_bo_usage usage;
};
enum ib_type {
IB_MAIN,
@@ -87,21 +87,21 @@ struct amdgpu_cs_context {
unsigned num_sparse_buffers;
unsigned max_sparse_buffers;
struct amdgpu_cs_buffer *sparse_buffers;
int buffer_indices_hashlist[4096];
struct amdgpu_winsys_bo *last_added_bo;
unsigned last_added_bo_index;
unsigned last_added_bo_usage;
- uint64_t last_added_bo_priority_usage;
+ uint32_t last_added_bo_priority_usage;
struct pipe_fence_handle **fence_dependencies;
unsigned num_fence_dependencies;
unsigned max_fence_dependencies;
struct pipe_fence_handle **syncobj_to_signal;
unsigned num_syncobj_to_signal;
unsigned max_syncobj_to_signal;
struct pipe_fence_handle *fence;
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
index 90386027235..798be78504c 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -359,21 +359,21 @@ static unsigned radeon_drm_cs_add_buffer(struct radeon_cmdbuf *rcs,
index = cs->csc->slab_buffers[index].u.slab.real_idx;
} else {
index = radeon_lookup_or_add_real_buffer(cs, bo);
}
reloc = &cs->csc->relocs[index];
added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain);
reloc->read_domains |= rd;
reloc->write_domain |= wd;
reloc->flags = MAX2(reloc->flags, priority);
- cs->csc->relocs_bo[index].u.real.priority_usage |= 1ull << priority;
+ cs->csc->relocs_bo[index].u.real.priority_usage |= 1u << priority;
if (added_domains & RADEON_DOMAIN_VRAM)
cs->base.used_vram += bo->base.size;
else if (added_domains & RADEON_DOMAIN_GTT)
cs->base.used_gart += bo->base.size;
return index;
}
static int radeon_drm_cs_lookup_buffer(struct radeon_cmdbuf *rcs,
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
index 75fb09bd001..f4c6cbe1fa7 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
@@ -26,21 +26,21 @@
#ifndef RADEON_DRM_CS_H
#define RADEON_DRM_CS_H
#include "radeon_drm_bo.h"
struct radeon_bo_item {
struct radeon_bo *bo;
union {
struct {
- uint64_t priority_usage;
+ uint32_t priority_usage;
} real;
struct {
unsigned real_idx;
} slab;
} u;
};
struct radeon_cs_context {
uint32_t buf[16 * 1024];
--
2.17.1
More information about the mesa-dev
mailing list