[Mesa-dev] [PATCH 7/9] radeonsi: rework RADEON_PRIO flags to be <= 31

Marek Olšák maraeo at gmail.com
Thu Jul 12 05:26:38 UTC 2018


From: Marek Olšák <marek.olsak at amd.com>

This decreases sizeof(struct amdgpu_cs_buffer) from 24 to 16 bytes.
---
 src/gallium/drivers/radeon/radeon_winsys.h    | 39 ++++++++++---------
 src/gallium/drivers/radeonsi/si_debug.c       |  2 +-
 src/gallium/winsys/amdgpu/drm/amdgpu_cs.c     |  6 +--
 src/gallium/winsys/amdgpu/drm/amdgpu_cs.h     |  4 +-
 src/gallium/winsys/radeon/drm/radeon_drm_cs.c |  2 +-
 src/gallium/winsys/radeon/drm/radeon_drm_cs.h |  2 +-
 6 files changed, 28 insertions(+), 27 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h
index bcd6831ed35..10c63ae4d82 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -108,63 +108,64 @@ enum radeon_value_id {
     RADEON_VRAM_USAGE,
     RADEON_VRAM_VIS_USAGE,
     RADEON_GTT_USAGE,
     RADEON_GPU_TEMPERATURE, /* DRM 2.42.0 */
     RADEON_CURRENT_SCLK,
     RADEON_CURRENT_MCLK,
     RADEON_GPU_RESET_COUNTER, /* DRM 2.43.0 */
     RADEON_CS_THREAD_TIME,
 };
 
-/* Each group of four has the same priority. */
 enum radeon_bo_priority {
+    /* Each group of two has the same priority. */
     RADEON_PRIO_FENCE = 0,
     RADEON_PRIO_TRACE,
-    RADEON_PRIO_SO_FILLED_SIZE,
+
+    RADEON_PRIO_SO_FILLED_SIZE = 2,
     RADEON_PRIO_QUERY,
 
     RADEON_PRIO_IB1 = 4, /* main IB submitted to the kernel */
     RADEON_PRIO_IB2, /* IB executed with INDIRECT_BUFFER */
-    RADEON_PRIO_DRAW_INDIRECT,
+
+    RADEON_PRIO_DRAW_INDIRECT = 6,
     RADEON_PRIO_INDEX_BUFFER,
 
-    RADEON_PRIO_CP_DMA = 12,
+    RADEON_PRIO_CP_DMA = 8,
+    RADEON_PRIO_BORDER_COLORS,
 
-    RADEON_PRIO_CONST_BUFFER = 16,
+    RADEON_PRIO_CONST_BUFFER = 10,
     RADEON_PRIO_DESCRIPTORS,
-    RADEON_PRIO_BORDER_COLORS,
 
-    RADEON_PRIO_SAMPLER_BUFFER = 20,
+    RADEON_PRIO_SAMPLER_BUFFER = 12,
     RADEON_PRIO_VERTEX_BUFFER,
 
-    RADEON_PRIO_SHADER_RW_BUFFER = 24,
+    RADEON_PRIO_SHADER_RW_BUFFER = 14,
     RADEON_PRIO_COMPUTE_GLOBAL,
 
-    RADEON_PRIO_SAMPLER_TEXTURE = 28,
+    RADEON_PRIO_SAMPLER_TEXTURE = 16,
     RADEON_PRIO_SHADER_RW_IMAGE,
 
-    RADEON_PRIO_SAMPLER_TEXTURE_MSAA = 32,
-
-    RADEON_PRIO_COLOR_BUFFER = 36,
+    RADEON_PRIO_SAMPLER_TEXTURE_MSAA = 18,
+    RADEON_PRIO_COLOR_BUFFER,
 
-    RADEON_PRIO_DEPTH_BUFFER = 40,
+    RADEON_PRIO_DEPTH_BUFFER = 20,
 
-    RADEON_PRIO_COLOR_BUFFER_MSAA = 44,
+    RADEON_PRIO_COLOR_BUFFER_MSAA = 22,
 
-    RADEON_PRIO_DEPTH_BUFFER_MSAA = 48,
+    RADEON_PRIO_DEPTH_BUFFER_MSAA = 24,
 
-    RADEON_PRIO_SEPARATE_META = 52,
+    RADEON_PRIO_SEPARATE_META = 26,
     RADEON_PRIO_SHADER_BINARY, /* the hw can't hide instruction cache misses */
 
-    RADEON_PRIO_SHADER_RINGS = 56,
+    RADEON_PRIO_SHADER_RINGS = 28,
 
-    RADEON_PRIO_SCRATCH_BUFFER = 60,
+    RADEON_PRIO_SCRATCH_BUFFER = 30,
     /* 63 is the maximum value */
 };
 
 struct winsys_handle;
 struct radeon_winsys_ctx;
 
 struct radeon_cmdbuf_chunk {
     unsigned cdw;  /* Number of used dwords. */
     unsigned max_dw; /* Maximum number of dwords. */
     uint32_t *buf; /* The base pointer of the chunk. */
@@ -216,21 +217,21 @@ struct radeon_bo_metadata {
 };
 
 enum radeon_feature_id {
     RADEON_FID_R300_HYPERZ_ACCESS,     /* ZMask + HiZ */
     RADEON_FID_R300_CMASK_ACCESS,
 };
 
 struct radeon_bo_list_item {
     uint64_t bo_size;
     uint64_t vm_address;
-    uint64_t priority_usage; /* mask of (1 << RADEON_PRIO_*) */
+    uint32_t priority_usage; /* mask of (1 << RADEON_PRIO_*) */
 };
 
 struct radeon_winsys {
     /**
      * The screen object this winsys was created for
      */
     struct pipe_screen *screen;
 
     /**
      * Decrement the winsys reference count.
diff --git a/src/gallium/drivers/radeonsi/si_debug.c b/src/gallium/drivers/radeonsi/si_debug.c
index 50375ce7cbe..d6207e68d12 100644
--- a/src/gallium/drivers/radeonsi/si_debug.c
+++ b/src/gallium/drivers/radeonsi/si_debug.c
@@ -562,21 +562,21 @@ static void si_dump_bo_list(struct si_context *sctx,
 					(va - previous_va_end) / page_size);
 			}
 		}
 
 		/* Print the buffer. */
 		fprintf(f, "  %10"PRIu64"    0x%013"PRIX64"       0x%013"PRIX64"       ",
 			size / page_size, va / page_size, (va + size) / page_size);
 
 		/* Print the usage. */
 		for (j = 0; j < 64; j++) {
-			if (!(saved->bo_list[i].priority_usage & (1ull << j)))
+			if (!(saved->bo_list[i].priority_usage & (1u << j)))
 				continue;
 
 			fprintf(f, "%s%s", !hit ? "" : ", ", priority_to_string(j));
 			hit = true;
 		}
 		fprintf(f, "\n");
 	}
 	fprintf(f, "\nNote: The holes represent memory not used by the IB.\n"
 		   "      Other buffers can still be allocated there.\n\n");
 }
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index ec164175dbc..872e67a790a 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -622,21 +622,21 @@ static unsigned amdgpu_cs_add_buffer(struct radeon_cmdbuf *rcs,
    struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf;
    struct amdgpu_cs_buffer *buffer;
    int index;
 
    /* Fast exit for no-op calls.
     * This is very effective with suballocators and linear uploaders that
     * are outside of the winsys.
     */
    if (bo == cs->last_added_bo &&
        (usage & cs->last_added_bo_usage) == usage &&
-       (1ull << priority) & cs->last_added_bo_priority_usage)
+       (1u << priority) & cs->last_added_bo_priority_usage)
       return cs->last_added_bo_index;
 
    if (!bo->sparse) {
       if (!bo->bo) {
          index = amdgpu_lookup_or_add_slab_buffer(acs, bo);
          if (index < 0)
             return 0;
 
          buffer = &cs->slab_buffers[index];
          buffer->usage |= usage;
@@ -651,21 +651,21 @@ static unsigned amdgpu_cs_add_buffer(struct radeon_cmdbuf *rcs,
 
       buffer = &cs->real_buffers[index];
    } else {
       index = amdgpu_lookup_or_add_sparse_buffer(acs, bo);
       if (index < 0)
          return 0;
 
       buffer = &cs->sparse_buffers[index];
    }
 
-   buffer->u.real.priority_usage |= 1ull << priority;
+   buffer->u.real.priority_usage |= 1u << priority;
    buffer->usage |= usage;
 
    cs->last_added_bo = bo;
    cs->last_added_bo_index = index;
    cs->last_added_bo_usage = buffer->usage;
    cs->last_added_bo_priority_usage = buffer->u.real.priority_usage;
    return index;
 }
 
 static bool amdgpu_ib_new_buffer(struct amdgpu_winsys *ws, struct amdgpu_ib *ib,
@@ -1332,21 +1332,21 @@ void amdgpu_cs_submit_ib(void *job, int thread_index)
       num_handles = 0;
       for (i = 0; i < cs->num_real_buffers; ++i) {
          struct amdgpu_cs_buffer *buffer = &cs->real_buffers[i];
 
 	 if (buffer->bo->is_local)
             continue;
 
          assert(buffer->u.real.priority_usage != 0);
 
          handles[num_handles] = buffer->bo->bo;
-         flags[num_handles] = (util_last_bit64(buffer->u.real.priority_usage) - 1) / 4;
+         flags[num_handles] = (util_last_bit(buffer->u.real.priority_usage) - 1) / 2;
 	 ++num_handles;
       }
 
       if (num_handles) {
          r = amdgpu_bo_list_create(ws->dev, num_handles,
                                    handles, flags, &bo_list);
          if (r) {
             fprintf(stderr, "amdgpu: buffer list creation failed (%d)\n", r);
             amdgpu_fence_signalled(cs->fence);
             cs->error_code = r;
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
index 3b10cc66c21..9f5a4fd991a 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
@@ -39,21 +39,21 @@ struct amdgpu_ctx {
    uint64_t *user_fence_cpu_address_base;
    int refcount;
    unsigned initial_num_total_rejected_cs;
    unsigned num_rejected_cs;
 };
 
 struct amdgpu_cs_buffer {
    struct amdgpu_winsys_bo *bo;
    union {
       struct {
-         uint64_t priority_usage;
+         uint32_t priority_usage;
       } real;
       struct {
          uint32_t real_idx; /* index of underlying real BO */
       } slab;
    } u;
    enum radeon_bo_usage usage;
 };
 
 enum ib_type {
    IB_MAIN,
@@ -87,21 +87,21 @@ struct amdgpu_cs_context {
 
    unsigned                    num_sparse_buffers;
    unsigned                    max_sparse_buffers;
    struct amdgpu_cs_buffer     *sparse_buffers;
 
    int                         buffer_indices_hashlist[4096];
 
    struct amdgpu_winsys_bo     *last_added_bo;
    unsigned                    last_added_bo_index;
    unsigned                    last_added_bo_usage;
-   uint64_t                    last_added_bo_priority_usage;
+   uint32_t                    last_added_bo_priority_usage;
 
    struct pipe_fence_handle    **fence_dependencies;
    unsigned                    num_fence_dependencies;
    unsigned                    max_fence_dependencies;
 
    struct pipe_fence_handle    **syncobj_to_signal;
    unsigned                    num_syncobj_to_signal;
    unsigned                    max_syncobj_to_signal;
 
    struct pipe_fence_handle    *fence;
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
index 90386027235..798be78504c 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c
@@ -359,21 +359,21 @@ static unsigned radeon_drm_cs_add_buffer(struct radeon_cmdbuf *rcs,
         index = cs->csc->slab_buffers[index].u.slab.real_idx;
     } else {
         index = radeon_lookup_or_add_real_buffer(cs, bo);
     }
 
     reloc = &cs->csc->relocs[index];
     added_domains = (rd | wd) & ~(reloc->read_domains | reloc->write_domain);
     reloc->read_domains |= rd;
     reloc->write_domain |= wd;
     reloc->flags = MAX2(reloc->flags, priority);
-    cs->csc->relocs_bo[index].u.real.priority_usage |= 1ull << priority;
+    cs->csc->relocs_bo[index].u.real.priority_usage |= 1u << priority;
 
     if (added_domains & RADEON_DOMAIN_VRAM)
         cs->base.used_vram += bo->base.size;
     else if (added_domains & RADEON_DOMAIN_GTT)
         cs->base.used_gart += bo->base.size;
 
     return index;
 }
 
 static int radeon_drm_cs_lookup_buffer(struct radeon_cmdbuf *rcs,
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
index 75fb09bd001..f4c6cbe1fa7 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h
@@ -26,21 +26,21 @@
 
 #ifndef RADEON_DRM_CS_H
 #define RADEON_DRM_CS_H
 
 #include "radeon_drm_bo.h"
 
 struct radeon_bo_item {
     struct radeon_bo    *bo;
     union {
         struct {
-            uint64_t    priority_usage;
+            uint32_t    priority_usage;
         } real;
         struct {
             unsigned    real_idx;
         } slab;
     } u;
 };
 
 struct radeon_cs_context {
     uint32_t                    buf[16 * 1024];
 
-- 
2.17.1



More information about the mesa-dev mailing list