[Mesa-dev] [PATCH 08/25] radv: add tessellation ring allocation support.

Dave Airlie airlied at gmail.com
Fri Mar 31 07:26:25 UTC 2017


On 31 March 2017 at 16:59, Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl> wrote:
> On Thu, Mar 30, 2017 at 10:00 AM, Dave Airlie <airlied at gmail.com> wrote:
>> From: Dave Airlie <airlied at redhat.com>
>>
>> This patch adds support for the offchip rings for storing
>> tessellation factors and attribute data.
>>
>> It includes the register setup for the TF ring
>>
>> Signed-off-by: Dave Airlie <airlied at redhat.com>
>> ---
>>  src/amd/vulkan/radv_cmd_buffer.c |   6 ++
>>  src/amd/vulkan/radv_device.c     | 210 ++++++++++++++++++++++++++++++++++++---
>>  src/amd/vulkan/radv_private.h    |   4 +
>>  3 files changed, 207 insertions(+), 13 deletions(-)
>>
>> diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
>> index dbd74de..70f6fad 100644
>> --- a/src/amd/vulkan/radv_cmd_buffer.c
>> +++ b/src/amd/vulkan/radv_cmd_buffer.c
>> @@ -221,6 +221,7 @@ static void  radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
>>         cmd_buffer->compute_scratch_size_needed = 0;
>>         cmd_buffer->esgs_ring_size_needed = 0;
>>         cmd_buffer->gsvs_ring_size_needed = 0;
>> +       cmd_buffer->tess_rings_needed = false;
>>
>>         if (cmd_buffer->upload.upload_bo)
>>                 cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs,
>> @@ -1896,6 +1897,9 @@ void radv_CmdBindPipeline(
>>                 if (pipeline->graphics.gsvs_ring_size > cmd_buffer->gsvs_ring_size_needed)
>>                         cmd_buffer->gsvs_ring_size_needed = pipeline->graphics.gsvs_ring_size;
>>
>> +               if (radv_pipeline_has_tess(pipeline))
>> +                       cmd_buffer->tess_rings_needed = true;
>> +
>>                 if (radv_pipeline_has_gs(pipeline)) {
>>                         struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.pipeline, MESA_SHADER_GEOMETRY,
>>                                                                              AC_UD_SCRATCH_RING_OFFSETS);
>> @@ -2063,6 +2067,8 @@ void radv_CmdExecuteCommands(
>>                         primary->esgs_ring_size_needed = secondary->esgs_ring_size_needed;
>>                 if (secondary->gsvs_ring_size_needed > primary->gsvs_ring_size_needed)
>>                         primary->gsvs_ring_size_needed = secondary->gsvs_ring_size_needed;
>> +               if (secondary->tess_rings_needed)
>> +                       primary->tess_rings_needed = true;
>>
>>                 if (secondary->ring_offsets_idx != -1) {
>>                         if (primary->ring_offsets_idx == -1)
>> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
>> index fe531e1..b75d76b 100644
>> --- a/src/amd/vulkan/radv_device.c
>> +++ b/src/amd/vulkan/radv_device.c
>> @@ -845,6 +845,10 @@ radv_queue_finish(struct radv_queue *queue)
>>                 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
>>         if (queue->gsvs_ring_bo)
>>                 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
>> +       if (queue->tess_factor_ring_bo)
>> +               queue->device->ws->buffer_destroy(queue->tess_factor_ring_bo);
>> +       if (queue->tess_offchip_ring_bo)
>> +               queue->device->ws->buffer_destroy(queue->tess_offchip_ring_bo);
>>         if (queue->compute_scratch_bo)
>>                 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
>>  }
>> @@ -1182,20 +1186,29 @@ static void radv_dump_trace(struct radv_device *device,
>>  }
>>
>>  static void
>> -fill_geom_rings(struct radv_queue *queue,
>> -               uint32_t *map,
>> -               uint32_t esgs_ring_size,
>> -               struct radeon_winsys_bo *esgs_ring_bo,
>> -               uint32_t gsvs_ring_size,
>> -               struct radeon_winsys_bo *gsvs_ring_bo)
>> +fill_geom_tess_rings(struct radv_queue *queue,
>> +                    uint32_t *map,
>> +                    uint32_t esgs_ring_size,
>> +                    struct radeon_winsys_bo *esgs_ring_bo,
>> +                    uint32_t gsvs_ring_size,
>> +                    struct radeon_winsys_bo *gsvs_ring_bo,
>> +                    uint32_t tess_factor_ring_size,
>> +                    struct radeon_winsys_bo *tess_factor_ring_bo,
>> +                    uint32_t tess_offchip_ring_size,
>> +                    struct radeon_winsys_bo *tess_offchip_ring_bo)
>>  {
>>         uint64_t esgs_va = 0, gsvs_va = 0;
>> +       uint64_t tess_factor_va = 0, tess_offchip_va = 0;
>>         uint32_t *desc = &map[4];
>>
>>         if (esgs_ring_bo)
>>                 esgs_va = queue->device->ws->buffer_get_va(esgs_ring_bo);
>>         if (gsvs_ring_bo)
>>                 gsvs_va = queue->device->ws->buffer_get_va(gsvs_ring_bo);
>> +       if (tess_factor_ring_bo)
>> +               tess_factor_va = queue->device->ws->buffer_get_va(tess_factor_ring_bo);
>> +       if (tess_offchip_ring_bo)
>> +               tess_offchip_va = queue->device->ws->buffer_get_va(tess_offchip_ring_bo);
>>
>>         /* stride 0, num records - size, add tid, swizzle, elsize4,
>>            index stride 64 */
>> @@ -1270,6 +1283,88 @@ fill_geom_rings(struct radv_queue *queue,
>>                 S_008F0C_ELEMENT_SIZE(1) |
>>                 S_008F0C_INDEX_STRIDE(1) |
>>                 S_008F0C_ADD_TID_ENABLE(true);
>> +       desc += 4;
>> +
>> +       desc[0] = tess_factor_va;
>> +       desc[1] = S_008F04_BASE_ADDRESS_HI(tess_factor_va >> 32) |
>> +               S_008F04_STRIDE(0) |
>> +               S_008F04_SWIZZLE_ENABLE(false);
>> +       desc[2] = tess_factor_ring_size;
>> +       desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
>> +               S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
>> +               S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
>> +               S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
>> +               S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
>> +               S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
>> +               S_008F0C_ELEMENT_SIZE(0) |
>> +               S_008F0C_INDEX_STRIDE(0) |
>> +               S_008F0C_ADD_TID_ENABLE(false);
>> +       desc += 4;
>> +
>> +       desc[0] = tess_offchip_va;
>> +       desc[1] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) |
>> +               S_008F04_STRIDE(0) |
>> +               S_008F04_SWIZZLE_ENABLE(false);
>> +       desc[2] = tess_offchip_ring_size;
>> +       desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
>> +               S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
>> +               S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
>> +               S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
>> +               S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
>> +               S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
>> +               S_008F0C_ELEMENT_SIZE(0) |
>> +               S_008F0C_INDEX_STRIDE(0) |
>> +               S_008F0C_ADD_TID_ENABLE(false);
>> +}
>> +
>> +static unsigned
>> +radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p)
>> +{
>> +       bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= CIK &&
>> +               device->physical_device->rad_info.family != CHIP_CARRIZO &&
>> +               device->physical_device->rad_info.family != CHIP_STONEY;
>> +       unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
>> +       unsigned max_offchip_buffers = max_offchip_buffers_per_se *
>> +               device->physical_device->rad_info.max_se;
>> +       unsigned offchip_granularity;
>> +       unsigned hs_offchip_param;
>> +       switch (device->tess_offchip_block_dw_size) {
>> +       default:
>> +               assert(0);
>> +               /* fall through */
>> +       case 8192:
>> +               offchip_granularity = V_03093C_X_8K_DWORDS;
>> +               break;
>> +       case 4096:
>> +               offchip_granularity = V_03093C_X_4K_DWORDS;
>> +               break;
>> +       }
>> +
>> +       switch (device->physical_device->rad_info.chip_class) {
>> +       case SI:
>> +               max_offchip_buffers = MIN2(max_offchip_buffers, 126);
>> +               break;
>> +       case CIK:
>> +               max_offchip_buffers = MIN2(max_offchip_buffers, 508);
>> +               break;
>> +       case VI:
>> +       default:
>> +               max_offchip_buffers = MIN2(max_offchip_buffers, 512);
>> +               break;
>> +       }
>> +
>> +       *max_offchip_buffers_p = max_offchip_buffers;
>> +       if (device->physical_device->rad_info.chip_class >= CIK) {
>> +               if (device->physical_device->rad_info.chip_class >= VI)
>> +                       --max_offchip_buffers;
>> +               hs_offchip_param =
>> +                       S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
>> +                       S_03093C_OFFCHIP_GRANULARITY(offchip_granularity);
>> +       } else {
>> +               hs_offchip_param =
>> +                       S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
>> +       }
>> +       return hs_offchip_param;
>>  }
>>
>>  static VkResult
>> @@ -1278,6 +1373,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
>>                       uint32_t compute_scratch_size,
>>                      uint32_t esgs_ring_size,
>>                      uint32_t gsvs_ring_size,
>> +                    bool needs_tess_rings,
>>                       struct radeon_winsys_cs **initial_preamble_cs,
>>                       struct radeon_winsys_cs **continue_preamble_cs)
>>  {
>> @@ -1286,12 +1382,32 @@ radv_get_preamble_cs(struct radv_queue *queue,
>>         struct radeon_winsys_bo *compute_scratch_bo = NULL;
>>         struct radeon_winsys_bo *esgs_ring_bo = NULL;
>>         struct radeon_winsys_bo *gsvs_ring_bo = NULL;
>> +       struct radeon_winsys_bo *tess_factor_ring_bo = NULL;
>> +       struct radeon_winsys_bo *tess_offchip_ring_bo = NULL;
>>         struct radeon_winsys_cs *dest_cs[2] = {0};
>> +       bool add_tess_rings = false;
>> +       unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
>> +       unsigned max_offchip_buffers;
>> +       unsigned hs_offchip_param = 0;
>> +       if (!queue->has_tess_rings) {
>> +               if (needs_tess_rings)
>> +                       add_tess_rings = true;
>> +       }
>> +
>> +       if (add_tess_rings) {
>> +               tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
>> +               hs_offchip_param = radv_get_hs_offchip_param(queue->device,
>> +                                                            &max_offchip_buffers);
> This only sets hs_offchip_param when the tess rings are added. We also
> need it if the tess rings already exist but we reemit the preamble due
> to other reasons.

Yes there is a later patch that fixes that, I worked it out today.

I'll squash it in here.

Dave.


More information about the mesa-dev mailing list