[Mesa-dev] [PATCH 08/25] radv: add tessellation ring allocation support.
Dave Airlie
airlied at gmail.com
Fri Mar 31 07:26:25 UTC 2017
On 31 March 2017 at 16:59, Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl> wrote:
> On Thu, Mar 30, 2017 at 10:00 AM, Dave Airlie <airlied at gmail.com> wrote:
>> From: Dave Airlie <airlied at redhat.com>
>>
>> This patch adds support for the offchip rings for storing
>> tessellation factors and attribute data.
>>
>> It includes the register setup for the TF ring
>>
>> Signed-off-by: Dave Airlie <airlied at redhat.com>
>> ---
>> src/amd/vulkan/radv_cmd_buffer.c | 6 ++
>> src/amd/vulkan/radv_device.c | 210 ++++++++++++++++++++++++++++++++++++---
>> src/amd/vulkan/radv_private.h | 4 +
>> 3 files changed, 207 insertions(+), 13 deletions(-)
>>
>> diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
>> index dbd74de..70f6fad 100644
>> --- a/src/amd/vulkan/radv_cmd_buffer.c
>> +++ b/src/amd/vulkan/radv_cmd_buffer.c
>> @@ -221,6 +221,7 @@ static void radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
>> cmd_buffer->compute_scratch_size_needed = 0;
>> cmd_buffer->esgs_ring_size_needed = 0;
>> cmd_buffer->gsvs_ring_size_needed = 0;
>> + cmd_buffer->tess_rings_needed = false;
>>
>> if (cmd_buffer->upload.upload_bo)
>> cmd_buffer->device->ws->cs_add_buffer(cmd_buffer->cs,
>> @@ -1896,6 +1897,9 @@ void radv_CmdBindPipeline(
>> if (pipeline->graphics.gsvs_ring_size > cmd_buffer->gsvs_ring_size_needed)
>> cmd_buffer->gsvs_ring_size_needed = pipeline->graphics.gsvs_ring_size;
>>
>> + if (radv_pipeline_has_tess(pipeline))
>> + cmd_buffer->tess_rings_needed = true;
>> +
>> if (radv_pipeline_has_gs(pipeline)) {
>> struct ac_userdata_info *loc = radv_lookup_user_sgpr(cmd_buffer->state.pipeline, MESA_SHADER_GEOMETRY,
>> AC_UD_SCRATCH_RING_OFFSETS);
>> @@ -2063,6 +2067,8 @@ void radv_CmdExecuteCommands(
>> primary->esgs_ring_size_needed = secondary->esgs_ring_size_needed;
>> if (secondary->gsvs_ring_size_needed > primary->gsvs_ring_size_needed)
>> primary->gsvs_ring_size_needed = secondary->gsvs_ring_size_needed;
>> + if (secondary->tess_rings_needed)
>> + primary->tess_rings_needed = true;
>>
>> if (secondary->ring_offsets_idx != -1) {
>> if (primary->ring_offsets_idx == -1)
>> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
>> index fe531e1..b75d76b 100644
>> --- a/src/amd/vulkan/radv_device.c
>> +++ b/src/amd/vulkan/radv_device.c
>> @@ -845,6 +845,10 @@ radv_queue_finish(struct radv_queue *queue)
>> queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
>> if (queue->gsvs_ring_bo)
>> queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
>> + if (queue->tess_factor_ring_bo)
>> + queue->device->ws->buffer_destroy(queue->tess_factor_ring_bo);
>> + if (queue->tess_offchip_ring_bo)
>> + queue->device->ws->buffer_destroy(queue->tess_offchip_ring_bo);
>> if (queue->compute_scratch_bo)
>> queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
>> }
>> @@ -1182,20 +1186,29 @@ static void radv_dump_trace(struct radv_device *device,
>> }
>>
>> static void
>> -fill_geom_rings(struct radv_queue *queue,
>> - uint32_t *map,
>> - uint32_t esgs_ring_size,
>> - struct radeon_winsys_bo *esgs_ring_bo,
>> - uint32_t gsvs_ring_size,
>> - struct radeon_winsys_bo *gsvs_ring_bo)
>> +fill_geom_tess_rings(struct radv_queue *queue,
>> + uint32_t *map,
>> + uint32_t esgs_ring_size,
>> + struct radeon_winsys_bo *esgs_ring_bo,
>> + uint32_t gsvs_ring_size,
>> + struct radeon_winsys_bo *gsvs_ring_bo,
>> + uint32_t tess_factor_ring_size,
>> + struct radeon_winsys_bo *tess_factor_ring_bo,
>> + uint32_t tess_offchip_ring_size,
>> + struct radeon_winsys_bo *tess_offchip_ring_bo)
>> {
>> uint64_t esgs_va = 0, gsvs_va = 0;
>> + uint64_t tess_factor_va = 0, tess_offchip_va = 0;
>> uint32_t *desc = &map[4];
>>
>> if (esgs_ring_bo)
>> esgs_va = queue->device->ws->buffer_get_va(esgs_ring_bo);
>> if (gsvs_ring_bo)
>> gsvs_va = queue->device->ws->buffer_get_va(gsvs_ring_bo);
>> + if (tess_factor_ring_bo)
>> + tess_factor_va = queue->device->ws->buffer_get_va(tess_factor_ring_bo);
>> + if (tess_offchip_ring_bo)
>> + tess_offchip_va = queue->device->ws->buffer_get_va(tess_offchip_ring_bo);
>>
>> /* stride 0, num records - size, add tid, swizzle, elsize4,
>> index stride 64 */
>> @@ -1270,6 +1283,88 @@ fill_geom_rings(struct radv_queue *queue,
>> S_008F0C_ELEMENT_SIZE(1) |
>> S_008F0C_INDEX_STRIDE(1) |
>> S_008F0C_ADD_TID_ENABLE(true);
>> + desc += 4;
>> +
>> + desc[0] = tess_factor_va;
>> + desc[1] = S_008F04_BASE_ADDRESS_HI(tess_factor_va >> 32) |
>> + S_008F04_STRIDE(0) |
>> + S_008F04_SWIZZLE_ENABLE(false);
>> + desc[2] = tess_factor_ring_size;
>> + desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
>> + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
>> + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
>> + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
>> + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
>> + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
>> + S_008F0C_ELEMENT_SIZE(0) |
>> + S_008F0C_INDEX_STRIDE(0) |
>> + S_008F0C_ADD_TID_ENABLE(false);
>> + desc += 4;
>> +
>> + desc[0] = tess_offchip_va;
>> + desc[1] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32) |
>> + S_008F04_STRIDE(0) |
>> + S_008F04_SWIZZLE_ENABLE(false);
>> + desc[2] = tess_offchip_ring_size;
>> + desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
>> + S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
>> + S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
>> + S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
>> + S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
>> + S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
>> + S_008F0C_ELEMENT_SIZE(0) |
>> + S_008F0C_INDEX_STRIDE(0) |
>> + S_008F0C_ADD_TID_ENABLE(false);
>> +}
>> +
>> +static unsigned
>> +radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p)
>> +{
>> + bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= CIK &&
>> + device->physical_device->rad_info.family != CHIP_CARRIZO &&
>> + device->physical_device->rad_info.family != CHIP_STONEY;
>> + unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
>> + unsigned max_offchip_buffers = max_offchip_buffers_per_se *
>> + device->physical_device->rad_info.max_se;
>> + unsigned offchip_granularity;
>> + unsigned hs_offchip_param;
>> + switch (device->tess_offchip_block_dw_size) {
>> + default:
>> + assert(0);
>> + /* fall through */
>> + case 8192:
>> + offchip_granularity = V_03093C_X_8K_DWORDS;
>> + break;
>> + case 4096:
>> + offchip_granularity = V_03093C_X_4K_DWORDS;
>> + break;
>> + }
>> +
>> + switch (device->physical_device->rad_info.chip_class) {
>> + case SI:
>> + max_offchip_buffers = MIN2(max_offchip_buffers, 126);
>> + break;
>> + case CIK:
>> + max_offchip_buffers = MIN2(max_offchip_buffers, 508);
>> + break;
>> + case VI:
>> + default:
>> + max_offchip_buffers = MIN2(max_offchip_buffers, 512);
>> + break;
>> + }
>> +
>> + *max_offchip_buffers_p = max_offchip_buffers;
>> + if (device->physical_device->rad_info.chip_class >= CIK) {
>> + if (device->physical_device->rad_info.chip_class >= VI)
>> + --max_offchip_buffers;
>> + hs_offchip_param =
>> + S_03093C_OFFCHIP_BUFFERING(max_offchip_buffers) |
>> + S_03093C_OFFCHIP_GRANULARITY(offchip_granularity);
>> + } else {
>> + hs_offchip_param =
>> + S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
>> + }
>> + return hs_offchip_param;
>> }
>>
>> static VkResult
>> @@ -1278,6 +1373,7 @@ radv_get_preamble_cs(struct radv_queue *queue,
>> uint32_t compute_scratch_size,
>> uint32_t esgs_ring_size,
>> uint32_t gsvs_ring_size,
>> + bool needs_tess_rings,
>> struct radeon_winsys_cs **initial_preamble_cs,
>> struct radeon_winsys_cs **continue_preamble_cs)
>> {
>> @@ -1286,12 +1382,32 @@ radv_get_preamble_cs(struct radv_queue *queue,
>> struct radeon_winsys_bo *compute_scratch_bo = NULL;
>> struct radeon_winsys_bo *esgs_ring_bo = NULL;
>> struct radeon_winsys_bo *gsvs_ring_bo = NULL;
>> + struct radeon_winsys_bo *tess_factor_ring_bo = NULL;
>> + struct radeon_winsys_bo *tess_offchip_ring_bo = NULL;
>> struct radeon_winsys_cs *dest_cs[2] = {0};
>> + bool add_tess_rings = false;
>> + unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
>> + unsigned max_offchip_buffers;
>> + unsigned hs_offchip_param = 0;
>> + if (!queue->has_tess_rings) {
>> + if (needs_tess_rings)
>> + add_tess_rings = true;
>> + }
>> +
>> + if (add_tess_rings) {
>> + tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
>> + hs_offchip_param = radv_get_hs_offchip_param(queue->device,
>> + &max_offchip_buffers);
> This only sets hs_offchip_param when the tess rings are added. We also
> need it if the tess rings already exist but we reemit the preamble due
> to other reasons.
Yes there is a later patch that fixes that, I worked it out today.
I'll squash it in here.
Dave.
More information about the mesa-dev
mailing list