[Mesa-dev] [PATCH] radv: initial support for shared semaphores (v2)
Bas Nieuwenhuizen
bas at basnieuwenhuizen.nl
Fri Jul 21 17:39:04 UTC 2017
On Fri, Jul 21, 2017 at 9:33 AM, Dave Airlie <airlied at gmail.com> wrote:
> From: Dave Airlie <airlied at redhat.com>
>
> This adds support for sharing semaphores using kernel syncobjects.
>
> Syncobj backed semaphores are used for any semaphore which is
> created with external flags, and when a semaphore is imported,
> otherwise we use the current non-kernel semaphores.
>
> Temporary imports from syncobj fd are also available, these
> just override the current user until the next wait, when the
> temp syncobj is dropped.
>
> v2: allocate more chunks upfront, fix off by one after
> previous refactor of syncobj setup, remove unnecessary null
> check.
>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
> src/amd/vulkan/radv_device.c | 248 +++++++++++++++++++++++---
> src/amd/vulkan/radv_entrypoints_gen.py | 3 +
> src/amd/vulkan/radv_private.h | 16 +-
> src/amd/vulkan/radv_radeon_winsys.h | 21 ++-
> src/amd/vulkan/radv_wsi.c | 30 +++-
> src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c | 110 ++++++++----
> 6 files changed, 357 insertions(+), 71 deletions(-)
>
> diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
> index d87be66..44bee5c 100644
> --- a/src/amd/vulkan/radv_device.c
> +++ b/src/amd/vulkan/radv_device.c
> @@ -102,6 +102,10 @@ static const VkExtensionProperties instance_extensions[] = {
> .extensionName = VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME,
> .specVersion = 1,
> },
> + {
> + .extensionName = VK_KHR_EXTERNAL_SEMAPHORE_CAPABILITIES_EXTENSION_NAME,
> + .specVersion = 1,
> + },
> };
>
> static const VkExtensionProperties common_device_extensions[] = {
> @@ -162,6 +166,16 @@ static const VkExtensionProperties common_device_extensions[] = {
> .specVersion = 1,
> },
> };
> +static const VkExtensionProperties ext_sema_device_extensions[] = {
> + {
> + .extensionName = VK_KHR_EXTERNAL_SEMAPHORE_EXTENSION_NAME,
> + .specVersion = 1,
> + },
> + {
> + .extensionName = VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME,
> + .specVersion = 1,
> + },
> +};
>
> static VkResult
> radv_extensions_register(struct radv_instance *instance,
> @@ -312,6 +326,15 @@ radv_physical_device_init(struct radv_physical_device *device,
> if (result != VK_SUCCESS)
> goto fail;
>
> + if (device->rad_info.has_syncobj) {
> + result = radv_extensions_register(instance,
> + &device->extensions,
> + ext_sema_device_extensions,
> + ARRAY_SIZE(ext_sema_device_extensions));
> + if (result != VK_SUCCESS)
> + goto fail;
> + }
> +
> fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
> device->name = get_chip_name(device->rad_info.family);
>
> @@ -1885,6 +1908,87 @@ fail:
> return VK_ERROR_OUT_OF_DEVICE_MEMORY;
> }
>
> +static VkResult radv_alloc_sem_counts(struct radv_winsys_sem_counts *counts,
> + int num_sems,
> + const VkSemaphore *sems,
> + bool reset_temp)
> +{
> + int syncobj_idx = 0, sem_idx = 0;
> +
> + if (num_sems == 0)
> + return VK_SUCCESS;
> + for (uint32_t i = 0; i < num_sems; i++) {
> + RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
> +
> + if (sem->temp_syncobj || sem->syncobj)
> + counts->syncobj_count++;
> + else
> + counts->sem_count++;
> + }
> +
> + if (counts->syncobj_count) {
> + counts->syncobj = (uint32_t *)malloc(sizeof(uint32_t) * counts->syncobj_count);
> + if (!counts->syncobj)
> + return VK_ERROR_OUT_OF_HOST_MEMORY;
> + }
> +
> + if (counts->sem_count) {
> + counts->sem = (struct radeon_winsys_sem **)malloc(sizeof(struct radeon_winsys_sem *) * counts->sem_count);
> + if (!counts->sem)
> + return VK_ERROR_OUT_OF_HOST_MEMORY;
Free counts->syncobj?
Otherwise looks reasonable to me,
Review-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
for all 3.
> + }
> +
> + for (uint32_t i = 0; i < num_sems; i++) {
> + RADV_FROM_HANDLE(radv_semaphore, sem, sems[i]);
> +
> + if (sem->temp_syncobj) {
> + counts->syncobj[syncobj_idx++] = sem->temp_syncobj;
> + if (reset_temp) {
> + /* after we wait on a temp import - drop it */
> + sem->temp_syncobj = 0;
> + }
> + }
> + else if (sem->syncobj)
> + counts->syncobj[syncobj_idx++] = sem->syncobj;
> + else {
> + assert(sem->sem);
> + counts->sem[sem_idx++] = sem->sem;
> + }
> + }
> +
> + return VK_SUCCESS;
> +}
> +
> +void radv_free_sem_info(struct radv_winsys_sem_info *sem_info)
> +{
> + free(sem_info->wait.syncobj);
> + free(sem_info->wait.sem);
> + free(sem_info->signal.syncobj);
> + free(sem_info->signal.sem);
> +}
> +
> +VkResult radv_alloc_sem_info(struct radv_winsys_sem_info *sem_info,
> + int num_wait_sems,
> + const VkSemaphore *wait_sems,
> + int num_signal_sems,
> + const VkSemaphore *signal_sems)
> +{
> + VkResult ret;
> + memset(sem_info, 0, sizeof(*sem_info));
> +
> + ret = radv_alloc_sem_counts(&sem_info->wait, num_wait_sems, wait_sems, true);
> + if (ret)
> + return ret;
> + ret = radv_alloc_sem_counts(&sem_info->signal, num_signal_sems, signal_sems, false);
> + if (ret)
> + radv_free_sem_info(sem_info);
> +
> + /* caller can override these */
> + sem_info->cs_emit_wait = true;
> + sem_info->cs_emit_signal = true;
> + return ret;
> +}
> +
> VkResult radv_QueueSubmit(
> VkQueue _queue,
> uint32_t submitCount,
> @@ -1935,16 +2039,22 @@ VkResult radv_QueueSubmit(
> bool do_flush = !i || pSubmits[i].pWaitDstStageMask;
> bool can_patch = !do_flush;
> uint32_t advance;
> + struct radv_winsys_sem_info sem_info;
> +
> + result = radv_alloc_sem_info(&sem_info,
> + pSubmits[i].waitSemaphoreCount,
> + pSubmits[i].pWaitSemaphores,
> + pSubmits[i].signalSemaphoreCount,
> + pSubmits[i].pSignalSemaphores);
> + if (result != VK_SUCCESS)
> + return result;
>
> if (!pSubmits[i].commandBufferCount) {
> if (pSubmits[i].waitSemaphoreCount || pSubmits[i].signalSemaphoreCount) {
> ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
> &queue->device->empty_cs[queue->queue_family_index],
> 1, NULL, NULL,
> - (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
> - pSubmits[i].waitSemaphoreCount,
> - (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
> - pSubmits[i].signalSemaphoreCount,
> + &sem_info,
> false, base_fence);
> if (ret) {
> radv_loge("failed to submit CS %d\n", i);
> @@ -1952,6 +2062,7 @@ VkResult radv_QueueSubmit(
> }
> fence_emitted = true;
> }
> + radv_free_sem_info(&sem_info);
> continue;
> }
>
> @@ -1976,18 +2087,16 @@ VkResult radv_QueueSubmit(
> for (uint32_t j = 0; j < pSubmits[i].commandBufferCount + do_flush; j += advance) {
> advance = MIN2(max_cs_submission,
> pSubmits[i].commandBufferCount + do_flush - j);
> - bool b = j == 0;
> - bool e = j + advance == pSubmits[i].commandBufferCount + do_flush;
>
> if (queue->device->trace_bo)
> *queue->device->trace_id_ptr = 0;
>
> + sem_info.cs_emit_wait = j == 0;
> + sem_info.cs_emit_signal = j + advance == pSubmits[i].commandBufferCount + do_flush;
> +
> ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
> advance, initial_preamble_cs, continue_preamble_cs,
> - (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores,
> - b ? pSubmits[i].waitSemaphoreCount : 0,
> - (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores,
> - e ? pSubmits[i].signalSemaphoreCount : 0,
> + &sem_info,
> can_patch, base_fence);
>
> if (ret) {
> @@ -2008,16 +2117,19 @@ VkResult radv_QueueSubmit(
> }
> }
> }
> +
> + radv_free_sem_info(&sem_info);
> free(cs_array);
> }
>
> if (fence) {
> - if (!fence_emitted)
> + if (!fence_emitted) {
> + struct radv_winsys_sem_info sem_info = {0};
> ret = queue->device->ws->cs_submit(ctx, queue->queue_idx,
> &queue->device->empty_cs[queue->queue_family_index],
> - 1, NULL, NULL, NULL, 0, NULL, 0,
> + 1, NULL, NULL, &sem_info,
> false, base_fence);
> -
> + }
> fence->submitted = true;
> }
>
> @@ -2445,6 +2557,7 @@ radv_sparse_image_opaque_bind_memory(struct radv_device *device,
> bool fence_emitted = false;
>
> for (uint32_t i = 0; i < bindInfoCount; ++i) {
> + struct radv_winsys_sem_info sem_info;
> for (uint32_t j = 0; j < pBindInfo[i].bufferBindCount; ++j) {
> radv_sparse_buffer_bind_memory(queue->device,
> pBindInfo[i].pBufferBinds + j);
> @@ -2455,19 +2568,28 @@ radv_sparse_image_opaque_bind_memory(struct radv_device *device,
> pBindInfo[i].pImageOpaqueBinds + j);
> }
>
> + VkResult result;
> + result = radv_alloc_sem_info(&sem_info,
> + pBindInfo[i].waitSemaphoreCount,
> + pBindInfo[i].pWaitSemaphores,
> + pBindInfo[i].signalSemaphoreCount,
> + pBindInfo[i].pSignalSemaphores);
> + if (result != VK_SUCCESS)
> + return result;
> +
> if (pBindInfo[i].waitSemaphoreCount || pBindInfo[i].signalSemaphoreCount) {
> queue->device->ws->cs_submit(queue->hw_ctx, queue->queue_idx,
> &queue->device->empty_cs[queue->queue_family_index],
> 1, NULL, NULL,
> - (struct radeon_winsys_sem **)pBindInfo[i].pWaitSemaphores,
> - pBindInfo[i].waitSemaphoreCount,
> - (struct radeon_winsys_sem **)pBindInfo[i].pSignalSemaphores,
> - pBindInfo[i].signalSemaphoreCount,
> + &sem_info,
> false, base_fence);
> fence_emitted = true;
> if (fence)
> fence->submitted = true;
> }
> +
> + radv_free_sem_info(&sem_info);
> +
> }
>
> if (fence && !fence_emitted) {
> @@ -2604,13 +2726,38 @@ VkResult radv_CreateSemaphore(
> VkSemaphore* pSemaphore)
> {
> RADV_FROM_HANDLE(radv_device, device, _device);
> - struct radeon_winsys_sem *sem;
> + const VkExportSemaphoreCreateInfoKHR *export =
> + vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO_KHR);
> + VkExternalSemaphoreHandleTypeFlagsKHR handleTypes =
> + export ? export->handleTypes : 0;
>
> - sem = device->ws->create_sem(device->ws);
> + struct radv_semaphore *sem = vk_alloc2(&device->alloc, pAllocator,
> + sizeof(*sem), 8,
> + VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
> if (!sem)
> return VK_ERROR_OUT_OF_HOST_MEMORY;
>
> - *pSemaphore = radeon_winsys_sem_to_handle(sem);
> + sem->temp_syncobj = 0;
> + /* create a syncobject if we are going to export this semaphore */
> + if (handleTypes) {
> + assert (device->physical_device->rad_info.has_syncobj);
> + assert (handleTypes == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
> + int ret = device->ws->create_syncobj(device->ws, &sem->syncobj);
> + if (ret) {
> + vk_free2(&device->alloc, pAllocator, sem);
> + return VK_ERROR_OUT_OF_HOST_MEMORY;
> + }
> + sem->sem = NULL;
> + } else {
> + sem->sem = device->ws->create_sem(device->ws);
> + if (!sem->sem) {
> + vk_free2(&device->alloc, pAllocator, sem);
> + return VK_ERROR_OUT_OF_HOST_MEMORY;
> + }
> + sem->syncobj = 0;
> + }
> +
> + *pSemaphore = radv_semaphore_to_handle(sem);
> return VK_SUCCESS;
> }
>
> @@ -2620,11 +2767,15 @@ void radv_DestroySemaphore(
> const VkAllocationCallbacks* pAllocator)
> {
> RADV_FROM_HANDLE(radv_device, device, _device);
> - RADV_FROM_HANDLE(radeon_winsys_sem, sem, _semaphore);
> + RADV_FROM_HANDLE(radv_semaphore, sem, _semaphore);
> if (!_semaphore)
> return;
>
> - device->ws->destroy_sem(sem);
> + if (sem->syncobj)
> + device->ws->destroy_syncobj(device->ws, sem->syncobj);
> + else
> + device->ws->destroy_sem(sem->sem);
> + vk_free2(&device->alloc, pAllocator, sem);
> }
>
> VkResult radv_CreateEvent(
> @@ -3409,3 +3560,56 @@ VkResult radv_GetMemoryFdPropertiesKHR(VkDevice _device,
> */
> return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
> }
> +
> +VkResult radv_ImportSemaphoreFdKHR(VkDevice _device,
> + const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo)
> +{
> + RADV_FROM_HANDLE(radv_device, device, _device);
> + RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);
> + uint32_t syncobj_handle = 0;
> + assert(pImportSemaphoreFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
> +
> + int ret = device->ws->import_syncobj(device->ws, pImportSemaphoreFdInfo->fd, &syncobj_handle);
> + if (ret != 0)
> + return VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR;
> +
> + if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT_KHR) {
> + sem->temp_syncobj = syncobj_handle;
> + } else {
> + sem->syncobj = syncobj_handle;
> + }
> + close(pImportSemaphoreFdInfo->fd);
> + return VK_SUCCESS;
> +}
> +
> +VkResult radv_GetSemaphoreFdKHR(VkDevice _device,
> + const VkSemaphoreGetFdInfoKHR *pGetFdInfo,
> + int *pFd)
> +{
> + RADV_FROM_HANDLE(radv_device, device, _device);
> + RADV_FROM_HANDLE(radv_semaphore, sem, pGetFdInfo->semaphore);
> + int ret;
> + uint32_t syncobj_handle;
> +
> + assert(pGetFdInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR);
> + if (sem->temp_syncobj)
> + syncobj_handle = sem->temp_syncobj;
> + else
> + syncobj_handle = sem->syncobj;
> + ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
> + if (ret)
> + return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE_KHR);
> + return VK_SUCCESS;
> +}
> +
> +void radv_GetPhysicalDeviceExternalSemaphorePropertiesKHR(
> + VkPhysicalDevice physicalDevice,
> + const VkPhysicalDeviceExternalSemaphoreInfoKHR* pExternalSemaphoreInfo,
> + VkExternalSemaphorePropertiesKHR* pExternalSemaphoreProperties)
> +{
> + pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
> + pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT_KHR;
> + pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT_KHR |
> + VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT_KHR;
> +
> +}
> diff --git a/src/amd/vulkan/radv_entrypoints_gen.py b/src/amd/vulkan/radv_entrypoints_gen.py
> index 61b2328..9f5a4f3 100644
> --- a/src/amd/vulkan/radv_entrypoints_gen.py
> +++ b/src/amd/vulkan/radv_entrypoints_gen.py
> @@ -49,6 +49,9 @@ supported_extensions = [
> 'VK_KHR_external_memory_fd',
> 'VK_KHR_storage_buffer_storage_class',
> 'VK_KHR_variable_pointers',
> + 'VK_KHR_external_semaphore_capabilities',
> + 'VK_KHR_external_semaphore',
> + 'VK_KHR_external_semaphore_fd'
> ]
>
> # We generate a static hash table for entry point lookup
> diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
> index 891b34e..8cd5ec0 100644
> --- a/src/amd/vulkan/radv_private.h
> +++ b/src/amd/vulkan/radv_private.h
> @@ -1470,6 +1470,20 @@ struct radv_query_pool {
> uint32_t pipeline_stats_mask;
> };
>
> +struct radv_semaphore {
> + /* use a winsys sem for non-exportable */
> + struct radeon_winsys_sem *sem;
> + uint32_t syncobj;
> + uint32_t temp_syncobj;
> +};
> +
> +VkResult radv_alloc_sem_info(struct radv_winsys_sem_info *sem_info,
> + int num_wait_sems,
> + const VkSemaphore *wait_sems,
> + int num_signal_sems,
> + const VkSemaphore *signal_sems);
> +void radv_free_sem_info(struct radv_winsys_sem_info *sem_info);
> +
> void
> radv_update_descriptor_sets(struct radv_device *device,
> struct radv_cmd_buffer *cmd_buffer,
> @@ -1563,6 +1577,6 @@ RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_query_pool, VkQueryPool)
> RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_render_pass, VkRenderPass)
> RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_sampler, VkSampler)
> RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_shader_module, VkShaderModule)
> -RADV_DEFINE_NONDISP_HANDLE_CASTS(radeon_winsys_sem, VkSemaphore)
> +RADV_DEFINE_NONDISP_HANDLE_CASTS(radv_semaphore, VkSemaphore)
>
> #endif /* RADV_PRIVATE_H */
> diff --git a/src/amd/vulkan/radv_radeon_winsys.h b/src/amd/vulkan/radv_radeon_winsys.h
> index 2f3990c..215ef0b 100644
> --- a/src/amd/vulkan/radv_radeon_winsys.h
> +++ b/src/amd/vulkan/radv_radeon_winsys.h
> @@ -131,9 +131,23 @@ struct radeon_bo_metadata {
> uint32_t metadata[64];
> };
>
> +uint32_t syncobj_handle;
> struct radeon_winsys_bo;
> struct radeon_winsys_fence;
> -struct radeon_winsys_sem;
> +
> +struct radv_winsys_sem_counts {
> + uint32_t syncobj_count;
> + uint32_t sem_count;
> + uint32_t *syncobj;
> + struct radeon_winsys_sem **sem;
> +};
> +
> +struct radv_winsys_sem_info {
> + bool cs_emit_signal;
> + bool cs_emit_wait;
> + struct radv_winsys_sem_counts wait;
> + struct radv_winsys_sem_counts signal;
> +};
>
> struct radeon_winsys {
> void (*destroy)(struct radeon_winsys *ws);
> @@ -191,10 +205,7 @@ struct radeon_winsys {
> unsigned cs_count,
> struct radeon_winsys_cs *initial_preamble_cs,
> struct radeon_winsys_cs *continue_preamble_cs,
> - struct radeon_winsys_sem **wait_sem,
> - unsigned wait_sem_count,
> - struct radeon_winsys_sem **signal_sem,
> - unsigned signal_sem_count,
> + struct radv_winsys_sem_info *sem_info,
> bool can_patch,
> struct radeon_winsys_fence *fence);
>
> diff --git a/src/amd/vulkan/radv_wsi.c b/src/amd/vulkan/radv_wsi.c
> index ab3dcd6..adc4311 100644
> --- a/src/amd/vulkan/radv_wsi.c
> +++ b/src/amd/vulkan/radv_wsi.c
> @@ -442,7 +442,6 @@ VkResult radv_AcquireNextImageKHR(
> fence->submitted = true;
> fence->signalled = true;
> }
> -
> return result;
> }
>
> @@ -452,7 +451,6 @@ VkResult radv_QueuePresentKHR(
> {
> RADV_FROM_HANDLE(radv_queue, queue, _queue);
> VkResult result = VK_SUCCESS;
> -
> const VkPresentRegionsKHR *regions =
> vk_find_struct_const(pPresentInfo->pNext, PRESENT_REGIONS_KHR);
>
> @@ -461,6 +459,20 @@ VkResult radv_QueuePresentKHR(
> struct radeon_winsys_cs *cs;
> const VkPresentRegionKHR *region = NULL;
> VkResult item_result;
> + struct radv_winsys_sem_info sem_info;
> +
> + item_result = radv_alloc_sem_info(&sem_info,
> + pPresentInfo->waitSemaphoreCount,
> + pPresentInfo->pWaitSemaphores,
> + 0,
> + NULL);
> + if (pPresentInfo->pResults != NULL)
> + pPresentInfo->pResults[i] = item_result;
> + result = result == VK_SUCCESS ? item_result : result;
> + if (item_result != VK_SUCCESS) {
> + radv_free_sem_info(&sem_info);
> + continue;
> + }
>
> assert(radv_device_from_handle(swapchain->device) == queue->device);
> if (swapchain->fences[0] == VK_NULL_HANDLE) {
> @@ -472,8 +484,10 @@ VkResult radv_QueuePresentKHR(
> if (pPresentInfo->pResults != NULL)
> pPresentInfo->pResults[i] = item_result;
> result = result == VK_SUCCESS ? item_result : result;
> - if (item_result != VK_SUCCESS)
> + if (item_result != VK_SUCCESS) {
> + radv_free_sem_info(&sem_info);
> continue;
> + }
> } else {
> radv_ResetFences(radv_device_to_handle(queue->device),
> 1, &swapchain->fences[0]);
> @@ -487,11 +501,12 @@ VkResult radv_QueuePresentKHR(
> RADV_FROM_HANDLE(radv_fence, fence, swapchain->fences[0]);
> struct radeon_winsys_fence *base_fence = fence->fence;
> struct radeon_winsys_ctx *ctx = queue->hw_ctx;
> +
> queue->device->ws->cs_submit(ctx, queue->queue_idx,
> &cs,
> 1, NULL, NULL,
> - (struct radeon_winsys_sem **)pPresentInfo->pWaitSemaphores,
> - pPresentInfo->waitSemaphoreCount, NULL, 0, false, base_fence);
> + &sem_info,
> + false, base_fence);
> fence->submitted = true;
>
> if (regions && regions->pRegions)
> @@ -504,8 +519,10 @@ VkResult radv_QueuePresentKHR(
> if (pPresentInfo->pResults != NULL)
> pPresentInfo->pResults[i] = item_result;
> result = result == VK_SUCCESS ? item_result : result;
> - if (item_result != VK_SUCCESS)
> + if (item_result != VK_SUCCESS) {
> + radv_free_sem_info(&sem_info);
> continue;
> + }
>
> VkFence last = swapchain->fences[2];
> swapchain->fences[2] = swapchain->fences[1];
> @@ -517,6 +534,7 @@ VkResult radv_QueuePresentKHR(
> 1, &last, true, 1);
> }
>
> + radv_free_sem_info(&sem_info);
> }
>
> return VK_SUCCESS;
> diff --git a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
> index 6ed8f32..bc4d460 100644
> --- a/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
> +++ b/src/amd/vulkan/winsys/amdgpu/radv_amdgpu_cs.c
> @@ -75,13 +75,6 @@ radv_amdgpu_cs(struct radeon_winsys_cs *base)
> return (struct radv_amdgpu_cs*)base;
> }
>
> -struct radv_amdgpu_sem_info {
> - int wait_sem_count;
> - struct radeon_winsys_sem **wait_sems;
> - int signal_sem_count;
> - struct radeon_winsys_sem **signal_sems;
> -};
> -
> static int ring_to_hw_ip(enum ring_type ring)
> {
> switch (ring) {
> @@ -99,10 +92,10 @@ static int ring_to_hw_ip(enum ring_type ring)
> static int radv_amdgpu_signal_sems(struct radv_amdgpu_ctx *ctx,
> uint32_t ip_type,
> uint32_t ring,
> - struct radv_amdgpu_sem_info *sem_info);
> + struct radv_winsys_sem_info *sem_info);
> static int radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx,
> struct amdgpu_cs_request *request,
> - struct radv_amdgpu_sem_info *sem_info);
> + struct radv_winsys_sem_info *sem_info);
>
> static void radv_amdgpu_request_to_fence(struct radv_amdgpu_ctx *ctx,
> struct radv_amdgpu_fence *fence,
> @@ -662,7 +655,7 @@ static void radv_assign_last_submit(struct radv_amdgpu_ctx *ctx,
>
> static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx,
> int queue_idx,
> - struct radv_amdgpu_sem_info *sem_info,
> + struct radv_winsys_sem_info *sem_info,
> struct radeon_winsys_cs **cs_array,
> unsigned cs_count,
> struct radeon_winsys_cs *initial_preamble_cs,
> @@ -740,7 +733,7 @@ static int radv_amdgpu_winsys_cs_submit_chained(struct radeon_winsys_ctx *_ctx,
>
> static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,
> int queue_idx,
> - struct radv_amdgpu_sem_info *sem_info,
> + struct radv_winsys_sem_info *sem_info,
> struct radeon_winsys_cs **cs_array,
> unsigned cs_count,
> struct radeon_winsys_cs *initial_preamble_cs,
> @@ -752,7 +745,7 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,
> struct radv_amdgpu_fence *fence = (struct radv_amdgpu_fence *)_fence;
> amdgpu_bo_list_handle bo_list;
> struct amdgpu_cs_request request;
> -
> + bool emit_signal_sem = sem_info->cs_emit_signal;
> assert(cs_count);
>
> for (unsigned i = 0; i < cs_count;) {
> @@ -792,6 +785,7 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,
> }
> }
>
> + sem_info->cs_emit_signal = (i == cs_count - cnt) ? emit_signal_sem : false;
> r = radv_amdgpu_cs_submit(ctx, &request, sem_info);
> if (r) {
> if (r == -ENOMEM)
> @@ -818,7 +812,7 @@ static int radv_amdgpu_winsys_cs_submit_fallback(struct radeon_winsys_ctx *_ctx,
>
> static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
> int queue_idx,
> - struct radv_amdgpu_sem_info *sem_info,
> + struct radv_winsys_sem_info *sem_info,
> struct radeon_winsys_cs **cs_array,
> unsigned cs_count,
> struct radeon_winsys_cs *initial_preamble_cs,
> @@ -833,6 +827,7 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
> amdgpu_bo_list_handle bo_list;
> struct amdgpu_cs_request request;
> uint32_t pad_word = 0xffff1000U;
> + bool emit_signal_sem = sem_info->cs_emit_signal;
>
> if (radv_amdgpu_winsys(ws)->info.chip_class == SI)
> pad_word = 0x80000000;
> @@ -898,6 +893,7 @@ static int radv_amdgpu_winsys_cs_submit_sysmem(struct radeon_winsys_ctx *_ctx,
> request.ibs = &ib;
> request.fence_info = radv_set_cs_fence(ctx, cs0->hw_ip, queue_idx);
>
> + sem_info->cs_emit_signal = (i == cs_count - cnt) ? emit_signal_sem : false;
> r = radv_amdgpu_cs_submit(ctx, &request, sem_info);
> if (r) {
> if (r == -ENOMEM)
> @@ -929,35 +925,27 @@ static int radv_amdgpu_winsys_cs_submit(struct radeon_winsys_ctx *_ctx,
> unsigned cs_count,
> struct radeon_winsys_cs *initial_preamble_cs,
> struct radeon_winsys_cs *continue_preamble_cs,
> - struct radeon_winsys_sem **wait_sem,
> - unsigned wait_sem_count,
> - struct radeon_winsys_sem **signal_sem,
> - unsigned signal_sem_count,
> + struct radv_winsys_sem_info *sem_info,
> bool can_patch,
> struct radeon_winsys_fence *_fence)
> {
> struct radv_amdgpu_cs *cs = radv_amdgpu_cs(cs_array[0]);
> struct radv_amdgpu_ctx *ctx = radv_amdgpu_ctx(_ctx);
> int ret;
> - struct radv_amdgpu_sem_info sem_info = {0};
> -
> - sem_info.wait_sems = wait_sem;
> - sem_info.wait_sem_count = wait_sem_count;
> - sem_info.signal_sems = signal_sem;
> - sem_info.signal_sem_count = signal_sem_count;
>
> + assert(sem_info);
> if (!cs->ws->use_ib_bos) {
> - ret = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, &sem_info, cs_array,
> + ret = radv_amdgpu_winsys_cs_submit_sysmem(_ctx, queue_idx, sem_info, cs_array,
> cs_count, initial_preamble_cs, continue_preamble_cs, _fence);
> } else if (can_patch && cs_count > AMDGPU_CS_MAX_IBS_PER_SUBMIT && cs->ws->batchchain) {
> - ret = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, &sem_info, cs_array,
> + ret = radv_amdgpu_winsys_cs_submit_chained(_ctx, queue_idx, sem_info, cs_array,
> cs_count, initial_preamble_cs, continue_preamble_cs, _fence);
> } else {
> - ret = radv_amdgpu_winsys_cs_submit_fallback(_ctx, queue_idx, &sem_info, cs_array,
> + ret = radv_amdgpu_winsys_cs_submit_fallback(_ctx, queue_idx, sem_info, cs_array,
> cs_count, initial_preamble_cs, continue_preamble_cs, _fence);
> }
>
> - radv_amdgpu_signal_sems(ctx, cs->hw_ip, queue_idx, &sem_info);
> + radv_amdgpu_signal_sems(ctx, cs->hw_ip, queue_idx, sem_info);
> return ret;
> }
>
> @@ -1072,10 +1060,10 @@ static void radv_amdgpu_destroy_sem(struct radeon_winsys_sem *_sem)
> static int radv_amdgpu_signal_sems(struct radv_amdgpu_ctx *ctx,
> uint32_t ip_type,
> uint32_t ring,
> - struct radv_amdgpu_sem_info *sem_info)
> + struct radv_winsys_sem_info *sem_info)
> {
> - for (unsigned i = 0; i < sem_info->signal_sem_count; i++) {
> - struct amdgpu_cs_fence *sem = (struct amdgpu_cs_fence *)sem_info->signal_sems[i];
> + for (unsigned i = 0; i < sem_info->signal.sem_count; i++) {
> + struct amdgpu_cs_fence *sem = (struct amdgpu_cs_fence *)(sem_info->signal.sem)[i];
>
> if (sem->context)
> return -EINVAL;
> @@ -1085,9 +1073,27 @@ static int radv_amdgpu_signal_sems(struct radv_amdgpu_ctx *ctx,
> return 0;
> }
>
> +static struct drm_amdgpu_cs_chunk_sem *radv_amdgpu_cs_alloc_syncobj_chunk(struct radv_winsys_sem_counts *counts,
> + struct drm_amdgpu_cs_chunk *chunk, int chunk_id)
> +{
> + struct drm_amdgpu_cs_chunk_sem *syncobj = malloc(sizeof(struct drm_amdgpu_cs_chunk_sem) * counts->syncobj_count);
> + if (!syncobj)
> + return NULL;
> +
> + for (unsigned i = 0; i < counts->syncobj_count; i++) {
> + struct drm_amdgpu_cs_chunk_sem *sem = &syncobj[i];
> + sem->handle = counts->syncobj[i];
> + }
> +
> + chunk->chunk_id = chunk_id;
> + chunk->length_dw = sizeof(struct drm_amdgpu_cs_chunk_sem) / 4 * counts->syncobj_count;
> + chunk->chunk_data = (uint64_t)(uintptr_t)syncobj;
> + return syncobj;
> +}
> +
> static int radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx,
> struct amdgpu_cs_request *request,
> - struct radv_amdgpu_sem_info *sem_info)
> + struct radv_winsys_sem_info *sem_info)
> {
> int r;
> int num_chunks;
> @@ -1096,10 +1102,12 @@ static int radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx,
> struct drm_amdgpu_cs_chunk *chunks;
> struct drm_amdgpu_cs_chunk_data *chunk_data;
> struct drm_amdgpu_cs_chunk_dep *sem_dependencies = NULL;
> + struct drm_amdgpu_cs_chunk_sem *wait_syncobj = NULL, *signal_syncobj = NULL;
> int i;
> struct amdgpu_cs_fence *sem;
> +
> user_fence = (request->fence_info.handle != NULL);
> - size = request->number_of_ibs + (user_fence ? 2 : 1) + 1;
> + size = request->number_of_ibs + (user_fence ? 2 : 1) + 3;
>
> chunks = alloca(sizeof(struct drm_amdgpu_cs_chunk) * size);
>
> @@ -1136,15 +1144,30 @@ static int radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx,
> &chunk_data[i]);
> }
>
> - if (sem_info->wait_sem_count) {
> - sem_dependencies = malloc(sizeof(struct drm_amdgpu_cs_chunk_dep) * sem_info->wait_sem_count);
> + if (sem_info->wait.syncobj_count && sem_info->cs_emit_wait) {
> + wait_syncobj = radv_amdgpu_cs_alloc_syncobj_chunk(&sem_info->wait,
> + &chunks[num_chunks],
> + AMDGPU_CHUNK_ID_SYNCOBJ_IN);
> + if (!wait_syncobj) {
> + r = -ENOMEM;
> + goto error_out;
> + }
> + num_chunks++;
> +
> + if (sem_info->wait.sem_count == 0)
> + sem_info->cs_emit_wait = false;
> +
> + }
> +
> + if (sem_info->wait.sem_count && sem_info->cs_emit_wait) {
> + sem_dependencies = malloc(sizeof(struct drm_amdgpu_cs_chunk_dep) * sem_info->wait.sem_count);
> if (!sem_dependencies) {
> r = -ENOMEM;
> goto error_out;
> }
> int sem_count = 0;
> - for (unsigned j = 0; j < sem_info->wait_sem_count; j++) {
> - sem = (struct amdgpu_cs_fence *)sem_info->wait_sems[j];
> + for (unsigned j = 0; j < sem_info->wait.sem_count; j++) {
> + sem = (struct amdgpu_cs_fence *)sem_info->wait.sem[j];
> if (!sem->context)
> continue;
> struct drm_amdgpu_cs_chunk_dep *dep = &sem_dependencies[sem_count++];
> @@ -1158,7 +1181,18 @@ static int radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx,
> chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_dep) / 4 * sem_count;
> chunks[i].chunk_data = (uint64_t)(uintptr_t)sem_dependencies;
>
> - sem_info->wait_sem_count = 0;
> + sem_info->cs_emit_wait = false;
> + }
> +
> + if (sem_info->signal.syncobj_count && sem_info->cs_emit_signal) {
> + signal_syncobj = radv_amdgpu_cs_alloc_syncobj_chunk(&sem_info->signal,
> + &chunks[num_chunks],
> + AMDGPU_CHUNK_ID_SYNCOBJ_OUT);
> + if (!signal_syncobj) {
> + r = -ENOMEM;
> + goto error_out;
> + }
> + num_chunks++;
> }
>
> r = amdgpu_cs_submit_raw(ctx->ws->dev,
> @@ -1169,6 +1203,8 @@ static int radv_amdgpu_cs_submit(struct radv_amdgpu_ctx *ctx,
> &request->seq_no);
> error_out:
> free(sem_dependencies);
> + free(wait_syncobj);
> + free(signal_syncobj);
> return r;
> }
>
> --
> 2.9.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list