[PATCH 2/3] lib/amdgpu: add a CS helper function
Kamil Konieczny
kamil.konieczny at linux.intel.com
Tue Feb 6 08:28:05 UTC 2024
Hi Vitaly,
On 2024-01-24 at 22:44:12 -0500, vitaly.prosyak at amd.com wrote:
> From: Vitaly Prosyak <vitaly.prosyak at amd.com>
>
> Add a cs helper function.
> CS helper function works as RADV
> radv_amdgpu_cs_submit. We want to validate
> and ensure the integrity of the following
> functionalities when the following method
> is called 'amdgpu_cs_submit_raw2':
>
> 1. Gang submission when several different IPs are into
> a single command, but those IPs are shared the
> instance and ring numbers(ip_instance, ring).
> 2. Use 'AMDGPU_CHUNK_ID_BO_HANDLES' vs explicit parameter into 'amdgpu_cs_submit_raw2'.
> 3. Not use 'AMDGPU_CHUNK_ID_DEPENDENCIES'.
> 4. User fence always present except for multimedia ring commands.
>
> RADV uses those scenarios.
>
> Cc: Jesse Zhang <jesse.zhang at amd.com>
> Cc: Alex Deucher <alexander.deucher at amd.com>
> Cc: Christian Koenig <christian.koenig at amd.com>
> Signed-off-by: Vitaly Prosyak <vitaly.prosyak at amd.com>
You forgot about a-b from Christian here, before push to gitlab
_check_ with 'git log' if _all_ you patches have acked-by or reviewed-by.
Regards,
Kamil
> ---
> lib/amdgpu/amd_cs_radv.c | 172 +++++++++++++++++++++++++++++++++++++
> lib/amdgpu/amd_cs_radv.h | 61 +++++++++++++
> lib/amdgpu/amd_ip_blocks.h | 2 +
> lib/meson.build | 1 +
> 4 files changed, 236 insertions(+)
> create mode 100644 lib/amdgpu/amd_cs_radv.c
> create mode 100644 lib/amdgpu/amd_cs_radv.h
>
> diff --git a/lib/amdgpu/amd_cs_radv.c b/lib/amdgpu/amd_cs_radv.c
> new file mode 100644
> index 000000000..3c51ff7ef
> --- /dev/null
> +++ b/lib/amdgpu/amd_cs_radv.c
> @@ -0,0 +1,172 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright 2024 Advanced Micro Devices, Inc.
> + */
> +#include <time.h>
> +#include <sys/time.h>
> +
> +#include "lib/amdgpu/amd_memory.h"
> +#include "lib/amdgpu/amd_sdma.h"
> +#include "lib/amdgpu/amd_PM4.h"
> +#include "lib/amdgpu/amd_cs_radv.h"
> +
> +#define TIME_MONOTONIC 2
> +#define OS_TIMEOUT_INFINITE 0xffffffffffffffffull
> +
> +enum { MAX_RINGS_PER_TYPE = 8 };
> +
> +static bool
> +amdgpu_cs_has_user_fence(struct amdgpu_cs_request_radv *request)
> +{
> + /* TODO */
> + return false;
> +}
> +
> +static int64_t
> +os_time_get_nano(void)
> +{
> + struct timespec ts;
> +
> + timespec_get(&ts, TIME_MONOTONIC);
> + return ts.tv_nsec + ts.tv_sec*INT64_C(1000000000);
> +}
> +
> +static int64_t
> +os_time_get_absolute_timeout(uint64_t timeout)
> +{
> + int64_t time, abs_timeout;
> +
> + /* Also check for the type upper bound. */
> + if (timeout == OS_TIMEOUT_INFINITE || timeout > INT64_MAX)
> + return OS_TIMEOUT_INFINITE;
> +
> + time = os_time_get_nano();
> + abs_timeout = time + (int64_t)timeout;
> +
> + /* Check for overflow. */
> + if (abs_timeout < time)
> + return OS_TIMEOUT_INFINITE;
> +
> + return abs_timeout;
> +}
> +
> +static void
> +os_time_sleep(int64_t usecs)
> +{
> + struct timespec time;
> +
> + time.tv_sec = usecs / 1000000;
> + time.tv_nsec = (usecs % 1000000) * 1000;
> + while (clock_nanosleep(CLOCK_MONOTONIC, 0, &time, &time) == EINTR)
> + ;
> +}
> +
> +uint32_t
> +amdgpu_get_bo_handle(struct amdgpu_bo *bo)
> +{
> + uint32_t handle;
> + int r;
> +
> + r = amdgpu_bo_export(bo, amdgpu_bo_handle_type_kms, &handle);
> + igt_assert_eq(r, 0);
> + return handle;
> +}
> +
> +uint32_t
> +amdgpu_cs_submit_radv(amdgpu_device_handle dev, struct amdgpu_ring_context *ring_context,
> + struct amdgpu_cs_request_radv *request)
> +{
> + int r, num_chunks, size, i;
> + struct drm_amdgpu_cs_chunk *chunks;
> + struct drm_amdgpu_cs_chunk_data *chunk_data;
> + struct drm_amdgpu_bo_list_in bo_list_in;
> + uint32_t result = 0;
> + uint64_t abs_timeout_ns;
> + bool has_user_fence;
> +
> + has_user_fence = amdgpu_cs_has_user_fence(request);
> + size = request->number_of_ibs + 1 + (has_user_fence ? 1 : 0) + 1 /* bo list */ + 3;
> + chunks = malloc(sizeof(chunks[0]) * size);
> + size = request->number_of_ibs + (has_user_fence ? 1 : 0);
> + chunk_data = malloc(sizeof(chunk_data[0]) * size);
> +
> + num_chunks = request->number_of_ibs;
> + for (i = 0; i < request->number_of_ibs; i++) {
> +
> + struct amdgpu_cs_ib_info_radv *ib;
> +
> + chunks[i].chunk_id = AMDGPU_CHUNK_ID_IB;
> + chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_ib) / 4;
> + chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
> +
> + ib = &request->ibs[i];
> + assert(ib->size);
> +
> + chunk_data[i].ib_data._pad = 0;
> + chunk_data[i].ib_data.va_start = ib->ib_mc_address;
> + chunk_data[i].ib_data.ib_bytes = ib->size * 4;
> + chunk_data[i].ib_data.ip_type = ib->ip_type;
> + chunk_data[i].ib_data.flags = ib->flags;
> +
> + chunk_data[i].ib_data.ip_instance = request->ip_instance;
> + chunk_data[i].ib_data.ring = request->ring;
> + }
> +
> + assert(chunk_data[request->number_of_ibs - 1].ib_data.ip_type == request->ip_type);
> +
> + if (has_user_fence) {
> + i = num_chunks++;
> + chunks[i].chunk_id = AMDGPU_CHUNK_ID_FENCE;
> + chunks[i].length_dw = sizeof(struct drm_amdgpu_cs_chunk_fence) / 4;
> + chunks[i].chunk_data = (uint64_t)(uintptr_t)&chunk_data[i];
> +
> + /* fence bo handle */
> + chunk_data[i].fence_data.handle = amdgpu_get_bo_handle(request->fence_info.handle);
> + /* offset */
> + chunk_data[i].fence_data.offset =
> + request->fence_info.offset * sizeof(uint64_t);
> + }
> +
> + bo_list_in.operation = ~0;
> + bo_list_in.list_handle = ~0;
> + bo_list_in.bo_number = request->num_handles;
> + bo_list_in.bo_info_size = sizeof(struct drm_amdgpu_bo_list_entry);
> + bo_list_in.bo_info_ptr = (uint64_t)(uintptr_t)request->handles;
> +
> + chunks[num_chunks].chunk_id = AMDGPU_CHUNK_ID_BO_HANDLES;
> + chunks[num_chunks].length_dw = sizeof(struct drm_amdgpu_bo_list_in) / 4;
> + chunks[num_chunks].chunk_data = (uintptr_t)&bo_list_in;
> + num_chunks++;
> +
> + /* The kernel returns -ENOMEM with many parallel processes using GDS such as test suites quite
> + * often, but it eventually succeeds after enough attempts. This happens frequently with dEQP
> + * using NGG streamout.
> + */
> + abs_timeout_ns = os_time_get_absolute_timeout(1000000000ull); /* 1s */
> +
> + r = 0;
> + do {
> + /* Wait 1 ms and try again. */
> + if (r == -ENOMEM)
> + os_time_sleep(1000);
> +
> + r = amdgpu_cs_submit_raw2(dev, ring_context->context_handle, 0,
> + num_chunks, chunks, &request->seq_no);
> + } while (r == -ENOMEM && os_time_get_nano() < abs_timeout_ns);
> +
> + if (r) {
> + if (r == -ENOMEM) {
> + igt_info("igt/amdgpu: Not enough memory for command submission.\n");
> + result = ENOMEM;
> + } else if (r == -ECANCELED) {
> + igt_info("igt/amdgpu: The CS has been cancelled because the context is lost.\n");
> + result = ECANCELED;
> + } else {
> + igt_info("igt/amdgpu: The CS has been rejected, see dmesg for more information (%i).\n", r);
> + result = EINVAL;
> + }
> + }
> + free(chunks);
> + free(chunk_data);
> + return result;
> +}
> diff --git a/lib/amdgpu/amd_cs_radv.h b/lib/amdgpu/amd_cs_radv.h
> new file mode 100644
> index 000000000..8b6dcaaa2
> --- /dev/null
> +++ b/lib/amdgpu/amd_cs_radv.h
> @@ -0,0 +1,61 @@
> +/* SPDX-License-Identifier: MIT
> + * Copyright 2024 Advanced Micro Devices, Inc.
> + */
> +
> +#ifndef AMD_CS_RADV
> +#define AMD_CS_RADV
> +
> +#include "amd_ip_blocks.h"
> +#define AMDGPU_CS_GANG_SIZE 4
> +
> +struct amdgpu_cs_ib_info_radv {
> + int64_t flags;
> + uint64_t ib_mc_address;
> + uint32_t size;
> + enum amd_ip_block_type ip_type;
> +};
> +
> +struct amdgpu_cs_request_radv {
> + /** Specify HW IP block type to which to send the IB. */
> + uint32_t ip_type;
> +
> + /** IP instance index if there are several IPs of the same type. */
> + uint32_t ip_instance;
> +
> + /**
> + * Specify ring index of the IP. We could have several rings
> + * in the same IP. E.g. 0 for SDMA0 and 1 for SDMA1.
> + */
> + uint32_t ring;
> +
> + /**
> + * BO list handles used by this request.
> + */
> + struct drm_amdgpu_bo_list_entry *handles;
> + uint32_t num_handles;
> +
> + /** Number of IBs to submit in the field ibs. */
> + uint32_t number_of_ibs;
> +
> + /**
> + * IBs to submit. Those IBs will be submitted together as single entity
> + */
> + struct amdgpu_cs_ib_info_radv ibs[AMDGPU_CS_GANG_SIZE];
> + /**
> + * The returned sequence number for the command submission
> + */
> + uint64_t seq_no;
> + /**
> + * The fence information
> + */
> + struct amdgpu_cs_fence_info fence_info;
> +};
> +
> +uint32_t
> +amdgpu_get_bo_handle(struct amdgpu_bo *bo);
> +
> +uint32_t
> +amdgpu_cs_submit_radv(amdgpu_device_handle device, struct amdgpu_ring_context *ring_context,
> + struct amdgpu_cs_request_radv *request);
> +
> +#endif
> diff --git a/lib/amdgpu/amd_ip_blocks.h b/lib/amdgpu/amd_ip_blocks.h
> index 4cad30d1e..97a9ad489 100644
> --- a/lib/amdgpu/amd_ip_blocks.h
> +++ b/lib/amdgpu/amd_ip_blocks.h
> @@ -20,7 +20,9 @@ enum amd_ip_block_type {
> AMD_IP_UVD_ENC,
> AMD_IP_VCN_DEC,
> AMD_IP_VCN_ENC,
> + AMD_IP_VCN_UNIFIED = AMD_IP_VCN_ENC,
> AMD_IP_VCN_JPEG,
> + AMD_IP_VPE,
> AMD_IP_MAX,
> };
>
> diff --git a/lib/meson.build b/lib/meson.build
> index 0fc11b26c..6122861d8 100644
> --- a/lib/meson.build
> +++ b/lib/meson.build
> @@ -144,6 +144,7 @@ if libdrm_amdgpu.found()
> 'amdgpu/amd_memory.c',
> 'amdgpu/amd_command_submission.c',
> 'amdgpu/amd_compute.c',
> + 'amdgpu/amd_cs_radv.c',
> 'amdgpu/amd_gfx.c',
> 'amdgpu/amd_ip_blocks.c',
> 'amdgpu/amd_shaders.c',
> --
> 2.25.1
>
More information about the igt-dev
mailing list