[PATCH V2 04/10] accel/amdxdna: Add hardware context
Alex Deucher
alexdeucher at gmail.com
Thu Aug 8 21:34:00 UTC 2024
On Mon, Aug 5, 2024 at 1:50 PM Lizhi Hou <lizhi.hou at amd.com> wrote:
>
> The hardware can be shared among multiple user applications. The
> hardware resources are allocated/freed based on the request from
> user application via driver IOCTLs.
>
> DRM_IOCTL_AMDXDNA_CREATE_HWCTX
> Allocate tile columns and create a hardware context structure to track the
> usage and status of the resources. A hardware context ID is returned for
> XDNA command execution.
>
> DRM_IOCTL_AMDXDNA_DESTROY_HWCTX
> Release hardware context based on its ID. The tile columns belong to
> this hardware context will be reclaimed.
>
> DRM_IOCTL_AMDXDNA_CONFIG_HWCTX
> Config hardware context. Bind the hardware context to the required
> resources.
>
> Co-developed-by: Min Ma <min.ma at amd.com>
> Signed-off-by: Min Ma <min.ma at amd.com>
> Signed-off-by: Lizhi Hou <lizhi.hou at amd.com>
> ---
> drivers/accel/amdxdna/Makefile | 2 +
> drivers/accel/amdxdna/aie2_ctx.c | 178 ++++++++++++++++++++
> drivers/accel/amdxdna/aie2_message.c | 87 ++++++++++
> drivers/accel/amdxdna/aie2_pci.c | 41 +++++
> drivers/accel/amdxdna/aie2_pci.h | 12 ++
> drivers/accel/amdxdna/amdxdna_ctx.c | 210 ++++++++++++++++++++++++
> drivers/accel/amdxdna/amdxdna_ctx.h | 41 +++++
> drivers/accel/amdxdna/amdxdna_drm.c | 107 +++++++++++-
> drivers/accel/amdxdna/amdxdna_drm.h | 21 +++
> drivers/accel/amdxdna/amdxdna_pci_drv.c | 15 ++
> include/uapi/drm/amdxdna_accel.h | 128 +++++++++++++++
> 11 files changed, 841 insertions(+), 1 deletion(-)
> create mode 100644 drivers/accel/amdxdna/aie2_ctx.c
> create mode 100644 drivers/accel/amdxdna/amdxdna_ctx.c
> create mode 100644 drivers/accel/amdxdna/amdxdna_ctx.h
>
> diff --git a/drivers/accel/amdxdna/Makefile b/drivers/accel/amdxdna/Makefile
> index b44de7fe0c9e..9cf90e92564d 100644
> --- a/drivers/accel/amdxdna/Makefile
> +++ b/drivers/accel/amdxdna/Makefile
> @@ -1,11 +1,13 @@
> # SPDX-License-Identifier: GPL-2.0-only
>
> amdxdna-y := \
> + aie2_ctx.o \
> aie2_message.o \
> aie2_pci.o \
> aie2_psp.o \
> aie2_smu.o \
> aie2_solver.o \
> + amdxdna_ctx.o \
> amdxdna_drm.o \
> amdxdna_mailbox.o \
> amdxdna_mailbox_helper.o \
> diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
> new file mode 100644
> index 000000000000..2cfcf9d571f6
> --- /dev/null
> +++ b/drivers/accel/amdxdna/aie2_ctx.c
> @@ -0,0 +1,178 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2024, Advanced Micro Devices, Inc.
> + */
> +
> +#include "aie2_pci.h"
> +#include "aie2_solver.h"
> +
> +static int aie2_hwctx_col_list(struct amdxdna_hwctx *hwctx)
> +{
> + struct amdxdna_dev *xdna = hwctx->client->xdna;
> + struct amdxdna_dev_hdl *ndev;
> + int start, end, first, last;
> + u32 width = 1, entries = 0;
> + int i;
> +
> + if (!hwctx->num_tiles) {
> + XDNA_ERR(xdna, "Number of tiles is zero");
> + return -EINVAL;
> + }
> +
> + ndev = xdna->dev_handle;
> + if (unlikely(!ndev->metadata.core.row_count)) {
> + XDNA_WARN(xdna, "Core tile row count is zero");
> + return -EINVAL;
> + }
> +
> + hwctx->num_col = hwctx->num_tiles / ndev->metadata.core.row_count;
> + if (!hwctx->num_col || hwctx->num_col > ndev->total_col) {
> + XDNA_ERR(xdna, "Invalid num_col %d", hwctx->num_col);
> + return -EINVAL;
> + }
> +
> + if (ndev->priv->col_align == COL_ALIGN_NATURE)
> + width = hwctx->num_col;
> +
> + /*
> + * In range [start, end], find out columns that is multiple of width.
> + * 'first' is the first column,
> + * 'last' is the last column,
> + * 'entries' is the total number of columns.
> + */
> + start = xdna->dev_info->first_col;
> + end = ndev->total_col - hwctx->num_col;
> + if (start > 0 && end == 0) {
> + XDNA_DBG(xdna, "Force start from col 0");
> + start = 0;
> + }
> + first = start + (width - start % width) % width;
> + last = end - end % width;
> + if (last >= first)
> + entries = (last - first) / width + 1;
> + XDNA_DBG(xdna, "start %d end %d first %d last %d",
> + start, end, first, last);
> +
> + if (unlikely(!entries)) {
> + XDNA_ERR(xdna, "Start %d end %d width %d",
> + start, end, width);
> + return -EINVAL;
> + }
> +
> + hwctx->col_list = kmalloc_array(entries, sizeof(*hwctx->col_list), GFP_KERNEL);
> + if (!hwctx->col_list)
> + return -ENOMEM;
> +
> + hwctx->col_list_len = entries;
> + hwctx->col_list[0] = first;
> + for (i = 1; i < entries; i++)
> + hwctx->col_list[i] = hwctx->col_list[i - 1] + width;
> +
> + print_hex_dump_debug("col_list: ", DUMP_PREFIX_OFFSET, 16, 4, hwctx->col_list,
> + entries * sizeof(*hwctx->col_list), false);
> + return 0;
> +}
> +
> +static int aie2_alloc_resource(struct amdxdna_hwctx *hwctx)
> +{
> + struct amdxdna_dev *xdna = hwctx->client->xdna;
> + struct alloc_requests *xrs_req;
> + int ret;
> +
> + xrs_req = kzalloc(sizeof(*xrs_req), GFP_KERNEL);
> + if (!xrs_req)
> + return -ENOMEM;
> +
> + xrs_req->cdo.start_cols = hwctx->col_list;
> + xrs_req->cdo.cols_len = hwctx->col_list_len;
> + xrs_req->cdo.ncols = hwctx->num_col;
> + xrs_req->cdo.qos_cap.opc = hwctx->max_opc;
> +
> + xrs_req->rqos.gops = hwctx->qos.gops;
> + xrs_req->rqos.fps = hwctx->qos.fps;
> + xrs_req->rqos.dma_bw = hwctx->qos.dma_bandwidth;
> + xrs_req->rqos.latency = hwctx->qos.latency;
> + xrs_req->rqos.exec_time = hwctx->qos.frame_exec_time;
> + xrs_req->rqos.priority = hwctx->qos.priority;
> +
> + xrs_req->rid = (uintptr_t)hwctx;
> +
> + ret = xrs_allocate_resource(xdna->xrs_hdl, xrs_req, hwctx);
> + if (ret)
> + XDNA_ERR(xdna, "Allocate AIE resource failed, ret %d", ret);
> +
> + kfree(xrs_req);
> + return ret;
> +}
> +
> +static void aie2_release_resource(struct amdxdna_hwctx *hwctx)
> +{
> + struct amdxdna_dev *xdna = hwctx->client->xdna;
> + int ret;
> +
> + ret = xrs_release_resource(xdna->xrs_hdl, (uintptr_t)hwctx);
> + if (ret)
> + XDNA_ERR(xdna, "Release AIE resource failed, ret %d", ret);
> +}
> +
> +int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
> +{
> + struct amdxdna_client *client = hwctx->client;
> + struct amdxdna_dev *xdna = client->xdna;
> + struct amdxdna_hwctx_priv *priv;
> + int ret;
> +
> + priv = kzalloc(sizeof(*hwctx->priv), GFP_KERNEL);
> + if (!priv)
> + return -ENOMEM;
> + hwctx->priv = priv;
> +
> + ret = aie2_hwctx_col_list(hwctx);
> + if (ret) {
> + XDNA_ERR(xdna, "Create col list failed, ret %d", ret);
> + goto free_priv;
> + }
> +
> + ret = aie2_alloc_resource(hwctx);
> + if (ret) {
> + XDNA_ERR(xdna, "Alloc hw resource failed, ret %d", ret);
> + goto free_col_list;
> + }
> +
> + hwctx->status = HWCTX_STAT_INIT;
> +
> + XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name);
> +
> + return 0;
> +
> +free_col_list:
> + kfree(hwctx->col_list);
> +free_priv:
> + kfree(priv);
> + return ret;
> +}
> +
> +void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
> +{
> + aie2_release_resource(hwctx);
> +
> + kfree(hwctx->col_list);
> + kfree(hwctx->priv);
> + kfree(hwctx->cus);
> +}
> +
> +int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size)
> +{
> + struct amdxdna_dev *xdna = hwctx->client->xdna;
> +
> + drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
> + switch (type) {
> + case DRM_AMDXDNA_HWCTX_CONFIG_CU:
> + case DRM_AMDXDNA_HWCTX_ASSIGN_DBG_BUF:
> + case DRM_AMDXDNA_HWCTX_REMOVE_DBG_BUF:
> + return -EOPNOTSUPP;
> + default:
> + XDNA_DBG(xdna, "Not supported type %d", type);
> + return -EOPNOTSUPP;
> + }
> +}
> diff --git a/drivers/accel/amdxdna/aie2_message.c b/drivers/accel/amdxdna/aie2_message.c
> index 71df005b7408..54fb0f68587a 100644
> --- a/drivers/accel/amdxdna/aie2_message.c
> +++ b/drivers/accel/amdxdna/aie2_message.c
> @@ -185,3 +185,90 @@ int aie2_query_firmware_version(struct amdxdna_dev_hdl *ndev,
>
> return 0;
> }
> +
> +int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx)
> +{
> + DECLARE_AIE2_MSG(create_ctx, MSG_OP_CREATE_CONTEXT);
> + struct amdxdna_dev *xdna = ndev->xdna;
> + struct xdna_mailbox_chann_res x2i;
> + struct xdna_mailbox_chann_res i2x;
> + struct cq_pair *cq_pair;
> + u32 intr_reg;
> + int ret;
> +
> + req.aie_type = 1;
> + req.start_col = hwctx->start_col;
> + req.num_col = hwctx->num_col;
> + req.num_cq_pairs_requested = 1;
> + req.pasid = hwctx->client->pasid;
> + req.context_priority = 2;
> +
> + ret = aie2_send_mgmt_msg_wait(ndev, &msg);
> + if (ret)
> + return ret;
> +
> + hwctx->fw_ctx_id = resp.context_id;
> + WARN_ONCE(hwctx->fw_ctx_id == -1, "Unexpected context id");
> +
> + cq_pair = &resp.cq_pair[0];
> + x2i.mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.head_addr);
> + x2i.mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->x2i_q.tail_addr);
> + x2i.rb_start_addr = AIE2_SRAM_OFF(ndev, cq_pair->x2i_q.buf_addr);
> + x2i.rb_size = cq_pair->x2i_q.buf_size;
> +
> + i2x.mb_head_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->i2x_q.head_addr);
> + i2x.mb_tail_ptr_reg = AIE2_MBOX_OFF(ndev, cq_pair->i2x_q.tail_addr);
> + i2x.rb_start_addr = AIE2_SRAM_OFF(ndev, cq_pair->i2x_q.buf_addr);
> + i2x.rb_size = cq_pair->i2x_q.buf_size;
> +
> + ret = pci_irq_vector(to_pci_dev(xdna->ddev.dev), resp.msix_id);
> + if (ret == -EINVAL) {
> + XDNA_ERR(xdna, "not able to create channel");
> + goto out_destroy_context;
> + }
> +
> + intr_reg = i2x.mb_head_ptr_reg + 4;
> + hwctx->priv->mbox_chann = xdna_mailbox_create_channel(ndev->mbox, &x2i, &i2x,
> + intr_reg, ret);
> + if (!hwctx->priv->mbox_chann) {
> + XDNA_ERR(xdna, "not able to create channel");
> + ret = -EINVAL;
> + goto out_destroy_context;
> + }
> +
> + XDNA_DBG(xdna, "%s mailbox channel irq: %d, msix_id: %d",
> + hwctx->name, ret, resp.msix_id);
> + XDNA_DBG(xdna, "%s created fw ctx %d pasid %d", hwctx->name,
> + hwctx->fw_ctx_id, hwctx->client->pasid);
> +
> + return 0;
> +
> +out_destroy_context:
> + aie2_destroy_context(ndev, hwctx);
> + return ret;
> +}
> +
> +int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx)
> +{
> + DECLARE_AIE2_MSG(destroy_ctx, MSG_OP_DESTROY_CONTEXT);
> + struct amdxdna_dev *xdna = ndev->xdna;
> + int ret;
> +
> + if (hwctx->fw_ctx_id == -1)
> + return 0;
> +
> + xdna_mailbox_stop_channel(hwctx->priv->mbox_chann);
> +
> + req.context_id = hwctx->fw_ctx_id;
> + ret = aie2_send_mgmt_msg_wait(ndev, &msg);
> + if (ret)
> + XDNA_WARN(xdna, "%s destroy context failed, ret %d", hwctx->name, ret);
> +
> + xdna_mailbox_destroy_channel(hwctx->priv->mbox_chann);
> + XDNA_DBG(xdna, "%s destroyed fw ctx %d", hwctx->name,
> + hwctx->fw_ctx_id);
> + hwctx->priv->mbox_chann = NULL;
> + hwctx->fw_ctx_id = -1;
> +
> + return ret;
> +}
> diff --git a/drivers/accel/amdxdna/aie2_pci.c b/drivers/accel/amdxdna/aie2_pci.c
> index 8fb7ecbf35c1..f60934ae21e0 100644
> --- a/drivers/accel/amdxdna/aie2_pci.c
> +++ b/drivers/accel/amdxdna/aie2_pci.c
> @@ -204,6 +204,43 @@ static void aie2_mgmt_fw_fini(struct amdxdna_dev_hdl *ndev)
> XDNA_DBG(ndev->xdna, "Firmware suspended");
> }
>
> +static int aie2_xrs_load(void *cb_arg, struct xrs_action_load *action)
> +{
> + struct amdxdna_hwctx *hwctx = cb_arg;
> + struct amdxdna_dev *xdna;
> + int ret;
> +
> + xdna = hwctx->client->xdna;
> +
> + hwctx->start_col = action->part.start_col;
> + hwctx->num_col = action->part.ncols;
> + ret = aie2_create_context(xdna->dev_handle, hwctx);
> + if (ret)
> + XDNA_ERR(xdna, "create context failed, ret %d", ret);
> +
> + return ret;
> +}
> +
> +static int aie2_xrs_unload(void *cb_arg)
> +{
> + struct amdxdna_hwctx *hwctx = cb_arg;
> + struct amdxdna_dev *xdna;
> + int ret;
> +
> + xdna = hwctx->client->xdna;
> +
> + ret = aie2_destroy_context(xdna->dev_handle, hwctx);
> + if (ret)
> + XDNA_ERR(xdna, "destroy context failed, ret %d", ret);
> +
> + return ret;
> +}
> +
> +static struct xrs_action_ops aie2_xrs_actions = {
> + .load = aie2_xrs_load,
> + .unload = aie2_xrs_unload,
> +};
> +
> static void aie2_hw_stop(struct amdxdna_dev *xdna)
> {
> struct pci_dev *pdev = to_pci_dev(xdna->ddev.dev);
> @@ -416,6 +453,7 @@ static int aie2_init(struct amdxdna_dev *xdna)
> xrs_cfg.clk_list.cu_clk_list[2] = 1000;
> xrs_cfg.sys_eff_factor = 1;
> xrs_cfg.dev = xdna->ddev.dev;
> + xrs_cfg.actions = &aie2_xrs_actions;
> xrs_cfg.total_col = ndev->total_col;
>
> xdna->xrs_hdl = xrsm_init(&xrs_cfg);
> @@ -452,4 +490,7 @@ static void aie2_fini(struct amdxdna_dev *xdna)
> const struct amdxdna_dev_ops aie2_ops = {
> .init = aie2_init,
> .fini = aie2_fini,
> + .hwctx_init = aie2_hwctx_init,
> + .hwctx_fini = aie2_hwctx_fini,
> + .hwctx_config = aie2_hwctx_config,
> };
> diff --git a/drivers/accel/amdxdna/aie2_pci.h b/drivers/accel/amdxdna/aie2_pci.h
> index a6b734f24955..818fc52dabbf 100644
> --- a/drivers/accel/amdxdna/aie2_pci.h
> +++ b/drivers/accel/amdxdna/aie2_pci.h
> @@ -128,6 +128,10 @@ struct rt_config {
> u32 value;
> };
>
> +struct amdxdna_hwctx_priv {
> + void *mbox_chann;
> +};
> +
> struct amdxdna_dev_hdl {
> struct amdxdna_dev *xdna;
> const struct amdxdna_dev_priv *priv;
> @@ -200,4 +204,12 @@ int aie2_query_aie_version(struct amdxdna_dev_hdl *ndev, struct aie_version *ver
> int aie2_query_aie_metadata(struct amdxdna_dev_hdl *ndev, struct aie_metadata *metadata);
> int aie2_query_firmware_version(struct amdxdna_dev_hdl *ndev,
> struct amdxdna_fw_ver *fw_ver);
> +int aie2_create_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx);
> +int aie2_destroy_context(struct amdxdna_dev_hdl *ndev, struct amdxdna_hwctx *hwctx);
> +
> +/* aie2_hwctx.c */
> +int aie2_hwctx_init(struct amdxdna_hwctx *hwctx);
> +void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx);
> +int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size);
> +
> #endif /* _AIE2_PCI_H_ */
> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.c b/drivers/accel/amdxdna/amdxdna_ctx.c
> new file mode 100644
> index 000000000000..3daf7065acf4
> --- /dev/null
> +++ b/drivers/accel/amdxdna/amdxdna_ctx.c
> @@ -0,0 +1,210 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
> + */
> +#include "amdxdna_drm.h"
> +
> +#define MAX_HWCTX_ID 255
> +
> +static void amdxdna_hwctx_destroy(struct amdxdna_hwctx *hwctx)
> +{
> + struct amdxdna_dev *xdna = hwctx->client->xdna;
> +
> + /* At this point, user is not able to submit new commands */
> + mutex_lock(&xdna->dev_lock);
> + xdna->dev_info->ops->hwctx_fini(hwctx);
> + mutex_unlock(&xdna->dev_lock);
> +
> + kfree(hwctx->name);
> + kfree(hwctx);
> +}
> +
> +/*
> + * This should be called in close() and remove(). DO NOT call in other syscalls.
> + * This guarantee that when hwctx and resources will be released, if user
> + * doesn't call amdxdna_drm_destroy_hwctx_ioctl.
> + */
> +void amdxdna_hwctx_remove_all(struct amdxdna_client *client)
> +{
> + struct amdxdna_hwctx *hwctx;
> + int next = 0;
> +
> + mutex_lock(&client->hwctx_lock);
> + idr_for_each_entry_continue(&client->hwctx_idr, hwctx, next) {
> + XDNA_DBG(client->xdna, "PID %d close HW context %d",
> + client->pid, hwctx->id);
> + idr_remove(&client->hwctx_idr, hwctx->id);
> + mutex_unlock(&client->hwctx_lock);
> + amdxdna_hwctx_destroy(hwctx);
> + mutex_lock(&client->hwctx_lock);
> + }
> + mutex_unlock(&client->hwctx_lock);
> +}
> +
> +int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
> +{
> + struct amdxdna_client *client = filp->driver_priv;
> + struct amdxdna_drm_create_hwctx *args = data;
> + struct amdxdna_dev *xdna = to_xdna_dev(dev);
> + struct amdxdna_hwctx *hwctx;
> + int ret, idx;
> +
> + if (args->ext_flags)
> + return -EINVAL;
> +
> + if (!drm_dev_enter(dev, &idx))
> + return -ENODEV;
> +
> + hwctx = kzalloc(sizeof(*hwctx), GFP_KERNEL);
> + if (!hwctx) {
> + ret = -ENOMEM;
> + goto exit;
> + }
> +
> + if (copy_from_user(&hwctx->qos, u64_to_user_ptr(args->qos_p), sizeof(hwctx->qos))) {
> + XDNA_ERR(xdna, "Access QoS info failed");
> + ret = -EFAULT;
> + goto free_hwctx;
> + }
> +
> + hwctx->client = client;
> + hwctx->fw_ctx_id = -1;
> + hwctx->num_tiles = args->num_tiles;
> + hwctx->mem_size = args->mem_size;
> + hwctx->max_opc = args->max_opc;
> + mutex_lock(&client->hwctx_lock);
> + ret = idr_alloc_cyclic(&client->hwctx_idr, hwctx, 0, MAX_HWCTX_ID, GFP_KERNEL);
> + if (ret < 0) {
> + mutex_unlock(&client->hwctx_lock);
> + XDNA_ERR(xdna, "Allocate hwctx ID failed, ret %d", ret);
> + goto free_hwctx;
> + }
> + hwctx->id = ret;
> + mutex_unlock(&client->hwctx_lock);
> +
> + hwctx->name = kasprintf(GFP_KERNEL, "hwctx.%d.%d", client->pid, hwctx->id);
> + if (!hwctx->name) {
> + ret = -ENOMEM;
> + goto rm_id;
> + }
> +
> + mutex_lock(&xdna->dev_lock);
> + ret = xdna->dev_info->ops->hwctx_init(hwctx);
> + if (ret) {
> + mutex_unlock(&xdna->dev_lock);
> + XDNA_ERR(xdna, "Init hwctx failed, ret %d", ret);
> + goto free_name;
> + }
> + args->handle = hwctx->id;
> + mutex_unlock(&xdna->dev_lock);
> +
> + XDNA_DBG(xdna, "PID %d create HW context %d, ret %d", client->pid, args->handle, ret);
> + drm_dev_exit(idx);
> + return 0;
> +
> +free_name:
> + kfree(hwctx->name);
> +rm_id:
> + mutex_lock(&client->hwctx_lock);
> + idr_remove(&client->hwctx_idr, hwctx->id);
> + mutex_unlock(&client->hwctx_lock);
> +free_hwctx:
> + kfree(hwctx);
> +exit:
> + drm_dev_exit(idx);
> + return ret;
> +}
> +
> +int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
> +{
> + struct amdxdna_client *client = filp->driver_priv;
> + struct amdxdna_drm_destroy_hwctx *args = data;
> + struct amdxdna_dev *xdna = to_xdna_dev(dev);
> + struct amdxdna_hwctx *hwctx;
> + int ret = 0, idx;
> +
> + if (!drm_dev_enter(dev, &idx))
> + return -ENODEV;
> +
> + mutex_lock(&client->hwctx_lock);
> + hwctx = idr_find(&client->hwctx_idr, args->handle);
> + if (!hwctx) {
> + mutex_unlock(&client->hwctx_lock);
> + ret = -EINVAL;
> + XDNA_DBG(xdna, "PID %d HW context %d not exist",
> + client->pid, args->handle);
> + goto out;
> + }
> + idr_remove(&client->hwctx_idr, hwctx->id);
> + mutex_unlock(&client->hwctx_lock);
> +
> + amdxdna_hwctx_destroy(hwctx);
> +
> + XDNA_DBG(xdna, "PID %d destroyed HW context %d", client->pid, args->handle);
> +out:
> + drm_dev_exit(idx);
> + return ret;
> +}
> +
> +int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
> +{
> + struct amdxdna_client *client = filp->driver_priv;
> + struct amdxdna_drm_config_hwctx *args = data;
> + struct amdxdna_dev *xdna = to_xdna_dev(dev);
> + struct amdxdna_hwctx *hwctx;
> + u32 buf_size;
> + void *buf;
> + u64 val;
> + int ret;
> +
> + if (!xdna->dev_info->ops->hwctx_config)
> + return -EOPNOTSUPP;
> +
> + val = args->param_val;
> + buf_size = args->param_val_size;
> +
> + switch (args->param_type) {
> + case DRM_AMDXDNA_HWCTX_CONFIG_CU:
> + /* For those types that param_val is pointer */
> + if (buf_size > PAGE_SIZE) {
> + XDNA_ERR(xdna, "Config CU param buffer too large");
> + return -E2BIG;
> + }
> +
> + /* Hwctx needs to keep buf */
> + buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
> + if (!buf)
> + return -ENOMEM;
> +
> + if (copy_from_user(buf, u64_to_user_ptr(val), buf_size)) {
> + kfree(buf);
> + return -EFAULT;
> + }
> +
> + break;
> + case DRM_AMDXDNA_HWCTX_ASSIGN_DBG_BUF:
> + case DRM_AMDXDNA_HWCTX_REMOVE_DBG_BUF:
> + /* For those types that param_val is a value */
> + buf = NULL;
> + buf_size = 0;
> + break;
> + default:
> + XDNA_DBG(xdna, "Unknown HW context config type %d", args->param_type);
> + return -EINVAL;
> + }
> +
> + mutex_lock(&xdna->dev_lock);
> + hwctx = idr_find(&client->hwctx_idr, args->handle);
> + if (!hwctx) {
> + XDNA_DBG(xdna, "PID %d failed to get hwctx %d", client->pid, args->handle);
> + ret = -EINVAL;
> + goto unlock;
> + }
> +
> + ret = xdna->dev_info->ops->hwctx_config(hwctx, args->param_type, val, buf, buf_size);
> +
> +unlock:
> + mutex_unlock(&xdna->dev_lock);
> + kfree(buf);
> + return ret;
> +}
> diff --git a/drivers/accel/amdxdna/amdxdna_ctx.h b/drivers/accel/amdxdna/amdxdna_ctx.h
> new file mode 100644
> index 000000000000..ceaebe3daa03
> --- /dev/null
> +++ b/drivers/accel/amdxdna/amdxdna_ctx.h
> @@ -0,0 +1,41 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +/*
> + * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
> + */
> +
> +#ifndef _AMDXDNA_CTX_H_
> +#define _AMDXDNA_CTX_H_
> +
> +#include <drm/drm_drv.h>
> +#include <linux/kref.h>
> +
> +struct amdxdna_hwctx {
> + struct amdxdna_client *client;
> + struct amdxdna_hwctx_priv *priv;
> + char *name;
> +
> + u32 id;
> + u32 max_opc;
> + u32 num_tiles;
> + u32 mem_size;
> + u32 fw_ctx_id;
> + u32 col_list_len;
> + u32 *col_list;
> + u32 start_col;
> + u32 num_col;
> +#define HWCTX_STAT_INIT 0
> +#define HWCTX_STAT_READY 1
> +#define HWCTX_STAT_STOP 2
> + u32 status;
> + u32 old_status;
> +
> + struct amdxdna_qos_info qos;
> + struct amdxdna_hwctx_param_config_cu *cus;
> +};
> +
> +void amdxdna_hwctx_remove_all(struct amdxdna_client *client);
> +int amdxdna_drm_create_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
> +int amdxdna_drm_config_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
> +int amdxdna_drm_destroy_hwctx_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
> +
> +#endif /* _AMDXDNA_CTX_H_ */
> diff --git a/drivers/accel/amdxdna/amdxdna_drm.c b/drivers/accel/amdxdna/amdxdna_drm.c
> index 91e4f9c9dac9..2365c064bad6 100644
> --- a/drivers/accel/amdxdna/amdxdna_drm.c
> +++ b/drivers/accel/amdxdna/amdxdna_drm.c
> @@ -3,12 +3,113 @@
> * Copyright (C) 2022-2024, Advanced Micro Devices, Inc.
> */
>
> +#include <linux/iommu.h>
> #include <drm/drm_ioctl.h>
> #include <drm/drm_accel.h>
>
> #include "amdxdna_drm.h"
>
> -DEFINE_DRM_ACCEL_FOPS(amdxdna_fops);
> +static int amdxdna_drm_open(struct drm_device *ddev, struct drm_file *filp)
> +{
> + struct amdxdna_dev *xdna = to_xdna_dev(ddev);
> + struct amdxdna_client *client;
> + int ret;
> +
> + client = kzalloc(sizeof(*client), GFP_KERNEL);
> + if (!client)
> + return -ENOMEM;
> +
> + client->pid = pid_nr(filp->pid);
> + client->xdna = xdna;
> +
> + client->sva = iommu_sva_bind_device(xdna->ddev.dev, current->mm);
> + if (IS_ERR(client->sva)) {
> + ret = PTR_ERR(client->sva);
> + XDNA_ERR(xdna, "SVA bind device failed, ret %d", ret);
> + goto failed;
> + }
> + client->pasid = iommu_sva_get_pasid(client->sva);
> + if (client->pasid == IOMMU_PASID_INVALID) {
> + XDNA_ERR(xdna, "SVA get pasid failed");
> + ret = -ENODEV;
> + goto unbind_sva;
> + }
> + mutex_init(&client->hwctx_lock);
> + idr_init_base(&client->hwctx_idr, AMDXDNA_INVALID_CTX_HANDLE + 1);
> +
> + mutex_lock(&xdna->dev_lock);
> + list_add_tail(&client->node, &xdna->client_list);
> + mutex_unlock(&xdna->dev_lock);
> +
> + filp->driver_priv = client;
> + client->filp = filp;
> +
> + XDNA_DBG(xdna, "pid %d opened", client->pid);
> + return 0;
> +
> +unbind_sva:
> + iommu_sva_unbind_device(client->sva);
> +failed:
> + kfree(client);
> +
> + return ret;
> +}
> +
> +static void amdxdna_drm_close(struct drm_device *ddev, struct drm_file *filp)
> +{
> + struct amdxdna_client *client = filp->driver_priv;
> + struct amdxdna_dev *xdna = to_xdna_dev(ddev);
> +
> + XDNA_DBG(xdna, "closing pid %d", client->pid);
> +
> + idr_destroy(&client->hwctx_idr);
> + mutex_destroy(&client->hwctx_lock);
> +
> + iommu_sva_unbind_device(client->sva);
> +
> + XDNA_DBG(xdna, "pid %d closed", client->pid);
> + kfree(client);
> +}
> +
> +static int amdxdna_flush(struct file *f, fl_owner_t id)
> +{
> + struct drm_file *filp = f->private_data;
> + struct amdxdna_client *client = filp->driver_priv;
> + struct amdxdna_dev *xdna = client->xdna;
> + int idx;
> +
> + XDNA_DBG(xdna, "PID %d flushing...", client->pid);
> + if (!drm_dev_enter(&xdna->ddev, &idx))
> + return 0;
> +
> + mutex_lock(&xdna->dev_lock);
> + list_del_init(&client->node);
> + mutex_unlock(&xdna->dev_lock);
> + amdxdna_hwctx_remove_all(client);
> +
> + drm_dev_exit(idx);
> + return 0;
> +}
> +
> +static const struct drm_ioctl_desc amdxdna_drm_ioctls[] = {
> + /* Context */
> + DRM_IOCTL_DEF_DRV(AMDXDNA_CREATE_HWCTX, amdxdna_drm_create_hwctx_ioctl, 0),
> + DRM_IOCTL_DEF_DRV(AMDXDNA_DESTROY_HWCTX, amdxdna_drm_destroy_hwctx_ioctl, 0),
> + DRM_IOCTL_DEF_DRV(AMDXDNA_CONFIG_HWCTX, amdxdna_drm_config_hwctx_ioctl, 0),
> +};
> +
> +static const struct file_operations amdxdna_fops = {
> + .owner = THIS_MODULE,
> + .open = accel_open,
> + .release = drm_release,
> + .flush = amdxdna_flush,
> + .unlocked_ioctl = drm_ioctl,
> + .compat_ioctl = drm_compat_ioctl,
> + .poll = drm_poll,
> + .read = drm_read,
> + .llseek = noop_llseek,
> + .mmap = drm_gem_mmap,
> +};
>
> const struct drm_driver amdxdna_drm_drv = {
> .driver_features = DRIVER_GEM | DRIVER_COMPUTE_ACCEL,
> @@ -17,4 +118,8 @@ const struct drm_driver amdxdna_drm_drv = {
> .desc = "AMD XDNA DRM implementation",
> .major = AMDXDNA_DRIVER_MAJOR,
> .minor = AMDXDNA_DRIVER_MINOR,
> + .open = amdxdna_drm_open,
> + .postclose = amdxdna_drm_close,
> + .ioctls = amdxdna_drm_ioctls,
> + .num_ioctls = ARRAY_SIZE(amdxdna_drm_ioctls),
> };
> diff --git a/drivers/accel/amdxdna/amdxdna_drm.h b/drivers/accel/amdxdna/amdxdna_drm.h
> index bf4b0b786606..08331c51a130 100644
> --- a/drivers/accel/amdxdna/amdxdna_drm.h
> +++ b/drivers/accel/amdxdna/amdxdna_drm.h
> @@ -13,6 +13,8 @@
> #include <drm/drm_print.h>
> #include <drm/drm_file.h>
>
> +#include "amdxdna_ctx.h"
> +
> #define XDNA_INFO(xdna, fmt, args...) drm_info(&(xdna)->ddev, fmt, ##args)
> #define XDNA_WARN(xdna, fmt, args...) drm_warn(&(xdna)->ddev, "%s: "fmt, __func__, ##args)
> #define XDNA_ERR(xdna, fmt, args...) drm_err(&(xdna)->ddev, "%s: "fmt, __func__, ##args)
> @@ -32,6 +34,9 @@ struct amdxdna_dev;
> struct amdxdna_dev_ops {
> int (*init)(struct amdxdna_dev *xdna);
> void (*fini)(struct amdxdna_dev *xdna);
> + int (*hwctx_init)(struct amdxdna_hwctx *hwctx);
> + void (*hwctx_fini)(struct amdxdna_hwctx *hwctx);
> + int (*hwctx_config)(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *buf, u32 size);
> };
>
> /*
> @@ -68,7 +73,23 @@ struct amdxdna_dev {
> void *xrs_hdl;
>
> struct mutex dev_lock; /* per device lock */
> + struct list_head client_list;
> struct amdxdna_fw_ver fw_ver;
> };
>
> +/*
> + * struct amdxdna_client - amdxdna client
> + * A per fd data structure for managing context and other user process stuffs.
> + */
> +struct amdxdna_client {
> + struct list_head node;
> + pid_t pid;
> + struct mutex hwctx_lock; /* protect hwctx */
> + struct idr hwctx_idr;
> + struct amdxdna_dev *xdna;
> + struct drm_file *filp;
> + struct iommu_sva *sva;
> + int pasid;
> +};
> +
> #endif /* _AMDXDNA_DRM_H_ */
> diff --git a/drivers/accel/amdxdna/amdxdna_pci_drv.c b/drivers/accel/amdxdna/amdxdna_pci_drv.c
> index 7d0cfd918b0e..924ee8b96ebb 100644
> --- a/drivers/accel/amdxdna/amdxdna_pci_drv.c
> +++ b/drivers/accel/amdxdna/amdxdna_pci_drv.c
> @@ -58,6 +58,7 @@ static int amdxdna_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> return -ENODEV;
>
> drmm_mutex_init(&xdna->ddev, &xdna->dev_lock);
> + INIT_LIST_HEAD(&xdna->client_list);
> pci_set_drvdata(pdev, xdna);
>
> mutex_lock(&xdna->dev_lock);
> @@ -94,11 +95,25 @@ static int amdxdna_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> static void amdxdna_remove(struct pci_dev *pdev)
> {
> struct amdxdna_dev *xdna = pci_get_drvdata(pdev);
> + struct amdxdna_client *client;
>
> drm_dev_unplug(&xdna->ddev);
> amdxdna_sysfs_fini(xdna);
>
> mutex_lock(&xdna->dev_lock);
> + client = list_first_entry_or_null(&xdna->client_list,
> + struct amdxdna_client, node);
> + while (client) {
> + list_del_init(&client->node);
> + mutex_unlock(&xdna->dev_lock);
> +
> + amdxdna_hwctx_remove_all(client);
> +
> + mutex_lock(&xdna->dev_lock);
> + client = list_first_entry_or_null(&xdna->client_list,
> + struct amdxdna_client, node);
> + }
> +
> xdna->dev_info->ops->fini(xdna);
> mutex_unlock(&xdna->dev_lock);
> }
> diff --git a/include/uapi/drm/amdxdna_accel.h b/include/uapi/drm/amdxdna_accel.h
> index 1b699464150e..5c1d9707d6da 100644
> --- a/include/uapi/drm/amdxdna_accel.h
> +++ b/include/uapi/drm/amdxdna_accel.h
> @@ -6,6 +6,7 @@
> #ifndef _UAPI_AMDXDNA_ACCEL_H_
> #define _UAPI_AMDXDNA_ACCEL_H_
>
> +#include <linux/stddef.h>
> #include "drm.h"
>
> #if defined(__cplusplus)
> @@ -15,11 +16,138 @@ extern "C" {
> #define AMDXDNA_DRIVER_MAJOR 1
> #define AMDXDNA_DRIVER_MINOR 0
>
> +#define AMDXDNA_INVALID_CTX_HANDLE 0
> +
> enum amdxdna_device_type {
> AMDXDNA_DEV_TYPE_UNKNOWN = -1,
> AMDXDNA_DEV_TYPE_KMQ,
> };
>
> +enum amdxdna_drm_ioctl_id {
> + DRM_AMDXDNA_CREATE_HWCTX,
> + DRM_AMDXDNA_DESTROY_HWCTX,
> + DRM_AMDXDNA_CONFIG_HWCTX,
> +};
> +
> +/**
> + * struct qos_info - QoS information for driver.
> + * @gops: Giga operations per second.
> + * @fps: Frames per second.
> + * @dma_bandwidth: DMA bandwidtha.
> + * @latency: Frame response latency.
> + * @frame_exec_time: Frame execution time.
> + * @priority: Request priority.
> + *
> + * User program can provide QoS hints to driver.
> + */
> +struct amdxdna_qos_info {
> + __u32 gops;
> + __u32 fps;
> + __u32 dma_bandwidth;
> + __u32 latency;
> + __u32 frame_exec_time;
> + __u32 priority;
> +};
> +
> +/**
> + * struct amdxdna_drm_create_hwctx - Create hardware context.
> + * @ext: MBZ.
> + * @ext_flags: MBZ.
> + * @qos_p: Address of QoS info.
> + * @umq_bo: BO handle for user mode queue(UMQ).
> + * @log_buf_bo: BO handle for log buffer.
> + * @max_opc: Maximum operations per cycle.
> + * @num_tiles: Number of AIE tiles.
> + * @mem_size: Size of AIE tile memory.
> + * @umq_doorbell: Returned offset of doorbell associated with UMQ.
> + * @handle: Returned hardware context handle.
> + */
> +struct amdxdna_drm_create_hwctx {
> + __u64 ext;
> + __u64 ext_flags;
> + __u64 qos_p;
> + __u32 umq_bo;
> + __u32 log_buf_bo;
> + __u32 max_opc;
> + __u32 num_tiles;
> + __u32 mem_size;
> + __u32 umq_doorbell;
> + __u32 handle;
You should align this structure to 64 bits for 32 bit compatibility.
I.e., add a __u32 pad;
Alex
> +};
> +
> +/**
> + * struct amdxdna_drm_destroy_hwctx - Destroy hardware context.
> + * @handle: Hardware context handle.
> + * @pad: MBZ.
> + */
> +struct amdxdna_drm_destroy_hwctx {
> + __u32 handle;
> + __u32 pad;
> +};
> +
> +/**
> + * struct amdxdna_cu_config - configuration for one CU
> + * @cu_bo: CU configuration buffer bo handle
> + * @cu_func: Functional of a CU
> + * @pad: MBZ
> + */
> +struct amdxdna_cu_config {
> + __u32 cu_bo;
> + __u8 cu_func;
> + __u8 pad[3];
> +};
> +
> +/**
> + * struct amdxdna_hwctx_param_config_cu - configuration for CUs in hardware context
> + * @num_cus: Number of CUs to configure
> + * @pad: MBZ
> + * @cu_configs: Array of CU configurations of struct amdxdna_cu_config
> + */
> +struct amdxdna_hwctx_param_config_cu {
> + __u16 num_cus;
> + __u16 pad[3];
> + struct amdxdna_cu_config cu_configs[] __counted_by(num_cus);
> +};
> +
> +enum amdxdna_drm_config_hwctx_param {
> + DRM_AMDXDNA_HWCTX_CONFIG_CU,
> + DRM_AMDXDNA_HWCTX_ASSIGN_DBG_BUF,
> + DRM_AMDXDNA_HWCTX_REMOVE_DBG_BUF,
> + DRM_AMDXDNA_HWCTX_CONFIG_NUM
> +};
> +
> +/**
> + * struct amdxdna_drm_config_hwctx - Configure hardware context.
> + * @handle: hardware context handle.
> + * @param_type: Value in enum amdxdna_drm_config_hwctx_param. Specifies the
> + * structure passed in via param_val.
> + * @param_val: A structure specified by the param_type struct member.
> + * @param_val_size: Size of the parameter buffer pointed to by the param_val.
> + * If param_val is not a pointer, driver can ignore this.
> + *
> + * Note: if the param_val is a pointer pointing to a buffer, the maximum size
> + * of the buffer is 4KiB(PAGE_SIZE).
> + */
> +struct amdxdna_drm_config_hwctx {
> + __u32 handle;
> + __u32 param_type;
> + __u64 param_val;
> + __u32 param_val_size;
> + __u32 pad;
> +};
> +
> +#define DRM_IOCTL_AMDXDNA_CREATE_HWCTX \
> + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_CREATE_HWCTX, \
> + struct amdxdna_drm_create_hwctx)
> +
> +#define DRM_IOCTL_AMDXDNA_DESTROY_HWCTX \
> + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_DESTROY_HWCTX, \
> + struct amdxdna_drm_destroy_hwctx)
> +
> +#define DRM_IOCTL_AMDXDNA_CONFIG_HWCTX \
> + DRM_IOWR(DRM_COMMAND_BASE + DRM_AMDXDNA_CONFIG_HWCTX, \
> + struct amdxdna_drm_config_hwctx)
> +
> #if defined(__cplusplus)
> } /* extern c end */
> #endif
> --
> 2.34.1
>
More information about the dri-devel
mailing list