[PATCH 2/2] accel/qaic: Expand DRM device lifecycle
Jacek Lawrynowicz
jacek.lawrynowicz at linux.intel.com
Tue Nov 21 08:17:08 UTC 2023
Reviewed-by: Jacek Lawrynowicz <jacek.lawrynowicz at linux.intel.com>
On 17.11.2023 18:43, Jeffrey Hugo wrote:
> From: Carl Vanderlip <quic_carlv at quicinc.com>
>
> Currently the QAIC DRM device registers itself when the MHI QAIC_CONTROL
> channel becomes available. This is when the device is able to process
> workloads. However, the DRM driver also provides the debugfs interface
> bootlog for the device. If the device fails to boot to the QSM (which
> brings up the MHI QAIC_CONTROL channel), the bootlog won't be available for
> debugging why it failed to boot.
>
> Change when the DRM device registers itself from when QAIC_CONTROL is
> available to when the card is first probed on the PCI bus. Additionally,
> make the DRM driver persist through reset/error cases so the driver
> doesn't have to be reloaded to access the card again. Send
> KOBJ_ONLINE/OFFLINE uevents so userspace can know when DRM device is
> ready to handle requests.
>
> Signed-off-by: Carl Vanderlip <quic_carlv at quicinc.com>
> Reviewed-by: Pranjal Ramajor Asha Kanojiya <quic_pkanojiy at quicinc.com>
> Reviewed-by: Jeffrey Hugo <quic_jhugo at quicinc.com>
> Signed-off-by: Jeffrey Hugo <quic_jhugo at quicinc.com>
> ---
> Documentation/accel/qaic/qaic.rst | 9 +++++-
> drivers/accel/qaic/mhi_controller.c | 2 +-
> drivers/accel/qaic/qaic.h | 2 +-
> drivers/accel/qaic/qaic_drv.c | 44 +++++++++++------------------
> 4 files changed, 27 insertions(+), 30 deletions(-)
>
> diff --git a/Documentation/accel/qaic/qaic.rst b/Documentation/accel/qaic/qaic.rst
> index f81020736ebf..efb7771273bb 100644
> --- a/Documentation/accel/qaic/qaic.rst
> +++ b/Documentation/accel/qaic/qaic.rst
> @@ -93,8 +93,15 @@ commands (does not impact QAIC).
> uAPI
> ====
>
> +QAIC creates an accel device per phsyical PCIe device. This accel device exists
> +for as long as the PCIe device is known to Linux.
> +
> +The PCIe device may not be in the state to accept requests from userspace at
> +all times. QAIC will trigger KOBJ_ONLINE/OFFLINE uevents to advertise when the
> +device can accept requests (ONLINE) and when the device is no longer accepting
> +requests (OFFLINE) because of a reset or other state transition.
> +
> QAIC defines a number of driver specific IOCTLs as part of the userspace API.
> -This section describes those APIs.
>
> DRM_IOCTL_QAIC_MANAGE
> This IOCTL allows userspace to send a NNC request to the QSM. The call will
> diff --git a/drivers/accel/qaic/mhi_controller.c b/drivers/accel/qaic/mhi_controller.c
> index 5d3cc30009cc..832464f2833a 100644
> --- a/drivers/accel/qaic/mhi_controller.c
> +++ b/drivers/accel/qaic/mhi_controller.c
> @@ -469,7 +469,7 @@ static void mhi_status_cb(struct mhi_controller *mhi_cntrl, enum mhi_callback re
> pci_err(qdev->pdev, "Fatal error received from device. Attempting to recover\n");
> /* this event occurs in non-atomic context */
> if (reason == MHI_CB_SYS_ERROR)
> - qaic_dev_reset_clean_local_state(qdev, true);
> + qaic_dev_reset_clean_local_state(qdev);
> }
>
> static int mhi_reset_and_async_power_up(struct mhi_controller *mhi_cntrl)
> diff --git a/drivers/accel/qaic/qaic.h b/drivers/accel/qaic/qaic.h
> index bd0c884e6bf7..66f4abf6c4c4 100644
> --- a/drivers/accel/qaic/qaic.h
> +++ b/drivers/accel/qaic/qaic.h
> @@ -283,7 +283,7 @@ void wakeup_dbc(struct qaic_device *qdev, u32 dbc_id);
> void release_dbc(struct qaic_device *qdev, u32 dbc_id);
>
> void wake_all_cntl(struct qaic_device *qdev);
> -void qaic_dev_reset_clean_local_state(struct qaic_device *qdev, bool exit_reset);
> +void qaic_dev_reset_clean_local_state(struct qaic_device *qdev);
>
> struct drm_gem_object *qaic_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf);
>
> diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c
> index 02fe23248da4..c19bc83b249c 100644
> --- a/drivers/accel/qaic/qaic_drv.c
> +++ b/drivers/accel/qaic/qaic_drv.c
> @@ -8,6 +8,7 @@
> #include <linux/idr.h>
> #include <linux/interrupt.h>
> #include <linux/list.h>
> +#include <linux/kobject.h>
> #include <linux/kref.h>
> #include <linux/mhi.h>
> #include <linux/module.h>
> @@ -43,9 +44,6 @@ MODULE_PARM_DESC(datapath_polling, "Operate the datapath in polling mode");
> static bool link_up;
> static DEFINE_IDA(qaic_usrs);
>
> -static int qaic_create_drm_device(struct qaic_device *qdev, s32 partition_id);
> -static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 partition_id);
> -
> static void free_usr(struct kref *kref)
> {
> struct qaic_user *usr = container_of(kref, struct qaic_user, ref_count);
> @@ -183,13 +181,6 @@ static int qaic_create_drm_device(struct qaic_device *qdev, s32 partition_id)
>
> qddev->partition_id = partition_id;
>
> - /*
> - * drm_dev_unregister() sets the driver data to NULL and
> - * drm_dev_register() does not update the driver data. During a SOC
> - * reset drm dev is unregistered and registered again leaving the
> - * driver data to NULL.
> - */
> - dev_set_drvdata(to_accel_kdev(qddev), drm->accel);
> ret = drm_dev_register(drm, 0);
> if (ret)
> pci_dbg(qdev->pdev, "drm_dev_register failed %d\n", ret);
> @@ -203,7 +194,6 @@ static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 partition_id)
> struct drm_device *drm = to_drm(qddev);
> struct qaic_user *usr;
>
> - drm_dev_get(drm);
> drm_dev_unregister(drm);
> qddev->partition_id = 0;
> /*
> @@ -232,7 +222,6 @@ static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 partition_id)
> mutex_lock(&qddev->users_mutex);
> }
> mutex_unlock(&qddev->users_mutex);
> - drm_dev_put(drm);
> }
>
> static int qaic_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id)
> @@ -254,8 +243,6 @@ static int qaic_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id
>
> qdev = pci_get_drvdata(to_pci_dev(mhi_dev->mhi_cntrl->cntrl_dev));
>
> - qdev->reset_state = QAIC_ONLINE;
> -
> dev_set_drvdata(&mhi_dev->dev, qdev);
> qdev->cntl_ch = mhi_dev;
>
> @@ -265,6 +252,7 @@ static int qaic_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id
> return ret;
> }
>
> + qdev->reset_state = QAIC_BOOT;
> ret = get_cntl_version(qdev, NULL, &major, &minor);
> if (ret || major != CNTL_MAJOR || minor > CNTL_MINOR) {
> pci_err(qdev->pdev, "%s: Control protocol version (%d.%d) not supported. Supported version is (%d.%d). Ret: %d\n",
> @@ -272,8 +260,8 @@ static int qaic_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id
> ret = -EINVAL;
> goto close_control;
> }
> -
> - ret = qaic_create_drm_device(qdev, QAIC_NO_PARTITION);
> + qdev->reset_state = QAIC_ONLINE;
> + kobject_uevent(&(to_accel_kdev(qdev->qddev))->kobj, KOBJ_ONLINE);
>
> return ret;
>
> @@ -291,6 +279,7 @@ static void qaic_notify_reset(struct qaic_device *qdev)
> {
> int i;
>
> + kobject_uevent(&(to_accel_kdev(qdev->qddev))->kobj, KOBJ_OFFLINE);
> qdev->reset_state = QAIC_OFFLINE;
> /* wake up any waiters to avoid waiting for timeouts at sync */
> wake_all_cntl(qdev);
> @@ -299,21 +288,15 @@ static void qaic_notify_reset(struct qaic_device *qdev)
> synchronize_srcu(&qdev->dev_lock);
> }
>
> -void qaic_dev_reset_clean_local_state(struct qaic_device *qdev, bool exit_reset)
> +void qaic_dev_reset_clean_local_state(struct qaic_device *qdev)
> {
> int i;
>
> qaic_notify_reset(qdev);
>
> - /* remove drmdevs to prevent new users from coming in */
> - qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION);
> -
> /* start tearing things down */
> for (i = 0; i < qdev->num_dbc; ++i)
> release_dbc(qdev, i);
> -
> - if (exit_reset)
> - qdev->reset_state = QAIC_ONLINE;
> }
>
> static void cleanup_qdev(struct qaic_device *qdev)
> @@ -338,6 +321,7 @@ static struct qaic_device *create_qdev(struct pci_dev *pdev, const struct pci_de
> if (!qdev)
> return NULL;
>
> + qdev->reset_state = QAIC_OFFLINE;
> if (id->device == PCI_DEV_AIC100) {
> qdev->num_dbc = 16;
> qdev->dbc = devm_kcalloc(&pdev->dev, qdev->num_dbc, sizeof(*qdev->dbc), GFP_KERNEL);
> @@ -499,15 +483,21 @@ static int qaic_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
> goto cleanup_qdev;
> }
>
> + ret = qaic_create_drm_device(qdev, QAIC_NO_PARTITION);
> + if (ret)
> + goto cleanup_qdev;
> +
> qdev->mhi_cntrl = qaic_mhi_register_controller(pdev, qdev->bar_0, mhi_irq,
> qdev->single_msi);
> if (IS_ERR(qdev->mhi_cntrl)) {
> ret = PTR_ERR(qdev->mhi_cntrl);
> - goto cleanup_qdev;
> + goto cleanup_drm_dev;
> }
>
> return 0;
>
> +cleanup_drm_dev:
> + qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION);
> cleanup_qdev:
> cleanup_qdev(qdev);
> return ret;
> @@ -520,7 +510,8 @@ static void qaic_pci_remove(struct pci_dev *pdev)
> if (!qdev)
> return;
>
> - qaic_dev_reset_clean_local_state(qdev, false);
> + qaic_dev_reset_clean_local_state(qdev);
> + qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION);
> qaic_mhi_free_controller(qdev->mhi_cntrl, link_up);
> cleanup_qdev(qdev);
> }
> @@ -543,14 +534,13 @@ static void qaic_pci_reset_prepare(struct pci_dev *pdev)
>
> qaic_notify_reset(qdev);
> qaic_mhi_start_reset(qdev->mhi_cntrl);
> - qaic_dev_reset_clean_local_state(qdev, false);
> + qaic_dev_reset_clean_local_state(qdev);
> }
>
> static void qaic_pci_reset_done(struct pci_dev *pdev)
> {
> struct qaic_device *qdev = pci_get_drvdata(pdev);
>
> - qdev->reset_state = QAIC_ONLINE;
> qaic_mhi_reset_done(qdev->mhi_cntrl);
> }
>
More information about the dri-devel
mailing list