[PATCH v2 2/8] accel/qaic: Add uapi and core driver file

Jeffrey Hugo quic_jhugo at quicinc.com
Wed Mar 1 19:46:04 UTC 2023


On 2/17/2023 11:15 AM, Jeffrey Hugo wrote:
> On 2/16/2023 7:13 AM, Jacek Lawrynowicz wrote:
>> Hi,
>>
>> On 06.02.2023 16:41, Jeffrey Hugo wrote:
>>> Add the QAIC driver uapi file and core driver file that binds to the 
>>> PCIe
>>> device.  The core driver file also creates the accel device and manages
>>> all the interconnections between the different parts of the driver.
>>>
>>> The driver can be built as a module.  If so, it will be called 
>>> "qaic.ko".
>>>
>>> Signed-off-by: Jeffrey Hugo <quic_jhugo at quicinc.com>
>>> Reviewed-by: Carl Vanderlip <quic_carlv at quicinc.com>
>>> ---
>>>   drivers/accel/qaic/qaic.h     | 321 ++++++++++++++++++
>>>   drivers/accel/qaic/qaic_drv.c | 771 
>>> ++++++++++++++++++++++++++++++++++++++++++
>>>   include/uapi/drm/qaic_accel.h | 283 ++++++++++++++++
>>>   3 files changed, 1375 insertions(+)
>>>   create mode 100644 drivers/accel/qaic/qaic.h
>>>   create mode 100644 drivers/accel/qaic/qaic_drv.c
>>>   create mode 100644 include/uapi/drm/qaic_accel.h
>>>
>>> diff --git a/drivers/accel/qaic/qaic.h b/drivers/accel/qaic/qaic.h
>>> new file mode 100644
>>> index 0000000..3f7ea76
>>> --- /dev/null
>>> +++ b/drivers/accel/qaic/qaic.h
>>> @@ -0,0 +1,321 @@
>>> +/* SPDX-License-Identifier: GPL-2.0-only
>>> + *
>>> + * Copyright (c) 2019-2021, The Linux Foundation. All rights reserved.
>>> + * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All 
>>> rights reserved.
>>> + */
>>> +
>>> +#ifndef QAICINTERNAL_H_
>>
>> Please use guard macro that matches the file name: _QAIC_H_
> 
> Before moving to DRM/ACCEL, this conflicted with the uapi file. However, 
> that is no longer the case, so yes, this should be changed. Will do.
> 
>>
>>> +#define QAICINTERNAL_H_
>>> +
>>> +#include <linux/interrupt.h>
>>> +#include <linux/kref.h>
>>> +#include <linux/mhi.h>
>>> +#include <linux/mutex.h>
>>> +#include <linux/pci.h>
>>> +#include <linux/spinlock.h>
>>> +#include <linux/srcu.h>
>>> +#include <linux/wait.h>
>>> +#include <linux/workqueue.h>
>>> +#include <drm/drm_device.h>
>>> +#include <drm/drm_gem.h>
>>> +
>>> +#define QAIC_DBC_BASE        0x20000
>>> +#define QAIC_DBC_SIZE        0x1000
>>> +
>>> +#define QAIC_NO_PARTITION    -1
>>> +
>>> +#define QAIC_DBC_OFF(i)        ((i) * QAIC_DBC_SIZE + QAIC_DBC_BASE)
>>> +
>>> +#define to_qaic_bo(obj) container_of(obj, struct qaic_bo, base)
>>> +
>>> +extern bool poll_datapath;
>>> +
>>> +struct qaic_user {
>>> +    /* Uniquely identifies this user for the device */
>>> +    int            handle;
>>> +    struct kref        ref_count;
>>> +    /* Char device opened by this user */
>>> +    struct qaic_drm_device    *qddev;
>>> +    /* Node in list of users that opened this drm device */
>>> +    struct list_head    node;
>>> +    /* SRCU used to synchronize this user during cleanup */
>>> +    struct srcu_struct    qddev_lock;
>>> +    atomic_t        chunk_id;
>>> +};
>>> +
>>> +struct dma_bridge_chan {
>>> +    /* Pointer to device strcut maintained by driver */
>>> +    struct qaic_device    *qdev;
>>> +    /* ID of this DMA bridge channel(DBC) */
>>> +    unsigned int        id;
>>> +    /* Synchronizes access to xfer_list */
>>> +    spinlock_t        xfer_lock;
>>> +    /* Base address of request queue */
>>> +    void            *req_q_base;
>>> +    /* Base address of response queue */
>>> +    void            *rsp_q_base;
>>> +    /*
>>> +     * Base bus address of request queue. Response queue bus address 
>>> can be
>>> +     * calculated by adding request queue size to this variable
>>> +     */
>>> +    dma_addr_t        dma_addr;
>>> +    /* Total size of request and response queue in byte */
>>> +    u32            total_size;
>>> +    /* Capacity of request/response queue */
>>> +    u32            nelem;
>>> +    /* The user that opened this DBC */
>>> +    struct qaic_user    *usr;
>>> +    /*
>>> +     * Request ID of next memory handle that goes in request queue. One
>>> +     * memory handle can enqueue more than one request elements, all
>>> +     * this requests that belong to same memory handle have same 
>>> request ID
>>> +     */
>>> +    u16            next_req_id;
>>> +    /* TRUE: DBC is in use; FALSE: DBC not in use */
>>
>> Use standard "true"/"false" instead of custom "TRUE"/"FALSE" macros.
>> This applies here and in multiple other places in the driver.
> 
> I think you are getting at that the documentation could be confusing.  I 
> don't appear to see custom macro use in the code.  Will try to clarify 
> that here.
> 
>>> +    bool            in_use;
>>> +    /*
>>> +     * Base address of device registers. Used to read/write request and
>>> +     * response queue's head and tail pointer of this DBC.
>>> +     */
>>> +    void __iomem        *dbc_base;
>>> +    /* Head of list where each node is a memory handle queued in 
>>> request queue */
>>> +    struct list_head    xfer_list;
>>> +    /* Synchronizes DBC readers during cleanup */
>>> +    struct srcu_struct    ch_lock;
>>> +    /*
>>> +     * When this DBC is released, any thread waiting on this wait 
>>> queue is
>>> +     * woken up
>>> +     */
>>> +    wait_queue_head_t    dbc_release;
>>> +    /* Head of list where each node is a bo associated with this DBC */
>>> +    struct list_head    bo_lists;
>>> +    /* The irq line for this DBC.  Used for polling */
>>> +    unsigned int        irq;
>>> +    /* Polling work item to simulate interrupts */
>>> +    struct work_struct    poll_work;
>>> +};
>>> +
>>> +struct qaic_device {
>>> +    /* Pointer to base PCI device struct of our physical device */
>>> +    struct pci_dev        *pdev;
>>> +    /* Mask of all bars of this device */
>>> +    int            bars;
>>> +    /* Req. ID of request that will be queued next in MHI control 
>>> device */
>>> +    u32            next_seq_num;
>>> +    /* Base address of bar 0 */
>>> +    void __iomem        *bar_0;
>>> +    /* Base address of bar 2 */
>>> +    void __iomem        *bar_2;
>>> +    /* Controller structure for MHI devices */
>>> +    struct mhi_controller    *mhi_cntl;
>>> +    /* MHI control channel device */
>>> +    struct mhi_device    *cntl_ch;
>>> +    /* List of requests queued in MHI control device */
>>> +    struct list_head    cntl_xfer_list;
>>> +    /* Synchronizes MHI control device transactions and its xfer 
>>> list */
>>> +    struct mutex        cntl_mutex;
>>> +    /* Base actual physical representation of drm device */
>>> +    struct qaic_drm_device    *base_dev;
>>> +    /* Array of DBC struct of this device */
>>> +    struct dma_bridge_chan    *dbc;
>>> +    /* Work queue for tasks related to MHI control device */
>>> +    struct workqueue_struct    *cntl_wq;
>>> +    /* Synchronizes all the users of device during cleanup */
>>> +    struct srcu_struct    dev_lock;
>>> +    /* TRUE: Device under reset; FALSE: Device not under reset */
>>> +    bool            in_reset;
>>> +    /*
>>> +     * TRUE: A tx MHI transaction has failed and a rx buffer is 
>>> still queued
>>> +     * in control device. Such a buffer is considered lost rx buffer
>>> +     * FALSE: No rx buffer is lost in control device
>>> +     */
>>> +    bool            cntl_lost_buf;
>>> +    /* Maximum number of DBC supported by this device */
>>> +    u32            num_dbc;
>>> +    /* Head in list of drm device created on top of this device */
>>> +    struct list_head    qaic_drm_devices;
>>> +    /* Synchronizes access of qaic_drm_devices list */
>>> +    struct mutex        qaic_drm_devices_mutex;
>>> +    /* Generate the CRC of a control message */
>>> +    u32 (*gen_crc)(void *msg);
>>> +    /* Validate the CRC of a control message */
>>> +    bool (*valid_crc)(void *msg);
>>> +};
>>> +
>>> +struct qaic_drm_device {
>>> +    /* Pointer to the root device struct driven by this driver */
>>> +    struct qaic_device    *qdev;
>>> +    /* Node in list of drm devices maintained by root device */
>>> +    struct list_head    node;
>>> +    /*
>>> +     * The physical device can be partition in number of logical 
>>> devices.
>>> +     * And each logical device is given a partition id. This member 
>>> stores
>>> +     * that id. QAIC_NO_PARTITION is a sentinel used to mark that 
>>> this drm
>>> +     * device is the actual physical device
>>> +     */
>>> +    s32            partition_id;
>>> +    /*
>>> +     * It points to the user that created this drm device. It is NULL
>>> +     * when this drm device represents the physical device i.e.
>>> +     * partition_id is QAIC_NO_PARTITION
>>> +     */
>>> +    struct qaic_user    *owner;
>>> +    /* Pointer to the drm device struct of this drm device */
>>> +    struct drm_device    *ddev;
>>> +    /* Head in list of users who have opened this drm device */
>>> +    struct list_head    users;
>>> +    /* Synchronizes access to users list */
>>> +    struct mutex        users_mutex;
>>> +};
>>> +
>>> +struct qaic_bo {
>>> +    struct drm_gem_object    base;
>>
>> Any reason why drm_gem_shmem_object cannot be used as a base?
> 
> I beleive that is incompatible with our memory allocator in patch 5.
> 
>>> +    /* Scatter/gather table for allocate/imported BO */
>>> +    struct sg_table        *sgt;
>>> +    /* BO size requested by user. GEM object might be bigger in 
>>> size. */
>>> +    u64            size;
>>> +    /* Head in list of slices of this BO */
>>> +    struct list_head    slices;
>>> +    /* Total nents, for all slices of this BO */
>>> +    int            total_slice_nents;
>>> +    /*
>>> +     * Direction of transfer. It can assume only two value 
>>> DMA_TO_DEVICE and
>>> +     * DMA_FROM_DEVICE.
>>> +     */
>>> +    int            dir;
>>> +    /* The pointer of the DBC which operates on this BO */
>>> +    struct dma_bridge_chan    *dbc;
>>> +    /* Number of slice that belongs to this buffer */
>>> +    u32            nr_slice;
>>> +    /* Number of slice that have been transferred by DMA engine */
>>> +    u32            nr_slice_xfer_done;
>>> +    /* TRUE = BO is queued for execution, FALSE = BO is not queued */
>>> +    bool            queued;
>>> +    /*
>>> +     * If TRUE then user has attached slicing information to this BO by
>>> +     * calling DRM_IOCTL_QAIC_ATTACH_SLICE_BO ioctl.
>>> +     */
>>> +    bool            sliced;
>>> +    /* Request ID of this BO if it is queued for execution */
>>> +    u16            req_id;
>>> +    /* Handle assigned to this BO */
>>> +    u32            handle;
>>> +    /* Wait on this for completion of DMA transfer of this BO */
>>> +    struct completion    xfer_done;
>>> +    /*
>>> +     * Node in linked list where head is dbc->xfer_list.
>>> +     * This link list contain BO's that are queued for DMA transfer.
>>> +     */
>>> +    struct list_head    xfer_list;
>>> +    /*
>>> +     * Node in linked list where head is dbc->bo_lists.
>>> +     * This link list contain BO's that are associated with the DBC 
>>> it is
>>> +     * linked to.
>>> +     */
>>> +    struct list_head    bo_list;
>>> +    struct {
>>> +        /*
>>> +         * Latest timestamp(ns) at which kernel received a request to
>>> +         * execute this BO
>>> +         */
>>> +        u64        req_received_ts;
>>> +        /*
>>> +         * Latest timestamp(ns) at which kernel enqueued requests of
>>> +         * this BO for execution in DMA queue
>>> +         */
>>> +        u64        req_submit_ts;
>>> +        /*
>>> +         * Latest timestamp(ns) at which kernel received a completion
>>> +         * interrupt for requests of this BO
>>> +         */
>>> +        u64        req_processed_ts;
>>> +        /*
>>> +         * Number of elements already enqueued in DMA queue before
>>> +         * enqueuing requests of this BO
>>> +         */
>>> +        u32        queue_level_before;
>>> +    } perf_stats;
>>> +
>>> +};
>>> +
>>> +struct bo_slice {
>>> +    /* Mapped pages */
>>> +    struct sg_table        *sgt;
>>> +    /* Number of requests required to queue in DMA queue */
>>> +    int            nents;
>>> +    /* See enum dma_data_direction */
>>> +    int            dir;
>>> +    /* Actual requests that will be copied in DMA queue */
>>> +    struct dbc_req        *reqs;
>>> +    struct kref        ref_count;
>>> +    /* TRUE: No DMA transfer required */
>>> +    bool            no_xfer;
>>> +    /* Pointer to the parent BO handle */
>>> +    struct qaic_bo        *bo;
>>> +    /* Node in list of slices maintained by parent BO */
>>> +    struct list_head    slice;
>>> +    /* Size of this slice in bytes */
>>> +    u64            size;
>>> +    /* Offset of this slice in buffer */
>>> +    u64            offset;
>>> +};
>>> +
>>> +int get_dbc_req_elem_size(void);
>>> +int get_dbc_rsp_elem_size(void);
>>> +int get_cntl_version(struct qaic_device *qdev, struct qaic_user *usr,
>>> +             u16 *major, u16 *minor);
>>> +int qaic_manage_ioctl(struct drm_device *dev, void *data,
>>> +              struct drm_file *file_priv);
>>> +int qaic_execute_ioctl(struct qaic_device *qdev, struct qaic_user *usr,
>>> +               unsigned long arg, bool is_partial);
>>> +int qaic_wait_exec_ioctl(struct qaic_device *qdev, struct qaic_user 
>>> *usr,
>>> +             unsigned long arg);
>>> +int qaic_query_ioctl(struct qaic_device *qdev, struct qaic_user *usr,
>>> +             unsigned long arg);
>>> +int qaic_data_mmap(struct qaic_device *qdev, struct qaic_user *usr,
>>> +           struct vm_area_struct *vma);
>>> +int qaic_data_get_reservation(struct qaic_device *qdev, struct 
>>> qaic_user *usr,
>>> +                  void *data, u32 *partition_id,
>>> +                  u16 *remove);
>>> +void qaic_mhi_ul_xfer_cb(struct mhi_device *mhi_dev,
>>> +             struct mhi_result *mhi_result);
>>> +
>>> +void qaic_mhi_dl_xfer_cb(struct mhi_device *mhi_dev,
>>> +             struct mhi_result *mhi_result);
>>> +
>>> +int qaic_control_open(struct qaic_device *qdev);
>>> +void qaic_control_close(struct qaic_device *qdev);
>>> +void qaic_release_usr(struct qaic_device *qdev, struct qaic_user *usr);
>>> +
>>> +irqreturn_t dbc_irq_threaded_fn(int irq, void *data);
>>> +irqreturn_t dbc_irq_handler(int irq, void *data);
>>> +int disable_dbc(struct qaic_device *qdev, u32 dbc_id, struct 
>>> qaic_user *usr);
>>> +void enable_dbc(struct qaic_device *qdev, u32 dbc_id, struct 
>>> qaic_user *usr);
>>> +void wakeup_dbc(struct qaic_device *qdev, u32 dbc_id);
>>> +void release_dbc(struct qaic_device *qdev, u32 dbc_id);
>>> +
>>> +void wake_all_cntl(struct qaic_device *qdev);
>>> +void qaic_dev_reset_clean_local_state(struct qaic_device *qdev, bool 
>>> exit_reset);
>>> +
>>> +struct drm_gem_object *qaic_gem_prime_import(struct drm_device *dev,
>>> +                         struct dma_buf *dma_buf);
>>> +
>>> +int qaic_create_bo_ioctl(struct drm_device *dev, void *data,
>>> +             struct drm_file *file_priv);
>>> +int qaic_mmap_bo_ioctl(struct drm_device *dev, void *data,
>>> +               struct drm_file *file_priv);
>>> +int qaic_attach_slice_bo_ioctl(struct drm_device *dev, void *data,
>>> +                   struct drm_file *file_priv);
>>> +int qaic_execute_bo_ioctl(struct drm_device *dev, void *data,
>>> +              struct drm_file *file_priv);
>>> +int qaic_partial_execute_bo_ioctl(struct drm_device *dev, void *data,
>>> +                  struct drm_file *file_priv);
>>> +int qaic_wait_bo_ioctl(struct drm_device *dev, void *data,
>>> +               struct drm_file *file_priv);
>>> +int qaic_test_print_bo_ioctl(struct drm_device *dev, void *data,
>>> +                 struct drm_file *file_priv);
>>> +int qaic_perf_stats_bo_ioctl(struct drm_device *dev, void *data,
>>> +                 struct drm_file *file_priv);
>>
>> You don't need to break these lines. You can use up to 100 columns in 
>> the whole driver.
>> It will be more readable and checkpatch won't complain.
> 
> Some of this predates the 100 column change.  Checkpatch already 
> complains about the uapi file.
> 
> Will try to fix here.
> 
>>> +void irq_polling_work(struct work_struct *work);
>>> +
>>> +#endif /* QAICINTERNAL_H_ */
>>> diff --git a/drivers/accel/qaic/qaic_drv.c 
>>> b/drivers/accel/qaic/qaic_drv.c
>>> new file mode 100644
>>> index 0000000..602a784
>>> --- /dev/null
>>> +++ b/drivers/accel/qaic/qaic_drv.c
>>> @@ -0,0 +1,771 @@
>>> +// SPDX-License-Identifier: GPL-2.0-only
>>> +
>>> +/* Copyright (c) 2019-2021, The Linux Foundation. All rights 
>>> reserved. */
>>> +/* Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All 
>>> rights reserved. */
>>> +
>>> +#include <linux/delay.h>
>>> +#include <linux/dma-mapping.h>
>>> +#include <linux/idr.h>
>>> +#include <linux/interrupt.h>
>>> +#include <linux/list.h>
>>> +#include <linux/kref.h>
>>> +#include <linux/mhi.h>
>>> +#include <linux/module.h>
>>> +#include <linux/msi.h>
>>> +#include <linux/mutex.h>
>>> +#include <linux/pci.h>
>>> +#include <linux/sched.h>
>>
>> Is <linux/sched.h> used here?
>> Feels like there are couple other unused includes in this file.
> 
> Actually I think that can be removed now.  Will check.
> The other ones look sane to me.  Are there specific ones you question?
> 
>>
>>> +#include <linux/spinlock.h>
>>> +#include <linux/workqueue.h>
>>> +#include <linux/wait.h>
>>> +#include <drm/drm_accel.h>
>>> +#include <drm/drm_drv.h>
>>> +#include <drm/drm_file.h>
>>> +#include <drm/drm_gem.h>
>>> +#include <drm/drm_ioctl.h>
>>> +#include <uapi/drm/qaic_accel.h>
>>> +
>>> +#include "mhi_controller.h"
>>> +#include "mhi_qaic_ctrl.h"
>>> +#include "qaic.h"
>>> +
>>> +MODULE_IMPORT_NS(DMA_BUF);
>>> +
>>> +#define PCI_DEV_AIC100            0xa100
>>> +#define QAIC_NAME            "qaic"
>>> +#define QAIC_DESC            "Qualcomm Cloud AI Accelerators"
>>> +
>>> +static unsigned int datapath_polling;
>>> +module_param(datapath_polling, uint, 0400);
>>> +bool poll_datapath;
>>> +
>>> +static u16 cntl_major = 5;
>>> +static u16 cntl_minor;/* 0 */
>>
>> Missing space before the comment.
>> And also you could convert both vars to macros as they are constants.
> 
> Sure
> 
>>> +static bool link_up;
>>> +static DEFINE_IDA(qaic_usrs);
>>> +
>>> +static int qaic_create_drm_device(struct qaic_device *qdev, s32 
>>> partition_id,
>>> +                  struct qaic_user *owner);
>>> +static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 
>>> partition_id,
>>> +                    struct qaic_user *owner);
>>> +
>>> +static void free_usr(struct kref *kref)
>>> +{
>>> +    struct qaic_user *usr = container_of(kref, struct qaic_user, 
>>> ref_count);
>>> +
>>> +    cleanup_srcu_struct(&usr->qddev_lock);
>>> +    ida_free(&qaic_usrs, usr->handle);
>>> +    kfree(usr);
>>> +}
>>> +
>>> +static int qaic_open(struct drm_device *dev, struct drm_file *file)
>>> +{
>>> +    struct qaic_drm_device *qddev = dev->dev_private;
>>> +    struct qaic_device *qdev = qddev->qdev;
>>> +    struct qaic_user *usr;
>>> +    int rcu_id;
>>> +    int ret;
>>> +
>>> +    rcu_id = srcu_read_lock(&qdev->dev_lock);
>>> +    if (qdev->in_reset) {
>>> +        srcu_read_unlock(&qdev->dev_lock, rcu_id);
>>> +        return -ENODEV;
>>> +    }
>>> +
>>> +    usr = kmalloc(sizeof(*usr), GFP_KERNEL);
>>> +    if (!usr) {
>>> +        srcu_read_unlock(&qdev->dev_lock, rcu_id);
>>> +        return -ENOMEM;
>>> +    }
>>> +
>>> +    usr->handle = ida_alloc(&qaic_usrs, GFP_KERNEL);
>>> +    if (usr->handle < 0) {
>>> +        srcu_read_unlock(&qdev->dev_lock, rcu_id);
>>> +        kfree(usr);
>>> +        return usr->handle;
>>> +    }
>>> +    usr->qddev = qddev;
>>> +    atomic_set(&usr->chunk_id, 0);
>>> +    init_srcu_struct(&usr->qddev_lock);
>>> +    kref_init(&usr->ref_count);
>>> +
>>> +    ret = mutex_lock_interruptible(&qddev->users_mutex);
>>> +    if (ret) {
>>> +        cleanup_srcu_struct(&usr->qddev_lock);
>>> +        kfree(usr);
>>> +        srcu_read_unlock(&qdev->dev_lock, rcu_id);
>>> +        return ret;
>>> +    }
>>> +
>>> +    list_add(&usr->node, &qddev->users);
>>> +    mutex_unlock(&qddev->users_mutex);
>>> +
>>> +    file->driver_priv = usr;
>>> +
>>> +    srcu_read_unlock(&qdev->dev_lock, rcu_id);
>>> +    return 0;
>>> +}
>>> +
>>> +static void qaic_postclose(struct drm_device *dev, struct drm_file 
>>> *file)
>>> +{
>>> +    struct qaic_user *usr = file->driver_priv;
>>> +    struct qaic_drm_device *qddev;
>>> +    struct qaic_device *qdev;
>>> +    int qdev_rcu_id;
>>> +    int usr_rcu_id;
>>> +    int i;
>>> +
>>> +    qddev = usr->qddev;
>>> +    usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
>>> +    if (qddev) {
>>> +        qdev = qddev->qdev;
>>> +        qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
>>> +        if (!qdev->in_reset) {
>>> +            qaic_release_usr(qdev, usr);
>>> +            for (i = 0; i < qdev->num_dbc; ++i)
>>> +                if (qdev->dbc[i].usr &&
>>> +                    qdev->dbc[i].usr->handle == usr->handle)
>>> +                    release_dbc(qdev, i);
>>> +
>>> +            /* Remove child devices */
>>> +            if (qddev->partition_id == QAIC_NO_PARTITION)
>>> +                qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION, usr);
>>> +        }
>>> +        srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
>>> +
>>> +        mutex_lock(&qddev->users_mutex);
>>> +        if (!list_empty(&usr->node))
>>> +            list_del_init(&usr->node);
>>> +        mutex_unlock(&qddev->users_mutex);
>>> +    }
>>> +
>>> +    srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
>>> +    kref_put(&usr->ref_count, free_usr);
>>> +
>>> +    file->driver_priv = NULL;
>>> +}
>>> +
>>> +static int qaic_part_dev_ioctl(struct drm_device *dev, void *data,
>>> +                   struct drm_file *file_priv)
>>> +{
>>> +    struct qaic_device *qdev;
>>> +    struct qaic_user *usr;
>>> +    u32 partition_id;
>>> +    int qdev_rcu_id;
>>> +    int usr_rcu_id;
>>> +    int ret = 0;
>>> +    u16 remove;
>>> +
>>> +    usr = file_priv->driver_priv;
>>> +    if (!usr)
>>> +        return -EINVAL;
>>> +
>>> +    usr_rcu_id = srcu_read_lock(&usr->qddev_lock);
>>> +    if (!usr->qddev) {
>>> +        srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
>>> +        return -ENODEV;
>>> +    }
>>> +
>>> +    qdev = usr->qddev->qdev;
>>> +    if (!qdev) {
>>> +        srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
>>> +        return -ENODEV;
>>> +    }
>>> +
>>> +    qdev_rcu_id = srcu_read_lock(&qdev->dev_lock);
>>> +    if (qdev->in_reset) {
>>> +        ret = -ENODEV;
>>> +        goto out;
>>> +    }
>>> +
>>> +    /* This IOCTL is only supported for base devices. */
>>> +    if (usr->qddev->partition_id != QAIC_NO_PARTITION) {
>>> +        ret = -ENOTTY;
>>> +        goto out;
>>> +    }
>>> +
>>> +    ret = qaic_data_get_reservation(qdev, usr, data, &partition_id,
>>> +                    &remove);
>>> +    if (ret)
>>> +        goto out;
>>> +
>>> +    if (remove == 1)
>>> +        qaic_destroy_drm_device(qdev, partition_id, usr);
>>> +    else
>>> +        ret = qaic_create_drm_device(qdev, partition_id, usr);
>>> +
>>> +out:
>>> +    srcu_read_unlock(&qdev->dev_lock, qdev_rcu_id);
>>> +    srcu_read_unlock(&usr->qddev_lock, usr_rcu_id);
>>> +
>>> +    return ret;
>>> +}
>>> +
>>> +DEFINE_DRM_ACCEL_FOPS(qaic_accel_fops);
>>> +
>>> +static const struct drm_ioctl_desc qaic_drm_ioctls[] = {
>>> +    DRM_IOCTL_DEF_DRV(QAIC_MANAGE, qaic_manage_ioctl, 
>>> DRM_RENDER_ALLOW),
>>> +    DRM_IOCTL_DEF_DRV(QAIC_CREATE_BO, qaic_create_bo_ioctl, 
>>> DRM_RENDER_ALLOW),
>>> +    DRM_IOCTL_DEF_DRV(QAIC_MMAP_BO, qaic_mmap_bo_ioctl, 
>>> DRM_RENDER_ALLOW),
>>> +    DRM_IOCTL_DEF_DRV(QAIC_ATTACH_SLICE_BO, 
>>> qaic_attach_slice_bo_ioctl, DRM_RENDER_ALLOW),
>>> +    DRM_IOCTL_DEF_DRV(QAIC_EXECUTE_BO, qaic_execute_bo_ioctl, 
>>> DRM_RENDER_ALLOW),
>>> +    DRM_IOCTL_DEF_DRV(QAIC_PARTIAL_EXECUTE_BO, 
>>> qaic_partial_execute_bo_ioctl, DRM_RENDER_ALLOW),
>>> +    DRM_IOCTL_DEF_DRV(QAIC_WAIT_BO, qaic_wait_bo_ioctl, 
>>> DRM_RENDER_ALLOW),
>>> +    DRM_IOCTL_DEF_DRV(QAIC_PERF_STATS_BO, qaic_perf_stats_bo_ioctl, 
>>> DRM_RENDER_ALLOW),
>>> +    DRM_IOCTL_DEF_DRV(QAIC_PART_DEV, qaic_part_dev_ioctl, 
>>> DRM_RENDER_ALLOW),
>>> +};
>>> +
>>> +static const struct drm_driver qaic_accel_driver = {
>>> +    .driver_features    = DRIVER_GEM | DRIVER_COMPUTE_ACCEL,
>>> +
>>> +    .name            = QAIC_NAME,
>>> +    .desc            = QAIC_DESC,
>>> +    .date            = "20190618",
>>> +
>>> +    .fops            = &qaic_accel_fops,
>>> +    .open            = qaic_open,
>>> +    .postclose        = qaic_postclose,
>>> +
>>> +    .ioctls            = qaic_drm_ioctls,
>>> +    .num_ioctls        = ARRAY_SIZE(qaic_drm_ioctls),
>>> +    .prime_fd_to_handle    = drm_gem_prime_fd_to_handle,
>>> +    .gem_prime_import    = qaic_gem_prime_import,
>>> +};
>>> +
>>> +static int qaic_create_drm_device(struct qaic_device *qdev, s32 
>>> partition_id,
>>> +                  struct qaic_user *owner)
>>> +{
>>> +    struct qaic_drm_device *qddev;
>>> +    struct drm_device *ddev;
>>> +    struct device *pdev;
>>> +    int ret;
>>> +
>>> +    /*
>>> +     * Partition id QAIC_NO_PARTITION indicates that the device was 
>>> created
>>> +     * on mhi_probe and id > QAIC_NO_PARTITION indicates a partition
>>> +     * created using IOCTL. So, pdev for primary device is the pci 
>>> dev and
>>> +     * the parent for partition dev is the primary device.
>>> +     */
>>> +    if (partition_id == QAIC_NO_PARTITION)
>>> +        pdev = &qdev->pdev->dev;
>>> +    else
>>> +        pdev = qdev->base_dev->ddev->dev;
>>> +
>>> +    qddev = kzalloc(sizeof(*qddev), GFP_KERNEL);
>>> +    if (!qddev) {
>>> +        ret = -ENOMEM;
>>> +        goto qddev_fail;
>>> +    }
>>> +
>>> +    ddev = drm_dev_alloc(&qaic_accel_driver, pdev);
>>> +    if (IS_ERR(ddev)) {
>>> +        ret = PTR_ERR(ddev);
>>> +        goto ddev_fail;
>>> +    }
>>> +
>>> +    ddev->dev_private = qddev;
>>> +    qddev->ddev = ddev;
>>> +
>>> +    if (partition_id == QAIC_NO_PARTITION)
>>> +        qdev->base_dev = qddev;
>>> +    qddev->qdev = qdev;
>>> +    qddev->partition_id = partition_id;
>>> +    qddev->owner = owner;
>>> +    INIT_LIST_HEAD(&qddev->users);
>>> +    mutex_init(&qddev->users_mutex);
>>> +
>>> +    mutex_lock(&qdev->qaic_drm_devices_mutex);
>>> +    list_add(&qddev->node, &qdev->qaic_drm_devices);
>>> +    mutex_unlock(&qdev->qaic_drm_devices_mutex);
>>> +
>>> +    ret = drm_dev_register(ddev, 0);
>>> +    if (ret) {
>>> +        pci_dbg(qdev->pdev, "%s: drm_dev_register failed %d\n", 
>>> __func__, ret);
>>> +        goto drm_reg_fail;
>>> +    }
>>> +
>>> +    return 0;
>>> +
>>> +drm_reg_fail:
>>> +    mutex_destroy(&qddev->users_mutex);
>>> +    mutex_lock(&qdev->qaic_drm_devices_mutex);
>>> +    list_del(&qddev->node);
>>> +    mutex_unlock(&qdev->qaic_drm_devices_mutex);
>>> +    if (partition_id == QAIC_NO_PARTITION)
>>> +        qdev->base_dev = NULL;
>>> +    drm_dev_put(ddev);
>>> +ddev_fail:
>>> +    kfree(qddev);
>>> +qddev_fail:
>>> +    return ret;
>>> +}
>>> +
>>> +static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 
>>> partition_id,
>>> +                    struct qaic_user *owner)
>>> +{
>>> +    struct qaic_drm_device *qddev;
>>> +    struct qaic_drm_device *q;
>>> +    struct qaic_user *usr;
>>> +
>>> +    list_for_each_entry_safe(qddev, q, &qdev->qaic_drm_devices, node) {
>>> +        /*
>>> +         * Skip devices in case we just want to remove devices
>>> +         * specific to a owner or partition id.
>>> +         *
>>> +         * owner    partition_id    notes
>>> +         * ----------------------------------
>>> +         * NULL        NO_PARTITION    delete base + all derived (qdev
>>> +         *                reset)
>>> +         * !NULL    NO_PARTITION    delete derived devs created by
>>> +         *                owner.
>>> +         * !NULL    >NO_PARTITION    delete derived dev identified by
>>> +         *                the partition id and created by
>>> +         *                owner
>>> +         * NULL        >NO_PARTITION    invalid (no-op)
>>> +         *
>>> +         * if partition_id is any value < QAIC_NO_PARTITION this 
>>> will be
>>> +         * a no-op.
>>> +         */
>>> +        if (owner && owner != qddev->owner)
>>> +            continue;
>>> +
>>> +        if (partition_id != QAIC_NO_PARTITION &&
>>> +            partition_id != qddev->partition_id && !owner)
>>> +            continue;
>>> +
>>> +        /*
>>> +         * Existing users get unresolvable errors till they close FDs.
>>> +         * Need to sync carefully with users calling close().  The
>>> +         * list of users can be modified elsewhere when the lock isn't
>>> +         * held here, but the sync'ing the srcu with the mutex held
>>> +         * could deadlock.  Grab the mutex so that the list will be
>>> +         * unmodified.  The user we get will exist as long as the
>>> +         * lock is held.  Signal that the qcdev is going away, and
>>> +         * grab a reference to the user so they don't go away for
>>> +         * synchronize_srcu().  Then release the mutex to avoid
>>> +         * deadlock and make sure the user has observed the signal.
>>> +         * With the lock released, we cannot maintain any state of the
>>> +         * user list.
>>> +         */
>>> +        mutex_lock(&qddev->users_mutex);
>>> +        while (!list_empty(&qddev->users)) {
>>> +            usr = list_first_entry(&qddev->users, struct qaic_user,
>>> +                           node);
>>> +            list_del_init(&usr->node);
>>> +            kref_get(&usr->ref_count);
>>> +            usr->qddev = NULL;
>>> +            mutex_unlock(&qddev->users_mutex);
>>> +            synchronize_srcu(&usr->qddev_lock);
>>> +            kref_put(&usr->ref_count, free_usr);
>>> +            mutex_lock(&qddev->users_mutex);
>>> +        }
>>> +        mutex_unlock(&qddev->users_mutex);
>>> +
>>> +        if (qddev->ddev) {
>>> +            drm_dev_unregister(qddev->ddev);
>>> +            drm_dev_put(qddev->ddev);
>>> +        }
>>> +
>>> +        list_del(&qddev->node);
>>> +        kfree(qddev);
>>> +    }
>>> +}
>>> +
>>> +static int qaic_mhi_probe(struct mhi_device *mhi_dev,
>>> +              const struct mhi_device_id *id)
>>> +{
>>> +    struct qaic_device *qdev;
>>> +    u16 major, minor;
>>> +    int ret;
>>> +
>>> +    /*
>>> +     * Invoking this function indicates that the control channel to the
>>> +     * device is available.  We use that as a signal to indicate that
>>> +     * the device side firmware has booted.  The device side firmware
>>> +     * manages the device resources, so we need to communicate with it
>>> +     * via the control channel in order to utilize the device.  
>>> Therefore
>>> +     * we wait until this signal to create the drm dev that 
>>> userspace will
>>> +     * use to control the device, because without the device side 
>>> firmware,
>>> +     * userspace can't do anything useful.
>>> +     */
>>> +
>>> +    qdev = pci_get_drvdata(to_pci_dev(mhi_dev->mhi_cntrl->cntrl_dev));
>>> +
>>> +    qdev->in_reset = false;
>>> +
>>> +    dev_set_drvdata(&mhi_dev->dev, qdev);
>>> +    qdev->cntl_ch = mhi_dev;
>>> +
>>> +    ret = qaic_control_open(qdev);
>>> +    if (ret) {
>>> +        pci_dbg(qdev->pdev, "%s: control_open failed %d\n", 
>>> __func__, ret);
>>> +        goto err;
>>> +    }
>>> +
>>> +    ret = get_cntl_version(qdev, NULL, &major, &minor);
>>> +    if (ret || major != cntl_major || minor > cntl_minor) {
>>> +        pci_err(qdev->pdev, "%s: Control protocol version (%d.%d) 
>>> not supported.  Supported version is (%d.%d). Ret: %d\n",
>>> +            __func__, major, minor, cntl_major, cntl_minor, ret);
>>> +        ret = -EINVAL;
>>> +        goto close_control;
>>> +    }
>>> +
>>> +    ret = qaic_create_drm_device(qdev, QAIC_NO_PARTITION, NULL);
>>> +
>>> +    return ret;
>>> +
>>> +close_control:
>>> +    qaic_control_close(qdev);
>>> +err:
>>> +    return ret;
>>> +}
>>> +
>>> +static void qaic_mhi_remove(struct mhi_device *mhi_dev)
>>> +{
>>
>> Add a comment here
> 
> Presumably why this is an empty function?
> Will do.
> 
>>> +}
>>> +
>>> +static void qaic_notify_reset(struct qaic_device *qdev)
>>> +{
>>> +    int i;
>>> +
>>> +    qdev->in_reset = true;
>>> +    /* wake up any waiters to avoid waiting for timeouts at sync */
>>> +    wake_all_cntl(qdev);
>>> +    for (i = 0; i < qdev->num_dbc; ++i)
>>> +        wakeup_dbc(qdev, i);
>>> +    synchronize_srcu(&qdev->dev_lock);
>>> +}
>>> +
>>> +void qaic_dev_reset_clean_local_state(struct qaic_device *qdev, bool 
>>> exit_reset)
>>> +{
>>> +    int i;
>>> +
>>> +    qaic_notify_reset(qdev);
>>> +
>>> +    /* remove drmdevs to prevent new users from coming in */
>>> +    if (qdev->base_dev)
>>> +        qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION, NULL);
>>> +
>>> +    /* start tearing things down */
>>> +    for (i = 0; i < qdev->num_dbc; ++i)
>>> +        release_dbc(qdev, i);
>>> +
>>> +    if (exit_reset)
>>> +        qdev->in_reset = false;
>>> +}
>>> +
>>> +static int qaic_pci_probe(struct pci_dev *pdev,
>>> +              const struct pci_device_id *id)
>>
>> Please try to simplify this function. Maybe move irq init to separate 
>> function.
>> It will be more readable and there will less of a error handling 
>> spaghetti at the bottom.
> 
> I guess I tend towards not breaking something up if there is not reuse 
> elsewhere, but I think I see your point.  Will have a look.
> 
>>
>>> +{
>>> +    int ret;
>>> +    int i;
>>> +    int mhi_irq;
>>> +    struct qaic_device *qdev;
>>> +
>>> +    qdev = devm_kzalloc(&pdev->dev, sizeof(*qdev), GFP_KERNEL);
>>> +    if (!qdev)
>>> +        return -ENOMEM;
>>> +
>>> +    if (id->device == PCI_DEV_AIC100) {
>>> +        qdev->num_dbc = 16;
>>> +        qdev->dbc = devm_kcalloc(&pdev->dev, qdev->num_dbc, 
>>> sizeof(*qdev->dbc),
>>> +                     GFP_KERNEL);
>>> +        if (!qdev->dbc)
>>> +            return -ENOMEM;
>>> +    }
>>> +
>>> +    qdev->cntl_wq = alloc_workqueue("qaic_cntl", WQ_UNBOUND, 0);
>>> +    if (!qdev->cntl_wq) {
>>> +        ret = -ENOMEM;
>>> +        goto wq_fail;
>>> +    }
>>> +    pci_set_drvdata(pdev, qdev);
>>> +    qdev->pdev = pdev;
>>> +    mutex_init(&qdev->cntl_mutex);
>>> +    INIT_LIST_HEAD(&qdev->cntl_xfer_list);
>>> +    init_srcu_struct(&qdev->dev_lock);
>>> +    INIT_LIST_HEAD(&qdev->qaic_drm_devices);
>>> +    mutex_init(&qdev->qaic_drm_devices_mutex);
>>> +    for (i = 0; i < qdev->num_dbc; ++i) {
>>> +        spin_lock_init(&qdev->dbc[i].xfer_lock);
>>> +        qdev->dbc[i].qdev = qdev;
>>> +        qdev->dbc[i].id = i;
>>> +        INIT_LIST_HEAD(&qdev->dbc[i].xfer_list);
>>> +        init_srcu_struct(&qdev->dbc[i].ch_lock);
>>> +        init_waitqueue_head(&qdev->dbc[i].dbc_release);
>>> +        INIT_LIST_HEAD(&qdev->dbc[i].bo_lists);
>>> +    }
>>> +
>>> +    qdev->bars = pci_select_bars(pdev, IORESOURCE_MEM);
>>> +
>>> +    /* make sure the device has the expected BARs */
>>> +    if (qdev->bars != (BIT(0) | BIT(2) | BIT(4))) {
>>> +        pci_dbg(pdev, "%s: expected BARs 0, 2, and 4 not found in 
>>> device.  Found 0x%x\n",
>>> +            __func__, qdev->bars);
>>> +        ret = -EINVAL;
>>> +        goto bar_fail;
>>> +    }
>>> +
>>> +    ret = pci_enable_device(pdev);
>>> +    if (ret)
>>> +        goto enable_fail;
>>> +
>>> +    ret = pci_request_selected_regions(pdev, qdev->bars, "aic100");
>>> +    if (ret)
>>> +        goto request_regions_fail;
>>> +
>>> +    pci_set_master(pdev);
>>> +
>>> +    ret = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
>>> +    if (ret)
>>> +        goto dma_mask_fail;
>>> +    ret = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
>>> +    if (ret)
>>> +        goto dma_mask_fail;
>>> +    ret = dma_set_max_seg_size(&pdev->dev, UINT_MAX);
>>> +    if (ret)
>>> +        goto dma_mask_fail;
>>> +
>>> +    qdev->bar_0 = pci_ioremap_bar(pdev, 0);
>>> +    if (!qdev->bar_0) {
>>> +        ret = -ENOMEM;
>>> +        goto ioremap_0_fail;
>>> +    }
>>> +
>>> +    qdev->bar_2 = pci_ioremap_bar(pdev, 2);
>>> +    if (!qdev->bar_2) {
>>> +        ret = -ENOMEM;
>>> +        goto ioremap_2_fail;
>>> +    }
>>> +
>>> +    for (i = 0; i < qdev->num_dbc; ++i)
>>> +        qdev->dbc[i].dbc_base = qdev->bar_2 + QAIC_DBC_OFF(i);
>>> +
>>> +    ret = pci_alloc_irq_vectors(pdev, 1, 32, PCI_IRQ_MSI);
>>> +    if (ret < 0)
>>> +        goto alloc_irq_fail;
>>> +
>>> +    if (ret < 32) {
>>> +        pci_err(pdev, "%s: Requested 32 MSIs.  Obtained %d MSIs 
>>> which is less than the 32 required.\n",
>>> +            __func__, ret);
>>> +        ret = -ENODEV;
>>> +        goto invalid_msi_config;
>>> +    }
>>> +
>>> +    mhi_irq = pci_irq_vector(pdev, 0);
>>> +    if (mhi_irq < 0) {
>>> +        ret = mhi_irq;
>>> +        goto get_mhi_irq_fail;
>>> +    }
>>> +
>>> +    for (i = 0; i < qdev->num_dbc; ++i) {
>>> +        ret = devm_request_threaded_irq(&pdev->dev,
>>> +                        pci_irq_vector(pdev, i + 1),
>>> +                        dbc_irq_handler,
>>> +                        dbc_irq_threaded_fn,
>>> +                        IRQF_SHARED,
>>> +                        "qaic_dbc",
>>> +                        &qdev->dbc[i]);
>>> +        if (ret)
>>> +            goto get_dbc_irq_failed;
>>> +
>>> +        if (poll_datapath) {
>>> +            qdev->dbc[i].irq = pci_irq_vector(pdev, i + 1);
>>> +            disable_irq_nosync(qdev->dbc[i].irq);
>>> +            INIT_WORK(&qdev->dbc[i].poll_work, irq_polling_work);
>>> +        }
>>> +    }
>>> +
>>> +    qdev->mhi_cntl = qaic_mhi_register_controller(pdev, qdev->bar_0, 
>>> mhi_irq);
>>> +    if (IS_ERR(qdev->mhi_cntl)) {
>>> +        ret = PTR_ERR(qdev->mhi_cntl);
>>> +        goto mhi_register_fail;
>>> +    }
>>> +
>>> +    return 0;
>>> +
>>> +mhi_register_fail:
>>> +get_dbc_irq_failed:
>>
>> I don't think that duplicated goto statements are allowed by the 
>> coding style.
>> These should be rather named something like err_free_irq.
>> See 
>> https://www.kernel.org/doc/html/v4.10/process/coding-style.html#centralized-exiting-of-functions 
>>
> 
> I took another look at that link, and I do not beleive it prohibits 
> duplicated goto labels, but they probably could be renamed and 
> simplified as you indicate.  Will have a look at that.
> 
>>
>>> +    for (i = 0; i < qdev->num_dbc; ++i)
>>> +        devm_free_irq(&pdev->dev, pci_irq_vector(pdev, i + 1),
>>> +                  &qdev->dbc[i]);
>>> +get_mhi_irq_fail:
>>> +invalid_msi_config:
>>> +    pci_free_irq_vectors(pdev);
>>> +alloc_irq_fail:
>>> +    iounmap(qdev->bar_2);
>>> +ioremap_2_fail:
>>> +    iounmap(qdev->bar_0);
>>> +ioremap_0_fail:
>>> +dma_mask_fail:
>>> +    pci_clear_master(pdev);
>>> +    pci_release_selected_regions(pdev, qdev->bars);
>>> +request_regions_fail:
>>> +    pci_disable_device(pdev);
>>> +enable_fail:
>>> +    pci_set_drvdata(pdev, NULL);
>>> +bar_fail:
>>> +    for (i = 0; i < qdev->num_dbc; ++i)
>>> +        cleanup_srcu_struct(&qdev->dbc[i].ch_lock);
>>> +    cleanup_srcu_struct(&qdev->dev_lock);
>>> +    destroy_workqueue(qdev->cntl_wq);
>>> +wq_fail:
>>> +    return ret;
>>> +}
>>> +
>>> +static void qaic_pci_remove(struct pci_dev *pdev)
>>> +{
>>> +    struct qaic_device *qdev = pci_get_drvdata(pdev);
>>> +    int i;
>>> +
>>> +    if (!qdev)
>>> +        return;
>>> +
>>> +    qaic_dev_reset_clean_local_state(qdev, false);
>>> +    qaic_mhi_free_controller(qdev->mhi_cntl, link_up);
>>> +    for (i = 0; i < qdev->num_dbc; ++i) {
>>> +        devm_free_irq(&pdev->dev, pci_irq_vector(pdev, i + 1),
>>> +                  &qdev->dbc[i]);
>>> +        cleanup_srcu_struct(&qdev->dbc[i].ch_lock);
>>> +    }
>>> +    destroy_workqueue(qdev->cntl_wq);
>>> +    pci_free_irq_vectors(pdev);
>>> +    iounmap(qdev->bar_0);
>>> +    pci_clear_master(pdev);
>>> +    pci_release_selected_regions(pdev, qdev->bars);
>>> +    pci_disable_device(pdev);
>>> +    pci_set_drvdata(pdev, NULL);
>>> +}
>>> +
>>> +static void qaic_pci_shutdown(struct pci_dev *pdev)
>>> +{
>>> +    /* see qaic_exit for what link_up is doing */
>>> +    link_up = true;
>>> +    qaic_pci_remove(pdev);
>>> +}
>>> +
>>> +static pci_ers_result_t qaic_pci_error_detected(struct pci_dev *pdev,
>>> +                        pci_channel_state_t error)
>>> +{
>>> +    return PCI_ERS_RESULT_NEED_RESET;
>>> +}
>>> +
>>> +static void qaic_pci_reset_prepare(struct pci_dev *pdev)
>>> +{
>>> +    struct qaic_device *qdev = pci_get_drvdata(pdev);
>>> +
>>> +    qaic_notify_reset(qdev);
>>> +    qaic_mhi_start_reset(qdev->mhi_cntl);
>>> +    qaic_dev_reset_clean_local_state(qdev, false);
>>> +}
>>> +
>>> +static void qaic_pci_reset_done(struct pci_dev *pdev)
>>> +{
>>> +    struct qaic_device *qdev = pci_get_drvdata(pdev);
>>> +
>>> +    qdev->in_reset = false;
>>> +    qaic_mhi_reset_done(qdev->mhi_cntl);
>>> +}
>>> +
>>> +static const struct mhi_device_id qaic_mhi_match_table[] = {
>>> +    { .chan = "QAIC_CONTROL", },
>>> +    {},
>>> +};
>>> +
>>> +static struct mhi_driver qaic_mhi_driver = {
>>> +    .id_table = qaic_mhi_match_table,
>>> +    .remove = qaic_mhi_remove,
>>> +    .probe = qaic_mhi_probe,
>>> +    .ul_xfer_cb = qaic_mhi_ul_xfer_cb,
>>> +    .dl_xfer_cb = qaic_mhi_dl_xfer_cb,
>>> +    .driver = {
>>> +        .name = "qaic_mhi",
>>> +    },
>>> +};
>>> +
>>> +static const struct pci_device_id qaic_ids[] = {
>>> +    { PCI_DEVICE(PCI_VENDOR_ID_QCOM, PCI_DEV_AIC100), },
>>> +    { }
>>> +};
>>> +MODULE_DEVICE_TABLE(pci, qaic_ids);
>>> +
>>> +static const struct pci_error_handlers qaic_pci_err_handler = {
>>> +    .error_detected = qaic_pci_error_detected,
>>> +    .reset_prepare = qaic_pci_reset_prepare,
>>> +    .reset_done = qaic_pci_reset_done,
>>> +};
>>> +
>>> +static struct pci_driver qaic_pci_driver = {
>>> +    .name = QAIC_NAME,
>>> +    .id_table = qaic_ids,
>>> +    .probe = qaic_pci_probe,
>>> +    .remove = qaic_pci_remove,
>>> +    .shutdown = qaic_pci_shutdown,
>>> +    .err_handler = &qaic_pci_err_handler,
>>> +};
>>> +
>>> +static int __init qaic_init(void)
>>> +{
>>> +    int ret;
>>> +
>>> +    if (datapath_polling)
>>> +        poll_datapath = true;
>>> +
>>> +    ret = mhi_driver_register(&qaic_mhi_driver);
>>> +    if (ret) {
>>> +        pr_debug("qaic: mhi_driver_register failed %d\n", ret);
>>> +        goto free_class;
>>> +    }
>>> +
>>> +    ret = pci_register_driver(&qaic_pci_driver);
>>> +    if (ret) {
>>> +        pr_debug("qaic: pci_register_driver failed %d\n", ret);
>>> +        goto free_mhi;
>>> +    }
>>> +
>>> +    ret = mhi_qaic_ctrl_init();
>>> +    if (ret) {
>>> +        pr_debug("qaic: mhi_qaic_ctrl_init failed %d\n", ret);
>>> +        goto free_pci;
>>> +    }
>>> +
>>> +    return 0;
>>> +
>>> +free_pci:
>>> +    pci_unregister_driver(&qaic_pci_driver);
>>> +free_mhi:
>>> +    mhi_driver_unregister(&qaic_mhi_driver);
>>> +free_class:
>>
>> This label doesn't free anything. It should be renamed.
> 
> Doh.  Holdover from a previous version.  It does need to be renamed.
> 
>>
>>> +    return ret;
>>> +}
>>> +
>>> +static void __exit qaic_exit(void)
>>> +{
>>> +    /*
>>> +     * We assume that qaic_pci_remove() is called due to a hotplug 
>>> event
>>> +     * which would mean that the link is down, and thus
>>> +     * qaic_mhi_free_controller() should not try to access the 
>>> device during
>>> +     * cleanup.
>>> +     * We call pci_unregister_driver() below, which also triggers
>>> +     * qaic_pci_remove(), but since this is module exit, we expect 
>>> the link
>>> +     * to the device to be up, in which case qaic_mhi_free_controller()
>>> +     * should try to access the device during cleanup to put the 
>>> device in
>>> +     * a sane state.
>>> +     * For that reason, we set link_up here to let 
>>> qaic_mhi_free_controller
>>> +     * know the expected link state.  Since the module is going to be
>>> +     * removed at the end of this, we don't need to worry about
>>> +     * reinitializing the link_up state after the cleanup is done.
>>> +     */
>>> +    link_up = true;
>>
>> Maybe you could just use pdev->current_state instead of link_up?
> 
> Maybe.  Will have a look at that.

Looks like no.  current_state does not appear to be modified by any of 
the hotplug code for remove events, and current_state is not updated by 
the pci framework until after the driver's remove() is invoked.

> 
>>
>>> +    mhi_qaic_ctrl_deinit();
>>> +    pci_unregister_driver(&qaic_pci_driver);
>>> +    mhi_driver_unregister(&qaic_mhi_driver);
>>> +}
>>> +
>>> +module_init(qaic_init);
>>> +module_exit(qaic_exit);
>>> +
>>> +MODULE_AUTHOR(QAIC_DESC " Kernel Driver Team");
>>> +MODULE_DESCRIPTION(QAIC_DESC " Accel Driver");
>>> +MODULE_LICENSE("GPL");
>>> diff --git a/include/uapi/drm/qaic_accel.h 
>>> b/include/uapi/drm/qaic_accel.h
>>> new file mode 100644
>>> index 0000000..d5fa6f5
>>> --- /dev/null
>>> +++ b/include/uapi/drm/qaic_accel.h
>>> @@ -0,0 +1,283 @@
>>> +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
>>> + *
>>> + * Copyright (c) 2019-2020, The Linux Foundation. All rights reserved.
>>> + * Copyright (c) 2021-2023 Qualcomm Innovation Center, Inc. All 
>>> rights reserved.
>>> + */
>>> +
>>> +#ifndef QAIC_ACCEL_H_
>>> +#define QAIC_ACCEL_H_
>>> +
>>> +#include <linux/ioctl.h>
>>> +#include <linux/types.h>
>>
>> These two headers should not be needed here.
> 
> Fair enough.  Listed out of paranoia since they define things used in 
> this header.
> 
>>
>>> +#include "drm.h"
>>> +
>>> +#if defined(__CPLUSPLUS)
>>
>> Use lowercase here: __cplusplus
> 
> Doh.  Not sure why uppercase was used.  The referenced examples sure 
> didn't have that.
> 
>>
>>> +extern "C" {
>>> +#endif
>>> +
>>> +#define QAIC_MANAGE_MAX_MSG_LENGTH SZ_4K    /**<
>>> +                          * The length(4K) includes len and
>>> +                          * count fields of qaic_manage_msg
>>> +                          */
>>
>> I guess these are doxygen style commands but you should be using 
>> kernel-doc here.
>> See https://docs.kernel.org/doc-guide/kernel-doc.html.
>> This can be used to verify the header:
>> $ scripts/kernel-doc -v -none include/uapi/drm/qaic_accel.h
> 
> include/uapi/drm/drm.h was used as the example for these, and thus it 
> was assumed that was the DRM style.
> 
> I'm not convinced kernel-doc is applicable to all of these.  Will review.
> 
>>
>>> +
>>> +enum qaic_sem_flags {
>>
>> Is there any specific reason for enums if all values are explicitly 
>> given?
> 
> It is a style pulled from elsewhere.  I can remove the enums.
> 
>>
>>> +    SEM_INSYNCFENCE =    0x1,
>>
>> All these enums/defines end up in a global user space namespace.
>> I would advise to prefix everything with QAIC_ or DRM_QAIC_ (e.g. 
>> QAIC_SEM_INSYNCFENCE)
>> to avoid conflicts with other drivers or user space libs.
> 
> Good point.  Will do.
> 
>>
>>> +    SEM_OUTSYNCFENCE =    0x2,
>>> +};
>>> +
>>> +enum qaic_sem_cmd {
>>> +    SEM_NOP =        0,
>>> +    SEM_INIT =        1,
>>> +    SEM_INC =        2,
>>> +    SEM_DEC =        3,
>>> +    SEM_WAIT_EQUAL =    4,
>>> +    SEM_WAIT_GT_EQ =    5, /**< Greater than or equal */
>>> +    SEM_WAIT_GT_0 =        6, /**< Greater than 0 */
>>> +};
>>> +
>>> +enum qaic_manage_transaction_type {
>>> +    TRANS_UNDEFINED =            0,
>>> +    TRANS_PASSTHROUGH_FROM_USR =        1,
>>> +    TRANS_PASSTHROUGH_TO_USR =        2,
>>> +    TRANS_PASSTHROUGH_FROM_DEV =        3,
>>> +    TRANS_PASSTHROUGH_TO_DEV =        4,
>>> +    TRANS_DMA_XFER_FROM_USR =        5,
>>> +    TRANS_DMA_XFER_TO_DEV =            6,
>>> +    TRANS_ACTIVATE_FROM_USR =        7,
>>> +    TRANS_ACTIVATE_FROM_DEV =        8,
>>> +    TRANS_ACTIVATE_TO_DEV =            9,
>>> +    TRANS_DEACTIVATE_FROM_USR =        10,
>>> +    TRANS_DEACTIVATE_FROM_DEV =        11,
>>> +    TRANS_STATUS_FROM_USR =            12,
>>> +    TRANS_STATUS_TO_USR =            13,
>>> +    TRANS_STATUS_FROM_DEV =            14,
>>> +    TRANS_STATUS_TO_DEV =            15,
>>> +    TRANS_TERMINATE_FROM_DEV =        16,
>>> +    TRANS_TERMINATE_TO_DEV =        17,
>>> +    TRANS_DMA_XFER_CONT =            18,
>>> +    TRANS_VALIDATE_PARTITION_FROM_DEV =    19,
>>> +    TRANS_VALIDATE_PARTITION_TO_DEV =    20,
>>> +    TRANS_MAX =                21
>>> +};
>>> +
>>> +struct qaic_manage_trans_hdr {
>>> +    __u32 type;    /**< in, value from enum manage_transaction_type */
>>> +    __u32 len;    /**< in, length of this transaction, including the 
>>> header */
>>> +};
>>> +
>>> +struct qaic_manage_trans_passthrough {
>>> +    struct qaic_manage_trans_hdr hdr;
>>> +    __u8 data[];    /**< in, userspace must encode in little endian */
>>> +};
>>> +
>>> +struct qaic_manage_trans_dma_xfer {
>>> +    struct qaic_manage_trans_hdr hdr;
>>> +    __u32 tag;    /**< in, device specific */
>>> +    __u32 count;    /**< in */
>>> +    __u64 addr;    /**< in, address of the data to transferred via 
>>> DMA */
>>> +    __u64 size;    /**< in, length of the data to transferred via 
>>> DMA */
>>> +};
>>> +
>>> +struct qaic_manage_trans_activate_to_dev {
>>> +    struct qaic_manage_trans_hdr hdr;
>>> +    __u32 queue_size;    /**<
>>> +                  * in, number of elements in DBC request
>>> +                  * and respose queue
>>> +                  */
>>> +    __u32 eventfd;        /**< in */
>>> +    __u32 options;        /**< in, device specific */
>>> +    __u32 pad;        /**< pad must be 0 */
>>> +};
>>> +
>>> +struct qaic_manage_trans_activate_from_dev {
>>> +    struct qaic_manage_trans_hdr hdr;
>>> +    __u32 status;    /**< out, status of activate transaction */
>>> +    __u32 dbc_id;    /**< out, Identifier of assigned DMA Bridge 
>>> channel */
>>> +    __u64 options;    /**< out */
>>> +};
>>> +
>>> +struct qaic_manage_trans_deactivate {
>>> +    struct qaic_manage_trans_hdr hdr;
>>> +    __u32 dbc_id;    /**< in, Identifier of assigned DMA Bridge 
>>> channel */
>>> +    __u32 pad;    /**< pad must be 0 */
>>> +};
>>> +
>>> +struct qaic_manage_trans_status_to_dev {
>>> +    struct qaic_manage_trans_hdr hdr;
>>> +};
>>> +
>>> +struct qaic_manage_trans_status_from_dev {
>>> +    struct qaic_manage_trans_hdr hdr;
>>> +    __u16 major;    /**< out, major vesrion of NNC protocol used by 
>>> device */
>>> +    __u16 minor;    /**< out, minor vesrion of NNC protocol used by 
>>> device */
>>
>> vesrion -> version
> 
> Will fix.
> 
>>
>>> +    __u32 status;    /**< out, status of query transaction  */
>>> +    __u64 status_flags;    /**<
>>> +                  * out
>>> +                  * 0    : If set then device has CRC check enabled
>>> +                  * 1:63 : Unused
>>> +                  */
>>> +};
>>> +
>>> +struct qaic_manage_msg {
>>> +    __u32 len;    /**< in, Length of valid data - ie sum of all 
>>> transactions */
>>> +    __u32 count;    /**< in, Number of transactions in message */
>>> +    __u64 data;    /**< in, Pointer to array of transactions */
>>> +};
>>> +
>>> +struct qaic_create_bo {
>>> +    __u64 size;    /**< in, Size of BO in byte */
>>> +    __u32 handle;    /**< out, Returned GEM handle for the BO */
>>> +    __u32 pad;    /**< pad must be 0 */
>>> +};
>>> +
>>> +struct qaic_mmap_bo {
>>> +    __u32 handle;    /**< in, Handle for the BO being mapped. */
>>
>> The comment is missleading. BO is not mapped by this ioctl().
> 
> Its mapping the handle to the offset, which is a type of mapping.  I 
> think I get your point though.  Will try to wordsmith this better.
> 
>>
>>> +    __u32 pad;    /**< pad must be 0 */
>>> +    __u64 offset;    /**<
>>> +              * out, offset into the drm node to use for
>>> +              * subsequent mmap call
>>> +              */
>>> +};
>>> +
>>> +/**
>>> + * @brief semaphore command
>>> + */
>>> +struct qaic_sem {
>>> +    __u16 val;    /**< in, Only lower 12 bits are valid */
>>> +    __u8  index;    /**< in, Only lower 5 bits are valid */
>>> +    __u8  presync;    /**< in, 1 if presync operation, 0 if postsync */
>>> +    __u8  cmd;    /**< in, See enum sem_cmd */
>>> +    __u8  flags;    /**< in, See sem_flags for valid bits.  All 
>>> others must be 0 */
>>> +    __u16 pad;    /**< pad must be 0 */
>>> +};
>>> +
>>> +struct qaic_attach_slice_entry {
>>> +    __u64 size;        /**< in, Size memory to allocate for this BO 
>>> slice */
>>> +    struct qaic_sem    sem0;    /**< in, Must be zero if not valid */
>>> +    struct qaic_sem    sem1;    /**< in, Must be zero if not valid */
>>> +    struct qaic_sem    sem2;    /**< in, Must be zero if not valid */
>>> +    struct qaic_sem    sem3;    /**< in, Must be zero if not valid */
>>> +    __u64 dev_addr;        /**< in, Address in device to/from which 
>>> data is copied */
>>> +    __u64 db_addr;        /**< in, Doorbell address */
>>> +    __u32 db_data;        /**< in, Data to write to doorbell */
>>> +    __u32 db_len;        /**<
>>> +                  * in, Doorbell length - 32, 16, or 8 bits.
>>> +                  * 0 means doorbell is inactive
>>> +                  */
>>> +    __u64 offset;        /**< in, Offset from start of buffer */
>>> +};
>>> +
>>> +struct qaic_attach_slice_hdr {
>>> +    __u32 count;    /**< in, Number of slices for this BO */
>>> +    __u32 dbc_id;    /**< in, Associate this BO with this DMA Bridge 
>>> channel */
>>> +    __u32 handle;    /**< in, Handle of BO to which slicing 
>>> information is to be attached */
>>> +    __u32 dir;    /**< in, Direction of data: 1 = DMA_TO_DEVICE, 2 = 
>>> DMA_FROM_DEVICE */
>>> +    __u64 size;    /**<
>>> +              * in, Total length of BO
>>> +              * If BO is imported (DMABUF/PRIME) then this size
>>> +              * should not exceed the size of DMABUF provided.
>>> +              * If BO is allocated using DRM_IOCTL_QAIC_CREATE_BO
>>> +              * then this size should be exactly same as the size
>>> +              * provided during DRM_IOCTL_QAIC_CREATE_BO.
>>> +              */
>>> +};
>>> +
>>> +struct qaic_attach_slice {
>>> +    struct qaic_attach_slice_hdr hdr;
>>> +    __u64 data;    /**<
>>> +              * in, Pointer to a buffer which is container of
>>> +              * struct qaic_attach_slice_entry[]
>>> +              */
>>> +};
>>> +
>>> +struct qaic_execute_entry {
>>> +    __u32 handle;    /**< in, buffer handle */
>>> +    __u32 dir;    /**< in, 1 = to device, 2 = from device */
>>> +};
>>> +
>>> +struct qaic_partial_execute_entry {
>>> +    __u32 handle;    /**< in, buffer handle */
>>> +    __u32 dir;    /**< in, 1 = to device, 2 = from device */
>>> +    __u64 resize;    /**< in, 0 = no resize */
>>> +};
>>> +
>>> +struct qaic_execute_hdr {
>>> +    __u32 count;    /**< in, number of executes following this 
>>> header */
>>> +    __u32 dbc_id;    /**< in, Identifier of assigned DMA Bridge 
>>> channel */
>>> +};
>>> +
>>> +struct qaic_execute {
>>> +    struct qaic_execute_hdr hdr;
>>> +    __u64 data;    /**< in, qaic_execute_entry or 
>>> qaic_partial_execute_entry container */
>>> +};
>>> +
>>> +struct qaic_wait {
>>> +    __u32 handle;    /**< in, handle to wait on until execute is 
>>> complete */
>>> +    __u32 timeout;    /**< in, timeout for wait(in ms) */
>>> +    __u32 dbc_id;    /**< in, Identifier of assigned DMA Bridge 
>>> channel */
>>> +    __u32 pad;    /**< pad must be 0 */
>>> +};
>>> +
>>> +struct qaic_perf_stats_hdr {
>>> +    __u16 count;    /**< in, Total number BOs requested */
>>> +    __u16 pad;    /**< pad must be 0 */
>>> +    __u32 dbc_id;    /**< in, Identifier of assigned DMA Bridge 
>>> channel */
>>> +};
>>> +
>>> +struct qaic_perf_stats {
>>> +    struct qaic_perf_stats_hdr hdr;
>>> +    __u64 data;    /**< in, qaic_perf_stats_entry container */
>>> +};
>>> +
>>> +struct qaic_perf_stats_entry {
>>> +    __u32 handle;            /**< in, Handle of the memory request */
>>> +    __u32 queue_level_before;    /**<
>>> +                      * out, Number of elements in queue
>>> +                      * before submission given memory request
>>> +                      */
>>> +    __u32 num_queue_element;    /**<
>>> +                      * out, Number of elements to add in the
>>> +                      * queue for given memory request
>>> +                      */
>>> +    __u32 submit_latency_us;    /**<
>>> +                      * out, Time taken by kernel to submit
>>> +                      * the request to device
>>> +                      */
>>> +    __u32 device_latency_us;    /**<
>>> +                      * out, Time taken by device to execute the
>>> +                      * request. 0 if request is not completed
>>> +                      */
>>> +    __u32 pad;            /**< pad must be 0 */
>>> +};
>>> +
>>> +struct qaic_part_dev {
>>> +    __u32 partition_id;    /**< in, reservation id */
>>> +    __u16 remove;        /**< in, 1 - Remove device 0 - Create 
>>> device */
>>> +    __u16 pad;        /**< pad must be 0 */
>>> +};
>>> +
>>> +#define DRM_QAIC_MANAGE                0x00
>>> +#define DRM_QAIC_CREATE_BO            0x01
>>> +#define DRM_QAIC_MMAP_BO            0x02
>>
>> I know that MMAP_BO ioctl is common in drm drivers but in my opinion 
>> it is a very poor name.
>> I suggest naming it BO_INFO so in future you could extend it with 
>> other bo params besides
>> mmap offset.
> 
> I see your point, but I don't see how to implement it.
> 
> Let us assume this IOCTL is renamed DRM_QAIC_BO_INFO, and struct 
> qaic_mmap_bo is named qaic_bo_info.
> 
> qaic_bo_info is part of the uAPI.  If, "tomorrow" we need to add a field 
> to it, we cannot.  That changes the structure size and breaks the uAPI.
> 
> I can't add reserved fields in anticipation of future use since GregKH 
> had said not to do that - 
> https://lore.kernel.org/all/20200514163734.GB3154055@kroah.com/
> 
> So, I'm thinking the only approach would be a linked list similar to 
> EXECUTE_BO where qaic_bo_info holds a userspace pointer that is the head 
> of the list, and new list nodes can be defined in future.  That seems 
> like an overengineered solution to some hypothetical future which may 
> not come to pass.  If this is the solution, then I think I'd prefer to 
> leave this ioctl as is, and deprecate it if the extension usecase ever 
> comes to pass.
> 
> Thoughts?  Am I missing something?
> 
>>
>>> +#define DRM_QAIC_ATTACH_SLICE_BO        0x03
>>> +#define DRM_QAIC_EXECUTE_BO            0x04
>>> +#define DRM_QAIC_PARTIAL_EXECUTE_BO        0x05
>>> +#define DRM_QAIC_WAIT_BO            0x06
>>> +#define DRM_QAIC_PERF_STATS_BO            0x07
>>> +#define DRM_QAIC_PART_DEV            0x08
>>> +
>>> +#define DRM_IOCTL_QAIC_MANAGE            DRM_IOWR(DRM_COMMAND_BASE + 
>>> DRM_QAIC_MANAGE, struct qaic_manage_msg)
>>> +#define DRM_IOCTL_QAIC_CREATE_BO        DRM_IOWR(DRM_COMMAND_BASE + 
>>> DRM_QAIC_CREATE_BO,    struct qaic_create_bo)
>>> +#define DRM_IOCTL_QAIC_MMAP_BO            DRM_IOWR(DRM_COMMAND_BASE 
>>> + DRM_QAIC_MMAP_BO, struct qaic_mmap_bo)
>>> +#define DRM_IOCTL_QAIC_ATTACH_SLICE_BO        
>>> DRM_IOW(DRM_COMMAND_BASE + DRM_QAIC_ATTACH_SLICE_BO, struct 
>>> qaic_attach_slice)
>>> +#define DRM_IOCTL_QAIC_EXECUTE_BO        DRM_IOW(DRM_COMMAND_BASE + 
>>> DRM_QAIC_EXECUTE_BO,    struct qaic_execute)
>>> +#define DRM_IOCTL_QAIC_PARTIAL_EXECUTE_BO    
>>> DRM_IOW(DRM_COMMAND_BASE + DRM_QAIC_PARTIAL_EXECUTE_BO,    struct 
>>> qaic_execute)
>>> +#define DRM_IOCTL_QAIC_WAIT_BO            DRM_IOW(DRM_COMMAND_BASE + 
>>> DRM_QAIC_WAIT_BO, struct qaic_wait)
>>> +#define DRM_IOCTL_QAIC_PERF_STATS_BO        
>>> DRM_IOWR(DRM_COMMAND_BASE + DRM_QAIC_PERF_STATS_BO, struct 
>>> qaic_perf_stats)
>>> +#define DRM_IOCTL_QAIC_PART_DEV            DRM_IOWR(DRM_COMMAND_BASE 
>>> + DRM_QAIC_PART_DEV, struct qaic_part_dev)
>>> +
>>> +#if defined(__CPLUSPLUS)
>>
>> Use lowercase here: __cplusplus
> 
> Will do.
> 
>>
>>> +}
>>> +#endif
>>> +
>>> +#endif /* QAIC_ACCEL_H_ */
>>
>> Regards,
>> Jacek
>>
> 



More information about the dri-devel mailing list