[PATCH v2 8/8] drm/xe: msix support for hw engines

Ilia Levi illevi at habana.ai
Wed Jul 17 08:40:35 UTC 2024


On 10/07/2024 19:43, Niranjana Vishwanathapura wrote:
> On Thu, Jun 27, 2024 at 03:40:43PM +0300, Dani Liberman wrote:
>> From: Ilia Levi <illevi at habana.ai>
>>
>> For devices that support MSIX, we would like to be able to configure
>> the hw engines to work with MSIX. This patch allocates MSIX vectors
>> for exec queues (via MSIX allocator), registers a handler and
>> programs the lrc the same way vf does it (using memirq). An
>> additional field added to the lrc is CS_INT_VEC.
>>
>> MSIX vector 0 is used for GuC-to-host interrupt.
>>
>> bspec: 60342, 72547
>>
>> Signed-off-by: Ilia Levi <illevi at habana.ai>
>> ---
>> drivers/gpu/drm/xe/regs/xe_engine_regs.h |  3 ++
>> drivers/gpu/drm/xe/regs/xe_lrc_layout.h  |  3 ++
>> drivers/gpu/drm/xe/xe_exec_queue.c       |  2 +-
>> drivers/gpu/drm/xe/xe_execlist.c         | 12 +++++--
>> drivers/gpu/drm/xe/xe_execlist_types.h   |  1 +
>> drivers/gpu/drm/xe/xe_hw_engine.c        |  7 ++--
>> drivers/gpu/drm/xe/xe_irq.c              | 42 ++++++++++++++++++++++--
>> drivers/gpu/drm/xe/xe_lrc.c              | 22 ++++++++++---
>> drivers/gpu/drm/xe/xe_lrc.h              |  2 +-
>> 9 files changed, 80 insertions(+), 14 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h 
>> b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
>> index c38db2a74614..4c9e4f467e64 100644
>> --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
>> +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
>> @@ -83,6 +83,8 @@
>> #define RING_IMR(base)                XE_REG((base) + 0xa8)
>> #define RING_INT_STATUS_RPT_PTR(base)        XE_REG((base) + 0xac)
>>
>> +#define CS_INT_VEC(base)            XE_REG((base) + 0x1b8)
>> +
>> #define RING_EIR(base)                XE_REG((base) + 0xb0)
>> #define RING_EMR(base)                XE_REG((base) + 0xb4)
>> #define RING_ESR(base)                XE_REG((base) + 0xb8)
>> @@ -137,6 +139,7 @@
>>
>> #define RING_MODE(base)                XE_REG((base) + 0x29c)
>> #define   GFX_DISABLE_LEGACY_MODE        REG_BIT(3)
>> +#define   GFX_MSIX_INTERRUPT_ENABLE        REG_BIT(13)
>>
>> #define RING_TIMESTAMP(base)            XE_REG((base) + 0x358)
>>
>> diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h 
>> b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
>> index 045dfd09db99..9b3eafd2bdc4 100644
>> --- a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
>> +++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
>> @@ -25,6 +25,9 @@
>> #define CTX_INT_SRC_REPORT_REG        (CTX_LRI_INT_REPORT_PTR + 3)
>> #define CTX_INT_SRC_REPORT_PTR        (CTX_LRI_INT_REPORT_PTR + 4)
>>
>> +#define CTX_CS_INT_VEC_REG        0x5a
>> +#define CTX_CS_INT_VEC_DATA        (0x5a + 1)
>> +
>> #define INDIRECT_CTX_RING_HEAD        (0x02 + 1)
>> #define INDIRECT_CTX_RING_TAIL        (0x04 + 1)
>> #define INDIRECT_CTX_RING_START        (0x06 + 1)
>> diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c 
>> b/drivers/gpu/drm/xe/xe_exec_queue.c
>> index e40c5380e292..8b709968ec56 100644
>> --- a/drivers/gpu/drm/xe/xe_exec_queue.c
>> +++ b/drivers/gpu/drm/xe/xe_exec_queue.c
>> @@ -143,7 +143,7 @@ static int __xe_exec_queue_init(struct 
>> xe_exec_queue *q)
>>     int i, err;
>>
>>     for (i = 0; i < q->width; ++i) {
>> -        q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K);
>> +        q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K, 
>> q->msix_number);
>>         if (IS_ERR(q->lrc[i])) {
>>             err = PTR_ERR(q->lrc[i]);
>>             goto err_lrc;
>> diff --git a/drivers/gpu/drm/xe/xe_execlist.c 
>> b/drivers/gpu/drm/xe/xe_execlist.c
>> index 354f85b591b1..858a77c83b5c 100644
>> --- a/drivers/gpu/drm/xe/xe_execlist.c
>> +++ b/drivers/gpu/drm/xe/xe_execlist.c
>> @@ -17,6 +17,7 @@
>> #include "xe_exec_queue.h"
>> #include "xe_gt.h"
>> #include "xe_hw_fence.h"
>> +#include "xe_irq.h"
>> #include "xe_lrc.h"
>> #include "xe_macros.h"
>> #include "xe_mmio.h"
>> @@ -254,7 +255,7 @@ struct xe_execlist_port 
>> *xe_execlist_port_create(struct xe_device *xe,
>> {
>>     struct drm_device *drm = &xe->drm;
>>     struct xe_execlist_port *port;
>> -    int i;
>> +    int i, err;
>>
>>     port = drmm_kzalloc(drm, sizeof(*port), GFP_KERNEL);
>>     if (!port)
>> @@ -262,7 +263,14 @@ struct xe_execlist_port 
>> *xe_execlist_port_create(struct xe_device *xe,
>>
>>     port->hwe = hwe;
>>
>> -    port->lrc = xe_lrc_create(hwe, NULL, SZ_16K);
>> +    if (xe_device_has_msix(xe)) {
>> +        err = xe_request_irq(xe, xe_irq_hwe_handler, hwe,
>> +                     hwe->name, true, &port->msix_number);
>> +        if (err)
>> +            return ERR_PTR(err);
>> +    }
>> +
>> +    port->lrc = xe_lrc_create(hwe, NULL, SZ_16K, port->msix_number);
>
> Where is this port->msix_number set?
>
>>     if (IS_ERR(port->lrc))
>>         return ERR_PTR(PTR_ERR(port->lrc));
>>
>> diff --git a/drivers/gpu/drm/xe/xe_execlist_types.h 
>> b/drivers/gpu/drm/xe/xe_execlist_types.h
>> index 415140936f11..bbb05310368e 100644
>> --- a/drivers/gpu/drm/xe/xe_execlist_types.h
>> +++ b/drivers/gpu/drm/xe/xe_execlist_types.h
>> @@ -23,6 +23,7 @@ struct xe_execlist_port {
>>     struct list_head active[XE_EXEC_QUEUE_PRIORITY_COUNT];
>>
>>     u32 last_ctx_id;
>> +    u32 msix_number;
>>
>>     struct xe_execlist_exec_queue *running_exl;
>>
>> diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c 
>> b/drivers/gpu/drm/xe/xe_hw_engine.c
>> index 90660d9382a0..667393d70d6d 100644
>> --- a/drivers/gpu/drm/xe/xe_hw_engine.c
>> +++ b/drivers/gpu/drm/xe/xe_hw_engine.c
>> @@ -298,16 +298,19 @@ void xe_hw_engine_enable_ring(struct 
>> xe_hw_engine *hwe)
>> {
>>     u32 ccs_mask =
>>         xe_hw_engine_mask_per_class(hwe->gt, XE_ENGINE_CLASS_COMPUTE);
>> +    u32 ring_mode = _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE);
>>
>>     if (hwe->class == XE_ENGINE_CLASS_COMPUTE && ccs_mask)
>>         xe_mmio_write32(hwe->gt, RCU_MODE,
>>                 _MASKED_BIT_ENABLE(RCU_MODE_CCS_ENABLE));
>>
>> +    if (xe_device_has_msix(gt_to_xe(hwe->gt)))
>> +        ring_mode |= _MASKED_BIT_ENABLE(GFX_MSIX_INTERRUPT_ENABLE);
>> +
>>     hw_engine_mmio_write32(hwe, RING_HWSTAM(0), ~0x0);
>>     hw_engine_mmio_write32(hwe, RING_HWS_PGA(0),
>>                    xe_bo_ggtt_addr(hwe->hwsp));
>> -    hw_engine_mmio_write32(hwe, RING_MODE(0),
>> - _MASKED_BIT_ENABLE(GFX_DISABLE_LEGACY_MODE));
>> +    hw_engine_mmio_write32(hwe, RING_MODE(0), ring_mode);
>>     hw_engine_mmio_write32(hwe, RING_MI_MODE(0),
>>                    _MASKED_BIT_DISABLE(STOP_RING));
>>     hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
>> diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
>> index 24e8445b2a76..62ed569bfcc7 100644
>> --- a/drivers/gpu/drm/xe/xe_irq.c
>> +++ b/drivers/gpu/drm/xe/xe_irq.c
>> @@ -11,6 +11,7 @@
>>
>> #include "display/xe_display.h"
>> #include "regs/xe_gt_regs.h"
>> +#include "regs/xe_guc_regs.h"
>> #include "regs/xe_regs.h"
>> #include "xe_device.h"
>> #include "xe_drv.h"
>> @@ -31,6 +32,7 @@
>> #define IER(offset)                XE_REG(offset + 0xc)
>>
>> enum static_msix_allocations {
>> +    GUC2HOST_MSIX = 0,
>>     NUM_OF_STATIC_MSIX,
>> };
>>
>> @@ -686,6 +688,13 @@ static void xe_irq_msi_free(struct xe_device *xe)
>>
>> static void xe_irq_msix_free(struct xe_device *xe)
>> {
>> +    unsigned long idx;
>> +    u32 *dummy;
>> +
>> +    xa_for_each(&xe->irq.msix_indexes, idx, dummy)
>> +        xe_free_irq(xe, idx);
>> +
>> +    xa_destroy(&xe->irq.msix_indexes);
>> }
>>
>> static void xe_irq_free(struct xe_device *xe)
>> @@ -731,8 +740,31 @@ static int xe_irq_msi_request(struct xe_device *xe)
>>     return 0;
>> }
>>
>> +static irqreturn_t guc2host_irq_handler(int irq, void *arg)
>> +{
>> +    struct xe_device *xe = arg;
>> +    struct xe_tile *tile;
>> +    u8 id;
>> +
>> +    for_each_tile(tile, xe, id)
>> +        xe_guc_irq_handler(&tile->primary_gt->uc.guc,
>> +                   GUC_INTR_GUC2HOST);
>> +
>> +    return IRQ_HANDLED;
>> +}
>> +
>> static int xe_irq_msix_request(struct xe_device *xe)
>> {
>> +    int err;
>> +    u32 msix = GUC2HOST_MSIX;
>> +
>> +    err = xe_request_irq(xe, guc2host_irq_handler, xe, DRIVER_NAME,
>> +                 false, &msix);
>> +    if (err) {
>> +        drm_err(&xe->drm, "Failed to request MSIX IRQ %d\n", err);
>> +        return err;
>> +    }
>
> Why only one g2h interrupt? Possible to have separate g2h interrupt
> for each tile?
AFAIK currently guc does not support this. This handler is basically a
stripped version of dg1_irq_handler, where we leave only the guc related
parts.

Ilia
>
>> +
>>     return 0;
>> }
>>
>> @@ -798,12 +830,16 @@ int xe_irq_install(struct xe_device *xe)
>>
>> static void xe_irq_msix_synchronize_irq(struct xe_device *xe)
>> {
>> +    struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
>> +    unsigned long msix_irq;
>> +    u32 *dummy;
>> +
>> +    xa_for_each(&xe->irq.msix_indexes, msix_irq, dummy)
>> +        synchronize_irq(pci_irq_vector(pdev, msix_irq));
>> }
>>
>> void xe_irq_suspend(struct xe_device *xe)
>> {
>> -    int irq = to_pci_dev(xe->drm.dev)->irq;
>> -
>>     spin_lock_irq(&xe->irq.lock);
>>     xe->irq.enabled = false; /* no new irqs */
>>     spin_unlock_irq(&xe->irq.lock);
>> @@ -812,7 +848,7 @@ void xe_irq_suspend(struct xe_device *xe)
>>     if (xe->irq.msix_enabled)
>>         xe_irq_msix_synchronize_irq(xe);
>>     else
>> -        synchronize_irq(irq);
>> + synchronize_irq(pci_irq_vector(to_pci_dev(xe->drm.dev), 0));
>>
>>     xe_irq_reset(xe); /* turn irqs off */
>> }
>> diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
>> index 838609915916..575b4833be84 100644
>> --- a/drivers/gpu/drm/xe/xe_lrc.c
>> +++ b/drivers/gpu/drm/xe/xe_lrc.c
>> @@ -598,8 +598,9 @@ static void set_memory_based_intr(u32 *regs, 
>> struct xe_hw_engine *hwe)
>> {
>>     struct xe_memirq *memirq = &gt_to_tile(hwe->gt)->memirq;
>>     struct xe_device *xe = gt_to_xe(hwe->gt);
>> +    u8 num_regs;
>>
>> -    if (!IS_SRIOV_VF(xe) || !xe_device_has_memirq(xe))
>> +    if (!xe_device_needs_memirq(xe))
>>         return;
>>
>>     regs[CTX_LRM_INT_MASK_ENABLE] = MI_LOAD_REGISTER_MEM |
>> @@ -607,12 +608,18 @@ static void set_memory_based_intr(u32 *regs, 
>> struct xe_hw_engine *hwe)
>>     regs[CTX_INT_MASK_ENABLE_REG] = RING_IMR(0).addr;
>>     regs[CTX_INT_MASK_ENABLE_PTR] = xe_memirq_enable_ptr(memirq);
>>
>> -    regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | 
>> MI_LRI_NUM_REGS(2) |
>> +    num_regs = xe_device_has_msix(xe) ? 3 : 2;
>> +    regs[CTX_LRI_INT_REPORT_PTR] = MI_LOAD_REGISTER_IMM | 
>> MI_LRI_NUM_REGS(num_regs) |
>>                        MI_LRI_LRM_CS_MMIO | MI_LRI_FORCE_POSTED;
>>     regs[CTX_INT_STATUS_REPORT_REG] = RING_INT_STATUS_RPT_PTR(0).addr;
>>     regs[CTX_INT_STATUS_REPORT_PTR] = xe_memirq_status_ptr(memirq);
>>     regs[CTX_INT_SRC_REPORT_REG] = RING_INT_SRC_RPT_PTR(0).addr;
>>     regs[CTX_INT_SRC_REPORT_PTR] = xe_memirq_source_ptr(memirq);
>> +
>> +    if (xe_device_has_msix(xe)) {
>> +        regs[CTX_CS_INT_VEC_REG] = CS_INT_VEC(0).addr;
>> +        /* CTX_CS_INT_VEC_DATA will be set in xe_lrc_init */
>> +    }
>> }
>>
>> static int lrc_ring_mi_mode(struct xe_hw_engine *hwe)
>> @@ -890,7 +897,7 @@ static void xe_lrc_finish(struct xe_lrc *lrc)
>> #define PVC_CTX_ACC_CTR_THOLD    (0x2a + 1)
>>
>> static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
>> -               struct xe_vm *vm, u32 ring_size)
>> +               struct xe_vm *vm, u32 ring_size, u32 msix)
>> {
>>     struct xe_gt *gt = hwe->gt;
>>     struct xe_tile *tile = gt_to_tile(gt);
>> @@ -959,6 +966,10 @@ static int xe_lrc_init(struct xe_lrc *lrc, 
>> struct xe_hw_engine *hwe,
>>             xe_drm_client_add_bo(vm->xef->client, lrc->bo);
>>     }
>>
>> +    if (xe_device_has_msix(xe)) {
>> +        xe_lrc_write_ctx_reg(lrc, CTX_CS_INT_VEC_DATA, msix);
>> +    }
>> +
>
> Looks like upper 16bits of CTX_CS_INT_VEC_DATA is a different field 
> than lower 16.
> What should be the value for upper 16 bit? Shouls msix variable (here 
> and elsewhere)
> should be u16 then?
>
> Niranjana
>
Good idea - we're limited to 2048 by the PCI spec anyway.
Per my understanding the upper 16 bits will be used for
post sync interrupt - a feature that is yet to be implemented.

Ilia
>>     if (xe_gt_has_indirect_ring_state(gt)) {
>>         xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE,
>>                      __xe_lrc_indirect_ring_ggtt_addr(lrc));
>> @@ -1019,6 +1030,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, 
>> struct xe_hw_engine *hwe,
>>  * @hwe: Hardware Engine
>>  * @vm: The VM (address space)
>>  * @ring_size: LRC ring size
>> + * @msix: MSI-X vector (used only for MSI-X capable devices)
>>  *
>>  * Allocate and initialize the Logical Ring Context (LRC).
>>  *
>> @@ -1026,7 +1038,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, 
>> struct xe_hw_engine *hwe,
>>  * upon failure.
>>  */
>> struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
>> -                 u32 ring_size)
>> +                 u32 ring_size, u32 msix)
>> {
>>     struct xe_lrc *lrc;
>>     int err;
>> @@ -1035,7 +1047,7 @@ struct xe_lrc *xe_lrc_create(struct 
>> xe_hw_engine *hwe, struct xe_vm *vm,
>>     if (!lrc)
>>         return ERR_PTR(-ENOMEM);
>>
>> -    err = xe_lrc_init(lrc, hwe, vm, ring_size);
>> +    err = xe_lrc_init(lrc, hwe, vm, ring_size, msix);
>>     if (err) {
>>         kfree(lrc);
>>         return ERR_PTR(err);
>> diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h
>> index c24542e89318..1eded8919d73 100644
>> --- a/drivers/gpu/drm/xe/xe_lrc.h
>> +++ b/drivers/gpu/drm/xe/xe_lrc.h
>> @@ -23,7 +23,7 @@ struct xe_vm;
>> #define LRC_PPHWSP_SCRATCH_ADDR (0x34 * 4)
>>
>> struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
>> -                 u32 ring_size);
>> +                 u32 ring_size, u32 msix);
>> void xe_lrc_destroy(struct kref *ref);
>>
>> /**
>> -- 
>> 2.34.1
>>



More information about the Intel-xe mailing list