[Intel-xe] [PATCH v4 02/31] drm/xe: Introduce xe_tile

Ofir Bitton obitton at habana.ai
Sun Jun 18 06:19:18 UTC 2023


On 15/06/2023 2:36, Matt Roper wrote:
> On Tue, Jun 13, 2023 at 10:41:04AM -0700, Ofir Bitton wrote:
>> Hey, sorry for jumping in so late, few comments, not a must, we can
>> refactor this later.
>>
>> On 01/06/2023 3:03, Matt Roper wrote:
>>> Create a new xe_tile structure to begin separating the concept of "tile"
>>> from "GT."  A tile is effectively a complete GPU, and a GT is just one
>>> part of that.  On platforms like MTL, there's only a single full GPU
>>> (tile) which has its IP blocks provided by two GTs.  In contrast, a
>>> "multi-tile" platform like PVC is basically multiple complete GPUs
>>> packed behind a single PCI device.
>>>
>>> For now, just create xe_tile as a simple wrapper around xe_gt.  The
>>> items in xe_gt that are truly tied to the tile rather than the GT will
>>> be moved in future patches.  Support for multiple GTs per tile (i.e.,
>>> the MTL standalone media case) will also be re-introduced in a future
>>> patch.
>>>
>>> v2:
>>>    - Fix kunit test build
>>>    - Move hunk from next patch to use local tile variable rather than
>>>      direct xe->tiles[id] accesses.  (Lucas)
>>>    - Mention compute in kerneldoc.  (Rodrigo)
>>>
>>> Reviewed-by: Lucas De Marchi <lucas.demarchi at intel.com>
>>> Signed-off-by: Matt Roper <matthew.d.roper at intel.com>
>>> ---
>>>    drivers/gpu/drm/xe/tests/xe_bo.c       |  2 +-
>>>    drivers/gpu/drm/xe/tests/xe_rtp_test.c |  6 ++--
>>>    drivers/gpu/drm/xe/xe_device.h         | 11 +++++--
>>>    drivers/gpu/drm/xe/xe_device_types.h   | 40 ++++++++++++++++++++++++--
>>>    drivers/gpu/drm/xe/xe_gt_types.h       | 15 ++++++----
>>>    drivers/gpu/drm/xe/xe_mmio.c           | 13 +++++----
>>>    drivers/gpu/drm/xe/xe_pci.c            |  7 ++++-
>>>    drivers/gpu/drm/xe/xe_vm.c             |  2 +-
>>>    drivers/gpu/drm/xe/xe_vm_types.h       |  8 +++---
>>>    9 files changed, 78 insertions(+), 26 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
>>> index 9bd381e5b7a6..6075f12a1962 100644
>>> --- a/drivers/gpu/drm/xe/tests/xe_bo.c
>>> +++ b/drivers/gpu/drm/xe/tests/xe_bo.c
>>> @@ -174,7 +174,7 @@ static int evict_test_run_gt(struct xe_device *xe, struct xe_gt *gt, struct kuni
>>>        struct xe_bo *bo, *external;
>>>        unsigned int bo_flags = XE_BO_CREATE_USER_BIT |
>>>                XE_BO_CREATE_VRAM_IF_DGFX(gt);
>>> -     struct xe_vm *vm = xe_migrate_get_vm(xe->gt[0].migrate);
>>> +     struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->primary_gt.migrate);
>>>        struct ww_acquire_ctx ww;
>>>        int err, i;
>>>
>>> diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
>>> index ab6f7a47db50..45f2614f91ec 100644
>>> --- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c
>>> +++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
>>> @@ -13,6 +13,7 @@
>>>
>>>    #include "regs/xe_gt_regs.h"
>>>    #include "regs/xe_reg_defs.h"
>>> +#include "xe_device.h"
>>>    #include "xe_device_types.h"
>>>    #include "xe_pci_test.h"
>>>    #include "xe_reg_sr.h"
>>> @@ -236,9 +237,10 @@ static void xe_rtp_process_tests(struct kunit *test)
>>>    {
>>>        const struct rtp_test_case *param = test->param_value;
>>>        struct xe_device *xe = test->priv;
>>> -     struct xe_reg_sr *reg_sr = &xe->gt[0].reg_sr;
>>> +     struct xe_gt *gt = &xe_device_get_root_tile(xe)->primary_gt;
>>> +     struct xe_reg_sr *reg_sr = &gt->reg_sr;
>>>        const struct xe_reg_sr_entry *sre, *sr_entry = NULL;
>>> -     struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(&xe->gt[0]);
>>> +     struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt);
>>>        unsigned long idx, count = 0;
>>>
>>>        xe_reg_sr_init(reg_sr, "xe_rtp_tests", xe);
>>> diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
>>> index cbae480a2092..f7acaf51a1fc 100644
>>> --- a/drivers/gpu/drm/xe/xe_device.h
>>> +++ b/drivers/gpu/drm/xe/xe_device.h
>>> @@ -48,12 +48,17 @@ static inline struct xe_file *to_xe_file(const struct drm_file *file)
>>>        return file->driver_priv;
>>>    }
>>>
>>> +static inline struct xe_tile *xe_device_get_root_tile(struct xe_device *xe)
>>> +{
>>> +     return &xe->tiles[0];
>>> +}
>>> +
>>>    static inline struct xe_gt *xe_device_get_gt(struct xe_device *xe, u8 gt_id)
>>>    {
>>>        struct xe_gt *gt;
>>>
>>> -     XE_BUG_ON(gt_id > XE_MAX_GT);
>>> -     gt = xe->gt + gt_id;
>>> +     XE_BUG_ON(gt_id > XE_MAX_TILES_PER_DEVICE);
>>
>> Wouldn't it be robust to check (gt_id > xe->info.tile_count) ?
> 
> This code changes a bit later on in the series, although it's still
> using build-time constants at the end.  Your suggestion of using runtime
> counts (tile_count and/or gt_count) is valid as a follow-up, although
> the handling of those counts, as well as how IDs get assigned is an area
> where we're still expecting further changes once we settle on exactly
> how we want to expose tiles and GTs through the uapi.  There's a good
> chance that we won't even be looking up GTs with device-wide IDs anymore
> once the uapi changes are in place.

ok

> 
>>
>>> +     gt = &xe->tiles[gt_id].primary_gt;
>>>        XE_BUG_ON(gt->info.id != gt_id);
>>>        XE_BUG_ON(gt->info.type == XE_GT_TYPE_UNINITIALIZED);
>>>
>>> @@ -65,7 +70,7 @@ static inline struct xe_gt *xe_device_get_gt(struct xe_device *xe, u8 gt_id)
>>>     */
>>>    static inline struct xe_gt *to_gt(struct xe_device *xe)
>>>    {
>>> -     return xe->gt;
>>> +     return &xe_device_get_root_tile(xe)->primary_gt;
>>>    }
>>>
>>>    static inline bool xe_device_guc_submission_enabled(struct xe_device *xe)
>>> diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
>>> index 51c16d207780..396ee281e499 100644
>>> --- a/drivers/gpu/drm/xe/xe_device_types.h
>>> +++ b/drivers/gpu/drm/xe/xe_device_types.h
>>> @@ -35,7 +35,7 @@
>>>
>>>    #define XE_GT0              0
>>>    #define XE_GT1              1
>>> -#define XE_MAX_GT    (XE_GT1 + 1)
>>> +#define XE_MAX_TILES_PER_DEVICE      (XE_GT1 + 1)
>>
>> This define will change each time we increase tile count.
>> I suggest removing this define and use dynamic allocations everywhere
>> according to 'tile_count.
>> Another (worst) option is to use some higher maximum (32?) and
>> initialize only the relevant elements in each array that uses this define.
> 
> These defines are driver internal defines; they aren't used anywhere in
> the uapi, so having them change occasionally when a new platform shows
> up that breaks the old limits is normal and expected; there's no problem
> with that.  We could change various internal arrays of constant size to
> be dynamically allocated at runtime, but that's a sizeable design and
> behavior change to the driver that's orthogonal to the GT vs tile
> refactor, so this series wouldn't have been the place to make a change
> like that.
> 
> However we do need to make XE_MAX_TILES_PER_DEVICE just a direct define
> as '2' since that's exactly what we want for now; it shouldn't be based
> on those "GT" defines anymore (which themselves should either be changed
> to something like XE_GT_PRIMARY / XE_GT_MEDIA or removed entirely).
> I'll send a patch or two to clean those up when I get some free time.
> 

I understand that my suggstion is probably not in the scope of this 
patch-set, However the hard-coded values will change in the near future 
in order to support other platforms. XE_GT_PRIMARY sounds like a good 
choise, but the other GT types and count can vary. Anyway this is just a 
heads up, I'm not expecting any changes right now but we should deal 
with this once this patch-set is merged.

>>
>>>
>>>    #define XE_MAX_ASID (BIT(20))
>>>
>>> @@ -49,6 +49,40 @@
>>>         (_xe)->info.step.graphics >= (min_step) &&                     \
>>>         (_xe)->info.step.graphics < (max_step))
>>>
>>> +#define tile_to_xe(tile__)                                                           \
>>> +     _Generic(tile__,                                                                \
>>> +              const struct xe_tile *: (const struct xe_device *)((tile__)->xe),      \
>>> +              struct xe_tile *: (tile__)->xe)
>>> +
>>> +/**
>>> + * struct xe_tile - hardware tile structure
>>> + *
>>> + * From a driver perspective, a "tile" is effectively a complete GPU, containing
>>> + * an SGunit, 1-2 GTs, and (for discrete platforms) VRAM.
>>> + *
>>> + * Multi-tile platforms effectively bundle multiple GPUs behind a single PCI
>>> + * device and designate one "root" tile as being responsible for external PCI
>>> + * communication.  PCI BAR0 exposes the GGTT and MMIO register space for each
>>> + * tile in a stacked layout, and PCI BAR2 exposes the local memory associated
>>> + * with each tile similarly.  Device-wide interrupts can be enabled/disabled
>>> + * at the root tile, and the MSTR_TILE_INTR register will report which tiles
>>> + * have interrupts that need servicing.
>>> + */
>>> +struct xe_tile {
>>> +     /** @xe: Backpointer to tile's PCI device */
>>> +     struct xe_device *xe;
>>> +
>>> +     /** @id: ID of the tile */
>>> +     u8 id;
>>> +
>>> +     /**
>>> +      * @primary_gt: Primary GT
>>> +      */
>>> +     struct xe_gt primary_gt;
>>> +
>>> +     /* TODO: Add media GT here */
>>> +};
>>> +
>>>    /**
>>>     * struct xe_device - Top level struct of XE device
>>>     */
>>> @@ -256,8 +290,8 @@ struct xe_device {
>>>        /** @ordered_wq: used to serialize compute mode resume */
>>>        struct workqueue_struct *ordered_wq;
>>>
>>> -     /** @gt: graphics tile */
>>> -     struct xe_gt gt[XE_MAX_GT];
>>> +     /** @tiles: device tiles */
>>> +     struct xe_tile tiles[XE_MAX_TILES_PER_DEVICE];
>>>
>>>        /**
>>>         * @mem_access: keep track of memory access in the device, possibly
>>> diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
>>> index 093d650c35f4..456e3e447a2e 100644
>>> --- a/drivers/gpu/drm/xe/xe_gt_types.h
>>> +++ b/drivers/gpu/drm/xe/xe_gt_types.h
>>> @@ -77,12 +77,17 @@ enum xe_steering_type {
>>>    };
>>>
>>>    /**
>>> - * struct xe_gt - Top level struct of a graphics tile
>>> + * struct xe_gt - A "Graphics Technology" unit of the GPU
>>>     *
>>> - * A graphics tile may be a physical split (duplicate pieces of silicon,
>>> - * different GGTT + VRAM) or a virtual split (shared GGTT + VRAM). Either way
>>> - * this structure encapsulates of everything a GT is (MMIO, VRAM, memory
>>> - * management, microcontrols, and a hardware set of engines).
>>> + * A GT ("Graphics Technology") is the subset of a GPU primarily responsible
>>> + * for implementing the graphics, compute, and/or media IP.  It encapsulates
>>> + * the hardware engines, programmable execution units, and GuC.   Each GT has
>>> + * its own handling of power management (RC6+forcewake) and multicast register
>>> + * steering.
>>> + *
>>> + * A GPU/tile may have a single GT that supplies all graphics, compute, and
>>> + * media functionality, or the graphics/compute and media may be split into
>>> + * separate GTs within a tile.
>>>     */
>>>    struct xe_gt {
>>>        /** @xe: backpointer to XE device */
>>> diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
>>> index ef2353eef6fe..9bc5715e9ebe 100644
>>> --- a/drivers/gpu/drm/xe/xe_mmio.c
>>> +++ b/drivers/gpu/drm/xe/xe_mmio.c
>>> @@ -438,6 +438,7 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data,
>>>                  struct drm_file *file)
>>>    {
>>>        struct xe_device *xe = to_xe_device(dev);
>>> +     struct xe_gt *gt = xe_device_get_gt(xe, 0);
>>
>> Better to have some define for '0' for example 'XE_ROOT_GT_ID'.
> 
> This is just patch #2 of a series of 31 patches; this line gets replaced
> with
> 
>          struct xe_gt *gt = xe_root_mmio_gt(xe);
> 
> later in the patch series.
> 
> 
> Matt
> 

Yeah I saw it after commenting here, thanks.

Ofir

>>
>> Ofir
>>
>>>        struct drm_xe_mmio *args = data;
>>>        unsigned int bits_flag, bytes;
>>>        struct xe_reg reg;
>>> @@ -480,7 +481,7 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data,
>>>         */
>>>        reg = XE_REG(args->addr);
>>>
>>> -     xe_force_wake_get(gt_to_fw(&xe->gt[0]), XE_FORCEWAKE_ALL);
>>> +     xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
>>>
>>>        if (args->flags & DRM_XE_MMIO_WRITE) {
>>>                switch (bits_flag) {
>>> @@ -489,10 +490,10 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data,
>>>                                ret = -EINVAL;
>>>                                goto exit;
>>>                        }
>>> -                     xe_mmio_write32(to_gt(xe), reg, args->value);
>>> +                     xe_mmio_write32(gt, reg, args->value);
>>>                        break;
>>>                case DRM_XE_MMIO_64BIT:
>>> -                     xe_mmio_write64(to_gt(xe), reg, args->value);
>>> +                     xe_mmio_write64(gt, reg, args->value);
>>>                        break;
>>>                default:
>>>                        drm_dbg(&xe->drm, "Invalid MMIO bit size");
>>> @@ -507,10 +508,10 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data,
>>>        if (args->flags & DRM_XE_MMIO_READ) {
>>>                switch (bits_flag) {
>>>                case DRM_XE_MMIO_32BIT:
>>> -                     args->value = xe_mmio_read32(to_gt(xe), reg);
>>> +                     args->value = xe_mmio_read32(gt, reg);
>>>                        break;
>>>                case DRM_XE_MMIO_64BIT:
>>> -                     args->value = xe_mmio_read64(to_gt(xe), reg);
>>> +                     args->value = xe_mmio_read64(gt, reg);
>>>                        break;
>>>                default:
>>>                        drm_dbg(&xe->drm, "Invalid MMIO bit size");
>>> @@ -522,7 +523,7 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data,
>>>        }
>>>
>>>    exit:
>>> -     xe_force_wake_put(gt_to_fw(&xe->gt[0]), XE_FORCEWAKE_ALL);
>>> +     xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
>>>
>>>        return ret;
>>>    }
>>> diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
>>> index 6f0c4203fca3..0ecf875e4fb4 100644
>>> --- a/drivers/gpu/drm/xe/xe_pci.c
>>> +++ b/drivers/gpu/drm/xe/xe_pci.c
>>> @@ -496,6 +496,7 @@ static int xe_info_init(struct xe_device *xe,
>>>        const struct xe_graphics_desc *graphics_desc = NULL;
>>>        const struct xe_media_desc *media_desc = NULL;
>>>        u32 graphics_gmdid_revid = 0, media_gmdid_revid = 0;
>>> +     struct xe_tile *tile;
>>>        struct xe_gt *gt;
>>>        u8 id;
>>>
>>> @@ -558,7 +559,11 @@ static int xe_info_init(struct xe_device *xe,
>>>        xe->info.tile_count = 1 + graphics_desc->max_remote_tiles;
>>>
>>>        for (id = 0; id < xe->info.tile_count; ++id) {
>>> -             gt = xe->gt + id;
>>> +             tile = &xe->tiles[id];
>>> +             tile->xe = xe;
>>> +             tile->id = id;
>>> +
>>> +             gt = &tile->primary_gt;
>>>                gt->info.id = id;
>>>                gt->xe = xe;
>>>
>>> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
>>> index dd1335e12d4c..87416b8bb878 100644
>>> --- a/drivers/gpu/drm/xe/xe_vm.c
>>> +++ b/drivers/gpu/drm/xe/xe_vm.c
>>> @@ -3389,7 +3389,7 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
>>>        struct xe_device *xe = vma->vm->xe;
>>>        struct xe_gt *gt;
>>>        u32 gt_needs_invalidate = 0;
>>> -     int seqno[XE_MAX_GT];
>>> +     int seqno[XE_MAX_TILES_PER_DEVICE];
>>>        u8 id;
>>>        int ret;
>>>
>>> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
>>> index fada7896867f..203ba9d946b8 100644
>>> --- a/drivers/gpu/drm/xe/xe_vm_types.h
>>> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
>>> @@ -159,7 +159,7 @@ struct xe_vm {
>>>        struct kref refcount;
>>>
>>>        /* engine used for (un)binding vma's */
>>> -     struct xe_engine *eng[XE_MAX_GT];
>>> +     struct xe_engine *eng[XE_MAX_TILES_PER_DEVICE];
>>>
>>>        /** Protects @rebind_list and the page-table structures */
>>>        struct dma_resv resv;
>>> @@ -167,9 +167,9 @@ struct xe_vm {
>>>        u64 size;
>>>        struct rb_root vmas;
>>>
>>> -     struct xe_pt *pt_root[XE_MAX_GT];
>>> -     struct xe_bo *scratch_bo[XE_MAX_GT];
>>> -     struct xe_pt *scratch_pt[XE_MAX_GT][XE_VM_MAX_LEVEL];
>>> +     struct xe_pt *pt_root[XE_MAX_TILES_PER_DEVICE];
>>> +     struct xe_bo *scratch_bo[XE_MAX_TILES_PER_DEVICE];
>>> +     struct xe_pt *scratch_pt[XE_MAX_TILES_PER_DEVICE][XE_VM_MAX_LEVEL];
>>>
>>>        /** @flags: flags for this VM, statically setup a creation time */
>>>    #define XE_VM_FLAGS_64K                     BIT(0)
>>
> 



More information about the Intel-xe mailing list