[Intel-xe] [PATCH 3/6] drm/xe/vram: start tracking the io_size

Matthew Auld matthew.auld at intel.com
Tue Mar 7 12:23:33 UTC 2023


On 07/03/2023 11:55, Gwan-gyeong Mun wrote:
> 
> 
> On 3/1/23 4:48 PM, Matthew Auld wrote:
>> First step towards supporting small-bar is to track the io_size for
>> vram. We can longer assume that the io_size == vram size. This way we
>> know how much is CPU accessible via the BAR, and how much is not.
>> Effectively giving us a two tiered vram, where in some later patches we
>> can support different allocation strategies depending on if the memory
>> needs to be CPU accessible or not.
>>
>> Note as this stage we still clamp the vram size to the usable vram size.
>> Only in the final patch do we turn this on for real, and allow distinct
>> io_size and vram_size.
>>
>> v2: (Lucas):
>>    - Improve the commit message, plus improve the kernel-doc for the
>>      io_size to give a better sense of what it actually is.
>>
>> Signed-off-by: Matthew Auld <matthew.auld at intel.com>
>> Cc: Lucas De Marchi <lucas.demarchi at intel.com>
>> Reviewed-by: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
>> ---
>>   drivers/gpu/drm/xe/xe_device_types.h | 14 +++++++--
>>   drivers/gpu/drm/xe/xe_gt_types.h     | 14 +++++++--
>>   drivers/gpu/drm/xe/xe_mmio.c         | 44 ++++++++++++++++++++--------
>>   3 files changed, 55 insertions(+), 17 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_device_types.h 
>> b/drivers/gpu/drm/xe/xe_device_types.h
>> index 9743987fc883..9e998b4738e1 100644
>> --- a/drivers/gpu/drm/xe/xe_device_types.h
>> +++ b/drivers/gpu/drm/xe/xe_device_types.h
>> @@ -173,9 +173,19 @@ struct xe_device {
>>       struct {
>>           /** @vram: VRAM info for device */
>>           struct {
>> -            /** @io_start: start address of VRAM */
>> +            /** @io_start: IO start address of VRAM */
>>               resource_size_t io_start;
>> -            /** @size: size of VRAM */
>> +            /**
>> +             * @io_size: IO size of VRAM.
>> +             *
>> +             * This represents how much of VRAM we can access via
>> +             * the CPU through the VRAM BAR. This can be smaller
>> +             * than @size, in which case only part of VRAM is CPU
>> +             * accessible (typically the first 256M). This
>> +             * configuration is known as small-bar.
>> +             */
>> +            resource_size_t io_size;
>> +            /** @size: Total size of VRAM */
>>               resource_size_t size;
>>               /** @mapping: pointer to VRAM mappable space */
>>               void *__iomem mapping;
>> diff --git a/drivers/gpu/drm/xe/xe_gt_types.h 
>> b/drivers/gpu/drm/xe/xe_gt_types.h
>> index b01edd3fdc4d..00c43f3a33a2 100644
>> --- a/drivers/gpu/drm/xe/xe_gt_types.h
>> +++ b/drivers/gpu/drm/xe/xe_gt_types.h
>> @@ -143,9 +143,19 @@ struct xe_gt {
>>            * (virtual split), can be subset of global device VRAM
>>            */
>>           struct {
>> -            /** @io_start: start address of VRAM */
>> +            /** @io_start: IO start address of this VRAM instance */
>>               resource_size_t io_start;
>> -            /** @size: size of VRAM */
>> +            /**
>> +             * @io_size: IO size of this VRAM instance
>> +             *
>> +             * This represents how much of this VRAM we can access
>> +             * via the CPU through the VRAM BAR. This can be smaller
>> +             * than @size, in which case only part of VRAM is CPU
>> +             * accessible (typically the first 256M). This
>> +             * configuration is known as small-bar.
>> +             */
>> +            resource_size_t io_size;
>> +            /** @size: size of VRAM. */
>>               resource_size_t size;
>>               /** @mapping: pointer to VRAM mappable space */
>>               void *__iomem mapping;
>> diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
>> index e5bd4609aaee..5cacaa05759a 100644
>> --- a/drivers/gpu/drm/xe/xe_mmio.c
>> +++ b/drivers/gpu/drm/xe/xe_mmio.c
>> @@ -182,7 +182,6 @@ int xe_mmio_probe_vram(struct xe_device *xe)
>>       u8 id;
>>       u64 vram_size;
>>       u64 original_size;
>> -    u64 current_size;
>>       u64 usable_size;
>>       int resize_result, err;
>> @@ -190,11 +189,13 @@ int xe_mmio_probe_vram(struct xe_device *xe)
>>           xe->mem.vram.mapping = 0;
>>           xe->mem.vram.size = 0;
>>           xe->mem.vram.io_start = 0;
>> +        xe->mem.vram.io_size = 0;
>>           for_each_gt(gt, xe, id) {
>>               gt->mem.vram.mapping = 0;
>>               gt->mem.vram.size = 0;
>>               gt->mem.vram.io_start = 0;
>> +            gt->mem.vram.io_size = 0;
>>           }
>>           return 0;
>>       }
>> @@ -212,10 +213,10 @@ int xe_mmio_probe_vram(struct xe_device *xe)
>>           return err;
>>       resize_result = xe_resize_vram_bar(xe, vram_size);
>> -    current_size = pci_resource_len(pdev, GEN12_LMEM_BAR);
>>       xe->mem.vram.io_start = pci_resource_start(pdev, GEN12_LMEM_BAR);
>> -
>> -    xe->mem.vram.size = min(current_size, vram_size);
>> +    xe->mem.vram.io_size = min(usable_size,
>> +                   pci_resource_len(pdev, GEN12_LMEM_BAR));
> Hi Matt,
> xe_mmio_total_vram_size() calculates usable_size, why is there an 
> additional check here?

You mean the min() here? We are just clamping the io_size to always be 
<= usable_size. On small-bar it will be < usable_size, and on full-bar 
is will be >= usable_size. Also it can even be larger than the probed 
size since BAR size must always be power-of-two, but VRAM can be 
non-power-of-two like 6G DG2.

> 
> G.G.
>> +    xe->mem.vram.size = xe->mem.vram.io_size;
>>       if (!xe->mem.vram.size)
>>           return -EIO;
>> @@ -223,15 +224,15 @@ int xe_mmio_probe_vram(struct xe_device *xe)
>>       if (resize_result > 0)
>>           drm_info(&xe->drm, "Successfully resize VRAM from %lluMiB to 
>> %lluMiB\n",
>>                (u64)original_size >> 20,
>> -             (u64)current_size >> 20);
>> -    else if (xe->mem.vram.size < vram_size && !xe_force_vram_bar_size)
>> +             (u64)xe->mem.vram.io_size >> 20);
>> +    else if (xe->mem.vram.io_size < usable_size && 
>> !xe_force_vram_bar_size)
>>           drm_info(&xe->drm, "Using a reduced BAR size of %lluMiB. 
>> Consider enabling 'Resizable BAR' support in your BIOS.\n",
>>                (u64)xe->mem.vram.size >> 20);
>>       if (xe->mem.vram.size < vram_size)
>>           drm_warn(&xe->drm, "Restricting VRAM size to PCI resource 
>> size (0x%llx->0x%llx)\n",
>>                vram_size, (u64)xe->mem.vram.size);
>> -    xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, 
>> xe->mem.vram.size);
>> +    xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, 
>> xe->mem.vram.io_size);
>>       xe->mem.vram.size = min_t(u64, xe->mem.vram.size, usable_size);
>>       drm_info(&xe->drm, "TOTAL VRAM: %pa, %pa\n", 
>> &xe->mem.vram.io_start, &xe->mem.vram.size);
>> @@ -239,7 +240,7 @@ int xe_mmio_probe_vram(struct xe_device *xe)
>>       /* FIXME: Assuming equally partitioned VRAM, incorrect */
>>       if (xe->info.tile_count > 1) {
>>           u8 adj_tile_count = xe->info.tile_count;
>> -        resource_size_t size, io_start;
>> +        resource_size_t size, io_start, io_size;
>>           for_each_gt(gt, xe, id)
>>               if (xe_gt_is_media_type(gt))
>> @@ -249,15 +250,31 @@ int xe_mmio_probe_vram(struct xe_device *xe)
>>           size = xe->mem.vram.size / adj_tile_count;
>>           io_start = xe->mem.vram.io_start;
>> +        io_size = xe->mem.vram.io_size;
>>           for_each_gt(gt, xe, id) {
>> -            if (id && !xe_gt_is_media_type(gt))
>> -                io_start += size;
>> +            if (id && !xe_gt_is_media_type(gt)) {
>> +                io_size -= min(io_size, size);
>> +                io_start += io_size;
>> +            }
>>               gt->mem.vram.size = size;
>> -            gt->mem.vram.io_start = io_start;
>> -            gt->mem.vram.mapping = xe->mem.vram.mapping +
>> -                (io_start - xe->mem.vram.io_start);
>> +
>> +            /*
>> +             * XXX: multi-tile small-bar might be wild. Hopefully
>> +             * full tile without any mappable vram is not something
>> +             * we care about.
>> +             */
>> +
>> +            gt->mem.vram.io_size = min(size, io_size);
>> +            if (io_size) {
>> +                gt->mem.vram.io_start = io_start;
>> +                gt->mem.vram.mapping = xe->mem.vram.mapping +
>> +                    (io_start - xe->mem.vram.io_start);
>> +            } else {
>> +                drm_err(&xe->drm, "Tile without any CPU visible VRAM. 
>> Aborting.\n");
>> +                return -ENODEV;
>> +            }
>>               drm_info(&xe->drm, "VRAM[%u, %u]: %pa, %pa\n",
>>                    id, gt->info.vram_id, &gt->mem.vram.io_start,
>> @@ -266,6 +283,7 @@ int xe_mmio_probe_vram(struct xe_device *xe)
>>       } else {
>>           gt->mem.vram.size = xe->mem.vram.size;
>>           gt->mem.vram.io_start = xe->mem.vram.io_start;
>> +        gt->mem.vram.io_size = xe->mem.vram.io_size;
>>           gt->mem.vram.mapping = xe->mem.vram.mapping;
>>           drm_info(&xe->drm, "VRAM: %pa\n", &gt->mem.vram.size);


More information about the Intel-xe mailing list