[Intel-gfx] [PATCH 12/19] drm/i915/lmem: Bypass aperture when lmem is available

Fri Apr 16 14:25:29 UTC 2021

On 14/04/2021 16:33, Tvrtko Ursulin wrote:
> 
> On 12/04/2021 10:05, Matthew Auld wrote:
>> From: Anusha Srivatsa <anusha.srivatsa at intel.com>
>>
>> In the scenario where local memory is available, we have
>> rely on CPU access via lmem directly instead of aperture.
>>
>> v2:
>> gmch is only relevant for much older hw, therefore we can drop the
>> has_aperture check since it should always be present on such platforms.
>> (Chris)
>>
>> Cc: Ville Syrjälä <ville.syrjala at linux.intel.com>
>> Cc: Dhinakaran Pandiyan <dhinakaran.pandiyan at intel.com>
>> Cc: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
>> Cc: Chris P Wilson <chris.p.wilson at intel.com>
>> Cc: Daniel Vetter <daniel.vetter at intel.com>
>> Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
>> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
>> Cc: CQ Tang <cq.tang at intel.com>
>> Signed-off-by: Anusha Srivatsa <anusha.srivatsa at intel.com>
>> ---
>>   drivers/gpu/drm/i915/display/intel_fbdev.c | 22 +++++++++++++++-------
>>   drivers/gpu/drm/i915/gem/i915_gem_lmem.c   | 15 +++++++++++++++
>>   drivers/gpu/drm/i915/gem/i915_gem_lmem.h   |  5 +++++
>>   drivers/gpu/drm/i915/i915_vma.c            | 19 +++++++++++++------
>>   4 files changed, 48 insertions(+), 13 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/display/intel_fbdev.c 
>> b/drivers/gpu/drm/i915/display/intel_fbdev.c
>> index 2b37959da747..4af40229f5ec 100644
>> --- a/drivers/gpu/drm/i915/display/intel_fbdev.c
>> +++ b/drivers/gpu/drm/i915/display/intel_fbdev.c
>> @@ -139,14 +139,22 @@ static int intelfb_alloc(struct drm_fb_helper 
>> *helper,
>>       size = mode_cmd.pitches[0] * mode_cmd.height;
>>       size = PAGE_ALIGN(size);
>> -    /* If the FB is too big, just don't use it since fbdev is not very
>> -     * important and we should probably use that space with FBC or other
>> -     * features. */
>>       obj = ERR_PTR(-ENODEV);
>> -    if (size * 2 < dev_priv->stolen_usable_size)
>> -        obj = i915_gem_object_create_stolen(dev_priv, size);
>> -    if (IS_ERR(obj))
>> -        obj = i915_gem_object_create_shmem(dev_priv, size);
>> +    if (HAS_LMEM(dev_priv)) {
>> +        obj = i915_gem_object_create_lmem(dev_priv, size,
>> +                          I915_BO_ALLOC_CONTIGUOUS);
> 
> Has to be contiguous? Question for display experts I guess.
> 
> [Comes back later.] Ah for iomap? Put a comment to that effect perhaps?

I don't think it has to be, since we could in theory just use pin_map() 
underneath, which can already deal with non-contiguous chunks of lmem, 
although that might bring in ww locking. I think for now just add a 
comment and mark this as XXX, and potentially revisit as follow up?

> 
>> +    } else {
>> +        /*
>> +         * If the FB is too big, just don't use it since fbdev is not 
>> very
>> +         * important and we should probably use that space with FBC 
>> or other
>> +         * features.
>> +         */
>> +        if (size * 2 < dev_priv->stolen_usable_size)
>> +            obj = i915_gem_object_create_stolen(dev_priv, size);
>> +        if (IS_ERR(obj))
>> +            obj = i915_gem_object_create_shmem(dev_priv, size);
>> +    }
> 
> Could we keep the IS_ERR ordered allocation order to save having to 
> re-indent? Bike shed so optional..
> 
>> +
>>       if (IS_ERR(obj)) {
>>           drm_err(&dev_priv->drm, "failed to allocate framebuffer\n");
>>           return PTR_ERR(obj);
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c 
>> b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
>> index 017db8f71130..f44bdd08f7cb 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.c
>> @@ -17,6 +17,21 @@ const struct drm_i915_gem_object_ops 
>> i915_gem_lmem_obj_ops = {
>>       .release = i915_gem_object_release_memory_region,
>>   };
>> +void __iomem *
>> +i915_gem_object_lmem_io_map(struct drm_i915_gem_object *obj,
>> +                unsigned long n,
>> +                unsigned long size)
>> +{
>> +    resource_size_t offset;
>> +
>> +    GEM_BUG_ON(!i915_gem_object_is_contiguous(obj));
>> +
>> +    offset = i915_gem_object_get_dma_address(obj, n);
>> +    offset -= obj->mm.region->region.start;
>> +
>> +    return io_mapping_map_wc(&obj->mm.region->iomap, offset, size);
>> +}
>> +
>>   bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj)
>>   {
>>       struct intel_memory_region *mr = obj->mm.region;
>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h 
>> b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h
>> index 036d53c01de9..fac6bc5a5ebb 100644
>> --- a/drivers/gpu/drm/i915/gem/i915_gem_lmem.h
>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_lmem.h
>> @@ -14,6 +14,11 @@ struct intel_memory_region;
>>   extern const struct drm_i915_gem_object_ops i915_gem_lmem_obj_ops;
>> +void __iomem *
>> +i915_gem_object_lmem_io_map(struct drm_i915_gem_object *obj,
>> +                unsigned long n,
>> +                unsigned long size);
>> +
>>   bool i915_gem_object_is_lmem(struct drm_i915_gem_object *obj);
>>   struct drm_i915_gem_object *
>> diff --git a/drivers/gpu/drm/i915/i915_vma.c 
>> b/drivers/gpu/drm/i915/i915_vma.c
>> index 07490db51cdc..e24d33aecac4 100644
>> --- a/drivers/gpu/drm/i915/i915_vma.c
>> +++ b/drivers/gpu/drm/i915/i915_vma.c
>> @@ -27,6 +27,7 @@
>>   #include "display/intel_frontbuffer.h"
>> +#include "gem/i915_gem_lmem.h"
>>   #include "gt/intel_engine.h"
>>   #include "gt/intel_engine_heartbeat.h"
>>   #include "gt/intel_gt.h"
>> @@ -448,9 +449,11 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma 
>> *vma)
>>       void __iomem *ptr;
>>       int err;
>> -    if (GEM_WARN_ON(!i915_vma_is_map_and_fenceable(vma))) {
>> -        err = -ENODEV;
>> -        goto err;
>> +    if (!i915_gem_object_is_lmem(vma->obj)) {
>> +        if (GEM_WARN_ON(!i915_vma_is_map_and_fenceable(vma))) {
>> +            err = -ENODEV;
>> +            goto err;
>> +        }
>>       }
>>       GEM_BUG_ON(!i915_vma_is_ggtt(vma));
>> @@ -458,9 +461,13 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma 
>> *vma)
>>       ptr = READ_ONCE(vma->iomap);
>>       if (ptr == NULL) {
>> -        ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->iomap,
>> -                    vma->node.start,
>> -                    vma->node.size);
>> +        if (i915_gem_object_is_lmem(vma->obj))
>> +            ptr = i915_gem_object_lmem_io_map(vma->obj, 0,
>> +                              vma->obj->base.size);
> 
> Can the vma size be bigger than the object here? Given how below works 
> of vma->node.size.

I don't know tbh. But in general node.size can definitely be larger than 
vma->size/obj->base.size.

For the iomap version below, it's using the mappable aperture, which 
requires reserving a vma node into the mappable part of the GGTT first, 
so using node.size here make sense, since the node reflects the window 
into the mappable aperture.

For the lmem case though that might be bogus, since the vma has no 
relationship with LMEM_BAR, since really it's the object, hence why we 
use the obj->base.size instead. Although really it might make more sense 
to use pin_map() instead for the lmem case, if it's possible.

> 
>> +        else
>> +            ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->iomap,
>> +                        vma->node.start,
>> +                        vma->node.size);
> 
> Looks a bit odd that this calls the same io_mapping_map_wc as 
> i915_gem_object_lmem_io_map ends up doing. Perhaps that suggests there 
> should be a single helper here but I am not sure what would be elegant.
> 
> Regards,
> 
> Tvrtko
> 
>>           if (ptr == NULL) {
>>               err = -ENOMEM;
>>               goto err;
>>