[Intel-gfx] [PATCH 11/17] drm/i915: Support to create write combined type vmaps

Goel, Akash akash.goel at intel.com
Fri Jul 15 16:30:43 UTC 2016



On 7/15/2016 5:01 PM, Tvrtko Ursulin wrote:
>
> On 10/07/16 14:41, akash.goel at intel.com wrote:
>> From: Chris Wilson <chris at chris-wilson.co.uk>
>>
>> vmaps has a provision for controlling the page protection bits, with
>> which
>> we can use to control the mapping type, e.g. WB, WC, UC or even WT.
>> To allow the caller to choose their mapping type, we add a parameter to
>> i915_gem_object_pin_map - but we still only allow one vmap to be cached
>> per object. If the object is currently not pinned, then we recreate the
>> previous vmap with the new access type, but if it was pinned we report an
>> error. This effectively limits the access via i915_gem_object_pin_map
>> to a
>> single mapping type for the lifetime of the object. Not usually a
>> problem,
>> but something to be aware of when setting up the object's vmap.
>>
>> We will want to vary the access type to enable WC mappings of ringbuffer
>> and context objects on !llc platforms, as well as other objects where we
>> need coherent access to the GPU's pages without going through the GTT
>>
>> v2: Remove the redundant braces around pin count check and fix the marker
>>      in documentation (Chris)
>>
>> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
>> Signed-off-by: Akash Goel <akash.goel at intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_drv.h            |  4 ++-
>>   drivers/gpu/drm/i915/i915_gem.c            | 57
>> +++++++++++++++++++++++-------
>>   drivers/gpu/drm/i915/i915_gem_dmabuf.c     |  2 +-
>>   drivers/gpu/drm/i915/i915_guc_submission.c |  2 +-
>>   drivers/gpu/drm/i915/intel_lrc.c           |  8 ++---
>>   drivers/gpu/drm/i915/intel_ringbuffer.c    |  2 +-
>>   6 files changed, 54 insertions(+), 21 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h
>> b/drivers/gpu/drm/i915/i915_drv.h
>> index 6e2ddfa..84afa17 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -3248,6 +3248,7 @@ static inline void
>> i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
>>   /**
>>    * i915_gem_object_pin_map - return a contiguous mapping of the
>> entire object
>>    * @obj - the object to map into kernel address space
>> + * @use_wc - whether the mapping should be using WC or WB pgprot_t
>>    *
>>    * Calls i915_gem_object_pin_pages() to prevent reaping of the object's
>>    * pages and then returns a contiguous mapping of the backing
>> storage into
>> @@ -3259,7 +3260,8 @@ static inline void
>> i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
>>    * Returns the pointer through which to access the mapped object, or an
>>    * ERR_PTR() on error.
>>    */
>> -void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object
>> *obj);
>> +void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object
>> *obj,
>> +                     bool use_wc);
>
> Could you make it an enum instead of a bool? Commit message suggests
> more modes will potentially be added and if so, and we start with an
> enum straight away, it will make for less churn in the future.
>
> func(something, true) is always also quite unreadabe in the code because
> one has to remember or remind himself what it really means.
>
> Something like func(something, MAP_WC) would be simply self-documenting.
>
Thanks nice suggestion, will do that.
enum only or macros also will do ?
#define MAP_CACHED	0x1
#define MAP_WC		0x2

>>
>>   /**
>>    * i915_gem_object_unpin_map - releases an earlier mapping
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c
>> b/drivers/gpu/drm/i915/i915_gem.c
>> index 8f50919..c431b40 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -2471,10 +2471,11 @@ i915_gem_object_put_pages(struct
>> drm_i915_gem_object *obj)
>>       list_del(&obj->global_list);
>>
>>       if (obj->mapping) {
>> -        if (is_vmalloc_addr(obj->mapping))
>> -            vunmap(obj->mapping);
>> +        void *ptr = (void *)((uintptr_t)obj->mapping & ~1);
>
> How many bits we have to play with here? Is there a suitable define
> somewhere we could use for a mask instead of hardcoded "1" or we could
> add one if you think that would be better?

As Chris said, will use PAGE_MASK.

>
>> +        if (is_vmalloc_addr(ptr))
>> +            vunmap(ptr);
>>           else
>> -            kunmap(kmap_to_page(obj->mapping));
>> +            kunmap(kmap_to_page(ptr));
>>           obj->mapping = NULL;
>>       }
>>
>> @@ -2647,7 +2648,8 @@ i915_gem_object_get_pages(struct
>> drm_i915_gem_object *obj)
>>   }
>>
>>   /* The 'mapping' part of i915_gem_object_pin_map() below */
>> -static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
>> +static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
>> +                bool use_wc)
>>   {
>>       unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
>>       struct sg_table *sgt = obj->pages;
>> @@ -2659,7 +2661,7 @@ static void *i915_gem_object_map(const struct
>> drm_i915_gem_object *obj)
>>       void *addr;
>>
>>       /* A single page can always be kmapped */
>> -    if (n_pages == 1)
>> +    if (n_pages == 1 && !use_wc)
>>           return kmap(sg_page(sgt->sgl));
>>
>>       if (n_pages > ARRAY_SIZE(stack_pages)) {
>> @@ -2675,7 +2677,8 @@ static void *i915_gem_object_map(const struct
>> drm_i915_gem_object *obj)
>>       /* Check that we have the expected number of pages */
>>       GEM_BUG_ON(i != n_pages);
>>
>> -    addr = vmap(pages, n_pages, 0, PAGE_KERNEL);
>> +    addr = vmap(pages, n_pages, VM_NO_GUARD,
>> +            use_wc ? pgprot_writecombine(PAGE_KERNEL_IO) : PAGE_KERNEL);
>
> For educational benefit, what is the importance and difference between
> PAGE_KERNEL and PAGE_KERNEL_IO here?
>
>>
>>       if (pages != stack_pages)
>>           drm_free_large(pages);
>> @@ -2684,27 +2687,55 @@ static void *i915_gem_object_map(const struct
>> drm_i915_gem_object *obj)
>>   }
>>
>>   /* get, pin, and map the pages of the object into kernel space */
>> -void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj)
>> +void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, bool
>> use_wc)
>>   {
>> +    void *ptr;
>> +    bool has_wc;
>> +    bool pinned;
>>       int ret;
>>
>>       lockdep_assert_held(&obj->base.dev->struct_mutex);
>> +    GEM_BUG_ON((obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE) ==
>> 0);
>>
>>       ret = i915_gem_object_get_pages(obj);
>>       if (ret)
>>           return ERR_PTR(ret);
>>
>> +    GEM_BUG_ON(obj->pages == NULL);
>
> Looks like belongs to i915_gem_object_get_pages and not to callers.
>

As Chris said, this is superfluous, so will remove it.

Best regards
Akash

>>       i915_gem_object_pin_pages(obj);
>>
>> -    if (!obj->mapping) {
>> -        obj->mapping = i915_gem_object_map(obj);
>> -        if (!obj->mapping) {
>> -            i915_gem_object_unpin_pages(obj);
>> -            return ERR_PTR(-ENOMEM);
>> +    pinned = obj->pages_pin_count > 1;
>> +    ptr = (void *)((uintptr_t)obj->mapping & ~1);
>> +    has_wc = (uintptr_t)obj->mapping & 1;
>> +
>> +    if (ptr && has_wc != use_wc) {
>> +        if (pinned) {
>> +            ret = -EBUSY;
>> +            goto err;
>> +        }
>> +
>> +        if (is_vmalloc_addr(ptr))
>> +            vunmap(ptr);
>> +        else
>> +            kunmap(kmap_to_page(ptr));
>> +        ptr = obj->mapping = NULL;
>> +    }
>> +
>> +    if (!ptr) {
>> +        ptr = i915_gem_object_map(obj, use_wc);
>> +        if (!ptr) {
>> +            ret = -ENOMEM;
>> +            goto err;
>>           }
>> +
>> +        obj->mapping = (void *)((uintptr_t)ptr | use_wc);
>>       }
>>
>> -    return obj->mapping;
>> +    return ptr;
>> +
>> +err:
>> +    i915_gem_object_unpin_pages(obj);
>> +    return ERR_PTR(ret);
>>   }
>>
>>   void i915_vma_move_to_active(struct i915_vma *vma,
>> diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
>> b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
>> index 80bbe43..edcadce 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
>> @@ -115,7 +115,7 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf
>> *dma_buf)
>>       if (ret)
>>           return ERR_PTR(ret);
>>
>> -    addr = i915_gem_object_pin_map(obj);
>> +    addr = i915_gem_object_pin_map(obj, false);
>>       mutex_unlock(&dev->struct_mutex);
>>
>>       return addr;
>> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
>> b/drivers/gpu/drm/i915/i915_guc_submission.c
>> index 009d7c0..c468619 100644
>> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
>> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
>> @@ -1096,7 +1096,7 @@ static int guc_create_log_extras(struct
>> intel_guc *guc)
>>
>>       if (!guc->log.buf_addr) {
>>           /* Create a vmalloc mapping of log buffer pages */
>> -        vaddr = i915_gem_object_pin_map(guc->log.obj);
>> +        vaddr = i915_gem_object_pin_map(guc->log.obj, false);
>>           if (IS_ERR(vaddr)) {
>>               ret = PTR_ERR(vaddr);
>>               DRM_ERROR("Couldn't map log buffer pages %d\n", ret);
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c
>> b/drivers/gpu/drm/i915/intel_lrc.c
>> index 70c6990..0d41047 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -971,7 +971,7 @@ static int intel_lr_context_pin(struct
>> i915_gem_context *ctx,
>>       if (ret)
>>           goto err;
>>
>> -    vaddr = i915_gem_object_pin_map(ce->state);
>> +    vaddr = i915_gem_object_pin_map(ce->state, false);
>>       if (IS_ERR(vaddr)) {
>>           ret = PTR_ERR(vaddr);
>>           goto unpin_ctx_obj;
>> @@ -1993,7 +1993,7 @@ lrc_setup_hws(struct intel_engine_cs *engine,
>>       /* The HWSP is part of the default context object in LRC mode. */
>>       engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(dctx_obj) +
>>                          LRC_PPHWSP_PN * PAGE_SIZE;
>> -    hws = i915_gem_object_pin_map(dctx_obj);
>> +    hws = i915_gem_object_pin_map(dctx_obj, false);
>>       if (IS_ERR(hws))
>>           return PTR_ERR(hws);
>>       engine->status_page.page_addr = hws + LRC_PPHWSP_PN * PAGE_SIZE;
>> @@ -2324,7 +2324,7 @@ populate_lr_context(struct i915_gem_context *ctx,
>>           return ret;
>>       }
>>
>> -    vaddr = i915_gem_object_pin_map(ctx_obj);
>> +    vaddr = i915_gem_object_pin_map(ctx_obj, false);
>>       if (IS_ERR(vaddr)) {
>>           ret = PTR_ERR(vaddr);
>>           DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
>> @@ -2558,7 +2558,7 @@ void intel_lr_context_reset(struct
>> drm_i915_private *dev_priv,
>>           if (!ctx_obj)
>>               continue;
>>
>> -        vaddr = i915_gem_object_pin_map(ctx_obj);
>> +        vaddr = i915_gem_object_pin_map(ctx_obj, false);
>>           if (WARN_ON(IS_ERR(vaddr)))
>>               continue;
>>
>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c
>> b/drivers/gpu/drm/i915/intel_ringbuffer.c
>> index 736ddba..a195f65 100644
>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
>> @@ -2007,7 +2007,7 @@ int intel_pin_and_map_ringbuffer_obj(struct
>> drm_i915_private *dev_priv,
>>           if (ret)
>>               goto err_unpin;
>>
>> -        addr = i915_gem_object_pin_map(obj);
>> +        addr = i915_gem_object_pin_map(obj, false);
>>           if (IS_ERR(addr)) {
>>               ret = PTR_ERR(addr);
>>               goto err_unpin;
>>
>
> The rest looks fine to me.
>
> Regards,
>
> Tvrtko


More information about the Intel-gfx mailing list