[Intel-gfx] [PATCH 11/17] drm/i915: Support to create write combined type vmaps
Goel, Akash
akash.goel at intel.com
Fri Jul 15 16:30:43 UTC 2016
On 7/15/2016 5:01 PM, Tvrtko Ursulin wrote:
>
> On 10/07/16 14:41, akash.goel at intel.com wrote:
>> From: Chris Wilson <chris at chris-wilson.co.uk>
>>
>> vmaps has a provision for controlling the page protection bits, with
>> which
>> we can use to control the mapping type, e.g. WB, WC, UC or even WT.
>> To allow the caller to choose their mapping type, we add a parameter to
>> i915_gem_object_pin_map - but we still only allow one vmap to be cached
>> per object. If the object is currently not pinned, then we recreate the
>> previous vmap with the new access type, but if it was pinned we report an
>> error. This effectively limits the access via i915_gem_object_pin_map
>> to a
>> single mapping type for the lifetime of the object. Not usually a
>> problem,
>> but something to be aware of when setting up the object's vmap.
>>
>> We will want to vary the access type to enable WC mappings of ringbuffer
>> and context objects on !llc platforms, as well as other objects where we
>> need coherent access to the GPU's pages without going through the GTT
>>
>> v2: Remove the redundant braces around pin count check and fix the marker
>> in documentation (Chris)
>>
>> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
>> Signed-off-by: Akash Goel <akash.goel at intel.com>
>> ---
>> drivers/gpu/drm/i915/i915_drv.h | 4 ++-
>> drivers/gpu/drm/i915/i915_gem.c | 57
>> +++++++++++++++++++++++-------
>> drivers/gpu/drm/i915/i915_gem_dmabuf.c | 2 +-
>> drivers/gpu/drm/i915/i915_guc_submission.c | 2 +-
>> drivers/gpu/drm/i915/intel_lrc.c | 8 ++---
>> drivers/gpu/drm/i915/intel_ringbuffer.c | 2 +-
>> 6 files changed, 54 insertions(+), 21 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h
>> b/drivers/gpu/drm/i915/i915_drv.h
>> index 6e2ddfa..84afa17 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -3248,6 +3248,7 @@ static inline void
>> i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
>> /**
>> * i915_gem_object_pin_map - return a contiguous mapping of the
>> entire object
>> * @obj - the object to map into kernel address space
>> + * @use_wc - whether the mapping should be using WC or WB pgprot_t
>> *
>> * Calls i915_gem_object_pin_pages() to prevent reaping of the object's
>> * pages and then returns a contiguous mapping of the backing
>> storage into
>> @@ -3259,7 +3260,8 @@ static inline void
>> i915_gem_object_unpin_pages(struct drm_i915_gem_object *obj)
>> * Returns the pointer through which to access the mapped object, or an
>> * ERR_PTR() on error.
>> */
>> -void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object
>> *obj);
>> +void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object
>> *obj,
>> + bool use_wc);
>
> Could you make it an enum instead of a bool? Commit message suggests
> more modes will potentially be added and if so, and we start with an
> enum straight away, it will make for less churn in the future.
>
> func(something, true) is always also quite unreadabe in the code because
> one has to remember or remind himself what it really means.
>
> Something like func(something, MAP_WC) would be simply self-documenting.
>
Thanks nice suggestion, will do that.
enum only or macros also will do ?
#define MAP_CACHED 0x1
#define MAP_WC 0x2
>>
>> /**
>> * i915_gem_object_unpin_map - releases an earlier mapping
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c
>> b/drivers/gpu/drm/i915/i915_gem.c
>> index 8f50919..c431b40 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -2471,10 +2471,11 @@ i915_gem_object_put_pages(struct
>> drm_i915_gem_object *obj)
>> list_del(&obj->global_list);
>>
>> if (obj->mapping) {
>> - if (is_vmalloc_addr(obj->mapping))
>> - vunmap(obj->mapping);
>> + void *ptr = (void *)((uintptr_t)obj->mapping & ~1);
>
> How many bits we have to play with here? Is there a suitable define
> somewhere we could use for a mask instead of hardcoded "1" or we could
> add one if you think that would be better?
As Chris said, will use PAGE_MASK.
>
>> + if (is_vmalloc_addr(ptr))
>> + vunmap(ptr);
>> else
>> - kunmap(kmap_to_page(obj->mapping));
>> + kunmap(kmap_to_page(ptr));
>> obj->mapping = NULL;
>> }
>>
>> @@ -2647,7 +2648,8 @@ i915_gem_object_get_pages(struct
>> drm_i915_gem_object *obj)
>> }
>>
>> /* The 'mapping' part of i915_gem_object_pin_map() below */
>> -static void *i915_gem_object_map(const struct drm_i915_gem_object *obj)
>> +static void *i915_gem_object_map(const struct drm_i915_gem_object *obj,
>> + bool use_wc)
>> {
>> unsigned long n_pages = obj->base.size >> PAGE_SHIFT;
>> struct sg_table *sgt = obj->pages;
>> @@ -2659,7 +2661,7 @@ static void *i915_gem_object_map(const struct
>> drm_i915_gem_object *obj)
>> void *addr;
>>
>> /* A single page can always be kmapped */
>> - if (n_pages == 1)
>> + if (n_pages == 1 && !use_wc)
>> return kmap(sg_page(sgt->sgl));
>>
>> if (n_pages > ARRAY_SIZE(stack_pages)) {
>> @@ -2675,7 +2677,8 @@ static void *i915_gem_object_map(const struct
>> drm_i915_gem_object *obj)
>> /* Check that we have the expected number of pages */
>> GEM_BUG_ON(i != n_pages);
>>
>> - addr = vmap(pages, n_pages, 0, PAGE_KERNEL);
>> + addr = vmap(pages, n_pages, VM_NO_GUARD,
>> + use_wc ? pgprot_writecombine(PAGE_KERNEL_IO) : PAGE_KERNEL);
>
> For educational benefit, what is the importance and difference between
> PAGE_KERNEL and PAGE_KERNEL_IO here?
>
>>
>> if (pages != stack_pages)
>> drm_free_large(pages);
>> @@ -2684,27 +2687,55 @@ static void *i915_gem_object_map(const struct
>> drm_i915_gem_object *obj)
>> }
>>
>> /* get, pin, and map the pages of the object into kernel space */
>> -void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj)
>> +void *i915_gem_object_pin_map(struct drm_i915_gem_object *obj, bool
>> use_wc)
>> {
>> + void *ptr;
>> + bool has_wc;
>> + bool pinned;
>> int ret;
>>
>> lockdep_assert_held(&obj->base.dev->struct_mutex);
>> + GEM_BUG_ON((obj->ops->flags & I915_GEM_OBJECT_HAS_STRUCT_PAGE) ==
>> 0);
>>
>> ret = i915_gem_object_get_pages(obj);
>> if (ret)
>> return ERR_PTR(ret);
>>
>> + GEM_BUG_ON(obj->pages == NULL);
>
> Looks like belongs to i915_gem_object_get_pages and not to callers.
>
As Chris said, this is superfluous, so will remove it.
Best regards
Akash
>> i915_gem_object_pin_pages(obj);
>>
>> - if (!obj->mapping) {
>> - obj->mapping = i915_gem_object_map(obj);
>> - if (!obj->mapping) {
>> - i915_gem_object_unpin_pages(obj);
>> - return ERR_PTR(-ENOMEM);
>> + pinned = obj->pages_pin_count > 1;
>> + ptr = (void *)((uintptr_t)obj->mapping & ~1);
>> + has_wc = (uintptr_t)obj->mapping & 1;
>> +
>> + if (ptr && has_wc != use_wc) {
>> + if (pinned) {
>> + ret = -EBUSY;
>> + goto err;
>> + }
>> +
>> + if (is_vmalloc_addr(ptr))
>> + vunmap(ptr);
>> + else
>> + kunmap(kmap_to_page(ptr));
>> + ptr = obj->mapping = NULL;
>> + }
>> +
>> + if (!ptr) {
>> + ptr = i915_gem_object_map(obj, use_wc);
>> + if (!ptr) {
>> + ret = -ENOMEM;
>> + goto err;
>> }
>> +
>> + obj->mapping = (void *)((uintptr_t)ptr | use_wc);
>> }
>>
>> - return obj->mapping;
>> + return ptr;
>> +
>> +err:
>> + i915_gem_object_unpin_pages(obj);
>> + return ERR_PTR(ret);
>> }
>>
>> void i915_vma_move_to_active(struct i915_vma *vma,
>> diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
>> b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
>> index 80bbe43..edcadce 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
>> @@ -115,7 +115,7 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf
>> *dma_buf)
>> if (ret)
>> return ERR_PTR(ret);
>>
>> - addr = i915_gem_object_pin_map(obj);
>> + addr = i915_gem_object_pin_map(obj, false);
>> mutex_unlock(&dev->struct_mutex);
>>
>> return addr;
>> diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c
>> b/drivers/gpu/drm/i915/i915_guc_submission.c
>> index 009d7c0..c468619 100644
>> --- a/drivers/gpu/drm/i915/i915_guc_submission.c
>> +++ b/drivers/gpu/drm/i915/i915_guc_submission.c
>> @@ -1096,7 +1096,7 @@ static int guc_create_log_extras(struct
>> intel_guc *guc)
>>
>> if (!guc->log.buf_addr) {
>> /* Create a vmalloc mapping of log buffer pages */
>> - vaddr = i915_gem_object_pin_map(guc->log.obj);
>> + vaddr = i915_gem_object_pin_map(guc->log.obj, false);
>> if (IS_ERR(vaddr)) {
>> ret = PTR_ERR(vaddr);
>> DRM_ERROR("Couldn't map log buffer pages %d\n", ret);
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c
>> b/drivers/gpu/drm/i915/intel_lrc.c
>> index 70c6990..0d41047 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -971,7 +971,7 @@ static int intel_lr_context_pin(struct
>> i915_gem_context *ctx,
>> if (ret)
>> goto err;
>>
>> - vaddr = i915_gem_object_pin_map(ce->state);
>> + vaddr = i915_gem_object_pin_map(ce->state, false);
>> if (IS_ERR(vaddr)) {
>> ret = PTR_ERR(vaddr);
>> goto unpin_ctx_obj;
>> @@ -1993,7 +1993,7 @@ lrc_setup_hws(struct intel_engine_cs *engine,
>> /* The HWSP is part of the default context object in LRC mode. */
>> engine->status_page.gfx_addr = i915_gem_obj_ggtt_offset(dctx_obj) +
>> LRC_PPHWSP_PN * PAGE_SIZE;
>> - hws = i915_gem_object_pin_map(dctx_obj);
>> + hws = i915_gem_object_pin_map(dctx_obj, false);
>> if (IS_ERR(hws))
>> return PTR_ERR(hws);
>> engine->status_page.page_addr = hws + LRC_PPHWSP_PN * PAGE_SIZE;
>> @@ -2324,7 +2324,7 @@ populate_lr_context(struct i915_gem_context *ctx,
>> return ret;
>> }
>>
>> - vaddr = i915_gem_object_pin_map(ctx_obj);
>> + vaddr = i915_gem_object_pin_map(ctx_obj, false);
>> if (IS_ERR(vaddr)) {
>> ret = PTR_ERR(vaddr);
>> DRM_DEBUG_DRIVER("Could not map object pages! (%d)\n", ret);
>> @@ -2558,7 +2558,7 @@ void intel_lr_context_reset(struct
>> drm_i915_private *dev_priv,
>> if (!ctx_obj)
>> continue;
>>
>> - vaddr = i915_gem_object_pin_map(ctx_obj);
>> + vaddr = i915_gem_object_pin_map(ctx_obj, false);
>> if (WARN_ON(IS_ERR(vaddr)))
>> continue;
>>
>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c
>> b/drivers/gpu/drm/i915/intel_ringbuffer.c
>> index 736ddba..a195f65 100644
>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
>> @@ -2007,7 +2007,7 @@ int intel_pin_and_map_ringbuffer_obj(struct
>> drm_i915_private *dev_priv,
>> if (ret)
>> goto err_unpin;
>>
>> - addr = i915_gem_object_pin_map(obj);
>> + addr = i915_gem_object_pin_map(obj, false);
>> if (IS_ERR(addr)) {
>> ret = PTR_ERR(addr);
>> goto err_unpin;
>>
>
> The rest looks fine to me.
>
> Regards,
>
> Tvrtko
More information about the Intel-gfx
mailing list