[Intel-gfx] [PATCH] drm/i915: Use the existing pages when retrying to DMA map

Mon Jan 9 13:04:55 UTC 2017

On Mon, Jan 09, 2017 at 12:40:07PM +0000, Tvrtko Ursulin wrote:
> 
> On 09/01/2017 12:28, Chris Wilson wrote:
> >On Mon, Jan 09, 2017 at 12:23:37PM +0000, Tvrtko Ursulin wrote:
> >>
> >>Hi,
> >>
> >>On 20/12/2016 13:42, Tvrtko Ursulin wrote:
> >>>From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> >>>
> >>>Rather than freeing and re-allocating the pages when DMA mapping
> >>>in large chunks fails, we can just rebuild the sg table with no
> >>>coalescing.
> >>>
> >>>Also change back the page counter to unsigned int because that
> >>>is what the sg API supports.
> >>>
> >>>Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> >>>Cc: Chris Wilson <chris at chris-wilson.co.uk>
> >>>---
> >>>Only compile tested!
> >>>---
> >>>drivers/gpu/drm/i915/i915_gem.c | 40 ++++++++++++++++++++++++++++++----------
> >>>1 file changed, 30 insertions(+), 10 deletions(-)
> >>>
> >>>diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> >>>index 5275f6248ce3..e73f9f5a5d23 100644
> >>>--- a/drivers/gpu/drm/i915/i915_gem.c
> >>>+++ b/drivers/gpu/drm/i915/i915_gem.c
> >>>@@ -2340,12 +2340,36 @@ static void i915_sg_trim(struct sg_table *orig_st)
> >>>	*orig_st = new_st;
> >>>}
> >>>
> >>>+static void i915_sg_uncoalesce(struct sg_table *orig_st, unsigned long nents)
> >>>+{
> >>>+	struct sg_table new_st;
> >>>+	struct scatterlist *new_sg;
> >>>+	struct sgt_iter sgt_iter;
> >>>+	struct page *page;
> >>>+
> >>>+	if (sg_alloc_table(&new_st, nents, GFP_KERNEL))
> >>>+		return;
> >>>+
> >>>+	new_sg = new_st.sgl;
> >>>+	for_each_sgt_page(page, sgt_iter, orig_st) {
> >>>+		sg_set_page(new_sg, page, PAGE_SIZE, 0);
> >>>+		/* called before being DMA mapped, no need to copy sg->dma_* */
> >>>+		new_sg = sg_next(new_sg);
> >>>+	}
> >>>+
> >>>+	GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */
> >>>+
> >>>+	sg_free_table(orig_st);
> >>>+
> >>>+	*orig_st = new_st;
> >>>+}
> >>>+
> >>>static struct sg_table *
> >>>i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
> >>>{
> >>>	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
> >>>-	const unsigned long page_count = obj->base.size / PAGE_SIZE;
> >>>-	unsigned long i;
> >>>+	const unsigned int page_count = obj->base.size / PAGE_SIZE;
> >>>+	unsigned int i;
> >>>	struct address_space *mapping;
> >>>	struct sg_table *st;
> >>>	struct scatterlist *sg;
> >>>@@ -2371,7 +2395,6 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
> >>>	if (st == NULL)
> >>>		return ERR_PTR(-ENOMEM);
> >>>
> >>>-rebuild_st:
> >>>	if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
> >>>		kfree(st);
> >>>		return ERR_PTR(-ENOMEM);
> >>>@@ -2429,6 +2452,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
> >>>	/* Trim unused sg entries to avoid wasting memory. */
> >>>	i915_sg_trim(st);
> >>>
> >>>+prepare_gtt:
> >>>	ret = i915_gem_gtt_prepare_pages(obj, st);
> >>>	if (ret) {
> >>>		/* DMA remapping failed? One possible cause is that
> >>>@@ -2436,16 +2460,12 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
> >>>		 * for PAGE_SIZE chunks instead may be helpful.
> >>>		 */
> >>>		if (max_segment > PAGE_SIZE) {
> >>>-			for_each_sgt_page(page, sgt_iter, st)
> >>>-				put_page(page);
> >>>-			sg_free_table(st);
> >>>-
> >>>+			i915_sg_uncoalesce(st, page_count);
> >>>			max_segment = PAGE_SIZE;
> >>>-			goto rebuild_st;
> >>>+			goto prepare_gtt;
> >>>		} else {
> >>>			dev_warn(&dev_priv->drm.pdev->dev,
> >>>-				 "Failed to DMA remap %lu pages\n",
> >>>-				 page_count);
> >>>+				 "Failed to DMA remap %u pages\n", page_count);
> >>>			goto err_pages;
> >>>		}
> >>>	}
> >>>
> >>
> >>Are you still against this? As a reminder it saves a
> >>put_page/allocate page-from-shmemfs cycle on dma mapping failures.
> >
> >I still regard it as incomplete. Why bother saving that trip and not the
> >actual page allocation itself.
> 
> Page allocation is exactly what it avoids re-doing!?

It is not. The shmemfs pages are not being freed nor reallocated, just
their use count being manipulated.
 
> You meant sg_alloc_table maybe? We could move the trim to be last
> and do the uncoalesce in place but it would be so much more complex
> that I am not sure it is worth it for this edge case.

Only the sg_table is being reallocated and doing actual kfree/kmalloc.
 
> The patch above was fast and simple improvement to the dma remapping
> retry you added.

I didn't value it as being a significant improvement and thought you
have a more complete solution in mind.

For starters we can reduce the duplication here with

diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ed3cd1a5f9bb..1bfec811f9b7 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2199,20 +2199,12 @@ void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
        invalidate_mapping_pages(mapping, 0, (loff_t)-1);
 }
 
-static void
-i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj,
-                             struct sg_table *pages)
+static void free_sg_pages(struct drm_i915_gem_object *obj,
+                         struct sg_table *pages)
 {
        struct sgt_iter sgt_iter;
        struct page *page;
 
-       __i915_gem_object_release_shmem(obj, pages, true);
-
-       i915_gem_gtt_finish_pages(obj, pages);
-
-       if (i915_gem_object_needs_bit17_swizzle(obj))
-               i915_gem_object_save_bit_17_swizzle(obj, pages);
-
        for_each_sgt_page(page, sgt_iter, pages) {
                if (obj->mm.dirty)
                        set_page_dirty(page);
@@ -2222,9 +2214,23 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj,
 
                put_page(page);
        }
+       sg_free_table(pages);
+
        obj->mm.dirty = false;
+}
 
-       sg_free_table(pages);
+static void
+i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj,
+                             struct sg_table *pages)
+{
+       __i915_gem_object_release_shmem(obj, pages, true);
+
+       i915_gem_gtt_finish_pages(obj, pages);
+
+       if (i915_gem_object_needs_bit17_swizzle(obj))
+               i915_gem_object_save_bit_17_swizzle(obj, pages);
+
+       free_sg_pages(obj, pages);
        kfree(pages);
 }
 
@@ -2322,7 +2328,6 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
        struct address_space *mapping;
        struct sg_table *st;
        struct scatterlist *sg;
-       struct sgt_iter sgt_iter;
        struct page *page;
        unsigned long last_pfn = 0;     /* suppress gcc warning */
        unsigned int max_segment;
@@ -2409,10 +2414,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
                 * for PAGE_SIZE chunks instead may be helpful.
                 */
                if (max_segment > PAGE_SIZE) {
-                       for_each_sgt_page(page, sgt_iter, st)
-                               put_page(page);
-                       sg_free_table(st);
-
+                       free_sg_pages(obj, st);
                        max_segment = PAGE_SIZE;
                        goto rebuild_st;
                } else {
@@ -2431,9 +2433,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 err_sg:
        sg_mark_end(sg);
 err_pages:
-       for_each_sgt_page(page, sgt_iter, st)
-               put_page(page);
-       sg_free_table(st);
+       free_sg_pages(obj, st);
        kfree(st);
 
        /* shmemfs first checks if there is enough memory to allocate the page


-- 
Chris Wilson, Intel Open Source Technology Centre