[Intel-gfx] [PATCH] drm/i915: Use the existing pages when retrying to DMA map
Chris Wilson
chris at chris-wilson.co.uk
Mon Jan 9 13:04:55 UTC 2017
On Mon, Jan 09, 2017 at 12:40:07PM +0000, Tvrtko Ursulin wrote:
>
> On 09/01/2017 12:28, Chris Wilson wrote:
> >On Mon, Jan 09, 2017 at 12:23:37PM +0000, Tvrtko Ursulin wrote:
> >>
> >>Hi,
> >>
> >>On 20/12/2016 13:42, Tvrtko Ursulin wrote:
> >>>From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> >>>
> >>>Rather than freeing and re-allocating the pages when DMA mapping
> >>>in large chunks fails, we can just rebuild the sg table with no
> >>>coalescing.
> >>>
> >>>Also change back the page counter to unsigned int because that
> >>>is what the sg API supports.
> >>>
> >>>Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> >>>Cc: Chris Wilson <chris at chris-wilson.co.uk>
> >>>---
> >>>Only compile tested!
> >>>---
> >>>drivers/gpu/drm/i915/i915_gem.c | 40 ++++++++++++++++++++++++++++++----------
> >>>1 file changed, 30 insertions(+), 10 deletions(-)
> >>>
> >>>diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> >>>index 5275f6248ce3..e73f9f5a5d23 100644
> >>>--- a/drivers/gpu/drm/i915/i915_gem.c
> >>>+++ b/drivers/gpu/drm/i915/i915_gem.c
> >>>@@ -2340,12 +2340,36 @@ static void i915_sg_trim(struct sg_table *orig_st)
> >>> *orig_st = new_st;
> >>>}
> >>>
> >>>+static void i915_sg_uncoalesce(struct sg_table *orig_st, unsigned long nents)
> >>>+{
> >>>+ struct sg_table new_st;
> >>>+ struct scatterlist *new_sg;
> >>>+ struct sgt_iter sgt_iter;
> >>>+ struct page *page;
> >>>+
> >>>+ if (sg_alloc_table(&new_st, nents, GFP_KERNEL))
> >>>+ return;
> >>>+
> >>>+ new_sg = new_st.sgl;
> >>>+ for_each_sgt_page(page, sgt_iter, orig_st) {
> >>>+ sg_set_page(new_sg, page, PAGE_SIZE, 0);
> >>>+ /* called before being DMA mapped, no need to copy sg->dma_* */
> >>>+ new_sg = sg_next(new_sg);
> >>>+ }
> >>>+
> >>>+ GEM_BUG_ON(new_sg); /* Should walk exactly nents and hit the end */
> >>>+
> >>>+ sg_free_table(orig_st);
> >>>+
> >>>+ *orig_st = new_st;
> >>>+}
> >>>+
> >>>static struct sg_table *
> >>>i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
> >>>{
> >>> struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
> >>>- const unsigned long page_count = obj->base.size / PAGE_SIZE;
> >>>- unsigned long i;
> >>>+ const unsigned int page_count = obj->base.size / PAGE_SIZE;
> >>>+ unsigned int i;
> >>> struct address_space *mapping;
> >>> struct sg_table *st;
> >>> struct scatterlist *sg;
> >>>@@ -2371,7 +2395,6 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
> >>> if (st == NULL)
> >>> return ERR_PTR(-ENOMEM);
> >>>
> >>>-rebuild_st:
> >>> if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
> >>> kfree(st);
> >>> return ERR_PTR(-ENOMEM);
> >>>@@ -2429,6 +2452,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
> >>> /* Trim unused sg entries to avoid wasting memory. */
> >>> i915_sg_trim(st);
> >>>
> >>>+prepare_gtt:
> >>> ret = i915_gem_gtt_prepare_pages(obj, st);
> >>> if (ret) {
> >>> /* DMA remapping failed? One possible cause is that
> >>>@@ -2436,16 +2460,12 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
> >>> * for PAGE_SIZE chunks instead may be helpful.
> >>> */
> >>> if (max_segment > PAGE_SIZE) {
> >>>- for_each_sgt_page(page, sgt_iter, st)
> >>>- put_page(page);
> >>>- sg_free_table(st);
> >>>-
> >>>+ i915_sg_uncoalesce(st, page_count);
> >>> max_segment = PAGE_SIZE;
> >>>- goto rebuild_st;
> >>>+ goto prepare_gtt;
> >>> } else {
> >>> dev_warn(&dev_priv->drm.pdev->dev,
> >>>- "Failed to DMA remap %lu pages\n",
> >>>- page_count);
> >>>+ "Failed to DMA remap %u pages\n", page_count);
> >>> goto err_pages;
> >>> }
> >>> }
> >>>
> >>
> >>Are you still against this? As a reminder it saves a
> >>put_page/allocate page-from-shmemfs cycle on dma mapping failures.
> >
> >I still regard it as incomplete. Why bother saving that trip and not the
> >actual page allocation itself.
>
> Page allocation is exactly what it avoids re-doing!?
It is not. The shmemfs pages are not being freed nor reallocated, just
their use count being manipulated.
> You meant sg_alloc_table maybe? We could move the trim to be last
> and do the uncoalesce in place but it would be so much more complex
> that I am not sure it is worth it for this edge case.
Only the sg_table is being reallocated and doing actual kfree/kmalloc.
> The patch above was fast and simple improvement to the dma remapping
> retry you added.
I didn't value it as being a significant improvement and thought you
have a more complete solution in mind.
For starters we can reduce the duplication here with
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index ed3cd1a5f9bb..1bfec811f9b7 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2199,20 +2199,12 @@ void __i915_gem_object_invalidate(struct drm_i915_gem_object *obj)
invalidate_mapping_pages(mapping, 0, (loff_t)-1);
}
-static void
-i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj,
- struct sg_table *pages)
+static void free_sg_pages(struct drm_i915_gem_object *obj,
+ struct sg_table *pages)
{
struct sgt_iter sgt_iter;
struct page *page;
- __i915_gem_object_release_shmem(obj, pages, true);
-
- i915_gem_gtt_finish_pages(obj, pages);
-
- if (i915_gem_object_needs_bit17_swizzle(obj))
- i915_gem_object_save_bit_17_swizzle(obj, pages);
-
for_each_sgt_page(page, sgt_iter, pages) {
if (obj->mm.dirty)
set_page_dirty(page);
@@ -2222,9 +2214,23 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj,
put_page(page);
}
+ sg_free_table(pages);
+
obj->mm.dirty = false;
+}
- sg_free_table(pages);
+static void
+i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj,
+ struct sg_table *pages)
+{
+ __i915_gem_object_release_shmem(obj, pages, true);
+
+ i915_gem_gtt_finish_pages(obj, pages);
+
+ if (i915_gem_object_needs_bit17_swizzle(obj))
+ i915_gem_object_save_bit_17_swizzle(obj, pages);
+
+ free_sg_pages(obj, pages);
kfree(pages);
}
@@ -2322,7 +2328,6 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
struct address_space *mapping;
struct sg_table *st;
struct scatterlist *sg;
- struct sgt_iter sgt_iter;
struct page *page;
unsigned long last_pfn = 0; /* suppress gcc warning */
unsigned int max_segment;
@@ -2409,10 +2414,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
* for PAGE_SIZE chunks instead may be helpful.
*/
if (max_segment > PAGE_SIZE) {
- for_each_sgt_page(page, sgt_iter, st)
- put_page(page);
- sg_free_table(st);
-
+ free_sg_pages(obj, st);
max_segment = PAGE_SIZE;
goto rebuild_st;
} else {
@@ -2431,9 +2433,7 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
err_sg:
sg_mark_end(sg);
err_pages:
- for_each_sgt_page(page, sgt_iter, st)
- put_page(page);
- sg_free_table(st);
+ free_sg_pages(obj, st);
kfree(st);
/* shmemfs first checks if there is enough memory to allocate the page
--
Chris Wilson, Intel Open Source Technology Centre
More information about the Intel-gfx
mailing list