[Intel-gfx] [PATCH v8 21/22] drm/i915/vm_bind: Properly build persistent map sg table
Niranjana Vishwanathapura
niranjana.vishwanathapura at intel.com
Wed Dec 14 04:58:34 UTC 2022
On Mon, Dec 12, 2022 at 06:17:01PM +0000, Matthew Auld wrote:
>On 29/11/2022 07:26, Niranjana Vishwanathapura wrote:
>>Properly build the sg table for persistent mapping which can
>>be partial map of the underlying object. Ensure the sg pages
>>are properly set for page backed regions. The dump capture
>>support requires this for page backed regions.
>>
>>Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura at intel.com>
>>---
>> drivers/gpu/drm/i915/i915_vma.c | 120 +++++++++++++++++++++++++++++++-
>> 1 file changed, 119 insertions(+), 1 deletion(-)
>>
>>diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
>>index 1b9033865768..68a9ac77b4f2 100644
>>--- a/drivers/gpu/drm/i915/i915_vma.c
>>+++ b/drivers/gpu/drm/i915/i915_vma.c
>>@@ -1298,6 +1298,120 @@ intel_partial_pages(const struct i915_gtt_view *view,
>> return ERR_PTR(ret);
>> }
>>+static unsigned int
>>+intel_copy_dma_sg(struct sg_table *src_st, struct sg_table *dst_st,
>>+ u64 offset, u64 length, bool dry_run)
>>+{
>>+ struct scatterlist *dst_sg, *src_sg;
>>+ unsigned int i, len, nents = 0;
>>+
>>+ dst_sg = dst_st->sgl;
>>+ for_each_sgtable_dma_sg(src_st, src_sg, i) {
>>+ if (sg_dma_len(src_sg) <= offset) {
>>+ offset -= sg_dma_len(src_sg);
>>+ continue;
>>+ }
>>+
>>+ nents++;
>>+ len = min(sg_dma_len(src_sg) - offset, length);
>>+ if (!dry_run) {
>>+ sg_dma_address(dst_sg) = sg_dma_address(src_sg) + offset;
>>+ sg_dma_len(dst_sg) = len;
>>+ dst_sg = sg_next(dst_sg);
>>+ }
>>+
>>+ length -= len;
>>+ offset = 0;
>>+ if (!length)
>>+ break;
>>+ }
>>+ WARN_ON_ONCE(length);
>>+
>>+ return nents;
>>+}
>>+
>>+static unsigned int
>>+intel_copy_sg(struct sg_table *src_st, struct sg_table *dst_st,
>>+ u64 offset, u64 length, bool dry_run)
>>+{
>>+ struct scatterlist *dst_sg, *src_sg;
>>+ unsigned int i, len, nents = 0;
>>+
>>+ dst_sg = dst_st->sgl;
>>+ for_each_sgtable_sg(src_st, src_sg, i) {
>>+ if (src_sg->length <= offset) {
>>+ offset -= src_sg->length;
>>+ continue;
>>+ }
>>+
>>+ nents++;
>>+ len = min(src_sg->length - offset, length);
>>+ if (!dry_run) {
>>+ unsigned long pfn;
>>+
>>+ pfn = page_to_pfn(sg_page(src_sg)) + offset / PAGE_SIZE;
>>+ sg_set_page(dst_sg, pfn_to_page(pfn), len, 0);
>>+ dst_sg = sg_next(dst_sg);
>>+ }
>>+
>>+ length -= len;
>>+ offset = 0;
>>+ if (!length)
>>+ break;
>>+ }
>>+ WARN_ON_ONCE(length);
>>+
>>+ return nents;
>>+}
>>+
>>+static noinline struct sg_table *
>>+intel_persistent_partial_pages(const struct i915_gtt_view *view,
>>+ struct drm_i915_gem_object *obj)
>>+{
>>+ u64 offset = view->partial.offset << PAGE_SHIFT;
>>+ struct sg_table *st, *obj_st = obj->mm.pages;
>>+ u64 length = view->partial.size << PAGE_SHIFT;
>>+ struct scatterlist *sg;
>>+ unsigned int nents;
>>+ int ret = -ENOMEM;
>>+
>>+ st = kmalloc(sizeof(*st), GFP_KERNEL);
>>+ if (!st)
>>+ goto err_st_alloc;
>>+
>>+ /* Get required sg_table size */
>>+ nents = intel_copy_dma_sg(obj_st, st, offset, length, true);
>>+ if (i915_gem_object_has_struct_page(obj)) {
>>+ unsigned int pg_nents;
>>+
>>+ pg_nents = intel_copy_sg(obj_st, st, offset, length, true);
>>+ if (nents < pg_nents)
>>+ nents = pg_nents;
>>+ }
>>+
>>+ ret = sg_alloc_table(st, nents, GFP_KERNEL);
>>+ if (ret)
>>+ goto err_sg_alloc;
>>+
>>+ /* Build sg_table for specified <offset, length> section */
>>+ intel_copy_dma_sg(obj_st, st, offset, length, false);
>>+ if (i915_gem_object_has_struct_page(obj))
>>+ intel_copy_sg(obj_st, st, offset, length, false);
>>+
>>+ /* Mark last sg */
>>+ sg = st->sgl;
>>+ while (sg_next(sg))
>>+ sg = sg_next(sg);
>>+ sg_mark_end(sg);
>
>Do we need this bit? The nents is exactly orig_nents, and
>sg_alloc_table will already mark the end for you.
>
Ok, looks like we don't need it as sg_alloc_table() sets it.
While looking at sg_alloc_table(), looks like it is possible for it
to return with -ENOMEM with partial built table, but we are not
cleaning it anywhere. Something to consider later may be.
https://elixir.bootlin.com/linux/latest/source/lib/scatterlist.c#L330
>Is it not possible to re-use remap_contiguous_pages() somehow? Also do
>we need the dry_run bit if we use sg_trim()? Maybe something like:
>
>dst = sg_alloc_table(partial.size);
>
>remap_contigious_pages_sg(dst, src);
>i915_sg_trim(dst);
>
>dst->nents = 0;
>sg = remap_contigious_pages_dma_sg(dst, src);
>
But then those remap_contiguous_pages[_dma]_sg would look just like
out intel_copy[_dma]_sg().
And the problem I have with i915_sg_trim() is that it uses _sg iterator
only and not _dma_sg iterator. I think at least in theory, it is possible
to have more number of dma_sg elements than the sg (page) elements. Right?
That is why I am doing a dry run of both above and getting the max of both.
Niranjana
>>+
>>+ return st;
>>+
>>+err_sg_alloc:
>>+ kfree(st);
>>+err_st_alloc:
>>+ return ERR_PTR(ret);
>>+}
>>+
>> static int
>> __i915_vma_get_pages(struct i915_vma *vma)
>> {
>>@@ -1330,7 +1444,11 @@ __i915_vma_get_pages(struct i915_vma *vma)
>> break;
>> case I915_GTT_VIEW_PARTIAL:
>>- pages = intel_partial_pages(&vma->gtt_view, vma->obj);
>>+ if (i915_vma_is_persistent(vma))
>>+ pages = intel_persistent_partial_pages(&vma->gtt_view,
>>+ vma->obj);
>>+ else
>>+ pages = intel_partial_pages(&vma->gtt_view, vma->obj);
>> break;
>> }
More information about the Intel-gfx
mailing list