[Intel-gfx] [PATCH 05/24] drm/i915: Replace the array of pages with a scatterlist
Ben Widawsky
ben at bwidawsk.net
Fri Sep 7 03:49:24 CEST 2012
On Tue, 4 Sep 2012 21:02:57 +0100
Chris Wilson <chris at chris-wilson.co.uk> wrote:
> Rather than have multiple data structures for describing our page layout
> in conjunction with the array of pages, we can migrate all users over to
> a scatterlist.
>
> One major advantage, other than unifying the page tracking structures,
> this offers is that we replace the vmalloc'ed array (which can be up to
> a megabyte in size) with a chain of individual pages which helps reduce
> memory pressure.
>
> The disadvantage is that we then do not have a simple array to iterate,
> or to access randomly. The common case for this is in the relocation
> processing, which will typically fit within a single scatterlist page
> and so be almost the same cost as the simple array. For iterating over
> the array, the extra function call could be optimised away, but in
> reality is an insignificant cost of either binding the pages, or
> performing the pwrite/pread.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Now that my eyes are done bleeding, easy ones:
ERROR: space required after that ',' (ctx:VxV)
#69: FILE: drivers/char/agp/intel-gtt.c:99:
+ for_each_sg(st->sgl, sg, num_entries,i)
^
WARNING: Prefer pr_err(... to printk(KERN_ERR, ...
#189: FILE: drivers/gpu/drm/drm_cache.c:117:
+ printk(KERN_ERR "Timed out waiting for cache
flush.\n");
WARNING: Prefer pr_err(... to printk(KERN_ERR, ...
#191: FILE: drivers/gpu/drm/drm_cache.c:119:
+ printk(KERN_ERR "Architecture has no drm_cache.c support\n");
In addition to the inline comments, it would have been even slightly
easier to review without the s/page/i since it seems to just be for no
compelling reason anyway.
> ---
> drivers/char/agp/intel-gtt.c | 51 +++++-------
> drivers/gpu/drm/drm_cache.c | 23 ++++++
> drivers/gpu/drm/i915/i915_drv.h | 18 +++--
> drivers/gpu/drm/i915/i915_gem.c | 79 ++++++++++++------
> drivers/gpu/drm/i915/i915_gem_dmabuf.c | 99 +++++++++++++++--------
> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 +-
> drivers/gpu/drm/i915/i915_gem_gtt.c | 121 ++++++----------------------
> drivers/gpu/drm/i915/i915_gem_tiling.c | 16 ++--
> drivers/gpu/drm/i915/i915_irq.c | 25 +++---
> drivers/gpu/drm/i915/intel_ringbuffer.c | 9 ++-
> include/drm/drmP.h | 1 +
> include/drm/intel-gtt.h | 10 +--
> 12 files changed, 236 insertions(+), 219 deletions(-)
>
> diff --git a/drivers/char/agp/intel-gtt.c b/drivers/char/agp/intel-gtt.c
> index 58e32f7..511d1b1 100644
> --- a/drivers/char/agp/intel-gtt.c
> +++ b/drivers/char/agp/intel-gtt.c
> @@ -84,40 +84,33 @@ static struct _intel_private {
> #define IS_IRONLAKE intel_private.driver->is_ironlake
> #define HAS_PGTBL_EN intel_private.driver->has_pgtbl_enable
>
> -int intel_gtt_map_memory(struct page **pages, unsigned int num_entries,
> - struct scatterlist **sg_list, int *num_sg)
> +static int intel_gtt_map_memory(struct page **pages,
> + unsigned int num_entries,
> + struct sg_table *st)
> {
> - struct sg_table st;
> struct scatterlist *sg;
> int i;
>
> - if (*sg_list)
> - return 0; /* already mapped (for e.g. resume */
> -
> DBG("try mapping %lu pages\n", (unsigned long)num_entries);
>
> - if (sg_alloc_table(&st, num_entries, GFP_KERNEL))
> + if (sg_alloc_table(st, num_entries, GFP_KERNEL))
> goto err;
>
> - *sg_list = sg = st.sgl;
> -
> - for (i = 0 ; i < num_entries; i++, sg = sg_next(sg))
> + for_each_sg(st->sgl, sg, num_entries,i)
> sg_set_page(sg, pages[i], PAGE_SIZE, 0);
>
> - *num_sg = pci_map_sg(intel_private.pcidev, *sg_list,
> - num_entries, PCI_DMA_BIDIRECTIONAL);
> - if (unlikely(!*num_sg))
> + if (!pci_map_sg(intel_private.pcidev,
> + st->sgl, st->nents, PCI_DMA_BIDIRECTIONAL))
> goto err;
>
> return 0;
>
> err:
> - sg_free_table(&st);
> + sg_free_table(st);
> return -ENOMEM;
> }
> -EXPORT_SYMBOL(intel_gtt_map_memory);
>
> -void intel_gtt_unmap_memory(struct scatterlist *sg_list, int num_sg)
> +static void intel_gtt_unmap_memory(struct scatterlist *sg_list, int num_sg)
> {
> struct sg_table st;
> DBG("try unmapping %lu pages\n", (unsigned long)mem->page_count);
> @@ -130,7 +123,6 @@ void intel_gtt_unmap_memory(struct scatterlist *sg_list, int num_sg)
>
> sg_free_table(&st);
> }
> -EXPORT_SYMBOL(intel_gtt_unmap_memory);
>
> static void intel_fake_agp_enable(struct agp_bridge_data *bridge, u32 mode)
> {
> @@ -879,8 +871,7 @@ static bool i830_check_flags(unsigned int flags)
> return false;
> }
>
> -void intel_gtt_insert_sg_entries(struct scatterlist *sg_list,
> - unsigned int sg_len,
> +void intel_gtt_insert_sg_entries(struct sg_table *st,
> unsigned int pg_start,
> unsigned int flags)
> {
> @@ -892,12 +883,11 @@ void intel_gtt_insert_sg_entries(struct scatterlist *sg_list,
>
> /* sg may merge pages, but we have to separate
> * per-page addr for GTT */
> - for_each_sg(sg_list, sg, sg_len, i) {
> + for_each_sg(st->sgl, sg, st->nents, i) {
> len = sg_dma_len(sg) >> PAGE_SHIFT;
> for (m = 0; m < len; m++) {
> dma_addr_t addr = sg_dma_address(sg) + (m << PAGE_SHIFT);
> - intel_private.driver->write_entry(addr,
> - j, flags);
> + intel_private.driver->write_entry(addr, j, flags);
> j++;
> }
> }
> @@ -905,8 +895,10 @@ void intel_gtt_insert_sg_entries(struct scatterlist *sg_list,
> }
> EXPORT_SYMBOL(intel_gtt_insert_sg_entries);
>
> -void intel_gtt_insert_pages(unsigned int first_entry, unsigned int num_entries,
> - struct page **pages, unsigned int flags)
> +static void intel_gtt_insert_pages(unsigned int first_entry,
> + unsigned int num_entries,
> + struct page **pages,
> + unsigned int flags)
> {
> int i, j;
>
> @@ -917,7 +909,6 @@ void intel_gtt_insert_pages(unsigned int first_entry, unsigned int num_entries,
> }
> readl(intel_private.gtt+j-1);
> }
> -EXPORT_SYMBOL(intel_gtt_insert_pages);
>
> static int intel_fake_agp_insert_entries(struct agp_memory *mem,
> off_t pg_start, int type)
> @@ -953,13 +944,15 @@ static int intel_fake_agp_insert_entries(struct agp_memory *mem,
> global_cache_flush();
>
> if (intel_private.base.needs_dmar) {
> - ret = intel_gtt_map_memory(mem->pages, mem->page_count,
> - &mem->sg_list, &mem->num_sg);
> + struct sg_table st;
> +
> + ret = intel_gtt_map_memory(mem->pages, mem->page_count, &st);
> if (ret != 0)
> return ret;
>
> - intel_gtt_insert_sg_entries(mem->sg_list, mem->num_sg,
> - pg_start, type);
> + intel_gtt_insert_sg_entries(&st, pg_start, type);
> + mem->sg_list = st.sgl;
> + mem->num_sg = st.nents;
Can you explain how the corresponding free for the sg_table gets called
here?
> } else
> intel_gtt_insert_pages(pg_start, mem->page_count, mem->pages,
> type);
> diff --git a/drivers/gpu/drm/drm_cache.c b/drivers/gpu/drm/drm_cache.c
> index 08758e0..628a2e0 100644
> --- a/drivers/gpu/drm/drm_cache.c
> +++ b/drivers/gpu/drm/drm_cache.c
> @@ -100,6 +100,29 @@ drm_clflush_pages(struct page *pages[], unsigned long num_pages)
> EXPORT_SYMBOL(drm_clflush_pages);
>
> void
> +drm_clflush_sg(struct sg_table *st)
> +{
> +#if defined(CONFIG_X86)
> + if (cpu_has_clflush) {
> + struct scatterlist *sg;
> + int i;
> +
> + mb();
> + for_each_sg(st->sgl, sg, st->nents, i)
> + drm_clflush_page(sg_page(sg));
> + mb();
> + }
> +
> + if (on_each_cpu(drm_clflush_ipi_handler, NULL, 1) != 0)
> + printk(KERN_ERR "Timed out waiting for cache flush.\n");
> +#else
> + printk(KERN_ERR "Architecture has no drm_cache.c support\n");
> + WARN_ON_ONCE(1);
> +#endif
> +}
> +EXPORT_SYMBOL(drm_clflush_sg);
> +
> +void
> drm_clflush_virt_range(char *addr, unsigned long length)
> {
> #if defined(CONFIG_X86)
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 0747472..1a714fa 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -992,16 +992,11 @@ struct drm_i915_gem_object {
>
> unsigned int has_aliasing_ppgtt_mapping:1;
> unsigned int has_global_gtt_mapping:1;
> + unsigned int has_dma_mapping:1;
>
> - struct page **pages;
> + struct sg_table *pages;
> int pages_pin_count;
>
> - /**
> - * DMAR support
> - */
> - struct scatterlist *sg_list;
> - int num_sg;
> -
> /* prime dma-buf support */
> struct sg_table *sg_table;
> void *dma_buf_vmapping;
> @@ -1328,6 +1323,15 @@ void i915_gem_release_mmap(struct drm_i915_gem_object *obj);
> void i915_gem_lastclose(struct drm_device *dev);
>
> int __must_check i915_gem_object_get_pages(struct drm_i915_gem_object *obj);
> +static inline struct page *i915_gem_object_get_page(struct drm_i915_gem_object *obj, int n)
> +{
> + struct scatterlist *sg = obj->pages->sgl;
> + while (n >= SG_MAX_SINGLE_ALLOC) {
> + sg = sg_chain_ptr(sg + SG_MAX_SINGLE_ALLOC - 1);
> + n -= SG_MAX_SINGLE_ALLOC - 1;
> + }
> + return sg_page(sg+n);
> +}
> static inline void i915_gem_object_pin_pages(struct drm_i915_gem_object *obj)
> {
> BUG_ON(obj->pages == NULL);
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 171bc51..06589a9 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -411,6 +411,8 @@ i915_gem_shmem_pread(struct drm_device *dev,
> int hit_slowpath = 0;
> int prefaulted = 0;
> int needs_clflush = 0;
> + struct scatterlist *sg;
> + int i;
>
> user_data = (char __user *) (uintptr_t) args->data_ptr;
> remain = args->size;
> @@ -439,9 +441,15 @@ i915_gem_shmem_pread(struct drm_device *dev,
>
> offset = args->offset;
>
> - while (remain > 0) {
> + for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
> struct page *page;
>
> + if (i < offset >> PAGE_SHIFT)
> + continue;
> +
> + if (remain <= 0)
> + break;
> +
> /* Operation in this page
> *
> * shmem_page_offset = offset within page in shmem file
> @@ -452,7 +460,7 @@ i915_gem_shmem_pread(struct drm_device *dev,
> if ((shmem_page_offset + page_length) > PAGE_SIZE)
> page_length = PAGE_SIZE - shmem_page_offset;
>
> - page = obj->pages[offset >> PAGE_SHIFT];
> + page = sg_page(sg);
> page_do_bit17_swizzling = obj_do_bit17_swizzling &&
> (page_to_phys(page) & (1 << 17)) != 0;
>
> @@ -731,6 +739,8 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
> int hit_slowpath = 0;
> int needs_clflush_after = 0;
> int needs_clflush_before = 0;
> + int i;
> + struct scatterlist *sg;
>
> user_data = (char __user *) (uintptr_t) args->data_ptr;
> remain = args->size;
> @@ -765,10 +775,16 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
> offset = args->offset;
> obj->dirty = 1;
>
> - while (remain > 0) {
> + for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i) {
> struct page *page;
> int partial_cacheline_write;
>
> + if (i < offset >> PAGE_SHIFT)
> + continue;
> +
> + if (remain <= 0)
> + break;
> +
> /* Operation in this page
> *
> * shmem_page_offset = offset within page in shmem file
> @@ -787,7 +803,7 @@ i915_gem_shmem_pwrite(struct drm_device *dev,
> ((shmem_page_offset | page_length)
> & (boot_cpu_data.x86_clflush_size - 1));
>
> - page = obj->pages[offset >> PAGE_SHIFT];
> + page = sg_page(sg);
> page_do_bit17_swizzling = obj_do_bit17_swizzling &&
> (page_to_phys(page) & (1 << 17)) != 0;
>
> @@ -1633,6 +1649,7 @@ static void
> i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
> {
> int page_count = obj->base.size / PAGE_SIZE;
> + struct scatterlist *sg;
> int ret, i;
>
> BUG_ON(obj->madv == __I915_MADV_PURGED);
> @@ -1653,19 +1670,21 @@ i915_gem_object_put_pages_gtt(struct drm_i915_gem_object *obj)
> if (obj->madv == I915_MADV_DONTNEED)
> obj->dirty = 0;
>
> - for (i = 0; i < page_count; i++) {
> + for_each_sg(obj->pages->sgl, sg, page_count, i) {
> + struct page *page = sg_page(sg);
> +
> if (obj->dirty)
> - set_page_dirty(obj->pages[i]);
> + set_page_dirty(page);
>
> if (obj->madv == I915_MADV_WILLNEED)
> - mark_page_accessed(obj->pages[i]);
> + mark_page_accessed(page);
>
> - page_cache_release(obj->pages[i]);
> + page_cache_release(page);
> }
> obj->dirty = 0;
>
> - drm_free_large(obj->pages);
> - obj->pages = NULL;
> + sg_free_table(obj->pages);
> + kfree(obj->pages);
> }
>
> static int
> @@ -1682,6 +1701,7 @@ i915_gem_object_put_pages(struct drm_i915_gem_object *obj)
> return -EBUSY;
>
> ops->put_pages(obj);
> + obj->pages = NULL;
>
> list_del(&obj->gtt_list);
> if (i915_gem_object_is_purgeable(obj))
> @@ -1739,6 +1759,8 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
> struct drm_i915_private *dev_priv = obj->base.dev->dev_private;
> int page_count, i;
> struct address_space *mapping;
> + struct sg_table *st;
> + struct scatterlist *sg;
> struct page *page;
> gfp_t gfp;
>
> @@ -1749,20 +1771,27 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
> BUG_ON(obj->base.read_domains & I915_GEM_GPU_DOMAINS);
> BUG_ON(obj->base.write_domain & I915_GEM_GPU_DOMAINS);
>
> - /* Get the list of pages out of our struct file. They'll be pinned
> - * at this point until we release them.
> - */
> + st = kmalloc(sizeof(*st), GFP_KERNEL);
> + if (st == NULL)
> + return -ENOMEM;
> +
> page_count = obj->base.size / PAGE_SIZE;
> - obj->pages = drm_malloc_ab(page_count, sizeof(struct page *));
> - if (obj->pages == NULL)
> + if (sg_alloc_table(st, page_count, GFP_KERNEL)) {
> + sg_free_table(st);
> + kfree(st);
> return -ENOMEM;
> + }
I think the call here to sg_free_table is bogus.
>
> - /* Fail silently without starting the shrinker */
> + /* Get the list of pages out of our struct file. They'll be pinned
> + * at this point until we release them.
> + *
> + * Fail silently without starting the shrinker
> + */
> mapping = obj->base.filp->f_path.dentry->d_inode->i_mapping;
> gfp = mapping_gfp_mask(mapping);
> gfp |= __GFP_NORETRY | __GFP_NOWARN;
> gfp &= ~(__GFP_IO | __GFP_WAIT);
> - for (i = 0; i < page_count; i++) {
> + for_each_sg(st->sgl, sg, page_count, i) {
> page = shmem_read_mapping_page_gfp(mapping, i, gfp);
> if (IS_ERR(page)) {
> i915_gem_purge(dev_priv, page_count);
> @@ -1785,20 +1814,20 @@ i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
> gfp &= ~(__GFP_IO | __GFP_WAIT);
> }
>
> - obj->pages[i] = page;
> + sg_set_page(sg, page, PAGE_SIZE, 0);
> }
>
> if (i915_gem_object_needs_bit17_swizzle(obj))
> i915_gem_object_do_bit_17_swizzle(obj);
>
> + obj->pages = st;
> return 0;
>
> err_pages:
> - while (i--)
> - page_cache_release(obj->pages[i]);
> -
> - drm_free_large(obj->pages);
> - obj->pages = NULL;
> + for_each_sg(st->sgl, sg, i, page_count)
> + page_cache_release(sg_page(sg));
> + sg_free_table(st);
> + kfree(st);
> return PTR_ERR(page);
> }
>
> @@ -2974,7 +3003,7 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj)
>
> trace_i915_gem_object_clflush(obj);
>
> - drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE);
> + drm_clflush_sg(obj->pages);
> }
>
> /** Flushes the GTT write domain for the object if it's dirty. */
> @@ -3724,6 +3753,8 @@ void i915_gem_free_object(struct drm_gem_object *gem_obj)
> i915_gem_object_put_pages(obj);
> i915_gem_object_free_mmap_offset(obj);
>
> + BUG_ON(obj->pages);
> +
> drm_gem_object_release(&obj->base);
> i915_gem_info_remove_obj(dev_priv, obj->base.size);
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> index eca4726..4bb1b94 100644
> --- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> +++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
> @@ -28,33 +28,57 @@
> #include <linux/dma-buf.h>
>
> static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachment,
> - enum dma_data_direction dir)
> + enum dma_data_direction dir)
> {
> struct drm_i915_gem_object *obj = attachment->dmabuf->priv;
> - struct drm_device *dev = obj->base.dev;
> - int npages = obj->base.size / PAGE_SIZE;
> - struct sg_table *sg;
> - int ret;
> - int nents;
> + struct sg_table *st;
> + struct scatterlist *src, *dst;
> + int ret, i;
>
> - ret = i915_mutex_lock_interruptible(dev);
> + ret = i915_mutex_lock_interruptible(obj->base.dev);
> if (ret)
> return ERR_PTR(ret);
>
> ret = i915_gem_object_get_pages(obj);
> if (ret) {
> - sg = ERR_PTR(ret);
> + st = ERR_PTR(ret);
> + goto out;
> + }
> +
> + /* Copy sg so that we make an independent mapping */
> + st = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
> + if (st == NULL) {
> + st = ERR_PTR(-ENOMEM);
> + goto out;
> + }
> +
> + ret = sg_alloc_table(st, obj->pages->nents, GFP_KERNEL);
> + if (ret) {
> + kfree(st);
> + st = ERR_PTR(ret);
> + goto out;
> + }
> +
> + src = obj->pages->sgl;
> + dst = st->sgl;
> + for (i = 0; i < obj->pages->nents; i++) {
> + sg_set_page(dst, sg_page(src), PAGE_SIZE, 0);
> + dst = sg_next(dst);
> + src = sg_next(src);
> + }
> +
> + if (!dma_map_sg(attachment->dev, st->sgl, st->nents, dir)) {
> + sg_free_table(st);
> + kfree(st);
> + st = ERR_PTR(-ENOMEM);
> goto out;
> }
>
> - /* link the pages into an SG then map the sg */
> - sg = drm_prime_pages_to_sg(obj->pages, npages);
> - nents = dma_map_sg(attachment->dev, sg->sgl, sg->nents, dir);
> i915_gem_object_pin_pages(obj);
<bikeshed>
I think the right way to go about this is to add rm_prime_pages_to_st
since you're pushing the whole st>sg thing, other drivers can leverage
it.
</bikeshed>
The lifetime description we discussed on IRC would have helped here as
well.
>
> out:
> - mutex_unlock(&dev->struct_mutex);
> - return sg;
> + mutex_unlock(&obj->base.dev->struct_mutex);
> + return st;
> }
>
> static void i915_gem_unmap_dma_buf(struct dma_buf_attachment *attachment,
> @@ -80,7 +104,9 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf)
> {
> struct drm_i915_gem_object *obj = dma_buf->priv;
> struct drm_device *dev = obj->base.dev;
> - int ret;
> + struct scatterlist *sg;
> + struct page **pages;
> + int ret, i;
>
> ret = i915_mutex_lock_interruptible(dev);
> if (ret)
> @@ -92,22 +118,33 @@ static void *i915_gem_dmabuf_vmap(struct dma_buf *dma_buf)
> }
>
> ret = i915_gem_object_get_pages(obj);
> - if (ret) {
> - mutex_unlock(&dev->struct_mutex);
> - return ERR_PTR(ret);
> - }
> + if (ret)
> + goto error;
>
> - obj->dma_buf_vmapping = vmap(obj->pages, obj->base.size / PAGE_SIZE, 0, PAGE_KERNEL);
> - if (!obj->dma_buf_vmapping) {
> - DRM_ERROR("failed to vmap object\n");
> - goto out_unlock;
> - }
> + ret = -ENOMEM;
> +
> + pages = drm_malloc_ab(obj->pages->nents, sizeof(struct page *));
> + if (pages == NULL)
> + goto error;
> +
> + for_each_sg(obj->pages->sgl, sg, obj->pages->nents, i)
> + pages[i] = sg_page(sg);
> +
> + obj->dma_buf_vmapping = vmap(pages, obj->pages->nents, 0, PAGE_KERNEL);
> + drm_free_large(pages);
> +
> + if (!obj->dma_buf_vmapping)
> + goto error;
>
> obj->vmapping_count = 1;
> i915_gem_object_pin_pages(obj);
> out_unlock:
> mutex_unlock(&dev->struct_mutex);
> return obj->dma_buf_vmapping;
> +
> +error:
> + mutex_unlock(&dev->struct_mutex);
> + return ERR_PTR(ret);
> }
>
> static void i915_gem_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr)
The return on vmap failing looks incorrect to me here. Also, I think
leaving the DRM_ERROR would have been nice.
> @@ -184,22 +221,19 @@ static const struct dma_buf_ops i915_dmabuf_ops = {
> };
>
> struct dma_buf *i915_gem_prime_export(struct drm_device *dev,
> - struct drm_gem_object *gem_obj, int flags)
> + struct drm_gem_object *gem_obj, int flags)
> {
> struct drm_i915_gem_object *obj = to_intel_bo(gem_obj);
>
> - return dma_buf_export(obj, &i915_dmabuf_ops,
> - obj->base.size, 0600);
> + return dma_buf_export(obj, &i915_dmabuf_ops, obj->base.size, 0600);
> }
>
> struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
> - struct dma_buf *dma_buf)
> + struct dma_buf *dma_buf)
> {
> struct dma_buf_attachment *attach;
> struct sg_table *sg;
> struct drm_i915_gem_object *obj;
> - int npages;
> - int size;
> int ret;
>
> /* is this one of own objects? */
> @@ -223,21 +257,19 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev,
> goto fail_detach;
> }
>
> - size = dma_buf->size;
> - npages = size / PAGE_SIZE;
> -
> obj = kzalloc(sizeof(*obj), GFP_KERNEL);
> if (obj == NULL) {
> ret = -ENOMEM;
> goto fail_unmap;
> }
>
> - ret = drm_gem_private_object_init(dev, &obj->base, size);
> + ret = drm_gem_private_object_init(dev, &obj->base, dma_buf->size);
> if (ret) {
> kfree(obj);
> goto fail_unmap;
> }
>
> + obj->has_dma_mapping = true;
> obj->sg_table = sg;
> obj->base.import_attach = attach;
>
> @@ -249,3 +281,4 @@ fail_detach:
> dma_buf_detach(dma_buf, attach);
> return ERR_PTR(ret);
> }
> +
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index e6b2205..4ab0083 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -210,7 +210,8 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
> if (ret)
> return ret;
>
> - vaddr = kmap_atomic(obj->pages[reloc->offset >> PAGE_SHIFT]);
> + vaddr = kmap_atomic(i915_gem_object_get_page(obj,
> + reloc->offset >> PAGE_SHIFT));
> *(uint32_t *)(vaddr + page_offset) = reloc->delta;
> kunmap_atomic(vaddr);
> } else {
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 1847731..6746109 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -167,8 +167,7 @@ void i915_gem_cleanup_aliasing_ppgtt(struct drm_device *dev)
> }
>
> static void i915_ppgtt_insert_sg_entries(struct i915_hw_ppgtt *ppgtt,
> - struct scatterlist *sg_list,
> - unsigned sg_len,
> + const struct sg_table *pages,
> unsigned first_entry,
> uint32_t pte_flags)
> {
> @@ -180,12 +179,12 @@ static void i915_ppgtt_insert_sg_entries(struct i915_hw_ppgtt *ppgtt,
> struct scatterlist *sg;
>
> /* init sg walking */
> - sg = sg_list;
> + sg = pages->sgl;
> i = 0;
> segment_len = sg_dma_len(sg) >> PAGE_SHIFT;
> m = 0;
>
> - while (i < sg_len) {
> + while (i < pages->nents) {
> pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pd]);
>
> for (j = first_pte; j < I915_PPGTT_PT_ENTRIES; j++) {
> @@ -194,13 +193,11 @@ static void i915_ppgtt_insert_sg_entries(struct i915_hw_ppgtt *ppgtt,
> pt_vaddr[j] = pte | pte_flags;
>
> /* grab the next page */
> - m++;
> - if (m == segment_len) {
> - sg = sg_next(sg);
> - i++;
> - if (i == sg_len)
> + if (++m == segment_len) {
> + if (++i == pages->nents)
> break;
>
> + sg = sg_next(sg);
> segment_len = sg_dma_len(sg) >> PAGE_SHIFT;
> m = 0;
> }
> @@ -213,44 +210,10 @@ static void i915_ppgtt_insert_sg_entries(struct i915_hw_ppgtt *ppgtt,
> }
> }
>
> -static void i915_ppgtt_insert_pages(struct i915_hw_ppgtt *ppgtt,
> - unsigned first_entry, unsigned num_entries,
> - struct page **pages, uint32_t pte_flags)
> -{
> - uint32_t *pt_vaddr, pte;
> - unsigned act_pd = first_entry / I915_PPGTT_PT_ENTRIES;
> - unsigned first_pte = first_entry % I915_PPGTT_PT_ENTRIES;
> - unsigned last_pte, i;
> - dma_addr_t page_addr;
> -
> - while (num_entries) {
> - last_pte = first_pte + num_entries;
> - last_pte = min_t(unsigned, last_pte, I915_PPGTT_PT_ENTRIES);
> -
> - pt_vaddr = kmap_atomic(ppgtt->pt_pages[act_pd]);
> -
> - for (i = first_pte; i < last_pte; i++) {
> - page_addr = page_to_phys(*pages);
> - pte = GEN6_PTE_ADDR_ENCODE(page_addr);
> - pt_vaddr[i] = pte | pte_flags;
> -
> - pages++;
> - }
> -
> - kunmap_atomic(pt_vaddr);
> -
> - num_entries -= last_pte - first_pte;
> - first_pte = 0;
> - act_pd++;
> - }
> -}
> -
> void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
> struct drm_i915_gem_object *obj,
> enum i915_cache_level cache_level)
> {
> - struct drm_device *dev = obj->base.dev;
> - struct drm_i915_private *dev_priv = dev->dev_private;
> uint32_t pte_flags = GEN6_PTE_VALID;
>
> switch (cache_level) {
Methinks this isn't what you wanted to do.
> @@ -270,26 +233,10 @@ void i915_ppgtt_bind_object(struct i915_hw_ppgtt *ppgtt,
> BUG();
> }
>
> - if (obj->sg_table) {
> - i915_ppgtt_insert_sg_entries(ppgtt,
> - obj->sg_table->sgl,
> - obj->sg_table->nents,
> - obj->gtt_space->start >> PAGE_SHIFT,
> - pte_flags);
> - } else if (dev_priv->mm.gtt->needs_dmar) {
> - BUG_ON(!obj->sg_list);
> -
> - i915_ppgtt_insert_sg_entries(ppgtt,
> - obj->sg_list,
> - obj->num_sg,
> - obj->gtt_space->start >> PAGE_SHIFT,
> - pte_flags);
> - } else
> - i915_ppgtt_insert_pages(ppgtt,
> - obj->gtt_space->start >> PAGE_SHIFT,
> - obj->base.size >> PAGE_SHIFT,
> - obj->pages,
> - pte_flags);
> + i915_ppgtt_insert_sg_entries(ppgtt,
> + obj->sg_table ?: obj->pages,
> + obj->gtt_space->start >> PAGE_SHIFT,
> + pte_flags);
> }
I got lost here. Is it, if there is a prime sg_table use that, otherwise
just use the object's sgt? If so, I think has_dma_mapping is more
readable.
Also, I wonder if ?: pissed off the clang people?
>
> void i915_ppgtt_unbind_object(struct i915_hw_ppgtt *ppgtt,
> @@ -361,44 +308,26 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
>
> int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
> {
> - struct drm_device *dev = obj->base.dev;
> - struct drm_i915_private *dev_priv = dev->dev_private;
> -
> - /* don't map imported dma buf objects */
> - if (dev_priv->mm.gtt->needs_dmar && !obj->sg_table)
> - return intel_gtt_map_memory(obj->pages,
> - obj->base.size >> PAGE_SHIFT,
> - &obj->sg_list,
> - &obj->num_sg);
> - else
> + if (obj->has_dma_mapping)
> return 0;
> +
> + if (!dma_map_sg(&obj->base.dev->pdev->dev,
> + obj->pages->sgl, obj->pages->nents,
> + PCI_DMA_BIDIRECTIONAL))
> + return -ENOSPC;
> +
> + return 0;
> }
>
> void i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj,
> enum i915_cache_level cache_level)
> {
> struct drm_device *dev = obj->base.dev;
> - struct drm_i915_private *dev_priv = dev->dev_private;
> unsigned int agp_type = cache_level_to_agp_type(dev, cache_level);
>
> - if (obj->sg_table) {
> - intel_gtt_insert_sg_entries(obj->sg_table->sgl,
> - obj->sg_table->nents,
> - obj->gtt_space->start >> PAGE_SHIFT,
> - agp_type);
> - } else if (dev_priv->mm.gtt->needs_dmar) {
> - BUG_ON(!obj->sg_list);
> -
> - intel_gtt_insert_sg_entries(obj->sg_list,
> - obj->num_sg,
> - obj->gtt_space->start >> PAGE_SHIFT,
> - agp_type);
> - } else
> - intel_gtt_insert_pages(obj->gtt_space->start >> PAGE_SHIFT,
> - obj->base.size >> PAGE_SHIFT,
> - obj->pages,
> - agp_type);
> -
> + intel_gtt_insert_sg_entries(obj->sg_table ?: obj->pages,
> + obj->gtt_space->start >> PAGE_SHIFT,
> + agp_type);
> obj->has_global_gtt_mapping = 1;
> }
>
> @@ -418,10 +347,10 @@ void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
>
> interruptible = do_idling(dev_priv);
>
> - if (obj->sg_list) {
> - intel_gtt_unmap_memory(obj->sg_list, obj->num_sg);
> - obj->sg_list = NULL;
> - }
> + if (!obj->has_dma_mapping)
> + dma_unmap_sg(&dev->pdev->dev,
> + obj->pages->sgl, obj->pages->nents,
> + PCI_DMA_BIDIRECTIONAL);
>
> undo_idling(dev_priv, interruptible);
> }
> diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
> index b964df5..8093ecd 100644
> --- a/drivers/gpu/drm/i915/i915_gem_tiling.c
> +++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
> @@ -470,18 +470,20 @@ i915_gem_swizzle_page(struct page *page)
> void
> i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj)
> {
> + struct scatterlist *sg;
> int page_count = obj->base.size >> PAGE_SHIFT;
> int i;
>
> if (obj->bit_17 == NULL)
> return;
>
> - for (i = 0; i < page_count; i++) {
> - char new_bit_17 = page_to_phys(obj->pages[i]) >> 17;
> + for_each_sg(obj->pages->sgl, sg, page_count, i) {
> + struct page *page = sg_page(sg);
> + char new_bit_17 = page_to_phys(page) >> 17;
> if ((new_bit_17 & 0x1) !=
> (test_bit(i, obj->bit_17) != 0)) {
> - i915_gem_swizzle_page(obj->pages[i]);
> - set_page_dirty(obj->pages[i]);
> + i915_gem_swizzle_page(page);
> + set_page_dirty(page);
> }
> }
> }
> @@ -489,6 +491,7 @@ i915_gem_object_do_bit_17_swizzle(struct drm_i915_gem_object *obj)
> void
> i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj)
> {
> + struct scatterlist *sg;
> int page_count = obj->base.size >> PAGE_SHIFT;
> int i;
>
> @@ -502,8 +505,9 @@ i915_gem_object_save_bit_17_swizzle(struct drm_i915_gem_object *obj)
> }
> }
>
> - for (i = 0; i < page_count; i++) {
> - if (page_to_phys(obj->pages[i]) & (1 << 17))
> + for_each_sg(obj->pages->sgl, sg, page_count, i) {
> + struct page *page = sg_page(sg);
> + if (page_to_phys(page) & (1 << 17))
> __set_bit(i, obj->bit_17);
> else
> __clear_bit(i, obj->bit_17);
> diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
> index d601013..dd49046 100644
> --- a/drivers/gpu/drm/i915/i915_irq.c
> +++ b/drivers/gpu/drm/i915/i915_irq.c
> @@ -888,20 +888,20 @@ i915_error_object_create(struct drm_i915_private *dev_priv,
> struct drm_i915_gem_object *src)
> {
> struct drm_i915_error_object *dst;
> - int page, page_count;
> + int i, count;
> u32 reloc_offset;
>
> if (src == NULL || src->pages == NULL)
> return NULL;
>
> - page_count = src->base.size / PAGE_SIZE;
> + count = src->base.size / PAGE_SIZE;
>
> - dst = kmalloc(sizeof(*dst) + page_count * sizeof(u32 *), GFP_ATOMIC);
> + dst = kmalloc(sizeof(*dst) + count * sizeof(u32 *), GFP_ATOMIC);
> if (dst == NULL)
> return NULL;
>
> reloc_offset = src->gtt_offset;
> - for (page = 0; page < page_count; page++) {
> + for (i = 0; i < count; i++) {
> unsigned long flags;
> void *d;
>
> @@ -924,30 +924,33 @@ i915_error_object_create(struct drm_i915_private *dev_priv,
> memcpy_fromio(d, s, PAGE_SIZE);
> io_mapping_unmap_atomic(s);
> } else {
> + struct page *page;
> void *s;
>
> - drm_clflush_pages(&src->pages[page], 1);
> + page = i915_gem_object_get_page(src, i);
> +
> + drm_clflush_pages(&page, 1);
>
> - s = kmap_atomic(src->pages[page]);
> + s = kmap_atomic(page);
> memcpy(d, s, PAGE_SIZE);
> kunmap_atomic(s);
>
> - drm_clflush_pages(&src->pages[page], 1);
> + drm_clflush_pages(&page, 1);
> }
> local_irq_restore(flags);
>
> - dst->pages[page] = d;
> + dst->pages[i] = d;
>
> reloc_offset += PAGE_SIZE;
> }
> - dst->page_count = page_count;
> + dst->page_count = count;
> dst->gtt_offset = src->gtt_offset;
>
> return dst;
>
> unwind:
> - while (page--)
> - kfree(dst->pages[page]);
> + while (i--)
> + kfree(dst->pages[i]);
> kfree(dst);
> return NULL;
> }
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 55cdb4d..984a0c5 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -464,7 +464,7 @@ init_pipe_control(struct intel_ring_buffer *ring)
> goto err_unref;
>
> pc->gtt_offset = obj->gtt_offset;
> - pc->cpu_page = kmap(obj->pages[0]);
> + pc->cpu_page = kmap(sg_page(obj->pages->sgl));
> if (pc->cpu_page == NULL)
> goto err_unpin;
>
> @@ -491,7 +491,8 @@ cleanup_pipe_control(struct intel_ring_buffer *ring)
> return;
>
> obj = pc->obj;
> - kunmap(obj->pages[0]);
> +
> + kunmap(sg_page(obj->pages->sgl));
> i915_gem_object_unpin(obj);
> drm_gem_object_unreference(&obj->base);
>
> @@ -1026,7 +1027,7 @@ static void cleanup_status_page(struct intel_ring_buffer *ring)
> if (obj == NULL)
> return;
>
> - kunmap(obj->pages[0]);
> + kunmap(sg_page(obj->pages->sgl));
> i915_gem_object_unpin(obj);
> drm_gem_object_unreference(&obj->base);
> ring->status_page.obj = NULL;
> @@ -1053,7 +1054,7 @@ static int init_status_page(struct intel_ring_buffer *ring)
> }
>
> ring->status_page.gfx_addr = obj->gtt_offset;
> - ring->status_page.page_addr = kmap(obj->pages[0]);
> + ring->status_page.page_addr = kmap(sg_page(obj->pages->sgl));
> if (ring->status_page.page_addr == NULL) {
> ret = -ENOMEM;
> goto err_unpin;
> diff --git a/include/drm/drmP.h b/include/drm/drmP.h
> index d6b67bb..d5f0c16 100644
> --- a/include/drm/drmP.h
> +++ b/include/drm/drmP.h
> @@ -1367,6 +1367,7 @@ extern int drm_remove_magic(struct drm_master *master, drm_magic_t magic);
>
> /* Cache management (drm_cache.c) */
> void drm_clflush_pages(struct page *pages[], unsigned long num_pages);
> +void drm_clflush_sg(struct sg_table *st);
> void drm_clflush_virt_range(char *addr, unsigned long length);
>
> /* Locking IOCTL support (drm_lock.h) */
> diff --git a/include/drm/intel-gtt.h b/include/drm/intel-gtt.h
> index 8e29d55..2e37e9f 100644
> --- a/include/drm/intel-gtt.h
> +++ b/include/drm/intel-gtt.h
> @@ -30,16 +30,10 @@ void intel_gmch_remove(void);
> bool intel_enable_gtt(void);
>
> void intel_gtt_chipset_flush(void);
> -void intel_gtt_unmap_memory(struct scatterlist *sg_list, int num_sg);
> -void intel_gtt_clear_range(unsigned int first_entry, unsigned int num_entries);
> -int intel_gtt_map_memory(struct page **pages, unsigned int num_entries,
> - struct scatterlist **sg_list, int *num_sg);
> -void intel_gtt_insert_sg_entries(struct scatterlist *sg_list,
> - unsigned int sg_len,
> +void intel_gtt_insert_sg_entries(struct sg_table *st,
> unsigned int pg_start,
> unsigned int flags);
> -void intel_gtt_insert_pages(unsigned int first_entry, unsigned int num_entries,
> - struct page **pages, unsigned int flags);
> +void intel_gtt_clear_range(unsigned int first_entry, unsigned int num_entries);
>
> /* Special gtt memory types */
> #define AGP_DCACHE_MEMORY 1
--
Ben Widawsky, Intel Open Source Technology Center
More information about the Intel-gfx
mailing list