[Intel-gfx] [PATCH v2] drm/i915: Add ppgtt->kunmap_page_dma vfunc

Chris Wilson chris at chris-wilson.co.uk
Wed May 18 12:22:26 UTC 2016


On Wed, May 18, 2016 at 01:06:06PM +0100, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> 
> Rather than asking itself "am I a Broadwell, am I a Cherryview,
> or am I neither of the two" on on low level page table operations,
> like inserting and clearing PTEs; add a new vfunc kunmap_page_dma
> and set it to appropriate flavour at ppgtt init time.
> 
> v2: Fix platfrom condition and group vfunc init more together.
>     (Daniele Ceraolo Spurio)

Or we can take a different approach and use a WC mapping for the page.

The patch is a bit messy since we need to feed not the device into the
unmap function but the vm, but the guts are:

@@ -323,19 +323,21 @@ static gen6_pte_t iris_pte_encode(dma_addr_t addr,
        return pte;
 }
 
-static int __setup_page_dma(struct drm_device *dev,
-                           struct i915_page_dma *p, gfp_t flags)
+static int __setup_page_dma(struct i915_address_space *vm,
+                           struct i915_page_dma *p,
+                           gfp_t flags)
 {
-       struct device *device = &dev->pdev->dev;
-
        p->page = alloc_page(flags);
        if (!p->page)
                return -ENOMEM;
 
-       p->daddr = dma_map_page(device,
-                               p->page, 0, 4096, PCI_DMA_BIDIRECTIONAL);
+       if (vm->pt_kmap_wc)
+               set_pages_array_wc(&p->page, 1);
+
+       p->daddr = dma_map_page(vm->dma, p->page, 0, PAGE_SIZE,
+                               PCI_DMA_BIDIRECTIONAL);
 
-       if (dma_mapping_error(device, p->daddr)) {
+       if (dma_mapping_error(vm->dma, p->daddr)) {
                __free_page(p->page);
                return -EINVAL;
        }
@@ -343,94 +345,89 @@ static int __setup_page_dma(struct drm_device *dev,
        return 0;
 }
 
-static int setup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
+static int setup_page_dma(struct i915_address_space *vm,
+                         struct i915_page_dma *p)
 {
-       return __setup_page_dma(dev, p, I915_GFP_DMA);
+       return __setup_page_dma(vm, p, I915_GFP_DMA);
 }
 
-static void cleanup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
+static void cleanup_page_dma(struct i915_address_space *vm,
+                            struct i915_page_dma *p)
 {
-       if (WARN_ON(!p->page))
-               return;
+       dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+
+       if (vm->pt_kmap_wc)
+               set_pages_array_wb(&p->page, 1);
 
-       dma_unmap_page(&dev->pdev->dev, p->daddr, 4096, PCI_DMA_BIDIRECTIONAL);
        __free_page(p->page);
-       memset(p, 0, sizeof(*p));
 }
 


@@ -1484,8 +1475,16 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
        ppgtt->base.bind_vma = ppgtt_bind_vma;
        ppgtt->debug_dump = gen8_dump_ppgtt;
 
+       /* There are only few exceptions for gen >=6. chv and bxt.
+        * And we are not sure about the latter so play safe for now.
+        */
+       if (IS_CHERRYVIEW(ppgtt->base.dev) || IS_BROXTON(ppgtt->base.dev)) {
+               ppgtt->base.pt_kmap_wc = true;
+               ppgtt->base.pt_kmap_prot = pgprot_writecombine(PAGE_KERNEL_IO);
+       }
+

Advantage: we avoid the clflush after every update
Disadvantage: we invoke set_memory_*() on every page used by the ppggtt.
(To reduce that cost, I have in made keeping a pagevec cache of WC
pages.)

Sadly, we can't just use kmap_atomic_prot() as it is 32-bit only!!!

Note that I started with exactly this patch (using a kunmap vfunc) many
moons ago and switched to the pgprot_t based approach instead.
-Chris

-- 
Chris Wilson, Intel Open Source Technology Centre


More information about the Intel-gfx mailing list