[Intel-gfx] [PATCH v4 09/24] drm/i915: Track GEN6 page table usage
Michel Thierry
michel.thierry at intel.com
Mon Feb 23 07:39:25 PST 2015
On 2/20/2015 4:41 PM, Mika Kuoppala wrote:
> Michel Thierry <michel.thierry at intel.com> writes:
>
>> From: Ben Widawsky <benjamin.widawsky at intel.com>
>>
>> Instead of implementing the full tracking + dynamic allocation, this
>> patch does a bit less than half of the work, by tracking and warning on
>> unexpected conditions. The tracking itself follows which PTEs within a
>> page table are currently being used for objects. The next patch will
>> modify this to actually allocate the page tables only when necessary.
>>
>> With the current patch there isn't much in the way of making a gen
>> agnostic range allocation function. However, in the next patch we'll add
>> more specificity which makes having separate functions a bit easier to
>> manage.
>>
>> One important change introduced here is that DMA mappings are
>> created/destroyed at the same page directories/tables are
>> allocated/deallocated.
>>
>> Notice that aliasing PPGTT is not managed here. The patch which actually
>> begins dynamic allocation/teardown explains the reasoning for this.
>>
>> v2: s/pdp.page_directory/pdp.page_directorys
>> Make a scratch page allocation helper
>>
>> v3: Rebase and expand commit message.
>>
>> v4: Allocate required pagetables only when it is needed, _bind_to_vm
>> instead of bind_vma (Daniel).
>>
>> v5: Rebased to remove the unnecessary noise in the diff, also:
>> - PDE mask is GEN agnostic, renamed GEN6_PDE_MASK to I915_PDE_MASK.
>> - Removed unnecessary checks in gen6_alloc_va_range.
>> - Changed map/unmap_px_single macros to use dma functions directly and
>> be part of a static inline function instead.
>> - Moved drm_device plumbing through page tables operation to its own
>> patch.
>> - Moved allocate/teardown_va_range calls until they are fully
>> implemented (in subsequent patch).
>> - Merged pt and scratch_pt unmap_and_free path.
>> - Moved scratch page allocator helper to the patch that will use it.
>>
>> v6: Reduce complexity by not tearing down pagetables dynamically, the
>> same can be achieved while freeing empty vms. (Daniel)
>>
>> Cc: Daniel Vetter <daniel at ffwll.ch>
>> Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
>> Signed-off-by: Michel Thierry <michel.thierry at intel.com> (v3+)
>> ---
>> drivers/gpu/drm/i915/i915_gem_gtt.c | 191 +++++++++++++++++++++++++-----------
>> drivers/gpu/drm/i915/i915_gem_gtt.h | 75 ++++++++++++++
>> 2 files changed, 206 insertions(+), 60 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
>> index e2bcd10..760585e 100644
>> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
>> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
>> @@ -274,29 +274,88 @@ static gen6_gtt_pte_t iris_pte_encode(dma_addr_t addr,
>> return pte;
>> }
>>
>> -static void unmap_and_free_pt(struct i915_page_table_entry *pt, struct drm_device *dev)
>> +#define i915_dma_unmap_single(px, dev) \
>> + __i915_dma_unmap_single((px)->daddr, dev)
>> +
>> +static inline void __i915_dma_unmap_single(dma_addr_t daddr,
>> + struct drm_device *dev)
>> +{
>> + struct device *device = &dev->pdev->dev;
>> +
>> + dma_unmap_page(device, daddr, 4096, PCI_DMA_BIDIRECTIONAL);
>> +}
>> +
>> +/**
>> + * i915_dma_map_px_single() - Create a dma mapping for a page table/dir/etc.
>> + * @px: Page table/dir/etc to get a DMA map for
>> + * @dev: drm device
>> + *
>> + * Page table allocations are unified across all gens. They always require a
>> + * single 4k allocation, as well as a DMA mapping. If we keep the structs
>> + * symmetric here, the simple macro covers us for every page table type.
>> + *
>> + * Return: 0 if success.
>> + */
>> +#define i915_dma_map_px_single(px, dev) \
>> + i915_dma_map_page_single((px)->page, (dev), &(px)->daddr)
>> +
> If this is symmetrical to i915_dma_unmap_single() is the _px_ needed?
>
>> +static inline int i915_dma_map_page_single(struct page *page,
>> + struct drm_device *dev,
>> + dma_addr_t *daddr)
>> +{
>> + struct device *device = &dev->pdev->dev;
>> +
>> + *daddr = dma_map_page(device, page, 0, 4096, PCI_DMA_BIDIRECTIONAL);
>> + return dma_mapping_error(device, *daddr);
>> +}
>> +
>> +static void unmap_and_free_pt(struct i915_page_table_entry *pt,
>> + struct drm_device *dev)
>> {
>> if (WARN_ON(!pt->page))
>> return;
>> +
>> + i915_dma_unmap_single(pt, dev);
>> __free_page(pt->page);
>> + kfree(pt->used_ptes);
>> kfree(pt);
>> }
>>
>> static struct i915_page_table_entry *alloc_pt_single(struct drm_device *dev)
>> {
>> struct i915_page_table_entry *pt;
>> + const size_t count = INTEL_INFO(dev)->gen >= 8 ?
>> + GEN8_PTES_PER_PAGE : I915_PPGTT_PT_ENTRIES;
>> + int ret = -ENOMEM;
>>
>> pt = kzalloc(sizeof(*pt), GFP_KERNEL);
>> if (!pt)
>> return ERR_PTR(-ENOMEM);
>>
>> + pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes),
>> + GFP_KERNEL);
>> +
>> + if (!pt->used_ptes)
>> + goto fail_bitmap;
>> +
>> pt->page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>> - if (!pt->page) {
>> - kfree(pt);
>> - return ERR_PTR(-ENOMEM);
>> - }
>> + if (!pt->page)
>> + goto fail_page;
>> +
>> + ret = i915_dma_map_px_single(pt, dev);
>> + if (ret)
>> + goto fail_dma;
>>
>> return pt;
>> +
>> +fail_dma:
>> + __free_page(pt->page);
>> +fail_page:
>> + kfree(pt->used_ptes);
>> +fail_bitmap:
>> + kfree(pt);
>> +
>> + return ERR_PTR(ret);
>> }
>>
>> /**
>> @@ -836,26 +895,36 @@ static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
>> }
>> }
>>
>> -static void gen6_write_pdes(struct i915_hw_ppgtt *ppgtt)
>> +/* Write pde (index) from the page directory @pd to the page table @pt */
>> +static void gen6_write_pdes(struct i915_page_directory_entry *pd,
> For me it seems that you will write only one pde entry so s/pdes/pde ?
>
>> + const int pde, struct i915_page_table_entry *pt)
>> {
>> - struct drm_i915_private *dev_priv = ppgtt->base.dev->dev_private;
>> - gen6_gtt_pte_t __iomem *pd_addr;
>> - uint32_t pd_entry;
>> - int i;
>> + struct i915_hw_ppgtt *ppgtt =
>> + container_of(pd, struct i915_hw_ppgtt, pd);
>> + u32 pd_entry;
>>
>> - WARN_ON(ppgtt->pd.pd_offset & 0x3f);
>> - pd_addr = (gen6_gtt_pte_t __iomem*)dev_priv->gtt.gsm +
>> - ppgtt->pd.pd_offset / sizeof(gen6_gtt_pte_t);
>> - for (i = 0; i < ppgtt->num_pd_entries; i++) {
>> - dma_addr_t pt_addr;
>> + pd_entry = GEN6_PDE_ADDR_ENCODE(pt->daddr);
>> + pd_entry |= GEN6_PDE_VALID;
>>
>> - pt_addr = ppgtt->pd.page_tables[i]->daddr;
>> - pd_entry = GEN6_PDE_ADDR_ENCODE(pt_addr);
>> - pd_entry |= GEN6_PDE_VALID;
>> + writel(pd_entry, ppgtt->pd_addr + pde);
>>
>> - writel(pd_entry, pd_addr + i);
>> - }
>> - readl(pd_addr);
>> + /* XXX: Caller needs to make sure the write completes if necessary */
>> +}
> Move this comment on top of the function and lift the XXX:
>
>> +
>> +/* Write all the page tables found in the ppgtt structure to incrementing page
>> + * directories. */
>> +static void gen6_write_page_range(struct drm_i915_private *dev_priv,
>> + struct i915_page_directory_entry *pd, uint32_t start, uint32_t length)
>> +{
>> + struct i915_page_table_entry *pt;
>> + uint32_t pde, temp;
>> +
>> + gen6_for_each_pde(pt, pd, start, length, temp, pde)
>> + gen6_write_pdes(pd, pde, pt);
>> +
>> + /* Make sure write is complete before other code can use this page
>> + * table. Also require for WC mapped PTEs */
>> + readl(dev_priv->gtt.gsm);
>> }
>>
>> static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
>> @@ -1071,6 +1140,28 @@ static void gen6_ppgtt_unmap_pages(struct i915_hw_ppgtt *ppgtt)
>> 4096, PCI_DMA_BIDIRECTIONAL);
>> }
>>
>> +static int gen6_alloc_va_range(struct i915_address_space *vm,
>> + uint64_t start, uint64_t length)
>> +{
>> + struct i915_hw_ppgtt *ppgtt =
>> + container_of(vm, struct i915_hw_ppgtt, base);
>> + struct i915_page_table_entry *pt;
>> + uint32_t pde, temp;
>> +
>> + gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) {
>> + DECLARE_BITMAP(tmp_bitmap, I915_PPGTT_PT_ENTRIES);
>> +
>> + bitmap_zero(tmp_bitmap, I915_PPGTT_PT_ENTRIES);
>> + bitmap_set(tmp_bitmap, gen6_pte_index(start),
>> + gen6_pte_count(start, length));
>> +
>> + bitmap_or(pt->used_ptes, pt->used_ptes, tmp_bitmap,
>> + I915_PPGTT_PT_ENTRIES);
>> + }
>> +
>> + return 0;
>> +}
>> +
>> static void gen6_ppgtt_free(struct i915_hw_ppgtt *ppgtt)
>> {
>> int i;
>> @@ -1117,20 +1208,24 @@ alloc:
>> 0, dev_priv->gtt.base.total,
>> 0);
>> if (ret)
>> - return ret;
>> + goto err_out;
>>
>> retried = true;
>> goto alloc;
>> }
>>
>> if (ret)
>> - return ret;
>> + goto err_out;
>> +
>>
>> if (ppgtt->node.start < dev_priv->gtt.mappable_end)
>> DRM_DEBUG("Forced to use aperture for PDEs\n");
>>
>> ppgtt->num_pd_entries = GEN6_PPGTT_PD_ENTRIES;
>> return 0;
>> +
>> +err_out:
>> + return ret;
>> }
>>
>> static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
>> @@ -1152,30 +1247,6 @@ static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
>> return 0;
>> }
>>
>> -static int gen6_ppgtt_setup_page_tables(struct i915_hw_ppgtt *ppgtt)
>> -{
>> - struct drm_device *dev = ppgtt->base.dev;
>> - int i;
>> -
>> - for (i = 0; i < ppgtt->num_pd_entries; i++) {
>> - struct page *page;
>> - dma_addr_t pt_addr;
>> -
>> - page = ppgtt->pd.page_tables[i]->page;
>> - pt_addr = pci_map_page(dev->pdev, page, 0, 4096,
>> - PCI_DMA_BIDIRECTIONAL);
>> -
>> - if (pci_dma_mapping_error(dev->pdev, pt_addr)) {
>> - gen6_ppgtt_unmap_pages(ppgtt);
>> - return -EIO;
>> - }
>> -
>> - ppgtt->pd.page_tables[i]->daddr = pt_addr;
>> - }
>> -
>> - return 0;
>> -}
>> -
>> static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
>> {
>> struct drm_device *dev = ppgtt->base.dev;
>> @@ -1196,12 +1267,7 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
>> if (ret)
>> return ret;
>>
>> - ret = gen6_ppgtt_setup_page_tables(ppgtt);
>> - if (ret) {
>> - gen6_ppgtt_free(ppgtt);
>> - return ret;
>> - }
>> -
>> + ppgtt->base.allocate_va_range = gen6_alloc_va_range;
>> ppgtt->base.clear_range = gen6_ppgtt_clear_range;
>> ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
>> ppgtt->base.cleanup = gen6_ppgtt_cleanup;
>> @@ -1212,13 +1278,17 @@ static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
>> ppgtt->pd.pd_offset =
>> ppgtt->node.start / PAGE_SIZE * sizeof(gen6_gtt_pte_t);
>>
>> + ppgtt->pd_addr = (gen6_gtt_pte_t __iomem *)dev_priv->gtt.gsm +
>> + ppgtt->pd.pd_offset / sizeof(gen6_gtt_pte_t);
>> +
>> ppgtt->base.clear_range(&ppgtt->base, 0, ppgtt->base.total, true);
>>
>> + gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total);
>> +
>> DRM_DEBUG_DRIVER("Allocated pde space (%ldM) at GTT entry: %lx\n",
>> ppgtt->node.size >> 20,
>> ppgtt->node.start / PAGE_SIZE);
>>
>> - gen6_write_pdes(ppgtt);
>> DRM_DEBUG("Adding PPGTT at offset %x\n",
>> ppgtt->pd.pd_offset << 10);
>>
>> @@ -1491,13 +1561,14 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
>>
>> list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
>> /* TODO: Perhaps it shouldn't be gen6 specific */
>> - if (i915_is_ggtt(vm)) {
>> - if (dev_priv->mm.aliasing_ppgtt)
>> - gen6_write_pdes(dev_priv->mm.aliasing_ppgtt);
>> - continue;
>> - }
>>
>> - gen6_write_pdes(container_of(vm, struct i915_hw_ppgtt, base));
>> + struct i915_hw_ppgtt *ppgtt =
>> + container_of(vm, struct i915_hw_ppgtt, base);
>> +
>> + if (i915_is_ggtt(vm))
>> + ppgtt = dev_priv->mm.aliasing_ppgtt;
> If we have ggtt but aliasing is not enabled we get NULL here and oops
> over in next function?
>
> -Mika
>
I applied the changes in v7.
Thanks for the review.
-Michel
-------------- next part --------------
A non-text attachment was scrubbed...
Name: smime.p7s
Type: application/pkcs7-signature
Size: 5510 bytes
Desc: S/MIME Cryptographic Signature
URL: <http://lists.freedesktop.org/archives/intel-gfx/attachments/20150223/5288e9b7/attachment-0001.bin>
More information about the Intel-gfx
mailing list