[PATCH 095/131] drm/i915: Micro-optimise gen6_ppgtt_insert_entries()
Chris Wilson
chris at chris-wilson.co.uk
Sat Aug 6 07:37:02 UTC 2016
Inline the address computation to avoid the vfunc call for every page.
We still have to pay the high overhead of sg_page_iter_next(), but now
at least GCC can optimise the inner most loop, giving a significant
boost to some thrashing Unreal Engine workloads.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_gem_gtt.c | 20 +++++++-------------
1 file changed, 7 insertions(+), 13 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index fb543941578f..dc5295497f9d 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -1815,28 +1815,22 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
unsigned first_entry = start >> PAGE_SHIFT;
unsigned act_pt = first_entry / GEN6_PTES;
- unsigned act_pte = first_entry % GEN6_PTES;
- gen6_pte_t *pt_vaddr = NULL;
+ unsigned act_pte = first_entry % GEN6_PTES - 1;
+ u32 pte_encode = vm->pte_encode(0, cache_level, true, flags);
struct sgt_iter sgt_iter;
+ gen6_pte_t *pt_vaddr;
dma_addr_t addr;
+ pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
for_each_sgt_dma(addr, sgt_iter, pages) {
- if (pt_vaddr == NULL)
- pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
-
- pt_vaddr[act_pte] =
- vm->pte_encode(addr, cache_level, true, flags);
-
if (++act_pte == GEN6_PTES) {
kunmap_px(ppgtt, pt_vaddr);
- pt_vaddr = NULL;
- act_pt++;
+ pt_vaddr = kmap_px(ppgtt->pd.page_table[++act_pt]);
act_pte = 0;
}
+ pt_vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(addr);
}
-
- if (pt_vaddr)
- kunmap_px(ppgtt, pt_vaddr);
+ kunmap_px(ppgtt, pt_vaddr);
}
static int gen6_alloc_va_range(struct i915_address_space *vm,
--
2.8.1
More information about the Intel-gfx-trybot
mailing list