[PATCH 7/7] drm/i915/gtt: Fully initialise 32b pt
Chris Wilson
chris at chris-wilson.co.uk
Fri Jul 5 21:53:08 UTC 2019
Fill the unused portion of the 32b page directories with scratch so that
we can use the same code throughout.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_gem_gtt.c | 582 +++++++++-------------------
drivers/gpu/drm/i915/i915_gem_gtt.h | 105 +----
2 files changed, 198 insertions(+), 489 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index 8c9bb4a9235f..1dbd80465612 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -592,10 +592,10 @@ static void cleanup_page_dma(struct i915_address_space *vm,
vm_free_page(vm, p->page);
}
-#define kmap_atomic_px(px) kmap_atomic(px_base(px)->page)
+#define kmap_atomic_pt(pt) kmap_atomic((pt)->base.page)
-#define fill_px(px, v) fill_page_dma(px_base(px), (v))
-#define fill32_px(px, v) fill_page_dma_32(px_base(px), (v))
+#define fill_pt(pt, v) fill_page_dma(&(pt)->base, (v))
+#define fill32_pt(pt, v) fill_page_dma_32(&(pt)->base, (v))
static void fill_page_dma(struct i915_page_dma *p, const u64 val)
{
@@ -652,8 +652,8 @@ setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
if (unlikely(!IS_ALIGNED(addr, size)))
goto unmap_page;
- vm->scratch_page.page = page;
- vm->scratch_page.daddr = addr;
+ vm->scratch[0].page = page;
+ vm->scratch[0].daddr = addr;
vm->scratch_order = order;
return 0;
@@ -672,8 +672,8 @@ setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
static void cleanup_scratch_page(struct i915_address_space *vm)
{
- struct i915_page_dma *p = &vm->scratch_page;
- int order = vm->scratch_order;
+ struct i915_page_dma *p = &vm->scratch[0];
+ unsigned int order = vm->scratch_order;
dma_unmap_page(vm->dma, p->daddr, BIT(order) << PAGE_SHIFT,
PCI_DMA_BIDIRECTIONAL);
@@ -698,12 +698,6 @@ static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
return pt;
}
-static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt)
-{
- cleanup_page_dma(vm, &pt->base);
- kfree(pt);
-}
-
static struct i915_page_directory *__alloc_pd(void)
{
struct i915_page_directory *pd;
@@ -734,18 +728,17 @@ static struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
return pd;
}
-static void free_pd(struct i915_address_space *vm,
- struct i915_page_directory *pd)
+static void free_pd(struct i915_address_space *vm, struct i915_page_dma *pd)
{
- cleanup_page_dma(vm, px_base(pd));
+ cleanup_page_dma(vm, pd);
kfree(pd);
}
static void init_pd(struct i915_page_directory *pd,
struct i915_page_dma *scratch)
{
- fill_px(pd, gen8_pde_encode(scratch->daddr, I915_CACHE_LLC));
- memset_p(pd->entry, scratch, 512);
+ fill_pt(&pd->pt, gen8_pde_encode(scratch->daddr, I915_CACHE_LLC));
+ memset_p(pd->entry, scratch, I915_PDES);
}
static inline void
@@ -765,7 +758,7 @@ __set_pd_entry(struct i915_page_directory * const pd,
struct i915_page_dma * const to,
u64 (*encode)(const dma_addr_t, const enum i915_cache_level))
{
- GEM_BUG_ON(atomic_read(px_used(pd)) > 512);
+ GEM_BUG_ON(atomic_read(px_used(pd)) > I915_PDES);
atomic_inc(px_used(pd));
pd->entry[pde] = to;
@@ -778,7 +771,7 @@ __clear_pd_entry(struct i915_page_directory * const pd,
struct i915_page_dma * const to,
u64 (*encode)(const dma_addr_t, const enum i915_cache_level))
{
- GEM_BUG_ON(atomic_read(px_used(pd)) == 0);
+ GEM_BUG_ON(!atomic_read(px_used(pd)));
write_dma_entry(px_base(pd), pde, encode(to->daddr, I915_CACHE_LLC));
pd->entry[pde] = to;
@@ -786,7 +779,7 @@ __clear_pd_entry(struct i915_page_directory * const pd,
}
#define set_pd_entry(pd, pde, to) \
- __set_pd_entry((pd), (pde), px_base(to), gen8_pde_encode)
+ __set_pd_entry((pd), (pde), &(to)->base, gen8_pde_encode)
#define clear_pd_entry(pd, pde, to) \
__clear_pd_entry((pd), (pde), (to), gen8_pde_encode)
@@ -820,90 +813,50 @@ static void mark_tlbs_dirty(struct i915_ppgtt *ppgtt)
ppgtt->pd_dirty_engines = ALL_ENGINES;
}
-/* Removes entries from a single page table, releasing it if it's empty.
- * Caller can use the return value to update higher-level entries.
- */
-static void gen8_ppgtt_clear_pt(const struct i915_address_space *vm,
- struct i915_page_table *pt,
- u64 start, u64 length)
-{
- const unsigned int num_entries = gen8_pte_count(start, length);
- gen8_pte_t *vaddr;
-
- vaddr = kmap_atomic_px(pt);
- memset64(vaddr + gen8_pte_index(start), vm->scratch_pte, num_entries);
- kunmap_atomic(vaddr);
+#define gen8_pd_shift(lvl) (GEN8_PTE_SHIFT + (lvl) * ilog2(I915_PDES))
- GEM_BUG_ON(num_entries > atomic_read(&pt->used));
+static void __gen8_ppgtt_clear(struct i915_address_space *vm,
+ struct i915_page_directory * const pd,
+ u64 start, u64 length, int lvl)
+{
+ unsigned int idx, len;
- atomic_sub(num_entries, &pt->used);
-}
+ idx = i915_pde_index(start, gen8_pd_shift(lvl));
+ len = i915_pde_length(length, gen8_pd_shift(lvl));
+ GEM_BUG_ON(!len);
-static void gen8_ppgtt_clear_pd(struct i915_address_space *vm,
- struct i915_page_directory *pd,
- u64 start, u64 length)
-{
- struct i915_page_table *pt;
- u32 pde;
+ if (!lvl) {
+ u64 *vaddr;
- gen8_for_each_pde(pt, pd, start, length, pde) {
- GEM_BUG_ON(px_base(pt) == &vm->scratch_pt);
+ GEM_BUG_ON(len > atomic_read(px_used(pd)));
+ atomic_sub(len, px_used(pd));
- atomic_inc(&pt->used);
- gen8_ppgtt_clear_pt(vm, pt, start, length);
- if (release_pd_entry(pd, pde, pt, &vm->scratch_pt))
- free_pt(vm, pt);
+ vaddr = kmap_atomic_pt(&pd->pt);
+ memset64(vaddr + idx, vm->scratch_pte, len);
+ kunmap_atomic(vaddr);
+ return;
}
-}
-/* Removes entries from a single page dir pointer, releasing it if it's empty.
- * Caller can use the return value to update higher-level entries
- */
-static void gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
- struct i915_page_directory * const pdp,
- u64 start, u64 length)
-{
- struct i915_page_directory *pd;
- unsigned int pdpe;
+ do {
+ struct i915_page_directory *pde = pd->entry[idx];
- gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
- GEM_BUG_ON(px_base(pd) == &vm->scratch_pd);
+ GEM_BUG_ON(pd->entry[idx] == &vm->scratch[lvl]);
atomic_inc(px_used(pd));
- gen8_ppgtt_clear_pd(vm, pd, start, length);
- if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch_pd))
- free_pd(vm, pd);
- }
-}
+ __gen8_ppgtt_clear(vm, pde, start, length, lvl - 1);
+ if (release_pd_entry(pd, idx, &pde->pt, &vm->scratch[lvl]))
+ free_pd(vm, px_base(pde));
-static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm,
- u64 start, u64 length)
-{
- gen8_ppgtt_clear_pdp(vm, i915_vm_to_ppgtt(vm)->pd, start, length);
+ start += BIT_ULL(gen8_pd_shift(lvl));
+ length -= BIT_ULL(gen8_pd_shift(lvl));
+ } while (idx++, --len);
}
-/* Removes entries from a single pml4.
- * This is the top-level structure in 4-level page tables used on gen8+.
- * Empty entries are always scratch pml4e.
- */
-static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
- u64 start, u64 length)
+static void gen8_ppgtt_clear(struct i915_address_space *vm,
+ u64 start, u64 length)
{
- struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
- struct i915_page_directory * const pml4 = ppgtt->pd;
- struct i915_page_directory *pdp;
- unsigned int pml4e;
-
- GEM_BUG_ON(!i915_vm_is_4lvl(vm));
-
- gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
- GEM_BUG_ON(px_base(pdp) == &vm->scratch_pdp);
-
- atomic_inc(px_used(pdp));
- gen8_ppgtt_clear_pdp(vm, pdp, start, length);
- if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch_pdp))
- free_pd(vm, pdp);
- }
+ __gen8_ppgtt_clear(vm, i915_vm_to_ppgtt(vm)->pd,
+ start, length, i915_vm_is_4lvl(vm) ? 3 : 2);
}
static inline struct sgt_dma {
@@ -925,10 +878,10 @@ struct gen8_insert_pte {
static __always_inline struct gen8_insert_pte gen8_insert_pte(u64 start)
{
return (struct gen8_insert_pte) {
- gen8_pml4e_index(start),
- gen8_pdpe_index(start),
- gen8_pde_index(start),
- gen8_pte_index(start),
+ i915_pde_index(start, gen8_pd_shift(3)),
+ i915_pde_index(start, gen8_pd_shift(1)),
+ i915_pde_index(start, gen8_pd_shift(2)),
+ i915_pde_index(start, gen8_pd_shift(0)),
};
}
@@ -945,9 +898,9 @@ gen8_ppgtt_insert_pte_entries(struct i915_ppgtt *ppgtt,
gen8_pte_t *vaddr;
bool ret;
- GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm));
+ GEM_BUG_ON(idx->pdpe >= I915_PDES);
pd = i915_pd_entry(pdp, idx->pdpe);
- vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde));
+ vaddr = kmap_atomic_pt(i915_pt_entry(pd, idx->pde));
do {
vaddr[idx->pte] = pte_encode | iter->dma;
@@ -963,25 +916,24 @@ gen8_ppgtt_insert_pte_entries(struct i915_ppgtt *ppgtt,
iter->max = iter->dma + iter->sg->length;
}
- if (++idx->pte == GEN8_PTES) {
+ if (++idx->pte == I915_PDES) {
idx->pte = 0;
if (++idx->pde == I915_PDES) {
idx->pde = 0;
/* Limited by sg length for 3lvl */
- if (++idx->pdpe == GEN8_PML4ES_PER_PML4) {
+ if (++idx->pdpe == I915_PDES) {
idx->pdpe = 0;
ret = true;
break;
}
- GEM_BUG_ON(idx->pdpe >= i915_pdpes_per_pdp(&ppgtt->vm));
pd = pdp->entry[idx->pdpe];
}
kunmap_atomic(vaddr);
- vaddr = kmap_atomic_px(i915_pt_entry(pd, idx->pde));
+ vaddr = kmap_atomic_pt(i915_pt_entry(pd, idx->pde));
}
} while (1);
kunmap_atomic(vaddr);
@@ -1017,7 +969,7 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
do {
struct gen8_insert_pte idx = gen8_insert_pte(start);
struct i915_page_directory *pdp =
- i915_pdp_entry(pml4, idx.pml4e);
+ i915_pd_entry(pml4, idx.pml4e);
struct i915_page_directory *pd = i915_pd_entry(pdp, idx.pdpe);
unsigned int page_size;
bool maybe_64K = false;
@@ -1034,12 +986,12 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
encode |= GEN8_PDE_PS_2M;
- vaddr = kmap_atomic_px(pd);
+ vaddr = kmap_atomic_pt(&pd->pt);
} else {
struct i915_page_table *pt = i915_pt_entry(pd, idx.pde);
index = idx.pte;
- max = GEN8_PTES;
+ max = I915_PDES;
page_size = I915_GTT_PAGE_SIZE;
if (!index &&
@@ -1049,7 +1001,7 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
rem >= (max - index) * I915_GTT_PAGE_SIZE))
maybe_64K = true;
- vaddr = kmap_atomic_px(pt);
+ vaddr = kmap_atomic_pt(pt);
}
do {
@@ -1093,7 +1045,7 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
!iter->sg && IS_ALIGNED(vma->node.start +
vma->node.size,
I915_GTT_PAGE_SIZE_2M)))) {
- vaddr = kmap_atomic_px(pd);
+ vaddr = kmap_atomic_pt(&pd->pt);
vaddr[idx.pde] |= GEN8_PDE_IPS_64K;
kunmap_atomic(vaddr);
page_size = I915_GTT_PAGE_SIZE_64K;
@@ -1111,7 +1063,7 @@ static void gen8_ppgtt_insert_huge_entries(struct i915_vma *vma,
u16 i;
encode = vma->vm->scratch_pte;
- vaddr = kmap_atomic_px(i915_pt_entry(pd,
+ vaddr = kmap_atomic_pt(i915_pt_entry(pd,
idx.pde));
for (i = 1; i < index; i += 16)
@@ -1141,29 +1093,36 @@ static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
struct gen8_insert_pte idx = gen8_insert_pte(vma->node.start);
while (gen8_ppgtt_insert_pte_entries(ppgtt,
- i915_pdp_entry(pml4, idx.pml4e++),
+ i915_pd_entry(pml4, idx.pml4e++),
&iter, &idx, cache_level,
flags))
- GEM_BUG_ON(idx.pml4e >= GEN8_PML4ES_PER_PML4);
+ GEM_BUG_ON(idx.pml4e >= I915_PDES);
vma->page_sizes.gtt = I915_GTT_PAGE_SIZE;
}
}
-static void gen8_free_page_tables(struct i915_address_space *vm,
- struct i915_page_directory *pd)
+static void gen8_free_scratch(struct i915_address_space *vm)
{
int i;
- for (i = 0; i < I915_PDES; i++) {
- if (pd->entry[i] != &vm->scratch_pt)
- free_pt(vm, pd->entry[i]);
+ if (!vm->scratch[0].daddr) /* set to 0 on clones */
+ return;
+
+ for (i = ARRAY_SIZE(vm->scratch); --i; ) {
+ if (vm->scratch[i].daddr)
+ cleanup_page_dma(vm, &vm->scratch[i]);
}
+
+ cleanup_scratch_page(vm);
}
static int gen8_init_scratch(struct i915_address_space *vm)
{
+ u64 pte;
int ret;
+ int max;
+ int i;
/*
* If everybody agrees to not to write into the scratch page,
@@ -1177,10 +1136,8 @@ static int gen8_init_scratch(struct i915_address_space *vm)
GEM_BUG_ON(!clone->has_read_only);
vm->scratch_order = clone->scratch_order;
- vm->scratch_pte = clone->scratch_pte;
- vm->scratch_pt = clone->scratch_pt;
- vm->scratch_pd = clone->scratch_pd;
- vm->scratch_pdp = clone->scratch_pdp;
+ memcpy(vm->scratch, clone->scratch, sizeof(vm->scratch));
+ vm->scratch[0].daddr = 0;
return 0;
}
@@ -1189,43 +1146,25 @@ static int gen8_init_scratch(struct i915_address_space *vm)
return ret;
vm->scratch_pte =
- gen8_pte_encode(vm->scratch_page.daddr,
+ gen8_pte_encode(vm->scratch[0].daddr,
I915_CACHE_LLC,
vm->has_read_only);
- if (unlikely(setup_page_dma(vm, &vm->scratch_pt))) {
- ret = -ENOMEM;
- goto free_scratch_page;
- }
- fill_page_dma(&vm->scratch_pt, vm->scratch_pte);
-
- if (unlikely(setup_page_dma(vm, &vm->scratch_pd))) {
- ret = -ENOMEM;
- goto free_pt;
- }
- fill_page_dma(&vm->scratch_pd,
- gen8_pde_encode(vm->scratch_pd.daddr, I915_CACHE_LLC));
+ max = i915_vm_is_4lvl(vm) ? 4 : 3;
+ pte = vm->scratch_pte;
+ for (i = 1; i < max; i++) {
+ if (unlikely(setup_page_dma(vm, &vm->scratch[i])))
+ goto free_scratch;
- if (i915_vm_is_4lvl(vm)) {
- if (unlikely(setup_page_dma(vm, &vm->scratch_pdp))) {
- ret = -ENOMEM;
- goto free_pd;
- }
- fill_page_dma(&vm->scratch_pdp,
- gen8_pde_encode(vm->scratch_pdp.daddr,
- I915_CACHE_LLC));
+ fill_page_dma(&vm->scratch[i], pte);
+ pte = gen8_pde_encode(vm->scratch[i].daddr, I915_CACHE_LLC);
}
return 0;
-free_pd:
- cleanup_page_dma(vm, &vm->scratch_pd);
-free_pt:
- cleanup_page_dma(vm, &vm->scratch_pt);
-free_scratch_page:
- cleanup_scratch_page(vm);
-
- return ret;
+free_scratch:
+ gen8_free_scratch(vm);
+ return -ENOMEM;
}
static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
@@ -1265,263 +1204,146 @@ static int gen8_ppgtt_notify_vgt(struct i915_ppgtt *ppgtt, bool create)
return 0;
}
-static void gen8_free_scratch(struct i915_address_space *vm)
-{
- if (!vm->scratch_page.daddr) /* set to 0 on clones */
- return;
-
- if (vm->scratch_pdp.daddr)
- cleanup_page_dma(vm, &vm->scratch_pdp);
- if (vm->scratch_pd.daddr)
- cleanup_page_dma(vm, &vm->scratch_pd);
- if (vm->scratch_pt.daddr)
- cleanup_page_dma(vm, &vm->scratch_pt);
-
- cleanup_scratch_page(vm);
-}
-
-static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
- struct i915_page_directory *pdp)
+static void __gen8_ppgtt_cleanup(struct i915_address_space *vm,
+ struct i915_page_directory *pd,
+ int lvl)
{
- const unsigned int pdpes = i915_pdpes_per_pdp(vm);
int i;
- for (i = 0; i < pdpes; i++) {
- if (pdp->entry[i] == &vm->scratch_pd)
- continue;
-
- gen8_free_page_tables(vm, pdp->entry[i]);
- free_pd(vm, pdp->entry[i]);
- }
-
- free_pd(vm, pdp);
-}
-
-static void gen8_ppgtt_cleanup_4lvl(struct i915_ppgtt *ppgtt)
-{
- struct i915_page_directory * const pml4 = ppgtt->pd;
- int i;
-
- for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) {
- struct i915_page_directory *pdp = i915_pdp_entry(pml4, i);
+ for (i = 0; i < I915_PDES; i++) {
+ void *pde = pd->entry[i];
- if (px_base(pdp) == &ppgtt->vm.scratch_pdp)
+ if (pde == &vm->scratch[lvl])
continue;
- gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, pdp);
+ if (lvl == 1)
+ free_pd(vm, pde);
+ else
+ __gen8_ppgtt_cleanup(vm, pde, lvl - 1);
}
- free_pd(&ppgtt->vm, pml4);
+ free_pd(vm, px_base(pd));
}
static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
{
- struct drm_i915_private *i915 = vm->i915;
struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
- if (intel_vgpu_active(i915))
+ if (intel_vgpu_active(vm->i915))
gen8_ppgtt_notify_vgt(ppgtt, false);
- if (i915_vm_is_4lvl(vm))
- gen8_ppgtt_cleanup_4lvl(ppgtt);
- else
- gen8_ppgtt_cleanup_3lvl(&ppgtt->vm, ppgtt->pd);
-
+ __gen8_ppgtt_cleanup(vm, ppgtt->pd, i915_vm_is_4lvl(vm) ? 3 : 2);
gen8_free_scratch(vm);
}
-static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
- struct i915_page_directory *pd,
- u64 start, u64 length)
+#define as_pd(x) container_of((x), typeof(struct i915_page_directory), pt)
+
+static int __gen8_ppgtt_alloc(struct i915_address_space *vm,
+ struct i915_page_directory *pd,
+ u64 start, u64 length, int lvl)
{
struct i915_page_table *pt, *alloc = NULL;
+ unsigned int idx, len;
u64 from = start;
- unsigned int pde;
int ret = 0;
+ idx = i915_pde_index(start, gen8_pd_shift(lvl));
+ len = i915_pde_length(length, gen8_pd_shift(lvl));
+ GEM_BUG_ON(!len);
+
spin_lock(&pd->lock);
- gen8_for_each_pde(pt, pd, start, length, pde) {
- const int count = gen8_pte_count(start, length);
+ do {
+ pt = pd->entry[idx];
- if (px_base(pt) == &vm->scratch_pt) {
+ if (&pt->base == &vm->scratch[lvl]) {
spin_unlock(&pd->lock);
pt = fetch_and_zero(&alloc);
if (!pt)
- pt = alloc_pt(vm);
+ pt = (lvl > 1) ? &alloc_pd(vm)->pt : alloc_pt(vm);
if (IS_ERR(pt)) {
ret = PTR_ERR(pt);
goto unwind;
}
- if (count < GEN8_PTES || intel_vgpu_active(vm->i915))
- fill_px(pt, vm->scratch_pte);
+ if (lvl > 1)
+ init_pd(as_pd(pt), &vm->scratch[lvl - 1]);
+ else if (len < I915_PDES || intel_vgpu_active(vm->i915))
+ fill_pt(pt, vm->scratch_pte);
spin_lock(&pd->lock);
- if (pd->entry[pde] == &vm->scratch_pt) {
- set_pd_entry(pd, pde, pt);
+ if (pd->entry[idx] == &vm->scratch[lvl]) {
+ set_pd_entry(pd, idx, pt);
} else {
alloc = pt;
- pt = pd->entry[pde];
+ pt = pd->entry[idx];
}
}
- atomic_add(count, &pt->used);
- }
- spin_unlock(&pd->lock);
- goto out;
-
-unwind:
- gen8_ppgtt_clear_pd(vm, pd, from, start - from);
-out:
- if (alloc)
- free_pt(vm, alloc);
- return ret;
-}
-
-static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
- struct i915_page_directory *pdp,
- u64 start, u64 length)
-{
- struct i915_page_directory *pd, *alloc = NULL;
- u64 from = start;
- unsigned int pdpe;
- int ret = 0;
-
- spin_lock(&pdp->lock);
- gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
- if (px_base(pd) == &vm->scratch_pd) {
- spin_unlock(&pdp->lock);
-
- pd = fetch_and_zero(&alloc);
- if (!pd)
- pd = alloc_pd(vm);
- if (IS_ERR(pd)) {
- ret = PTR_ERR(pd);
- goto unwind;
- }
+ if (lvl > 1) {
+ atomic_inc(&pt->used);
+ spin_unlock(&pd->lock);
- init_pd(pd, &vm->scratch_pt);
+ ret = __gen8_ppgtt_alloc(vm, as_pd(pt),
+ start, length, lvl - 1);
+ if (unlikely(ret))
+ goto unwind_pt;
- spin_lock(&pdp->lock);
- if (pdp->entry[pdpe] == &vm->scratch_pd) {
- set_pd_entry(pdp, pdpe, pd);
- } else {
- alloc = pd;
- pd = pdp->entry[pdpe];
- }
+ spin_lock(&pd->lock);
+ atomic_dec(&pt->used);
+ GEM_BUG_ON(!atomic_read(&pt->used));
+ } else {
+ atomic_add(i915_pte_count(start, length,
+ gen8_pd_shift(1)),
+ &pt->used);
}
- atomic_inc(px_used(pd));
- spin_unlock(&pdp->lock);
- ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
- if (unlikely(ret))
- goto unwind_pd;
-
- spin_lock(&pdp->lock);
- atomic_dec(px_used(pd));
- }
- spin_unlock(&pdp->lock);
+ start += BIT_ULL(gen8_pd_shift(lvl));
+ length -= BIT_ULL(gen8_pd_shift(lvl));
+ } while (idx++, --len);
+ spin_unlock(&pd->lock);
goto out;
-unwind_pd:
- if (release_pd_entry(pdp, pdpe, &pd->pt, &vm->scratch_pd))
- free_pd(vm, pd);
+unwind_pt:
+ if (release_pd_entry(pd, idx, pt, &vm->scratch[lvl]))
+ free_pd(vm, px_base(pt));
unwind:
- gen8_ppgtt_clear_pdp(vm, pdp, from, start - from);
+ __gen8_ppgtt_clear(vm, pd, from, start - from, lvl);
out:
if (alloc)
- free_pd(vm, alloc);
+ free_pd(vm, &alloc->base);
return ret;
}
-static int gen8_ppgtt_alloc_3lvl(struct i915_address_space *vm,
- u64 start, u64 length)
-{
- return gen8_ppgtt_alloc_pdp(vm,
- i915_vm_to_ppgtt(vm)->pd, start, length);
-}
-
-static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
- u64 start, u64 length)
+static int gen8_ppgtt_alloc(struct i915_address_space *vm,
+ u64 start, u64 length)
{
- struct i915_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
- struct i915_page_directory * const pml4 = ppgtt->pd;
- struct i915_page_directory *pdp, *alloc = NULL;
- u64 from = start;
- int ret = 0;
- u32 pml4e;
-
- spin_lock(&pml4->lock);
- gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
- if (px_base(pdp) == &vm->scratch_pdp) {
- spin_unlock(&pml4->lock);
-
- pdp = fetch_and_zero(&alloc);
- if (!pdp)
- pdp = alloc_pd(vm);
- if (IS_ERR(pdp)) {
- ret = PTR_ERR(pdp);
- goto unwind;
- }
-
- init_pd(pdp, &vm->scratch_pd);
-
- spin_lock(&pml4->lock);
- if (pml4->entry[pml4e] == &vm->scratch_pdp) {
- set_pd_entry(pml4, pml4e, pdp);
- } else {
- alloc = pdp;
- pdp = pml4->entry[pml4e];
- }
- }
- atomic_inc(px_used(pdp));
- spin_unlock(&pml4->lock);
-
- ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length);
- if (unlikely(ret))
- goto unwind_pdp;
-
- spin_lock(&pml4->lock);
- atomic_dec(px_used(pdp));
- }
- spin_unlock(&pml4->lock);
- goto out;
-
-unwind_pdp:
- if (release_pd_entry(pml4, pml4e, &pdp->pt, &vm->scratch_pdp))
- free_pd(vm, pdp);
-unwind:
- gen8_ppgtt_clear_4lvl(vm, from, start - from);
-out:
- if (alloc)
- free_pd(vm, alloc);
- return ret;
+ return __gen8_ppgtt_alloc(vm, i915_vm_to_ppgtt(vm)->pd,
+ start, length, i915_vm_is_4lvl(vm) ? 3 : 2);
}
static int gen8_preallocate_top_level_pdp(struct i915_ppgtt *ppgtt)
{
struct i915_address_space *vm = &ppgtt->vm;
- struct i915_page_directory *pdp = ppgtt->pd;
- struct i915_page_directory *pd;
- u64 start = 0, length = ppgtt->vm.total;
- u64 from = start;
- unsigned int pdpe;
+ struct i915_page_directory *pd = ppgtt->pd;
+ unsigned int idx;
- gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
- pd = alloc_pd(vm);
- if (IS_ERR(pd))
+ for (idx = 0; idx < ppgtt->vm.total >> 30; idx++) {
+ struct i915_page_directory *pde;
+
+ pde = alloc_pd(vm);
+ if (IS_ERR(pde))
goto unwind;
- init_pd(pd, &vm->scratch_pt);
- set_pd_entry(pdp, pdpe, pd);
+ init_pd(pde, &vm->scratch[1]);
+ set_pd_entry(pd, idx, &pde->pt);
}
return 0;
unwind:
- gen8_ppgtt_clear_pdp(vm, pdp, from, start - from);
- atomic_set(px_used(pdp), 0);
+ __gen8_ppgtt_clear(vm, pd, 0, idx << 30, 2);
+ atomic_set(px_used(pd), 0);
return -ENOMEM;
}
@@ -1542,47 +1364,16 @@ static void ppgtt_init(struct i915_ppgtt *ppgtt, struct intel_gt *gt)
ppgtt->vm.vma_ops.clear_pages = clear_pages;
}
-static void init_pd_n(struct i915_address_space *vm,
- struct i915_page_directory *pd,
- struct i915_page_dma *to,
- const unsigned int entries)
-{
- const u64 daddr = gen8_pde_encode(to->daddr, I915_CACHE_LLC);
- u64 * const vaddr = kmap_atomic_px(pd);
-
- memset64(vaddr, daddr, entries);
- kunmap_atomic(vaddr);
-
- memset_p(pd->entry, to, entries);
-}
-
static struct i915_page_directory *
gen8_alloc_top_pd(struct i915_address_space *vm)
{
struct i915_page_directory *pd;
- if (i915_vm_is_4lvl(vm)) {
- pd = alloc_pd(vm);
- if (!IS_ERR(pd))
- init_pd(pd, &vm->scratch_pdp);
-
+ pd = alloc_pd(vm);
+ if (IS_ERR(pd))
return pd;
- }
-
- /* 3lvl */
- pd = __alloc_pd();
- if (!pd)
- return ERR_PTR(-ENOMEM);
-
- pd->entry[GEN8_3LVL_PDPES] = NULL;
-
- if (unlikely(setup_page_dma(vm, px_base(pd)))) {
- kfree(pd);
- return ERR_PTR(-ENOMEM);
- }
-
- init_pd_n(vm, pd, &vm->scratch_pd, GEN8_3LVL_PDPES);
+ init_pd(pd, &vm->scratch[i915_vm_is_4lvl(vm) ? 3 : 2]);
return pd;
}
@@ -1629,9 +1420,7 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
}
if (i915_vm_is_4lvl(&ppgtt->vm)) {
- ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_4lvl;
ppgtt->vm.insert_entries = gen8_ppgtt_insert_4lvl;
- ppgtt->vm.clear_range = gen8_ppgtt_clear_4lvl;
} else {
if (intel_vgpu_active(i915)) {
err = gen8_preallocate_top_level_pdp(ppgtt);
@@ -1639,11 +1428,12 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
goto err_free_pd;
}
- ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc_3lvl;
ppgtt->vm.insert_entries = gen8_ppgtt_insert_3lvl;
- ppgtt->vm.clear_range = gen8_ppgtt_clear_3lvl;
}
+ ppgtt->vm.allocate_va_range = gen8_ppgtt_alloc;
+ ppgtt->vm.clear_range = gen8_ppgtt_clear;
+
if (intel_vgpu_active(i915))
gen8_ppgtt_notify_vgt(ppgtt, true);
@@ -1652,7 +1442,7 @@ static struct i915_ppgtt *gen8_ppgtt_create(struct drm_i915_private *i915)
return ppgtt;
err_free_pd:
- free_pd(&ppgtt->vm, ppgtt->pd);
+ free_pd(&ppgtt->vm, px_base(ppgtt->pd));
err_free_scratch:
gen8_free_scratch(&ppgtt->vm);
err_free:
@@ -1739,7 +1529,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
const unsigned int count = min(num_entries, GEN6_PTES - pte);
gen6_pte_t *vaddr;
- GEM_BUG_ON(px_base(pt) == &vm->scratch_pt);
+ GEM_BUG_ON(&pt->base == &vm->scratch[1]);
num_entries -= count;
@@ -1754,7 +1544,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
* entries back to scratch.
*/
- vaddr = kmap_atomic_px(pt);
+ vaddr = kmap_atomic_pt(pt);
memset32(vaddr + pte, scratch_pte, count);
kunmap_atomic(vaddr);
@@ -1776,9 +1566,9 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
struct sgt_dma iter = sgt_dma(vma);
gen6_pte_t *vaddr;
- GEM_BUG_ON(pd->entry[act_pt] == &vm->scratch_pt);
+ GEM_BUG_ON(pd->entry[act_pt] == &vm->scratch[1]);
- vaddr = kmap_atomic_px(i915_pt_entry(pd, act_pt));
+ vaddr = kmap_atomic_pt(i915_pt_entry(pd, act_pt));
do {
vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma);
@@ -1794,7 +1584,7 @@ static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
if (++act_pte == GEN6_PTES) {
kunmap_atomic(vaddr);
- vaddr = kmap_atomic_px(i915_pt_entry(pd, ++act_pt));
+ vaddr = kmap_atomic_pt(i915_pt_entry(pd, ++act_pt));
act_pte = 0;
}
} while (1);
@@ -1821,7 +1611,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
gen6_for_each_pde(pt, pd, start, length, pde) {
const unsigned int count = gen6_pte_count(start, length);
- if (px_base(pt) == &vm->scratch_pt) {
+ if (&pt->base == &vm->scratch[1]) {
spin_unlock(&pd->lock);
pt = fetch_and_zero(&alloc);
@@ -1832,10 +1622,10 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
goto unwind_out;
}
- fill32_px(pt, vm->scratch_pte);
+ fill32_pt(pt, vm->scratch_pte);
spin_lock(&pd->lock);
- if (pd->entry[pde] == &vm->scratch_pt) {
+ if (pd->entry[pde] == &vm->scratch[1]) {
pd->entry[pde] = pt;
if (i915_vma_is_bound(ppgtt->vma,
I915_VMA_GLOBAL_BIND)) {
@@ -1863,7 +1653,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
gen6_ppgtt_clear_range(vm, from, start - from);
out:
if (alloc)
- free_pt(vm, alloc);
+ free_pd(vm, &alloc->base);
intel_runtime_pm_put(&vm->i915->runtime_pm, wakeref);
return ret;
}
@@ -1872,33 +1662,29 @@ static int gen6_ppgtt_init_scratch(struct gen6_ppgtt *ppgtt)
{
struct i915_address_space * const vm = &ppgtt->base.vm;
struct i915_page_directory * const pd = ppgtt->base.pd;
- struct i915_page_table *unused;
- u32 pde;
int ret;
ret = setup_scratch_page(vm, __GFP_HIGHMEM);
if (ret)
return ret;
- vm->scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
+ vm->scratch_pte = vm->pte_encode(vm->scratch[0].daddr,
I915_CACHE_NONE,
PTE_READ_ONLY);
- if (unlikely(setup_page_dma(vm, &vm->scratch_pt))) {
+ if (unlikely(setup_page_dma(vm, &vm->scratch[1]))) {
cleanup_scratch_page(vm);
return -ENOMEM;
}
- fill_page_dma_32(&vm->scratch_pt, vm->scratch_pte);
-
- gen6_for_all_pdes(unused, pd, pde)
- pd->entry[pde] = &vm->scratch_pt;
+ fill_page_dma_32(&vm->scratch[1], vm->scratch_pte);
+ memset_p(pd->entry, &vm->scratch[1], I915_PDES);
return 0;
}
static void gen6_ppgtt_free_scratch(struct i915_address_space *vm)
{
- cleanup_page_dma(vm, &vm->scratch_pt);
+ cleanup_page_dma(vm, &vm->scratch[1]);
cleanup_scratch_page(vm);
}
@@ -1909,8 +1695,8 @@ static void gen6_ppgtt_free_pd(struct gen6_ppgtt *ppgtt)
u32 pde;
gen6_for_all_pdes(pt, pd, pde)
- if (px_base(pt) != &ppgtt->base.vm.scratch_pt)
- free_pt(&ppgtt->base.vm, pt);
+ if (&pt->base != &ppgtt->base.vm.scratch[1])
+ free_pd(&ppgtt->base.vm, &pt->base);
}
static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
@@ -1967,7 +1753,7 @@ static void pd_vma_unbind(struct i915_vma *vma)
{
struct gen6_ppgtt *ppgtt = vma->private;
struct i915_page_directory * const pd = ppgtt->base.pd;
- struct i915_page_dma * const scratch = &ppgtt->base.vm.scratch_pt;
+ struct i915_page_dma * const scratch = &ppgtt->base.vm.scratch[1];
struct i915_page_table *pt;
unsigned int pde;
@@ -1979,7 +1765,7 @@ static void pd_vma_unbind(struct i915_vma *vma)
if (atomic_read(&pt->used) || px_base(pt) == scratch)
continue;
- free_pt(&ppgtt->base.vm, pt);
+ free_pd(&ppgtt->base.vm, &pt->base);
pd->entry[pde] = scratch;
}
@@ -2973,7 +2759,7 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
}
ggtt->vm.scratch_pte =
- ggtt->vm.pte_encode(ggtt->vm.scratch_page.daddr,
+ ggtt->vm.pte_encode(ggtt->vm.scratch[0].daddr,
I915_CACHE_NONE, 0);
return 0;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 7019bb2b1941..2070d47ba3cf 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -115,30 +115,19 @@ typedef u64 gen8_pte_t;
#define HSW_GTT_ADDR_ENCODE(addr) ((addr) | (((addr) >> 28) & 0x7f0))
#define HSW_PTE_ADDR_ENCODE(addr) HSW_GTT_ADDR_ENCODE(addr)
-/* GEN8 32b style address is defined as a 3 level page table:
+/*
+ * GEN8 32b style address is defined as a 3 level page table:
* 31:30 | 29:21 | 20:12 | 11:0
* PDPE | PDE | PTE | offset
* The difference as compared to normal x86 3 level page table is the PDPEs are
* programmed via register.
- */
-#define GEN8_3LVL_PDPES 4
-#define GEN8_PDE_SHIFT 21
-#define GEN8_PDE_MASK 0x1ff
-#define GEN8_PTE_SHIFT 12
-#define GEN8_PTE_MASK 0x1ff
-#define GEN8_PTES I915_PTES(sizeof(gen8_pte_t))
-
-/* GEN8 48b style address is defined as a 4 level page table:
+ *
+ * GEN8 48b style address is defined as a 4 level page table:
* 47:39 | 38:30 | 29:21 | 20:12 | 11:0
* PML4E | PDPE | PDE | PTE | offset
*/
-#define GEN8_PML4ES_PER_PML4 512
-#define GEN8_PML4E_SHIFT 39
-#define GEN8_PML4E_MASK (GEN8_PML4ES_PER_PML4 - 1)
-#define GEN8_PDPE_SHIFT 30
-/* NB: GEN8_PDPE_MASK is untrue for 32b platforms, but it has no impact on 32b page
- * tables */
-#define GEN8_PDPE_MASK 0x1ff
+#define GEN8_3LVL_PDPES 4
+#define GEN8_PTE_SHIFT 12
#define PPAT_UNCACHED (_PAGE_PWT | _PAGE_PCD)
#define PPAT_CACHED_PDE 0 /* WB LLC */
@@ -317,12 +306,9 @@ struct i915_address_space {
#define VM_CLASS_GGTT 0
#define VM_CLASS_PPGTT 1
+ struct i915_page_dma scratch[4];
u64 scratch_pte;
int scratch_order;
- struct i915_page_dma scratch_page;
- struct i915_page_dma scratch_pt;
- struct i915_page_dma scratch_pd;
- struct i915_page_dma scratch_pdp; /* GEN8+ & 48b PPGTT */
/**
* List of vma currently bound.
@@ -504,6 +490,12 @@ static inline u32 i915_pde_index(u64 addr, u32 shift)
return (addr >> shift) & I915_PDE_MASK;
}
+static inline u32 i915_pde_length(u64 len, u32 shift)
+{
+ len = (len + BIT_ULL(shift) - 1) >> shift;
+ return min_t(unsigned int, len, I915_PDES);
+}
+
static inline u32 gen6_pte_index(u32 addr)
{
return i915_pte_index(addr, GEN6_PDE_SHIFT);
@@ -519,15 +511,6 @@ static inline u32 gen6_pde_index(u32 addr)
return i915_pde_index(addr, GEN6_PDE_SHIFT);
}
-static inline unsigned int
-i915_pdpes_per_pdp(const struct i915_address_space *vm)
-{
- if (i915_vm_is_4lvl(vm))
- return GEN8_PML4ES_PER_PML4;
-
- return GEN8_3LVL_PDPES;
-}
-
static inline struct i915_page_table *
i915_pt_entry(const struct i915_page_directory * const pd,
const unsigned short n)
@@ -542,70 +525,10 @@ i915_pd_entry(const struct i915_page_directory * const pdp,
return pdp->entry[n];
}
-static inline struct i915_page_directory *
-i915_pdp_entry(const struct i915_page_directory * const pml4,
- const unsigned short n)
-{
- return pml4->entry[n];
-}
-
-/* Equivalent to the gen6 version, For each pde iterates over every pde
- * between from start until start + length. On gen8+ it simply iterates
- * over every page directory entry in a page directory.
- */
-#define gen8_for_each_pde(pt, pd, start, length, iter) \
- for (iter = gen8_pde_index(start); \
- length > 0 && iter < I915_PDES && \
- (pt = i915_pt_entry(pd, iter), true); \
- ({ u64 temp = ALIGN(start+1, 1 << GEN8_PDE_SHIFT); \
- temp = min(temp - start, length); \
- start += temp, length -= temp; }), ++iter)
-
-#define gen8_for_each_pdpe(pd, pdp, start, length, iter) \
- for (iter = gen8_pdpe_index(start); \
- length > 0 && iter < i915_pdpes_per_pdp(vm) && \
- (pd = i915_pd_entry(pdp, iter), true); \
- ({ u64 temp = ALIGN(start+1, 1 << GEN8_PDPE_SHIFT); \
- temp = min(temp - start, length); \
- start += temp, length -= temp; }), ++iter)
-
-#define gen8_for_each_pml4e(pdp, pml4, start, length, iter) \
- for (iter = gen8_pml4e_index(start); \
- length > 0 && iter < GEN8_PML4ES_PER_PML4 && \
- (pdp = i915_pdp_entry(pml4, iter), true); \
- ({ u64 temp = ALIGN(start+1, 1ULL << GEN8_PML4E_SHIFT); \
- temp = min(temp - start, length); \
- start += temp, length -= temp; }), ++iter)
-
-static inline u32 gen8_pte_index(u64 address)
-{
- return i915_pte_index(address, GEN8_PDE_SHIFT);
-}
-
-static inline u32 gen8_pde_index(u64 address)
-{
- return i915_pde_index(address, GEN8_PDE_SHIFT);
-}
-
-static inline u32 gen8_pdpe_index(u64 address)
-{
- return (address >> GEN8_PDPE_SHIFT) & GEN8_PDPE_MASK;
-}
-
-static inline u32 gen8_pml4e_index(u64 address)
-{
- return (address >> GEN8_PML4E_SHIFT) & GEN8_PML4E_MASK;
-}
-
-static inline u64 gen8_pte_count(u64 address, u64 length)
-{
- return i915_pte_count(address, length, GEN8_PDE_SHIFT);
-}
-
static inline dma_addr_t
i915_page_dir_dma_addr(const struct i915_ppgtt *ppgtt, const unsigned int n)
{
- return px_dma(i915_pdp_entry(ppgtt->pd, n));
+ return px_dma(i915_pt_entry(ppgtt->pd, n));
}
static inline struct i915_ggtt *
--
2.20.1
More information about the Intel-gfx-trybot
mailing list