[Intel-gfx] [PATCH 4/7] drm/i915/gtt: Replace struct_mutex serialisation for allocation
Chris Wilson
chris at chris-wilson.co.uk
Mon Jun 3 17:49:32 UTC 2019
Instead of relying on the caller holding struct_mutex across the
allocation, push the allocation under a tree of spinlocks stored inside
the page tables. Not only should this allow us to avoid struct_mutex
here, but it will allow multiple users to lock independent ranges for
concurrent allocations, and operate independently. This is vital for
pushing the GTT manipulation into a background thread where dependency
on struct_mutex is verboten, and for allowing other callers to avoid
struct_mutex altogether.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Matthew Auld <matthew.auld at intel.com>
Cc: Mika Kuoppala <mika.kuoppala at intel.com>
---
drivers/gpu/drm/i915/i915_gem_gtt.c | 212 +++++++++++++++++++---------
drivers/gpu/drm/i915/i915_gem_gtt.h | 9 +-
2 files changed, 152 insertions(+), 69 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index ca8a69e8b098..5000a990ddf3 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -655,7 +655,7 @@ static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
return ERR_PTR(-ENOMEM);
}
- pt->used_ptes = 0;
+ atomic_set(&pt->used_ptes, 0);
return pt;
}
@@ -690,7 +690,8 @@ static struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
return ERR_PTR(-ENOMEM);
}
- pd->used_pdes = 0;
+ atomic_set(&pd->used_pdes, 0);
+ spin_lock_init(&pd->lock);
return pd;
}
@@ -721,6 +722,8 @@ static int __pdp_init(struct i915_address_space *vm,
memset_p((void **)pdp->page_directory, vm->scratch_pd, pdpes);
+ atomic_set(&pdp->used_pdpes, 0);
+ spin_lock_init(&pdp->lock);
return 0;
}
@@ -775,11 +778,8 @@ static void free_pdp(struct i915_address_space *vm,
static void gen8_initialize_pdp(struct i915_address_space *vm,
struct i915_page_directory_pointer *pdp)
{
- gen8_ppgtt_pdpe_t scratch_pdpe;
-
- scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);
-
- fill_px(vm, pdp, scratch_pdpe);
+ fill_px(vm, pdp,
+ gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC));
}
static void gen8_initialize_pml4(struct i915_address_space *vm,
@@ -788,6 +788,7 @@ static void gen8_initialize_pml4(struct i915_address_space *vm,
fill_px(vm, pml4,
gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC));
memset_p((void **)pml4->pdps, vm->scratch_pdp, GEN8_PML4ES_PER_PML4);
+ spin_lock_init(&pml4->lock);
}
/*
@@ -811,17 +812,12 @@ static bool gen8_ppgtt_clear_pt(const struct i915_address_space *vm,
unsigned int num_entries = gen8_pte_count(start, length);
gen8_pte_t *vaddr;
- GEM_BUG_ON(num_entries > pt->used_ptes);
-
- pt->used_ptes -= num_entries;
- if (!pt->used_ptes)
- return true;
-
vaddr = kmap_atomic_px(pt);
memset64(vaddr + gen8_pte_index(start), vm->scratch_pte, num_entries);
kunmap_atomic(vaddr);
- return false;
+ GEM_BUG_ON(num_entries > atomic_read(&pt->used_ptes));
+ return !atomic_sub_return(num_entries, &pt->used_ptes);
}
static void gen8_ppgtt_set_pde(struct i915_address_space *vm,
@@ -831,8 +827,6 @@ static void gen8_ppgtt_set_pde(struct i915_address_space *vm,
{
gen8_pde_t *vaddr;
- pd->page_table[pde] = pt;
-
vaddr = kmap_atomic_px(pd);
vaddr[pde] = gen8_pde_encode(px_dma(pt), I915_CACHE_LLC);
kunmap_atomic(vaddr);
@@ -846,19 +840,28 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
u32 pde;
gen8_for_each_pde(pt, pd, start, length, pde) {
+ bool free = false;
+
GEM_BUG_ON(pt == vm->scratch_pt);
if (!gen8_ppgtt_clear_pt(vm, pt, start, length))
continue;
- gen8_ppgtt_set_pde(vm, pd, vm->scratch_pt, pde);
- GEM_BUG_ON(!pd->used_pdes);
- pd->used_pdes--;
+ spin_lock(&pd->lock);
+ if (!atomic_read(&pt->used_ptes)) {
+ gen8_ppgtt_set_pde(vm, pd, vm->scratch_pt, pde);
+ pd->page_table[pde] = vm->scratch_pt;
- free_pt(vm, pt);
+ GEM_BUG_ON(!atomic_read(&pd->used_pdes));
+ atomic_dec(&pd->used_pdes);
+ free = true;
+ }
+ spin_unlock(&pd->lock);
+ if (free)
+ free_pt(vm, pt);
}
- return !pd->used_pdes;
+ return !atomic_read(&pd->used_pdes);
}
static void gen8_ppgtt_set_pdpe(struct i915_address_space *vm,
@@ -868,7 +871,6 @@ static void gen8_ppgtt_set_pdpe(struct i915_address_space *vm,
{
gen8_ppgtt_pdpe_t *vaddr;
- pdp->page_directory[pdpe] = pd;
if (!i915_vm_is_4lvl(vm))
return;
@@ -888,19 +890,28 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
unsigned int pdpe;
gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
+ bool free = false;
+
GEM_BUG_ON(pd == vm->scratch_pd);
if (!gen8_ppgtt_clear_pd(vm, pd, start, length))
continue;
- gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
- GEM_BUG_ON(!pdp->used_pdpes);
- pdp->used_pdpes--;
+ spin_lock(&pdp->lock);
+ if (!atomic_read(&pd->used_pdes)) {
+ gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
+ pdp->page_directory[pdpe] = vm->scratch_pd;
- free_pd(vm, pd);
+ GEM_BUG_ON(!atomic_read(&pdp->used_pdpes));
+ atomic_dec(&pdp->used_pdpes);
+ free = true;
+ }
+ spin_unlock(&pdp->lock);
+ if (free)
+ free_pd(vm, pd);
}
- return !pdp->used_pdpes;
+ return !atomic_read(&pdp->used_pdpes);
}
static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm,
@@ -915,8 +926,6 @@ static void gen8_ppgtt_set_pml4e(struct i915_pml4 *pml4,
{
gen8_ppgtt_pml4e_t *vaddr;
- pml4->pdps[pml4e] = pdp;
-
vaddr = kmap_atomic_px(pml4);
vaddr[pml4e] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
kunmap_atomic(vaddr);
@@ -937,14 +946,21 @@ static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
GEM_BUG_ON(!i915_vm_is_4lvl(vm));
gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
+ bool free = false;
GEM_BUG_ON(pdp == vm->scratch_pdp);
if (!gen8_ppgtt_clear_pdp(vm, pdp, start, length))
continue;
- gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
-
- free_pdp(vm, pdp);
+ spin_lock(&pml4->lock);
+ if (!atomic_read(&pdp->used_pdpes)) {
+ gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
+ pml4->pdps[pml4e] = vm->scratch_pdp;
+ free = true;
+ }
+ spin_unlock(&pml4->lock);
+ if (free)
+ free_pdp(vm, pdp);
}
}
@@ -1369,27 +1385,38 @@ static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
u64 from = start;
unsigned int pde;
+ spin_lock(&pd->lock);
gen8_for_each_pde(pt, pd, start, length, pde) {
- int count = gen8_pte_count(start, length);
+ const int count = gen8_pte_count(start, length);
if (pt == vm->scratch_pt) {
- pd->used_pdes++;
+ struct i915_page_table *old;
+
+ spin_unlock(&pd->lock);
pt = alloc_pt(vm);
- if (IS_ERR(pt)) {
- pd->used_pdes--;
+ if (IS_ERR(pt))
goto unwind;
- }
if (count < GEN8_PTES || intel_vgpu_active(vm->i915))
gen8_initialize_pt(vm, pt);
- gen8_ppgtt_set_pde(vm, pd, pt, pde);
- GEM_BUG_ON(pd->used_pdes > I915_PDES);
+ old = cmpxchg(&pd->page_table[pde], vm->scratch_pt, pt);
+ if (old == vm->scratch_pt) {
+ gen8_ppgtt_set_pde(vm, pd, pt, pde);
+ atomic_inc(&pd->used_pdes);
+ } else {
+ free_pt(vm, pt);
+ pt = old;
+ }
+
+ spin_lock(&pd->lock);
}
- pt->used_ptes += count;
+ atomic_add(count, &pt->used_ptes);
}
+ spin_unlock(&pd->lock);
+
return 0;
unwind:
@@ -1406,35 +1433,54 @@ static int gen8_ppgtt_alloc_pdp(struct i915_address_space *vm,
unsigned int pdpe;
int ret;
+ spin_lock(&pdp->lock);
gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
if (pd == vm->scratch_pd) {
- pdp->used_pdpes++;
+ struct i915_page_directory *old;
+
+ spin_unlock(&pdp->lock);
pd = alloc_pd(vm);
- if (IS_ERR(pd)) {
- pdp->used_pdpes--;
+ if (IS_ERR(pd))
goto unwind;
- }
gen8_initialize_pd(vm, pd);
- gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
- GEM_BUG_ON(pdp->used_pdpes > i915_pdpes_per_pdp(vm));
+
+ old = cmpxchg(&pdp->page_directory[pdpe],
+ vm->scratch_pd, pd);
+ if (old == vm->scratch_pd) {
+ gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
+ atomic_inc(&pdp->used_pdpes);
+ } else {
+ free_pd(vm, pd);
+ pd = old;
+ }
+
+ spin_lock(&pdp->lock);
}
+ atomic_inc(&pd->used_pdes);
+ spin_unlock(&pdp->lock);
ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
if (unlikely(ret))
goto unwind_pd;
+
+ spin_lock(&pdp->lock);
+ atomic_dec(&pd->used_pdes);
}
+ spin_unlock(&pdp->lock);
return 0;
unwind_pd:
- if (!pd->used_pdes) {
+ spin_lock(&pdp->lock);
+ if (atomic_dec_and_test(&pd->used_pdes)) {
gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
- GEM_BUG_ON(!pdp->used_pdpes);
- pdp->used_pdpes--;
+ GEM_BUG_ON(!atomic_read(&pdp->used_pdpes));
+ atomic_dec(&pdp->used_pdpes);
free_pd(vm, pd);
}
+ spin_unlock(&pdp->lock);
unwind:
gen8_ppgtt_clear_pdp(vm, pdp, from, start - from);
return -ENOMEM;
@@ -1457,28 +1503,50 @@ static int gen8_ppgtt_alloc_4lvl(struct i915_address_space *vm,
u32 pml4e;
int ret;
+ spin_lock(&pml4->lock);
gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
- if (pml4->pdps[pml4e] == vm->scratch_pdp) {
+ if (pdp == vm->scratch_pdp) {
+ struct i915_page_directory_pointer *old;
+
+ spin_unlock(&pml4->lock);
+
pdp = alloc_pdp(vm);
if (IS_ERR(pdp))
goto unwind;
gen8_initialize_pdp(vm, pdp);
- gen8_ppgtt_set_pml4e(pml4, pdp, pml4e);
+
+ old = cmpxchg(&pml4->pdps[pml4e], vm->scratch_pdp, pdp);
+ if (old == vm->scratch_pdp) {
+ gen8_ppgtt_set_pml4e(pml4, pdp, pml4e);
+ } else {
+ free_pdp(vm, pdp);
+ pdp = old;
+ }
+
+ spin_lock(&pml4->lock);
}
+ atomic_inc(&pdp->used_pdpes);
+ spin_unlock(&pml4->lock);
ret = gen8_ppgtt_alloc_pdp(vm, pdp, start, length);
if (unlikely(ret))
goto unwind_pdp;
+
+ spin_lock(&pml4->lock);
+ atomic_dec(&pdp->used_pdpes);
}
+ spin_unlock(&pml4->lock);
return 0;
unwind_pdp:
- if (!pdp->used_pdpes) {
+ spin_lock(&pml4->lock);
+ if (atomic_dec_and_test(&pdp->used_pdpes)) {
gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
free_pdp(vm, pdp);
}
+ spin_unlock(&pml4->lock);
unwind:
gen8_ppgtt_clear_4lvl(vm, from, start - from);
return -ENOMEM;
@@ -1500,10 +1568,10 @@ static int gen8_preallocate_top_level_pdp(struct i915_hw_ppgtt *ppgtt)
gen8_initialize_pd(vm, pd);
gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
- pdp->used_pdpes++;
+ atomic_inc(&pdp->used_pdpes);
}
- pdp->used_pdpes++; /* never remove */
+ atomic_inc(&pdp->used_pdpes); /* never remove */
return 0;
unwind:
@@ -1512,7 +1580,7 @@ static int gen8_preallocate_top_level_pdp(struct i915_hw_ppgtt *ppgtt)
gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
free_pd(vm, pd);
}
- pdp->used_pdpes = 0;
+ atomic_set(&pdp->used_pdpes, 0);
return -ENOMEM;
}
@@ -1684,9 +1752,7 @@ static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
num_entries -= count;
- GEM_BUG_ON(count > pt->used_ptes);
- pt->used_ptes -= count;
- if (!pt->used_ptes)
+ if (!atomic_sub_return(count, &pt->used_ptes))
ppgtt->scan_for_unused_pt = true;
/*
@@ -1756,28 +1822,41 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
wakeref = intel_runtime_pm_get(vm->i915);
+ spin_lock(&ppgtt->base.pd.lock);
gen6_for_each_pde(pt, &ppgtt->base.pd, start, length, pde) {
const unsigned int count = gen6_pte_count(start, length);
if (pt == vm->scratch_pt) {
+ struct i915_page_table *old;
+
+ spin_unlock(&ppgtt->base.pd.lock);
+
pt = alloc_pt(vm);
if (IS_ERR(pt))
goto unwind_out;
gen6_initialize_pt(vm, pt);
- ppgtt->base.pd.page_table[pde] = pt;
- if (i915_vma_is_bound(ppgtt->vma,
- I915_VMA_GLOBAL_BIND)) {
- gen6_write_pde(ppgtt, pde, pt);
- flush = true;
+ old = cmpxchg(&ppgtt->base.pd.page_table[pde],
+ vm->scratch_pt, pt);
+ if (old == vm->scratch_pt) {
+ ppgtt->base.pd.page_table[pde] = pt;
+ if (i915_vma_is_bound(ppgtt->vma,
+ I915_VMA_GLOBAL_BIND)) {
+ gen6_write_pde(ppgtt, pde, pt);
+ flush = true;
+ }
+ } else {
+ free_pt(vm, pt);
+ pt = old;
}
- GEM_BUG_ON(pt->used_ptes);
+ spin_lock(&ppgtt->base.pd.lock);
}
- pt->used_ptes += count;
+ atomic_add(count, &pt->used_ptes);
}
+ spin_unlock(&ppgtt->base.pd.lock);
if (flush) {
mark_tlbs_dirty(&ppgtt->base);
@@ -1818,6 +1897,7 @@ static int gen6_ppgtt_init_scratch(struct gen6_hw_ppgtt *ppgtt)
gen6_initialize_pt(vm, vm->scratch_pt);
gen6_for_all_pdes(unused, &ppgtt->base.pd, pde)
ppgtt->base.pd.page_table[pde] = vm->scratch_pt;
+ spin_lock_init(&ppgtt->base.pd.lock);
return 0;
}
@@ -1946,7 +2026,7 @@ static void pd_vma_unbind(struct i915_vma *vma)
/* Free all no longer used page tables */
gen6_for_all_pdes(pt, &ppgtt->base.pd, pde) {
- if (pt->used_ptes || pt == scratch_pt)
+ if (atomic_read(&pt->used_ptes) || pt == scratch_pt)
continue;
free_pt(&ppgtt->base.vm, pt);
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 73b6608740f2..152a03560c22 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -248,25 +248,28 @@ struct i915_page_dma {
struct i915_page_table {
struct i915_page_dma base;
- unsigned int used_ptes;
+ atomic_t used_ptes;
};
struct i915_page_directory {
struct i915_page_dma base;
struct i915_page_table *page_table[I915_PDES]; /* PDEs */
- unsigned int used_pdes;
+ atomic_t used_pdes;
+ spinlock_t lock;
};
struct i915_page_directory_pointer {
struct i915_page_dma base;
struct i915_page_directory **page_directory;
- unsigned int used_pdpes;
+ atomic_t used_pdpes;
+ spinlock_t lock;
};
struct i915_pml4 {
struct i915_page_dma base;
struct i915_page_directory_pointer *pdps[GEN8_PML4ES_PER_PML4];
+ spinlock_t lock;
};
struct i915_vma_ops {
--
2.20.1
More information about the Intel-gfx
mailing list