[Intel-gfx] [PATCH v2 13/22] drm/i915: Remove bitmap tracking for used-pdes
Chris Wilson
chris at chris-wilson.co.uk
Fri Feb 10 19:38:36 UTC 2017
We only operate on known extents (both for alloc/clear) and so we can use
both the knowledge of the bind/unbind range along with the knowledge of
the existing pagetable to avoid having to allocate temporary and
auxiliary bitmaps.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld at intel.com>
---
drivers/gpu/drm/i915/i915_gem_gtt.c | 246 ++++++++++++------------------------
drivers/gpu/drm/i915/i915_gem_gtt.h | 6 +-
2 files changed, 84 insertions(+), 168 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index afacf722b8b1..725709ff7f8b 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -494,39 +494,25 @@ static void gen6_initialize_pt(struct i915_address_space *vm,
static struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
{
struct i915_page_directory *pd;
- int ret = -ENOMEM;
- pd = kzalloc(sizeof(*pd), GFP_KERNEL);
- if (!pd)
+ pd = kzalloc(sizeof(*pd), GFP_KERNEL | __GFP_NOWARN);
+ if (unlikely(!pd))
return ERR_PTR(-ENOMEM);
- pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES),
- sizeof(*pd->used_pdes), GFP_KERNEL);
- if (!pd->used_pdes)
- goto fail_bitmap;
-
- ret = setup_px(vm, pd);
- if (ret)
- goto fail_page_m;
+ if (unlikely(setup_px(vm, pd))) {
+ kfree(pd);
+ return ERR_PTR(-ENOMEM);
+ }
+ pd->used_pdes = 0;
return pd;
-
-fail_page_m:
- kfree(pd->used_pdes);
-fail_bitmap:
- kfree(pd);
-
- return ERR_PTR(ret);
}
static void free_pd(struct i915_address_space *vm,
struct i915_page_directory *pd)
{
- if (px_page(pd)) {
- cleanup_px(vm, pd);
- kfree(pd->used_pdes);
- kfree(pd);
- }
+ cleanup_px(vm, pd);
+ kfree(pd);
}
static void gen8_initialize_pd(struct i915_address_space *vm,
@@ -540,10 +526,11 @@ static void gen8_initialize_pd(struct i915_address_space *vm,
pd->page_table[i] = vm->scratch_pt;
}
-static int __pdp_init(struct drm_i915_private *dev_priv,
+static int __pdp_init(struct i915_address_space *vm,
struct i915_page_directory_pointer *pdp)
{
- size_t pdpes = I915_PDPES_PER_PDP(dev_priv);
+ size_t pdpes = I915_PDPES_PER_PDP(vm->i915);
+ int i;
pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes),
sizeof(unsigned long),
@@ -551,8 +538,8 @@ static int __pdp_init(struct drm_i915_private *dev_priv,
if (!pdp->used_pdpes)
return -ENOMEM;
- pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory),
- GFP_KERNEL);
+ pdp->page_directory = kmalloc_array(pdpes, sizeof(*pdp->page_directory),
+ GFP_KERNEL);
if (!pdp->page_directory) {
kfree(pdp->used_pdpes);
/* the PDP might be the statically allocated top level. Keep it
@@ -561,6 +548,9 @@ static int __pdp_init(struct drm_i915_private *dev_priv,
return -ENOMEM;
}
+ for (i = 0; i < pdpes; i++)
+ pdp->page_directory[i] = vm->scratch_pd;
+
return 0;
}
@@ -583,7 +573,7 @@ alloc_pdp(struct i915_address_space *vm)
if (!pdp)
return ERR_PTR(-ENOMEM);
- ret = __pdp_init(vm->i915, pdp);
+ ret = __pdp_init(vm, pdp);
if (ret)
goto fail_bitmap;
@@ -633,25 +623,9 @@ static void gen8_initialize_pml4(struct i915_address_space *vm,
}
static void
-gen8_setup_pdpe(struct i915_hw_ppgtt *ppgtt,
- struct i915_page_directory_pointer *pdp,
- struct i915_page_directory *pd,
- int index)
-{
- gen8_ppgtt_pdpe_t *page_directorypo;
-
- if (!USES_FULL_48BIT_PPGTT(to_i915(ppgtt->base.dev)))
- return;
-
- page_directorypo = kmap_atomic_px(pdp);
- page_directorypo[index] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
- kunmap_atomic(page_directorypo);
-}
-
-static void
-gen8_setup_pml4e(struct i915_pml4 *pml4,
- struct i915_page_directory_pointer *pdp,
- int index)
+gen8_ppgtt_set_pml4e(struct i915_pml4 *pml4,
+ struct i915_page_directory_pointer *pdp,
+ int index)
{
gen8_ppgtt_pml4e_t *pagemap = kmap_atomic_px(pml4);
@@ -759,9 +733,6 @@ static void gen8_ppgtt_set_pde(struct i915_address_space *vm,
kunmap_atomic(vaddr);
}
-/* Removes entries from a single page dir, releasing it if it's empty.
- * Caller can use the return value to update higher-level entries
- */
static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
struct i915_page_directory *pd,
u64 start, u64 length)
@@ -774,15 +745,28 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
continue;
gen8_ppgtt_set_pde(vm, pd, vm->scratch_pt, pde);
- __clear_bit(pde, pd->used_pdes);
+ pd->used_pdes--;
free_pt(vm, pt);
}
- if (bitmap_empty(pd->used_pdes, I915_PDES))
- return true;
+ return !pd->used_pdes;
+}
- return false;
+static void gen8_ppgtt_set_pdpe(struct i915_address_space *vm,
+ struct i915_page_directory_pointer *pdp,
+ struct i915_page_directory *pd,
+ unsigned int pdpe)
+{
+ gen8_ppgtt_pdpe_t *vaddr;
+
+ pdp->page_directory[pdpe] = pd;
+ if (!USES_FULL_48BIT_PPGTT(vm->i915))
+ return;
+
+ vaddr = kmap_atomic_px(pdp);
+ vaddr[pdpe] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
+ kunmap_atomic(vaddr);
}
/* Removes entries from a single page dir pointer, releasing it if it's empty.
@@ -790,25 +774,20 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
*/
static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
struct i915_page_directory_pointer *pdp,
- uint64_t start,
- uint64_t length)
+ u64 start, u64 length)
{
- struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
struct i915_page_directory *pd;
- uint64_t pdpe;
+ unsigned int pdpe;
gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
- if (WARN_ON(!pdp->page_directory[pdpe]))
- break;
+ if (!gen8_ppgtt_clear_pd(vm, pd, start, length))
+ continue;
- if (gen8_ppgtt_clear_pd(vm, pd, start, length)) {
- __clear_bit(pdpe, pdp->used_pdpes);
- gen8_setup_pdpe(ppgtt, pdp, vm->scratch_pd, pdpe);
- free_pd(vm, pd);
- }
- }
+ gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
+ __clear_bit(pdpe, pdp->used_pdpes);
- mark_tlbs_dirty(ppgtt);
+ free_pd(vm, pd);
+ }
if (bitmap_empty(pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv)))
return true;
@@ -816,15 +795,21 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
return false;
}
+static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm,
+ u64 start, u64 length)
+{
+ gen8_ppgtt_clear_pdp(vm, &i915_vm_to_ppgtt(vm)->pdp, start, length);
+}
+
/* Removes entries from a single pml4.
* This is the top-level structure in 4-level page tables used on gen8+.
* Empty entries are always scratch pml4e.
*/
-static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm,
- struct i915_pml4 *pml4,
- uint64_t start,
- uint64_t length)
+static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
+ u64 start, u64 length)
{
+ struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
+ struct i915_pml4 *pml4 = &ppgtt->pml4;
struct i915_page_directory_pointer *pdp;
uint64_t pml4e;
@@ -836,23 +821,12 @@ static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm,
if (gen8_ppgtt_clear_pdp(vm, pdp, start, length)) {
__clear_bit(pml4e, pml4->used_pml4es);
- gen8_setup_pml4e(pml4, vm->scratch_pdp, pml4e);
+ gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
free_pdp(vm, pdp);
}
}
}
-static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
- uint64_t start, uint64_t length)
-{
- struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
-
- if (USES_FULL_48BIT_PPGTT(vm->i915))
- gen8_ppgtt_clear_pml4(vm, &ppgtt->pml4, start, length);
- else
- gen8_ppgtt_clear_pdp(vm, &ppgtt->pdp, start, length);
-}
-
struct sgt_dma {
struct scatterlist *sg;
dma_addr_t dma, max;
@@ -950,12 +924,9 @@ static void gen8_free_page_tables(struct i915_address_space *vm,
if (!px_page(pd))
return;
- for_each_set_bit(i, pd->used_pdes, I915_PDES) {
- if (WARN_ON(!pd->page_table[i]))
- continue;
-
- free_pt(vm, pd->page_table[i]);
- pd->page_table[i] = NULL;
+ for (i = 0; i < I915_PDES; i++) {
+ if (pd->page_table[i] != vm->scratch_pt)
+ free_pt(vm, pd->page_table[i]);
}
}
@@ -1050,7 +1021,7 @@ static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
int i;
for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(vm->i915)) {
- if (WARN_ON(!pdp->page_directory[i]))
+ if (pdp->page_directory[i] == vm->scratch_pd)
continue;
gen8_free_page_tables(vm, pdp->page_directory[i]);
@@ -1090,44 +1061,28 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
gen8_free_scratch(vm);
}
-/**
- * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range.
- * @vm: Master vm structure.
- * @pd: Page directory for this address range.
- * @start: Starting virtual address to begin allocations.
- * @length: Size of the allocations.
- *
- * Allocate the required number of page tables. Extremely similar to
- * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by
- * the page directory boundary (instead of the page directory pointer). That
- * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is
- * possible, and likely that the caller will need to use multiple calls of this
- * function to achieve the appropriate allocation.
- *
- * Return: 0 if success; negative error code otherwise.
- */
-static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
- struct i915_page_directory *pd,
- u64 start, u64 length)
+static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
+ struct i915_page_directory *pd,
+ u64 start, u64 length)
{
struct i915_page_table *pt;
u64 from = start;
- uint32_t pde;
+ unsigned int pde;
gen8_for_each_pde(pt, pd, start, length, pde) {
- /* Don't reallocate page tables */
- if (!test_bit(pde, pd->used_pdes)) {
+ if (pt == vm->scratch_pt) {
pt = alloc_pt(vm);
if (IS_ERR(pt))
goto unwind;
gen8_initialize_pt(vm, pt);
- pd->page_table[pde] = pt;
+
+ gen8_ppgtt_set_pde(vm, pd, pt, pde);
+ pd->used_pdes++;
}
+
pt->used_ptes += gen8_pte_count(start, length);
- trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT);
}
-
return 0;
unwind:
@@ -1204,7 +1159,7 @@ gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
* caller to free on error.
*
* Allocate the required number of page directory pointers. Extremely similar to
- * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pagetabs().
+ * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pd().
* The main difference is here we are limited by the pml4 boundary (instead of
* the page directory pointer).
*
@@ -1256,9 +1211,8 @@ free_gen8_temp_bitmaps(unsigned long *new_pds)
/* Fills in the page directory bitmap, and the array of page tables bitmap. Both
* of these are based on the number of PDPEs in the system.
*/
-static
-int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds,
- uint32_t pdpes)
+static int __must_check
+alloc_gen8_temp_bitmaps(unsigned long **new_pds, uint32_t pdpes)
{
unsigned long *pds;
@@ -1278,8 +1232,6 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
unsigned long *new_page_dirs;
struct i915_page_directory *pd;
- const uint64_t orig_start = start;
- const uint64_t orig_length = length;
uint32_t pdpe;
uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv);
int ret;
@@ -1298,51 +1250,16 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
/* For every page directory referenced, allocate page tables */
gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
- ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length);
+ ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
if (ret)
goto err_out;
- }
- start = orig_start;
- length = orig_length;
+ if (test_and_set_bit(pdpe, pdp->used_pdpes))
+ gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
+ }
/* Allocations have completed successfully, so set the bitmaps, and do
* the mappings. */
- gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
- gen8_pde_t *const page_directory = kmap_atomic_px(pd);
- struct i915_page_table *pt;
- uint64_t pd_len = length;
- uint64_t pd_start = start;
- uint32_t pde;
-
- /* Every pd should be allocated, we just did that above. */
- WARN_ON(!pd);
-
- gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) {
- /* Same reasoning as pd */
- WARN_ON(!pt);
- WARN_ON(!pd_len);
- WARN_ON(!gen8_pte_count(pd_start, pd_len));
-
- /* Our pde is now pointing to the pagetable, pt */
- __set_bit(pde, pd->used_pdes);
-
- /* Map the PDE to the page table */
- page_directory[pde] = gen8_pde_encode(px_dma(pt),
- I915_CACHE_LLC);
- trace_i915_page_table_entry_map(&ppgtt->base, pde, pt,
- gen8_pte_index(start),
- gen8_pte_count(start, length));
-
- /* NB: We haven't yet mapped ptes to pages. At this
- * point we're still relying on insert_entries() */
- }
-
- kunmap_atomic(page_directory);
- __set_bit(pdpe, pdp->used_pdpes);
- gen8_setup_pdpe(ppgtt, pdp, pd, pdpe);
- }
-
free_gen8_temp_bitmaps(new_page_dirs);
mark_tlbs_dirty(ppgtt);
return 0;
@@ -1385,7 +1302,7 @@ static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
if (ret)
goto err_out;
- gen8_setup_pml4e(pml4, pdp, pml4e);
+ gen8_ppgtt_set_pml4e(pml4, pdp, pml4e);
}
bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es,
@@ -1434,7 +1351,7 @@ static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt,
uint32_t pte;
gen8_pte_t *pt_vaddr;
- if (!test_bit(pde, pd->used_pdes))
+ if (pd->page_table[pde] == ppgtt->base.scratch_pt)
continue;
pt_vaddr = kmap_atomic_px(pt);
@@ -1537,7 +1454,6 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
ppgtt->base.start = 0;
ppgtt->base.cleanup = gen8_ppgtt_cleanup;
ppgtt->base.allocate_va_range = gen8_alloc_va_range;
- ppgtt->base.clear_range = gen8_ppgtt_clear_range;
ppgtt->base.unbind_vma = ppgtt_unbind_vma;
ppgtt->base.bind_vma = ppgtt_bind_vma;
ppgtt->debug_dump = gen8_dump_ppgtt;
@@ -1559,8 +1475,9 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
ppgtt->switch_mm = gen8_48b_mm_switch;
ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl;
+ ppgtt->base.clear_range = gen8_ppgtt_clear_4lvl;
} else {
- ret = __pdp_init(dev_priv, &ppgtt->pdp);
+ ret = __pdp_init(&ppgtt->base, &ppgtt->pdp);
if (ret)
goto free_scratch;
@@ -1577,6 +1494,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
}
ppgtt->base.insert_entries = gen8_ppgtt_insert_3lvl;
+ ppgtt->base.clear_range = gen8_ppgtt_clear_3lvl;
}
if (intel_vgpu_active(dev_priv))
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 5ad5b59a01b1..a62b0ef1f3fc 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -226,8 +226,8 @@ struct i915_page_table {
struct i915_page_directory {
struct i915_page_dma base;
- unsigned long *used_pdes;
struct i915_page_table *page_table[I915_PDES]; /* PDEs */
+ unsigned int used_pdes;
};
struct i915_page_directory_pointer {
@@ -520,9 +520,7 @@ static inline size_t gen8_pte_count(uint64_t address, uint64_t length)
static inline dma_addr_t
i915_page_dir_dma_addr(const struct i915_hw_ppgtt *ppgtt, const unsigned n)
{
- return test_bit(n, ppgtt->pdp.used_pdpes) ?
- px_dma(ppgtt->pdp.page_directory[n]) :
- px_dma(ppgtt->base.scratch_pd);
+ return px_dma(ppgtt->pdp.page_directory[n]);
}
static inline struct i915_ggtt *
--
2.11.0
More information about the Intel-gfx
mailing list