[Intel-gfx] [PATCH 7/8] drm/i915/gtt: Purge page tracking bitmaps
Michał Winiarski
michal.winiarski at intel.com
Mon Dec 12 11:48:44 UTC 2016
All of the page tracking structures contain a bitmap holding currently
used entries.
It's redundant, since we can just inspect the pointer.
The only actual place where we're taking advantage of bitmaps is during
ppgtt cleanup - since we're able to reduce the number of iterations.
Still, we can improve that in the following commits, and iterate over
the full range here.
Let's replace the bitmaps with a simple counter.
Cc: Arkadiusz Hiler <arkadiusz.hiler at intel.com>
Cc: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
Cc: Michel Thierry <michel.thierry at intel.com>
Cc: Mika Kuoppala <mika.kuoppala at intel.com>
Signed-off-by: Michał Winiarski <michal.winiarski at intel.com>
---
drivers/gpu/drm/i915/i915_gem_gtt.c | 138 +++++++++++++-----------------------
drivers/gpu/drm/i915/i915_gem_gtt.h | 10 +--
2 files changed, 54 insertions(+), 94 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index a7d6f78..50d4861 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -426,38 +426,26 @@ static void cleanup_scratch_page(struct drm_i915_private *dev_priv,
static struct i915_page_table *alloc_pt(struct drm_i915_private *dev_priv)
{
struct i915_page_table *pt;
- const size_t count = INTEL_GEN(dev_priv) >= 8 ? GEN8_PTES : GEN6_PTES;
int ret = -ENOMEM;
pt = kzalloc(sizeof(*pt), GFP_KERNEL);
if (!pt)
return ERR_PTR(-ENOMEM);
- pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes),
- GFP_KERNEL);
-
- if (!pt->used_ptes)
- goto fail_bitmap;
-
ret = setup_px(dev_priv, pt);
- if (ret)
- goto fail_page_m;
+ if (ret) {
+ kfree(pt);
+ return ERR_PTR(ret);
+ }
return pt;
-fail_page_m:
- kfree(pt->used_ptes);
-fail_bitmap:
- kfree(pt);
-
- return ERR_PTR(ret);
}
static void free_pt(struct drm_i915_private *dev_priv,
struct i915_page_table *pt)
{
cleanup_px(dev_priv, pt);
- kfree(pt->used_ptes);
kfree(pt);
}
@@ -494,23 +482,13 @@ static struct i915_page_directory *alloc_pd(struct drm_i915_private *dev_priv)
if (!pd)
return ERR_PTR(-ENOMEM);
- pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES),
- sizeof(*pd->used_pdes), GFP_KERNEL);
- if (!pd->used_pdes)
- goto fail_bitmap;
-
ret = setup_px(dev_priv, pd);
- if (ret)
- goto fail_page_m;
+ if (ret) {
+ kfree(pd);
+ return ERR_PTR(ret);
+ }
return pd;
-
-fail_page_m:
- kfree(pd->used_pdes);
-fail_bitmap:
- kfree(pd);
-
- return ERR_PTR(ret);
}
static void free_pd(struct drm_i915_private *dev_priv,
@@ -518,7 +496,6 @@ static void free_pd(struct drm_i915_private *dev_priv,
{
if (px_page(pd)) {
cleanup_px(dev_priv, pd);
- kfree(pd->used_pdes);
kfree(pd);
}
}
@@ -538,28 +515,16 @@ static int __pdp_init(struct drm_i915_private *dev_priv,
{
size_t pdpes = I915_PDPES_PER_PDP(dev_priv);
- pdp->used_pdpes = kcalloc(BITS_TO_LONGS(pdpes),
- sizeof(unsigned long),
- GFP_KERNEL);
- if (!pdp->used_pdpes)
- return -ENOMEM;
-
pdp->page_directory = kcalloc(pdpes, sizeof(*pdp->page_directory),
GFP_KERNEL);
- if (!pdp->page_directory) {
- kfree(pdp->used_pdpes);
- /* the PDP might be the statically allocated top level. Keep it
- * as clean as possible */
- pdp->used_pdpes = NULL;
+ if (!pdp->page_directory)
return -ENOMEM;
- }
return 0;
}
static void __pdp_fini(struct i915_page_directory_pointer *pdp)
{
- kfree(pdp->used_pdpes);
kfree(pdp->page_directory);
pdp->page_directory = NULL;
}
@@ -733,9 +698,9 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
GEM_BUG_ON(pte_end > GEN8_PTES);
- bitmap_clear(pt->used_ptes, pte, num_entries);
+ pt->num_ptes -= num_entries;
- if (bitmap_empty(pt->used_ptes, GEN8_PTES))
+ if (pt->num_ptes == 0)
return true;
pt_vaddr = kmap_px(pt);
@@ -768,15 +733,16 @@ static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
break;
if (gen8_ppgtt_clear_pt(vm, pt, start, length)) {
- __clear_bit(pde, pd->used_pdes);
+ pd->num_pdes--;
pde_vaddr = kmap_px(pd);
pde_vaddr[pde] = scratch_pde;
kunmap_px(ppgtt, pde_vaddr);
free_pt(vm->i915, pt);
+ pd->page_table[pde] = NULL;
}
}
- if (bitmap_empty(pd->used_pdes, I915_PDES))
+ if (pd->num_pdes == 0)
return true;
return false;
@@ -802,19 +768,20 @@ static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
break;
if (gen8_ppgtt_clear_pd(vm, pd, start, length)) {
- __clear_bit(pdpe, pdp->used_pdpes);
+ pdp->num_pdpes--;
if (USES_FULL_48BIT_PPGTT(dev_priv)) {
pdpe_vaddr = kmap_px(pdp);
pdpe_vaddr[pdpe] = scratch_pdpe;
kunmap_px(ppgtt, pdpe_vaddr);
}
free_pd(vm->i915, pd);
+ pdp->page_directory[pdpe] = NULL;
}
}
mark_tlbs_dirty(ppgtt);
- if (bitmap_empty(pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv)))
+ if (pdp->num_pdpes == 0)
return true;
return false;
@@ -843,11 +810,12 @@ static void gen8_ppgtt_clear_pml4(struct i915_address_space *vm,
break;
if (gen8_ppgtt_clear_pdp(vm, pdp, start, length)) {
- __clear_bit(pml4e, pml4->used_pml4es);
+ pml4->num_pml4es--;
pml4e_vaddr = kmap_px(pml4);
pml4e_vaddr[pml4e] = scratch_pml4e;
kunmap_px(ppgtt, pml4e_vaddr);
free_pdp(vm->i915, pdp);
+ pml4->pdps[pml4e] = NULL;
}
}
}
@@ -938,12 +906,11 @@ static void gen8_free_page_tables(struct drm_i915_private *dev_priv,
if (!px_page(pd))
return;
- for_each_set_bit(i, pd->used_pdes, I915_PDES) {
- if (WARN_ON(!pd->page_table[i]))
- continue;
-
- free_pt(dev_priv, pd->page_table[i]);
- pd->page_table[i] = NULL;
+ for (i = 0; i < I915_PDES; i++) {
+ if (pd->page_table[i]) {
+ free_pt(dev_priv, pd->page_table[i]);
+ pd->page_table[i] = NULL;
+ }
}
}
@@ -1040,12 +1007,11 @@ static void gen8_ppgtt_cleanup_3lvl(struct drm_i915_private *dev_priv,
{
int i;
- for_each_set_bit(i, pdp->used_pdpes, I915_PDPES_PER_PDP(dev_priv)) {
- if (WARN_ON(!pdp->page_directory[i]))
- continue;
-
- gen8_free_page_tables(dev_priv, pdp->page_directory[i]);
- free_pd(dev_priv, pdp->page_directory[i]);
+ for (i = 0; i < I915_PDPES_PER_PDP(dev_priv); i++) {
+ if (pdp->page_directory[i]) {
+ gen8_free_page_tables(dev_priv, pdp->page_directory[i]);
+ pdp->page_directory[i] = NULL;
+ }
}
free_pdp(dev_priv, pdp);
@@ -1056,11 +1022,11 @@ static void gen8_ppgtt_cleanup_4lvl(struct drm_i915_private *dev_priv,
{
int i;
- for_each_set_bit(i, pml4->used_pml4es, GEN8_PML4ES_PER_PML4) {
- if (WARN_ON(!pml4->pdps[i]))
- continue;
-
- gen8_ppgtt_cleanup_3lvl(dev_priv, pml4->pdps[i]);
+ for (i = 0; i < GEN8_PML4ES_PER_PML4; i++) {
+ if (pml4->pdps[i]) {
+ gen8_ppgtt_cleanup_3lvl(dev_priv, pml4->pdps[i]);
+ pml4->pdps[i] = NULL;
+ }
}
cleanup_px(dev_priv, pml4);
@@ -1109,7 +1075,7 @@ static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
const uint64_t start_save = start;
gen8_for_each_pde(pt, pd, start, length, pde) {
- if (test_bit(pde, pd->used_pdes))
+ if (pd->page_table[pde])
continue;
pt = alloc_pt(dev_priv);
@@ -1121,7 +1087,7 @@ static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
gen8_initialize_pt(vm, pt);
pd->page_table[pde] = pt;
- __set_bit(pde, pd->used_pdes);
+ pd->num_pdes++;
trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT);
}
@@ -1160,7 +1126,7 @@ gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
const uint64_t start_save = start;
gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
- if (test_bit(pdpe, pdp->used_pdpes))
+ if (pdp->page_directory[pdpe])
continue;
pd = alloc_pd(dev_priv);
@@ -1172,7 +1138,7 @@ gen8_ppgtt_alloc_page_directories(struct i915_address_space *vm,
gen8_initialize_pd(vm, pd);
pdp->page_directory[pdpe] = pd;
- __set_bit(pdpe, pdp->used_pdpes);
+ pdp->num_pdpes++;
trace_i915_page_directory_entry_alloc(vm, pdpe, start, GEN8_PDPE_SHIFT);
}
@@ -1205,7 +1171,7 @@ gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
const uint64_t start_save = start;
gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
- if (test_bit(pml4e, pml4->used_pml4es))
+ if (pml4->pdps[pml4e])
continue;
pdp = alloc_pdp(dev_priv);
@@ -1217,7 +1183,7 @@ gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
gen8_initialize_pdp(vm, pdp);
pml4->pdps[pml4e] = pdp;
- __set_bit(pml4e, pml4->used_pml4es);
+ pml4->num_pml4es++;
trace_i915_page_directory_pointer_entry_alloc(vm,
pml4e,
start,
@@ -1286,9 +1252,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
WARN_ON(!gen8_pte_count(pd_start, pd_len));
/* Set our used ptes within the page table */
- bitmap_set(pt->used_ptes,
- gen8_pte_index(pd_start),
- gen8_pte_count(pd_start, pd_len));
+ pt->num_ptes += gen8_pte_count(pd_start, pd_len);
/* Map the PDE to the page table */
page_directory[pde] = gen8_pde_encode(px_dma(pt),
@@ -1296,7 +1260,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
trace_i915_page_table_entry_map(&ppgtt->base, pde, pt,
gen8_pte_index(start),
gen8_pte_count(start, length),
- bitmap_weight(pt->used_ptes, GEN8_PTES));
+ pt->num_ptes);
/* NB: We haven't yet mapped ptes to pages. At this
* point we're still relying on insert_entries() */
@@ -1373,7 +1337,7 @@ static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp,
uint64_t pd_start = start;
uint32_t pde;
- if (!test_bit(pdpe, pdp->used_pdpes))
+ if (!pdp->page_directory[pdpe])
continue;
seq_printf(m, "\tPDPE #%d\n", pdpe);
@@ -1381,7 +1345,7 @@ static void gen8_dump_pdp(struct i915_page_directory_pointer *pdp,
uint32_t pte;
gen8_pte_t *pt_vaddr;
- if (!test_bit(pde, pd->used_pdes))
+ if (!pd->page_table[pde])
continue;
pt_vaddr = kmap_px(pt);
@@ -1432,7 +1396,7 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
struct i915_page_directory_pointer *pdp;
gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
- if (!test_bit(pml4e, pml4->used_pml4es))
+ if (!pml4->pdps[pml4e])
continue;
seq_printf(m, " PML4E #%llu\n", pml4e);
@@ -1452,9 +1416,6 @@ static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt)
*/
ret = gen8_ppgtt_alloc_page_directories(&ppgtt->base, &ppgtt->pdp,
start, length);
- if (!ret)
- bitmap_set(ppgtt->pdp.used_pdpes, gen8_pdpe_index(start),
- i915_pte_count(start, length, GEN8_PDPE_SHIFT));
return ret;
}
@@ -1848,12 +1809,12 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
*/
gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) {
if (pt != vm->scratch_pt) {
- WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES));
+ WARN_ON(pt->num_ptes == 0);
continue;
}
/* We've already allocated a page table */
- WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES));
+ WARN_ON(pt->num_ptes != 0);
pt = alloc_pt(dev_priv);
if (IS_ERR(pt)) {
@@ -1866,8 +1827,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT);
gen6_initialize_pt(vm, pt);
- bitmap_set(pt->used_ptes, gen6_pte_index(start),
- gen6_pte_count(start, length));
+ pt->num_ptes += gen6_pte_count(start, length);
ppgtt->pd.page_table[pde] = pt;
gen6_write_pde(&ppgtt->pd, pde, pt);
@@ -1875,7 +1835,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
trace_i915_page_table_entry_map(vm, pde, pt,
gen6_pte_index(start),
gen6_pte_count(start, length),
- bitmap_weight(pt->used_ptes, GEN6_PTES));
+ pt->num_ptes);
}
/* Make sure write is complete before other code can use this page
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 8965bbb..1fc65ac 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -193,27 +193,27 @@ struct i915_page_dma {
struct i915_page_table {
struct i915_page_dma base;
- unsigned long *used_ptes;
+ unsigned int num_ptes;
};
struct i915_page_directory {
struct i915_page_dma base;
- unsigned long *used_pdes;
+ unsigned int num_pdes;
struct i915_page_table *page_table[I915_PDES]; /* PDEs */
};
struct i915_page_directory_pointer {
struct i915_page_dma base;
- unsigned long *used_pdpes;
+ unsigned int num_pdpes;
struct i915_page_directory **page_directory;
};
struct i915_pml4 {
struct i915_page_dma base;
- DECLARE_BITMAP(used_pml4es, GEN8_PML4ES_PER_PML4);
+ unsigned int num_pml4es;
struct i915_page_directory_pointer *pdps[GEN8_PML4ES_PER_PML4];
};
@@ -477,7 +477,7 @@ static inline size_t gen8_pte_count(uint64_t address, uint64_t length)
static inline dma_addr_t
i915_page_dir_dma_addr(const struct i915_hw_ppgtt *ppgtt, const unsigned n)
{
- return test_bit(n, ppgtt->pdp.used_pdpes) ?
+ return ppgtt->pdp.page_directory[n] ?
px_dma(ppgtt->pdp.page_directory[n]) :
px_dma(ppgtt->base.scratch_pd);
}
--
2.7.4
More information about the Intel-gfx
mailing list