[PATCH 73/79] no-used-ptes
Chris Wilson
chris at chris-wilson.co.uk
Sun Jan 1 20:38:04 UTC 2017
---
drivers/gpu/drm/i915/i915_gem_gtt.c | 183 +++++++++++++-----------------------
drivers/gpu/drm/i915/i915_gem_gtt.h | 3 +-
drivers/gpu/drm/i915/i915_trace.h | 19 ++--
3 files changed, 70 insertions(+), 135 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index baa1f56fbeda..ec0afc8c9f13 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -429,37 +429,23 @@ static void cleanup_scratch_page(struct i915_address_space *vm)
static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
{
struct i915_page_table *pt;
- const size_t count = INTEL_GEN(vm->i915) >= 8 ? GEN8_PTES : GEN6_PTES;
- int ret = -ENOMEM;
- pt = kzalloc(sizeof(*pt), GFP_KERNEL);
- if (!pt)
+ pt = kmalloc(sizeof(*pt), GFP_KERNEL | __GFP_NOWARN);
+ if (unlikely(!pt))
return ERR_PTR(-ENOMEM);
- pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes),
- GFP_KERNEL);
-
- if (!pt->used_ptes)
- goto fail_bitmap;
-
- ret = setup_px(vm, pt);
- if (ret)
- goto fail_page_m;
+ if (unlikely(setup_px(vm, pt))) {
+ kfree(pt);
+ return ERR_PTR(-ENOMEM);
+ }
+ pt->used_ptes = 0;
return pt;
-
-fail_page_m:
- kfree(pt->used_ptes);
-fail_bitmap:
- kfree(pt);
-
- return ERR_PTR(ret);
}
static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt)
{
cleanup_px(vm, pt);
- kfree(pt->used_ptes);
kfree(pt);
}
@@ -528,11 +514,12 @@ static void free_pd(struct i915_address_space *vm,
static void gen8_initialize_pd(struct i915_address_space *vm,
struct i915_page_directory *pd)
{
- gen8_pde_t scratch_pde;
-
- scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC);
+ unsigned int i;
- fill_px(vm, pd, scratch_pde);
+ fill_px(vm, pd,
+ gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC));
+ for (i = 0; i < I915_PDES; i++)
+ pd->page_table[i] = vm->scratch_pt;
}
static int __pdp_init(struct drm_i915_private *dev_priv,
@@ -719,8 +706,7 @@ static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
*/
static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
struct i915_page_table *pt,
- uint64_t start,
- uint64_t length)
+ u64 start, u64 length)
{
unsigned int num_entries = gen8_pte_count(start, length);
unsigned int pte = gen8_pte_index(start);
@@ -729,14 +715,10 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC);
gen8_pte_t *vaddr;
- if (WARN_ON(!px_page(pt)))
- return false;
-
- GEM_BUG_ON(pte_end > GEN8_PTES);
-
- bitmap_clear(pt->used_ptes, pte, num_entries);
+ GEM_BUG_ON(num_entries > pt->used_ptes);
- if (bitmap_empty(pt->used_ptes, GEN8_PTES))
+ pt->used_ptes -= num_entries;
+ if (!pt->used_ptes)
return true;
vaddr = kmap_atomic_px(pt);
@@ -747,31 +729,38 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
return false;
}
+static void gen8_ppgtt_set_pde(struct i915_address_space *vm,
+ struct i915_page_directory *pd,
+ struct i915_page_table *pt,
+ unsigned int pde)
+{
+ gen8_pde_t *vaddr;
+
+ pd->page_table[pde] = pt;
+
+ vaddr = kmap_atomic_px(pd);
+ vaddr[pde] = gen8_pde_encode(px_dma(pt), I915_CACHE_LLC);
+ kunmap_atomic(vaddr);
+}
+
/* Removes entries from a single page dir, releasing it if it's empty.
* Caller can use the return value to update higher-level entries
*/
static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
struct i915_page_directory *pd,
- uint64_t start,
- uint64_t length)
+ u64 start, u64 length)
{
struct i915_page_table *pt;
- uint64_t pde;
- gen8_pde_t *pde_vaddr;
- gen8_pde_t scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt),
- I915_CACHE_LLC);
+ u32 pde;
gen8_for_each_pde(pt, pd, start, length, pde) {
- if (WARN_ON(!pd->page_table[pde]))
- break;
+ if (!gen8_ppgtt_clear_pt(vm, pt, start, length))
+ continue;
- if (gen8_ppgtt_clear_pt(vm, pt, start, length)) {
- __clear_bit(pde, pd->used_pdes);
- pde_vaddr = kmap_atomic_px(pd);
- pde_vaddr[pde] = scratch_pde;
- kunmap_atomic(pde_vaddr);
- free_pt(vm, pt);
- }
+ gen8_ppgtt_set_pde(vm, pd, vm->scratch_pt, pde);
+ __clear_bit(pde, pd->used_pdes);
+
+ free_pt(vm, pt);
}
if (bitmap_empty(pd->used_pdes, I915_PDES))
@@ -1103,8 +1092,6 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
* @pd: Page directory for this address range.
* @start: Starting virtual address to begin allocations.
* @length: Size of the allocations.
- * @new_pts: Bitmap set by function with new allocations. Likely used by the
- * caller to free on error.
*
* Allocate the required number of page tables. Extremely similar to
* gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by
@@ -1117,37 +1104,30 @@ static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
*/
static int gen8_ppgtt_alloc_pagetabs(struct i915_address_space *vm,
struct i915_page_directory *pd,
- uint64_t start,
- uint64_t length,
- unsigned long *new_pts)
+ u64 start, u64 length)
{
struct i915_page_table *pt;
+ u64 from = start;
uint32_t pde;
gen8_for_each_pde(pt, pd, start, length, pde) {
/* Don't reallocate page tables */
- if (test_bit(pde, pd->used_pdes)) {
- /* Scratch is never allocated this way */
- WARN_ON(pt == vm->scratch_pt);
- continue;
- }
-
- pt = alloc_pt(vm);
- if (IS_ERR(pt))
- goto unwind_out;
+ if (!test_bit(pde, pd->used_pdes)) {
+ pt = alloc_pt(vm);
+ if (IS_ERR(pt))
+ goto unwind;
- gen8_initialize_pt(vm, pt);
- pd->page_table[pde] = pt;
- __set_bit(pde, new_pts);
+ gen8_initialize_pt(vm, pt);
+ pd->page_table[pde] = pt;
+ }
+ pt->used_ptes += gen8_pte_count(start, length);
trace_i915_page_table_entry_alloc(vm, pde, start, GEN8_PDE_SHIFT);
}
return 0;
-unwind_out:
- for_each_set_bit(pde, new_pts, I915_PDES)
- free_pt(vm, pd->page_table[pde]);
-
+unwind:
+ gen8_ppgtt_clear_pd(vm, pd, from, start - from);
return -ENOMEM;
}
@@ -1264,9 +1244,8 @@ gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
}
static void
-free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts)
+free_gen8_temp_bitmaps(unsigned long *new_pds)
{
- kfree(new_pts);
kfree(new_pds);
}
@@ -1275,29 +1254,16 @@ free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long *new_pts)
*/
static
int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds,
- unsigned long **new_pts,
uint32_t pdpes)
{
unsigned long *pds;
- unsigned long *pts;
pds = kcalloc(BITS_TO_LONGS(pdpes), sizeof(unsigned long), GFP_TEMPORARY);
if (!pds)
return -ENOMEM;
- pts = kcalloc(pdpes, BITS_TO_LONGS(I915_PDES) * sizeof(unsigned long),
- GFP_TEMPORARY);
- if (!pts)
- goto err_out;
-
*new_pds = pds;
- *new_pts = pts;
-
return 0;
-
-err_out:
- free_gen8_temp_bitmaps(pds, pts);
- return -ENOMEM;
}
static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
@@ -1306,7 +1272,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
uint64_t length)
{
struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
- unsigned long *new_page_dirs, *new_page_tables;
+ unsigned long *new_page_dirs;
struct i915_page_directory *pd;
const uint64_t orig_start = start;
const uint64_t orig_length = length;
@@ -1314,7 +1280,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
uint32_t pdpes = I915_PDPES_PER_PDP(dev_priv);
int ret;
- ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes);
+ ret = alloc_gen8_temp_bitmaps(&new_page_dirs, pdpes);
if (ret)
return ret;
@@ -1322,14 +1288,13 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
ret = gen8_ppgtt_alloc_page_directories(vm, pdp, start, length,
new_page_dirs);
if (ret) {
- free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
+ free_gen8_temp_bitmaps(new_page_dirs);
return ret;
}
/* For every page directory referenced, allocate page tables */
gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
- ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length,
- new_page_tables + pdpe * BITS_TO_LONGS(I915_PDES));
+ ret = gen8_ppgtt_alloc_pagetabs(vm, pd, start, length);
if (ret)
goto err_out;
}
@@ -1355,11 +1320,6 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
WARN_ON(!pd_len);
WARN_ON(!gen8_pte_count(pd_start, pd_len));
- /* Set our used ptes within the page table */
- bitmap_set(pt->used_ptes,
- gen8_pte_index(pd_start),
- gen8_pte_count(pd_start, pd_len));
-
/* Our pde is now pointing to the pagetable, pt */
__set_bit(pde, pd->used_pdes);
@@ -1368,8 +1328,7 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
I915_CACHE_LLC);
trace_i915_page_table_entry_map(&ppgtt->base, pde, pt,
gen8_pte_index(start),
- gen8_pte_count(start, length),
- GEN8_PTES);
+ gen8_pte_count(start, length));
/* NB: We haven't yet mapped ptes to pages. At this
* point we're still relying on insert_entries() */
@@ -1380,23 +1339,15 @@ static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
gen8_setup_page_directory(ppgtt, pdp, pd, pdpe);
}
- free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
+ free_gen8_temp_bitmaps(new_page_dirs);
mark_tlbs_dirty(ppgtt);
return 0;
err_out:
- while (pdpe--) {
- unsigned long temp;
-
- for_each_set_bit(temp, new_page_tables + pdpe *
- BITS_TO_LONGS(I915_PDES), I915_PDES)
- free_pt(vm, pdp->page_directory[pdpe]->page_table[temp]);
- }
-
for_each_set_bit(pdpe, new_page_dirs, pdpes)
free_pd(vm, pdp->page_directory[pdpe]);
- free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
+ free_gen8_temp_bitmaps(new_page_dirs);
mark_tlbs_dirty(ppgtt);
return ret;
}
@@ -1539,14 +1490,14 @@ static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt)
{
- unsigned long *new_page_dirs, *new_page_tables;
+ unsigned long *new_page_dirs;
uint32_t pdpes = I915_PDPES_PER_PDP(to_i915(ppgtt->base.dev));
int ret;
/* We allocate temp bitmap for page tables for no gain
* but as this is for init only, lets keep the things simple
*/
- ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables, pdpes);
+ ret = alloc_gen8_temp_bitmaps(&new_page_dirs, pdpes);
if (ret)
return ret;
@@ -1559,7 +1510,7 @@ static int gen8_preallocate_top_level_pdps(struct i915_hw_ppgtt *ppgtt)
if (!ret)
*ppgtt->pdp.used_pdpes = *new_page_dirs;
- free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
+ free_gen8_temp_bitmaps(new_page_dirs);
return ret;
}
@@ -1951,13 +1902,8 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
* tables.
*/
gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) {
- if (pt != vm->scratch_pt) {
- WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES));
+ if (pt != vm->scratch_pt)
continue;
- }
-
- /* We've already allocated a page table */
- WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES));
pt = alloc_pt(vm);
if (IS_ERR(pt)) {
@@ -1987,10 +1933,7 @@ static int gen6_alloc_va_range(struct i915_address_space *vm,
trace_i915_page_table_entry_map(vm, pde, pt,
gen6_pte_index(start),
- gen6_pte_count(start, length),
- GEN6_PTES);
- bitmap_or(pt->used_ptes, tmp_bitmap, pt->used_ptes,
- GEN6_PTES);
+ gen6_pte_count(start, length));
}
WARN_ON(!bitmap_empty(new_page_tables, I915_PDES));
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 98268a59bb08..9cb1c0e6f194 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -216,8 +216,7 @@ struct i915_page_dma {
struct i915_page_table {
struct i915_page_dma base;
-
- unsigned long *used_ptes;
+ unsigned int used_ptes;
};
struct i915_page_directory {
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 18ae37c411fd..ef2216242092 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -245,15 +245,14 @@ DEFINE_EVENT_PRINT(i915_px_entry, i915_page_directory_pointer_entry_alloc,
DECLARE_EVENT_CLASS(i915_page_table_entry_update,
TP_PROTO(struct i915_address_space *vm, u32 pde,
- struct i915_page_table *pt, u32 first, u32 count, u32 bits),
- TP_ARGS(vm, pde, pt, first, count, bits),
+ struct i915_page_table *pt, u32 first, u32 count),
+ TP_ARGS(vm, pde, pt, first, count),
TP_STRUCT__entry(
__field(struct i915_address_space *, vm)
__field(u32, pde)
__field(u32, first)
__field(u32, last)
- __dynamic_array(char, cur_ptes, TRACE_PT_SIZE(bits))
),
TP_fast_assign(
@@ -261,22 +260,16 @@ DECLARE_EVENT_CLASS(i915_page_table_entry_update,
__entry->pde = pde;
__entry->first = first;
__entry->last = first + count - 1;
- scnprintf(__get_str(cur_ptes),
- TRACE_PT_SIZE(bits),
- "%*pb",
- bits,
- pt->used_ptes);
),
- TP_printk("vm=%p, pde=%d, updating %u:%u\t%s",
- __entry->vm, __entry->pde, __entry->last, __entry->first,
- __get_str(cur_ptes))
+ TP_printk("vm=%p, pde=%d, updating %u:%u",
+ __entry->vm, __entry->pde, __entry->last, __entry->first)
);
DEFINE_EVENT(i915_page_table_entry_update, i915_page_table_entry_map,
TP_PROTO(struct i915_address_space *vm, u32 pde,
- struct i915_page_table *pt, u32 first, u32 count, u32 bits),
- TP_ARGS(vm, pde, pt, first, count, bits)
+ struct i915_page_table *pt, u32 first, u32 count),
+ TP_ARGS(vm, pde, pt, first, count)
);
TRACE_EVENT(i915_gem_object_change_domain,
--
2.11.0
More information about the Intel-gfx-trybot
mailing list