[PATCH 3/8] flat
Matthew Auld
matthew.auld at intel.com
Wed Jul 29 11:21:41 UTC 2020
---
drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 86 +++++++++++
drivers/gpu/drm/i915/gt/gen8_ppgtt.h | 7 +
drivers/gpu/drm/i915/selftests/i915_gem_gtt.c | 142 ++++++++++++++++++
3 files changed, 235 insertions(+)
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
index e3afd250cd7f..4b6c8e639eca 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
@@ -672,6 +672,92 @@ gen8_alloc_top_pd(struct i915_address_space *vm)
return pd;
}
+#define GEN8_PDPE_PS_1G BIT(7)
+
+int flat_lmem_ppgtt_init(struct i915_address_space *vm,
+ struct drm_mm_node *node)
+{
+ struct i915_page_directory *pd = i915_vm_to_ppgtt(vm)->pd;
+ const gen8_pte_t encode = gen8_pte_encode(0, 0, /* PTE_LM */ 0);
+ u64 start, end, size;
+ unsigned int idx;
+ gen8_pte_t *vaddr;
+ u8 lvl;
+ int err;
+
+ GEM_BUG_ON(!i915_vm_is_4lvl(vm));
+ GEM_BUG_ON(!IS_ALIGNED(node->start, SZ_1G));
+ GEM_BUG_ON(!IS_ALIGNED(node->size, SZ_1G));
+ GEM_BUG_ON(node->size > SZ_1G * 512ULL);
+
+ /*
+ * Map all of LMEM in a kernel internal vm(could be cloned?). This gives
+ * us the useful property where the va == pa, which lets us touch any
+ * part of LMEM, from the gpu without having to dynamically bind
+ * anything. We map the entries as 1G GTT entries, such that we only
+ * need one pdpe for every 1G of LMEM, i.e a single pdp can cover 512G
+ * of LMEM.
+ */
+
+ err = drm_mm_reserve_node(&vm->mm, node);
+ if (err)
+ return err;
+
+ start = node->start >> GEN8_PTE_SHIFT;
+ end = start + (node->size >> GEN8_PTE_SHIFT);
+
+ lvl = vm->top;
+ while (lvl >= 3) { /* allocate everything up to and including the pdp */
+ struct i915_page_directory *__pd;
+
+ gen8_pd_range(start, end, lvl, &idx);
+ __pd = pd->entry[idx];
+
+ if (!__pd) {
+ __pd = alloc_pd(vm);
+ if (IS_ERR(__pd)) {
+ err = PTR_ERR(__pd);
+ return err;
+ }
+
+ err = pin_pt_dma(vm, __pd->pt.base);
+ if (err) {
+ i915_gem_object_put(__pd->pt.base);
+ kfree(__pd);
+ return err;
+ }
+
+ atomic_inc(&__pd->pt.used); /* alive until vm is gone */
+ fill_px(pd, vm->scratch[lvl]->encode);
+ }
+
+ set_pd_entry(pd, idx, __pd);
+ pd = __pd;
+ lvl--;
+ }
+
+ gen8_pd_range(start, end, lvl, &idx);
+
+ start = node->start;
+ size = node->size;
+ vaddr = kmap_atomic_px(pd);
+ do {
+ vaddr[idx++] = encode | GEN8_PDPE_PS_1G | start;
+ size -= min_t(u64, size, SZ_1G);
+ start += SZ_1G;
+ } while (size);
+
+ kunmap_atomic(vaddr);
+
+ return 0;
+}
+
+void flat_lmem_ppgtt_fini(struct i915_address_space *vm,
+ struct drm_mm_node *node)
+{
+ drm_mm_remove_node(node);
+}
+
/*
* GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
* with a net effect resembling a 2-level page table in normal x86 terms. Each
diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
index 76a08b9c1f5c..b265b4b6da83 100644
--- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
+++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.h
@@ -7,7 +7,14 @@
#define __GEN8_PPGTT_H__
struct intel_gt;
+struct i915_address_space;
+struct drm_mm_node;
struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt);
+int flat_lmem_ppgtt_init(struct i915_address_space *vm,
+ struct drm_mm_node *node);
+void flat_lmem_ppgtt_fini(struct i915_address_space *vm,
+ struct drm_mm_node *node);
+
#endif
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index af8205a2bd8f..421e69153f47 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -26,8 +26,15 @@
#include <linux/prime_numbers.h>
#include "gem/i915_gem_context.h"
+#include "gem/i915_gem_lmem.h"
+#include "gem/i915_gem_object_blt.h"
#include "gem/selftests/mock_context.h"
#include "gt/intel_context.h"
+#include "gt/intel_gt_buffer_pool.h"
+#include "gt/gen8_ppgtt.h"
+#include "gt/intel_engine_pm.h"
+#include "gt/intel_gt.h"
+
#include "i915_random.h"
#include "i915_selftest.h"
@@ -1075,6 +1082,140 @@ static int igt_ppgtt_shrink_boom(void *arg)
return exercise_ppgtt(arg, shrink_boom);
}
+static int igt_ppgtt_flat(void *arg)
+{
+ struct drm_i915_private *i915 = arg;
+ struct intel_memory_region *mr = i915->mm.regions[INTEL_REGION_LMEM];
+ struct i915_gem_context *ctx;
+ struct i915_address_space *vm;
+ struct i915_vma vma, *batch;
+ struct drm_mm_node flat;
+ struct drm_i915_gem_object *obj;
+ struct intel_context *ce;
+ struct i915_request *rq;
+ I915_RND_STATE(prng);
+ struct file *file;
+ long timeout;
+ u32 *vaddr;
+ u32 val;
+ int ret;
+ u32 i;
+
+ if (!mr) {
+ pr_info("skipping...\n");
+ return 0;
+ }
+
+ file = mock_file(i915);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+
+ ctx = live_context(i915, file);
+ if (IS_ERR(ctx)) {
+ ret = PTR_ERR(ctx);
+ goto out_put;
+ }
+
+ memset(&flat, 0, sizeof(flat));
+ flat.start = mr->region.start;
+ flat.size = round_up(resource_size(&mr->region), SZ_1G);
+ flat.color = I915_COLOR_UNEVICTABLE;
+
+ vm = i915_gem_context_get_vm_rcu(ctx);
+
+ ret = flat_lmem_ppgtt_init(vm, &flat);
+ if (ret)
+ goto out_vm;
+
+ obj = i915_gem_object_create_lmem(i915, SZ_64M,
+ I915_BO_ALLOC_CONTIGUOUS);
+ if (IS_ERR(obj)) {
+ ret = PTR_ERR(obj);
+ goto out_fini;
+ }
+
+ vaddr = i915_gem_object_pin_map(obj, I915_MAP_WC);
+ if (IS_ERR(vaddr)) {
+ ret = PTR_ERR(vaddr);
+ goto out_unpin;
+ }
+
+ val = prandom_u32_state(&prng);
+
+ memset32(vaddr, val ^ 0xdeadbeaf, obj->base.size / sizeof(u32));
+
+ memset(&vma, 0, sizeof(vma));
+ vma.node.start = sg_dma_address(obj->mm.pages->sgl); /* va == pa */
+ vma.size = obj->base.size;
+
+ ce = i915_gem_context_get_engine(ctx, BCS0);
+ GEM_BUG_ON(IS_ERR(ce));
+
+ batch = intel_emit_vma_fill_blt(ce, &vma, val);
+ if (IS_ERR(batch)) {
+ ret = PTR_ERR(batch);
+ goto out_ce;
+ }
+
+ rq = intel_context_create_request(ce);
+ if (IS_ERR(rq)) {
+ ret = PTR_ERR(rq);
+ goto out_batch;
+ }
+
+ ret = intel_emit_vma_mark_active(batch, rq);
+ if (unlikely(ret))
+ goto out_request;
+
+ if (ce->engine->emit_init_breadcrumb)
+ ret = ce->engine->emit_init_breadcrumb(rq);
+
+ if (likely(!ret))
+ ret = ce->engine->emit_bb_start(rq,
+ batch->node.start,
+ batch->node.size,
+ 0);
+out_request:
+ if (unlikely(ret))
+ i915_request_set_error_once(rq, ret);
+
+ i915_request_get(rq);
+ i915_request_add(rq);
+
+ timeout = i915_request_wait(rq, 0, HZ / 2);
+ i915_request_put(rq);
+ if (timeout < 0) {
+ ret = -EIO;
+ goto out_batch;
+ }
+
+ for (i = 0; i < obj->base.size / sizeof(u32); ++i) {
+ if (vaddr[i] != val) {
+ pr_err("vaddr[%u]=%u\n", i, vaddr[i]);
+ ret = -EINVAL;
+ break;
+ }
+ }
+
+out_batch:
+ intel_emit_vma_release(ce, batch);
+out_ce:
+ intel_context_put(ce);
+out_unpin:
+ i915_gem_object_put(obj);
+out_fini:
+ flat_lmem_ppgtt_fini(vm, &flat);
+out_vm:
+ i915_vm_put(vm);
+out_put:
+ fput(file);
+
+ if (igt_flush_test(i915))
+ ret = -EIO;
+
+ return ret;
+}
+
static int sort_holes(void *priv, struct list_head *A, struct list_head *B)
{
struct drm_mm_node *a = list_entry(A, typeof(*a), hole_stack);
@@ -2059,6 +2200,7 @@ int i915_gem_gtt_live_selftests(struct drm_i915_private *i915)
SUBTEST(igt_ppgtt_fill),
SUBTEST(igt_ppgtt_shrink),
SUBTEST(igt_ppgtt_shrink_boom),
+ SUBTEST(igt_ppgtt_flat),
SUBTEST(igt_ggtt_lowlevel),
SUBTEST(igt_ggtt_drunk),
SUBTEST(igt_ggtt_walk),
--
2.26.2
More information about the Intel-gfx-trybot
mailing list