[Intel-gfx] [PATCH RFC 2/4] drm/i915: IOMMU based SVM implementation v13
Mika Kuoppala
mika.kuoppala at linux.intel.com
Mon Aug 15 11:48:05 UTC 2016
From: Jesse Barnes <jbarnes at virtuousgeek.org>
Use David's new IOMMU layer functions for supporting SVM in i915.
TODO:
error record collection for failing SVM contexts
callback handling for fatal faults
scheduling
v2: integrate David's core IOMMU support
make sure we don't clobber the PASID in the context reg state
v3: fixup for intel-svm.h changes (David)
v4: use fault & halt for now (Jesse)
fix ring free in error path on context alloc (Julia)
v5: update with new callback struct (Jesse)
v6: fix init svm check per new IOMMU code (Jesse)
v7: drop debug code and obsolete i915_svm.c file (Jesse)
v8: fix !CONFIG_INTEL_IOMMU_SVM init stub (Jesse)
v9: update to new execlist and reg handling bits (Jesse)
context teardown fix (lrc deferred alloc vs teardown race?) (Jesse)
check for SVM availability at context create (Jesse)
v10: intel_context_svm_init/fini & rebase
v11: move context specific stuff to i915_gem_context
v12: move addressing to context descriptor
v13: strip out workqueue and mm notifiers
Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
Cc: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
Cc: David Woodhouse <dwmw2 at infradead.org>
Signed-off-by: David Woodhouse <David.Woodhouse at intel.com> (v3)
Signed-off-by: Jesse Barnes <jbarnes at virtuousgeek.org> (v9)
Signed-off-by: Mika Kuoppala <mika.kuoppala at intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 32 ++++++++++
drivers/gpu/drm/i915/i915_gem.c | 7 +++
drivers/gpu/drm/i915/i915_gem_context.c | 104 +++++++++++++++++++++++++++++---
drivers/gpu/drm/i915/i915_reg.h | 18 ++++++
drivers/gpu/drm/i915/intel_lrc.c | 39 +++++-------
5 files changed, 167 insertions(+), 33 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 598e078418e3..64f3f0f18509 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -39,6 +39,7 @@
#include <linux/backlight.h>
#include <linux/hashtable.h>
#include <linux/intel-iommu.h>
+#include <linux/intel-svm.h>
#include <linux/kref.h>
#include <linux/pm_qos.h>
#include <linux/shmem_fs.h>
@@ -866,6 +867,8 @@ struct i915_ctx_hang_stats {
* @remap_slice: l3 row remapping information.
* @flags: context specific flags:
* CONTEXT_NO_ZEROMAP: do not allow mapping things to page 0.
+ * CONTEXT_NO_ERROR_CAPTURE: do not capture gpu state on hang.
+ * CONTEXT_SVM: context with 1:1 gpu vs cpu mapping of vm.
* @file_priv: filp associated with this context (NULL for global default
* context).
* @hang_stats: information about the role of this context in possible GPU
@@ -891,6 +894,8 @@ struct i915_gem_context {
unsigned long flags;
#define CONTEXT_NO_ZEROMAP BIT(0)
#define CONTEXT_NO_ERROR_CAPTURE BIT(1)
+#define CONTEXT_SVM BIT(2)
+
unsigned hw_id;
u32 user_handle;
@@ -909,6 +914,9 @@ struct i915_gem_context {
struct atomic_notifier_head status_notifier;
bool execlists_force_single_submission;
+ u32 pasid; /* svm, 20 bits */
+ struct task_struct *task;
+
struct list_head link;
u8 remap_slice;
@@ -2001,6 +2009,8 @@ struct drm_i915_private {
struct i915_runtime_pm pm;
+ bool svm_available;
+
/* Abstract the submission mechanism (legacy ringbuffer or execlists) away */
struct {
void (*cleanup_engine)(struct intel_engine_cs *engine);
@@ -3628,6 +3638,28 @@ extern void intel_set_memory_cxsr(struct drm_i915_private *dev_priv,
int i915_reg_read_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
+/* svm */
+#ifdef CONFIG_INTEL_IOMMU_SVM
+static inline bool intel_init_svm(struct drm_device *dev)
+{
+ struct drm_i915_private *dev_priv = to_i915(dev);
+
+ dev_priv->svm_available = USES_FULL_48BIT_PPGTT(dev_priv) &&
+ intel_svm_available(&dev->pdev->dev);
+
+ return dev_priv->svm_available;
+}
+#else
+static inline bool intel_init_svm(struct drm_device *dev)
+{
+ struct drm_i915_private *dev_priv = to_i915(dev);
+
+ dev_priv->svm_available = false;
+
+ return dev_priv->svm_available;
+}
+#endif
+
/* overlay */
extern struct intel_overlay_error_state *
intel_overlay_capture_error_state(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7e08c774a1aa..45d67b54c018 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4304,6 +4304,13 @@ i915_gem_init_hw(struct drm_device *dev)
}
}
+ if (INTEL_GEN(dev) >= 8) {
+ if (intel_init_svm(dev))
+ DRM_DEBUG_DRIVER("Initialized Intel SVM support\n");
+ else
+ DRM_ERROR("Failed to enable Intel SVM support\n");
+ }
+
i915_gem_init_swizzling(dev);
/*
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 189a6c018b72..9ab6332f296b 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -134,6 +134,47 @@ static int get_context_size(struct drm_i915_private *dev_priv)
return ret;
}
+static void i915_svm_fault_cb(struct device *dev, int pasid, u64 addr,
+ u32 private, int rwxp, int response)
+{
+}
+
+static struct svm_dev_ops i915_svm_ops = {
+ .fault_cb = i915_svm_fault_cb,
+};
+
+static int i915_gem_context_svm_init(struct i915_gem_context *ctx)
+{
+ struct device *dev = &ctx->i915->drm.pdev->dev;
+ int ret;
+
+ if (WARN_ON_ONCE(!ctx->i915->svm_available))
+ return -ENODEV;
+
+ get_task_struct(current);
+
+ ret = intel_svm_bind_mm(dev, &ctx->pasid, 0, &i915_svm_ops);
+ if (ret) {
+ DRM_DEBUG_DRIVER("pasid alloc fail: %d\n", ret);
+ put_task_struct(current);
+ return ret;
+ }
+
+ ctx->task = current;
+
+ return 0;
+}
+
+static void i915_gem_context_svm_fini(struct i915_gem_context *ctx)
+{
+ struct device *dev = &ctx->i915->drm.pdev->dev;
+
+ if (ctx->task) {
+ intel_svm_unbind_mm(dev, ctx->pasid);
+ put_task_struct(ctx->task);
+ }
+}
+
void i915_gem_context_free(struct kref *ctx_ref)
{
struct i915_gem_context *ctx = container_of(ctx_ref, typeof(*ctx), ref);
@@ -143,6 +184,9 @@ void i915_gem_context_free(struct kref *ctx_ref)
trace_i915_context_free(ctx);
GEM_BUG_ON(!ctx->closed);
+ if (ctx->flags & CONTEXT_SVM)
+ i915_gem_context_svm_fini(ctx);
+
i915_ppgtt_put(ctx->ppgtt);
for (i = 0; i < I915_NUM_ENGINES; i++) {
@@ -257,9 +301,34 @@ static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out)
return 0;
}
+static u32 __create_ctx_desc(struct drm_i915_private *dev_priv, u32 flags)
+{
+ u32 desc = GEN8_CTX_VALID;
+
+ if (flags & I915_GEM_CONTEXT_ENABLE_SVM) {
+ desc |= INTEL_ADVANCED_CONTEXT <<
+ GEN8_CTX_ADDRESSING_MODE_SHIFT;
+ /*
+ * Switch to stream once we have a scheduler and can
+ * re-submit contexts.
+ */
+ desc |= FAULT_AND_HALT << GEN8_CTX_FAULT_SHIFT;
+ } else {
+ if (IS_GEN8(dev_priv))
+ desc |= GEN8_CTX_L3LLC_COHERENT;
+
+ desc |= GEN8_CTX_PRIVILEGE;
+
+ desc |= GEN8_CTX_ADDRESSING_MODE(dev_priv) <<
+ GEN8_CTX_ADDRESSING_MODE_SHIFT;
+ }
+
+ return desc;
+}
+
static struct i915_gem_context *
__create_hw_context(struct drm_device *dev,
- struct drm_i915_file_private *file_priv)
+ struct drm_i915_file_private *file_priv, u32 flags)
{
struct drm_i915_private *dev_priv = to_i915(dev);
struct i915_gem_context *ctx;
@@ -323,8 +392,8 @@ __create_hw_context(struct drm_device *dev,
ctx->hang_stats.ban_period_seconds = DRM_I915_CTX_BAN_PERIOD;
ctx->ring_size = 4 * PAGE_SIZE;
- ctx->desc_template = GEN8_CTX_ADDRESSING_MODE(dev_priv) <<
- GEN8_CTX_ADDRESSING_MODE_SHIFT;
+ ctx->desc_template = __create_ctx_desc(dev_priv, flags);
+
ATOMIC_INIT_NOTIFIER_HEAD(&ctx->status_notifier);
return ctx;
@@ -345,13 +414,14 @@ i915_gem_create_context(struct drm_device *dev,
{
struct i915_gem_context *ctx;
bool create_vm = false;
+ int ret;
lockdep_assert_held(&dev->struct_mutex);
if (flags & (I915_GEM_CONTEXT_FULL_PPGTT | I915_GEM_CONTEXT_ENABLE_SVM))
create_vm = true;
- ctx = __create_hw_context(dev, file_priv);
+ ctx = __create_hw_context(dev, file_priv, flags);
if (IS_ERR(ctx))
return ctx;
@@ -360,19 +430,31 @@ i915_gem_create_context(struct drm_device *dev,
i915_ppgtt_create(to_i915(dev), file_priv);
if (IS_ERR(ppgtt)) {
- DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n",
- PTR_ERR(ppgtt));
- idr_remove(&file_priv->context_idr, ctx->user_handle);
- context_close(ctx);
- return ERR_CAST(ppgtt);
+ ret = PTR_ERR(ppgtt);
+ DRM_DEBUG_DRIVER("PPGTT setup failed (%d)\n", ret);
+ goto free_ctx;
}
ctx->ppgtt = ppgtt;
}
+ if (flags & I915_GEM_CONTEXT_ENABLE_SVM) {
+ ret = i915_gem_context_svm_init(ctx);
+ if (ret)
+ goto free_ctx;
+
+ ctx->flags |= CONTEXT_SVM;
+ }
+
trace_i915_context_create(ctx);
return ctx;
+
+free_ctx:
+ idr_remove(&file_priv->context_idr, ctx->user_handle);
+ context_close(ctx);
+
+ return ERR_PTR(ret);
}
/**
@@ -987,6 +1069,7 @@ static bool contexts_enabled(struct drm_device *dev)
int i915_gem_context_create2_ioctl(struct drm_device *dev, void *data,
struct drm_file *file)
{
+ struct drm_i915_private *dev_priv = to_i915(dev);
struct drm_i915_gem_context_create2 *args = data;
struct drm_i915_file_private *file_priv = file->driver_priv;
struct i915_gem_context *ctx;
@@ -1007,7 +1090,8 @@ int i915_gem_context_create2_ioctl(struct drm_device *dev, void *data,
if (USES_FULL_PPGTT(dev))
flags |= I915_GEM_CONTEXT_FULL_PPGTT;
- if (flags & I915_GEM_CONTEXT_ENABLE_SVM) {
+ if ((flags & I915_GEM_CONTEXT_ENABLE_SVM) &&
+ !dev_priv->svm_available) {
ret = -ENODEV;
goto unlock;
}
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index d4adf2806c50..8eebb038622b 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -3062,6 +3062,24 @@ enum {
INTEL_LEGACY_64B_CONTEXT
};
+enum {
+ FAULT_AND_HANG = 0,
+ FAULT_AND_HALT, /* Debug only */
+ FAULT_AND_STREAM,
+ FAULT_AND_CONTINUE /* Unsupported */
+};
+
+#define GEN8_CTX_VALID (1<<0)
+#define GEN8_CTX_FORCE_PD_RESTORE (1<<1)
+#define GEN8_CTX_FORCE_RESTORE (1<<2)
+#define GEN8_CTX_L3LLC_COHERENT (1<<5)
+#define GEN8_CTX_PRIVILEGE (1<<8)
+
+#define GEN8_CTX_FAULT_SHIFT 6
+#define GEN8_CTX_ID_SHIFT 32
+#define GEN8_CTX_ID_WIDTH 21
+#define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x17
+#define GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x26
#define GEN8_CTX_ADDRESSING_MODE_SHIFT 3
#define GEN8_CTX_ADDRESSING_MODE(dev_priv) (USES_FULL_48BIT_PPGTT(dev_priv) ?\
INTEL_LEGACY_64B_CONTEXT : \
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 6b49df4316f4..6e27cc83aa43 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -185,12 +185,6 @@
#define CTX_R_PWR_CLK_STATE 0x42
#define CTX_GPGPU_CSR_BASE_ADDRESS 0x44
-#define GEN8_CTX_VALID (1<<0)
-#define GEN8_CTX_FORCE_PD_RESTORE (1<<1)
-#define GEN8_CTX_FORCE_RESTORE (1<<2)
-#define GEN8_CTX_L3LLC_COHERENT (1<<5)
-#define GEN8_CTX_PRIVILEGE (1<<8)
-
#define ASSIGN_CTX_REG(reg_state, pos, reg, val) do { \
(reg_state)[(pos)+0] = i915_mmio_reg_offset(reg); \
(reg_state)[(pos)+1] = (val); \
@@ -207,16 +201,13 @@
reg_state[CTX_PDP0_LDW + 1] = lower_32_bits(px_dma(&ppgtt->pml4)); \
} while (0)
-enum {
- FAULT_AND_HANG = 0,
- FAULT_AND_HALT, /* Debug only */
- FAULT_AND_STREAM,
- FAULT_AND_CONTINUE /* Unsupported */
-};
-#define GEN8_CTX_ID_SHIFT 32
-#define GEN8_CTX_ID_WIDTH 21
-#define GEN8_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x17
-#define GEN9_CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x26
+#define ASSIGN_CTX_SVM(reg_state, ctx, engine) do { \
+ ASSIGN_CTX_REG(reg_state, CTX_PDP0_UDW, \
+ GEN8_RING_PDP_UDW((engine), 0), 0); \
+ ASSIGN_CTX_REG(reg_state, CTX_PDP0_LDW, \
+ GEN8_RING_PDP_LDW((engine), 0), (ctx)->pasid); \
+} while (0)
+
/* Typical size of the average request (2 pipecontrols and a MI_BB) */
#define EXECLISTS_REQUEST_SIZE 64 /* bytes */
@@ -270,10 +261,6 @@ logical_ring_init_platform_invariants(struct intel_engine_cs *engine)
IS_BXT_REVID(dev_priv, 0, BXT_REVID_A1)) &&
(engine->id == VCS || engine->id == VCS2);
- engine->ctx_desc_template = GEN8_CTX_VALID;
- if (IS_GEN8(dev_priv))
- engine->ctx_desc_template |= GEN8_CTX_L3LLC_COHERENT;
- engine->ctx_desc_template |= GEN8_CTX_PRIVILEGE;
/* TODO: WaDisableLiteRestore when we start using semaphore
* signalling between Command Streamers */
@@ -380,7 +367,9 @@ static void execlists_update_context(struct drm_i915_gem_request *rq)
* PML4 is allocated during ppgtt init, so this is not needed
* in 48-bit mode.
*/
- if (ppgtt && !USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
+ if (!(rq->ctx->flags & CONTEXT_SVM) &&
+ ppgtt &&
+ !USES_FULL_48BIT_PPGTT(ppgtt->base.dev))
execlists_update_context_pdps(ppgtt, reg_state);
}
@@ -1399,7 +1388,9 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
* it is unsafe in case of lite-restore (because the ctx is
* not idle). PML4 is allocated during ppgtt init so this is
* not needed in 48-bit.*/
- if (req->ctx->ppgtt &&
+
+ if (!(req->ctx->flags & CONTEXT_SVM) &&
+ req->ctx->ppgtt &&
(intel_engine_flag(req->engine) & req->ctx->ppgtt->pd_dirty_rings)) {
if (!USES_FULL_48BIT_PPGTT(req->i915) &&
!intel_vgpu_active(req->i915)) {
@@ -2057,7 +2048,9 @@ populate_lr_context(struct i915_gem_context *ctx,
ASSIGN_CTX_REG(reg_state, CTX_PDP0_LDW, GEN8_RING_PDP_LDW(engine, 0),
0);
- if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
+ if (ctx->flags & CONTEXT_SVM) {
+ ASSIGN_CTX_SVM(reg_state, ctx, engine);
+ } else if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
/* 64b PPGTT (48bit canonical)
* PDP0_DESCRIPTOR contains the base address to PML4 and
* other PDP Descriptors are ignored.
--
2.7.4
More information about the Intel-gfx
mailing list