[Intel-gfx] [PATCH 6/9] drm/i915: driver based PASID handling
Chris Wilson
chris at chris-wilson.co.uk
Thu Oct 8 08:57:11 PDT 2015
On Fri, Sep 04, 2015 at 09:59:00AM -0700, Jesse Barnes wrote:
> New file with VT-d SVM and PASID handling functions and page table
> management. This belongs in the IOMMU code (along with some extra bits
> for waiting for invalidations and page faults to complete, flushing the
> device IOTLB, etc.)
>
> FIXME:
> need work queue for re-submitting contexts
> TE bit handling on SKL
> ---
> drivers/gpu/drm/i915/Makefile | 5 +-
> drivers/gpu/drm/i915/i915_drv.h | 43 ++
> drivers/gpu/drm/i915/i915_gem.c | 3 +
> drivers/gpu/drm/i915/i915_gem_context.c | 3 +
> drivers/gpu/drm/i915/i915_irq.c | 7 +
> drivers/gpu/drm/i915/i915_reg.h | 47 ++
> drivers/gpu/drm/i915/i915_svm.c | 1102 +++++++++++++++++++++++++++++++
> drivers/gpu/drm/i915/intel_lrc.c | 120 +++-
> drivers/gpu/drm/i915/intel_lrc.h | 1 +
> 9 files changed, 1299 insertions(+), 32 deletions(-)
> create mode 100644 drivers/gpu/drm/i915/i915_svm.c
>
> diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
> index 44d290a..e4883a7 100644
> --- a/drivers/gpu/drm/i915/Makefile
> +++ b/drivers/gpu/drm/i915/Makefile
> @@ -38,7 +38,8 @@ i915-y += i915_cmd_parser.o \
> intel_lrc.o \
> intel_mocs.o \
> intel_ringbuffer.o \
> - intel_uncore.o
> + intel_uncore.o \
> + i915_svm.o
Correct me if I am wrong, but it looks like i915_svm implements the
lowlevel interface with the hardware, so by convention is intel_svm.c
> # general-purpose microcontroller (GuC) support
> i915-y += intel_guc_loader.o \
> @@ -93,6 +94,8 @@ i915-y += dvo_ch7017.o \
> # virtual gpu code
> i915-y += i915_vgpu.o
>
> +i915-$(CONFIG_MMU_NOTIFIER) += i915_svm.o
Added twice?
> +
> # legacy horrors
> i915-y += i915_dma.o
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 20beb51..ca38a7a 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -47,6 +47,7 @@
> #include <drm/drm_gem.h>
> #include <linux/backlight.h>
> #include <linux/hashtable.h>
> +#include <linux/mmu_notifier.h>
> #include <linux/intel-iommu.h>
> #include <linux/kref.h>
> #include <linux/pm_qos.h>
> @@ -848,6 +849,13 @@ struct i915_ctx_hang_stats {
> bool banned;
> };
>
> +struct intel_mm_struct {
> + struct kref kref;
> + struct mmu_notifier notifier;
> + struct drm_i915_private *dev_priv;
> + struct list_head context_list;
> +};
Doesn't this look kind of familiar? struct i915_mm_struct perhaps?
> +
> /* This must match up with the value previously used for execbuf2.rsvd1. */
> #define DEFAULT_CONTEXT_HANDLE 0
>
> @@ -874,6 +882,9 @@ struct i915_ctx_hang_stats {
> struct intel_context {
> struct kref ref;
> int user_handle;
> + bool is_svm; /* shares x86 page tables */
> + u32 pasid; /* 20 bits */
> + struct intel_mm_struct *ims;
> uint8_t remap_slice;
> struct drm_i915_private *i915;
> int flags;
> @@ -895,6 +906,9 @@ struct intel_context {
> int pin_count;
> } engine[I915_NUM_RINGS];
>
> + struct list_head mm_list;
This is a link, name it so.
> + struct task_struct *tsk;
One task? A context can be passed by the device fd to another process.
Do we inherit the VM along with the context? I don't anything to prevent
such.
> +static void gpu_mm_segv(struct task_struct *tsk, unsigned long address,
> + int si_code)
> +{
> + siginfo_t info;
> +
> + /* Need specific signal info here */
> + info.si_signo = SIGSEGV;
> + info.si_errno = EIO;
> + info.si_code = si_code;
> + info.si_addr = (void __user *)address;
> +
> + force_sig_info(SIGSEGV, &info, tsk);
force_sig_info() is not exported, ah you builtin i915-svm.c
> +}
> +
> +/*
> + * Read the fault descriptor and handle the fault:
> + * get PML4 from PASID
> + * get mm struct
> + * get the vma
> + * verify the address is valid
> + * call handle_mm_fault after taking the mm->mmap_sem
> + */
> +void intel_gpu_fault_work(struct work_struct *work)
> +{
> + struct i915_svm_state *svm = container_of(work, struct i915_svm_state,
> + work);
> + struct drm_i915_private *dev_priv =
> + container_of(svm, struct drm_i915_private, svm);
> + struct drm_device *dev = dev_priv->dev;
> + struct intel_ringbuffer *ringbuf;
> + struct page_request_dsc desc;
> + struct page_group_response_dsc resp;
> + struct intel_context *ctx;
> + struct task_struct *tsk;
> + struct mm_struct *mm;
> + struct vm_area_struct *vma;
> + u64 address;
> + int ret;
> +
> + DRM_ERROR("PRQ updated, head 0x%08x, tail 0x%08x\n",
> + I915_READ(SVM_PRQ_HEAD), I915_READ(SVM_PRQ_TAIL));
> + prq_read_descriptor(dev, &desc);
> + DRM_ERROR("page fault on addr 0x%016llx, PASID %d, srr %d\n",
> + (u64)(desc.addr << PAGE_SHIFT), desc.pasid, desc.srr);
> +
> + spin_lock(&dev_priv->svm.lock);
> + ctx = dev_priv->svm.pasid_ctx[desc.pasid];
> + tsk = ctx->tsk;
> + mm = tsk->mm;
> + address = desc.addr << PAGE_SHIFT;
> + ringbuf = ctx->engine[RCS].ringbuf;
> + spin_unlock(&dev_priv->svm.lock);
All of the above can disappear at anytime after the unlock?
> +
> + down_read_trylock(&mm->mmap_sem);
> + vma = find_extend_vma(mm, address);
> + if (!vma || address < vma->vm_start) {
> + DRM_ERROR("bad VMA or address out of range\n");
> + gpu_mm_segv(tsk, address, SEGV_MAPERR);
> + goto out_unlock; /* need to kill process */
> + }
> +
> + ret = handle_mm_fault(mm, vma, address,
> + desc.wr_req ? FAULT_FLAG_WRITE : 0);
> + if (ret & VM_FAULT_ERROR) {
> + gpu_mm_segv(tsk, address, SEGV_ACCERR); /* ? */
> + goto out_unlock;
> + }
> +
> + if (ret & VM_FAULT_MAJOR)
> + tsk->maj_flt++;
> + else
> + tsk->min_flt++;
> +
> + if (desc.srr)
> + resp.dsc_type = PAGE_STREAM_RESP_DSC;
> + else
> + resp.dsc_type = PAGE_GRP_RESP_DSC;
> + resp.pasid = desc.pasid;
> + resp.pasid_present = 1;
> + resp.requestor_id = PCI_DEVID(0, PCI_DEVFN(2,0));
> + resp.resp_code = RESP_CODE_SUCCESS;
> + resp.prg_index = desc.prg_index;
> + resp.private = desc.private;
> + ivq_write_resp_descriptor(dev, &resp);
> +out_unlock:
> + up_read(&mm->mmap_sem);
> +
> + /* FIXME: wait for page response to be serviced */
> +
> + /* FIXME: queue context for re-submit */
> + /* execlists_context_queue(req); */
> +}
> +/* Make sure GPU writes can't hit the mm that's about to go away */
> +static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm)
> +{
> + struct intel_mm_struct *ims = container_of(mn, struct intel_mm_struct,
> + notifier);
> + struct drm_i915_private *dev_priv = ims->dev_priv;
> + struct drm_device *dev = dev_priv->dev;
> + struct intel_context *ctx;
> +
> + /*
> + * Wait for any outstanding activity and unbind the mm. Since
> + * each context has its own ring, we can simply wait for the ring
> + * to idle before invalidating the PASID and flushing the TLB.
> + */
> + mutex_lock(&dev->struct_mutex);
> + list_for_each_entry(ctx, &ims->context_list, mm_list) {
> + intel_ring_idle(ctx->engine[RCS].ringbuf->ring);
> + }
> +
> + intel_iommu_tlb_flush(dev_priv->dev);
> + mutex_unlock(&dev->struct_mutex);
Erm, what! So you halt the GPU everytime? But you've already invalidated
the shadow PTE -- ah, invalidate-range looks to be a wip.
> +static void intel_flush_page_locked(struct drm_device *dev, int pasid,
> + unsigned long address)
> +{
> + struct ext_iotlb_inv_dsc dsc = { 0 };
> +
> + dsc.dsc_type = EXT_IOTLB_INV_DSC;
> + dsc.g = EXT_IOTLB_INV_G_PASID_PAGE_SELECT;
> + dsc.pasid = pasid;
> + dsc.ih = 0;
> + dsc.addr = address;
> + dsc.am = 1;
> + ivq_write_ext_iotlb_inv_descriptor(dev, &dsc);
> +}
> +
> +static void intel_change_pte(struct mmu_notifier *mn, struct mm_struct *mm,
> + unsigned long address, pte_t pte)
> +{
> + struct intel_mm_struct *ims = container_of(mn, struct intel_mm_struct,
> + notifier);
> + struct drm_i915_private *dev_priv = ims->dev_priv;
> + struct drm_device *dev = dev_priv->dev;
> +
> + struct intel_context *ctx;
> +
> + mutex_lock(&dev->struct_mutex);
> + list_for_each_entry(ctx, &ims->context_list, mm_list)
> + intel_flush_page_locked(dev, ctx->pasid, address);
> + mutex_unlock(&dev->struct_mutex);
Suggests you really want a ims->spinlock for context_list instead.
> +}
> +
> +static void intel_invalidate_page(struct mmu_notifier *mn,
> + struct mm_struct *mm,
> + unsigned long address)
> +{
> + struct intel_mm_struct *ims = container_of(mn, struct intel_mm_struct,
> + notifier);
> + struct drm_i915_private *dev_priv = ims->dev_priv;
> + struct drm_device *dev = dev_priv->dev;
> + struct intel_context *ctx;
> +
> + mutex_lock(&dev->struct_mutex);
> + list_for_each_entry(ctx, &ims->context_list, mm_list)
> + intel_flush_page_locked(dev, ctx->pasid, address);
> + mutex_unlock(&dev->struct_mutex);
> +}
> +
> +/* Need to unmap this range and make sure it doesn't get re-faulted */
> +static void intel_invalidate_range_start(struct mmu_notifier *mn,
> + struct mm_struct *mm,
> + unsigned long start, unsigned long end)
> +{
> + struct intel_mm_struct *ims = container_of(mn, struct intel_mm_struct,
> + notifier);
> + struct drm_i915_private *dev_priv = ims->dev_priv;
> + struct drm_device *dev = dev_priv->dev;
> +
> + /* FIXME: invalidate page only */
> + intel_iommu_tlb_flush(dev);
> +}
> +
> +/* Pages have been freed at this point */
> +static void intel_invalidate_range_end(struct mmu_notifier *mn,
> + struct mm_struct *mm,
> + unsigned long start, unsigned long end)
> +{
> + struct intel_mm_struct *ims = container_of(mn, struct intel_mm_struct,
> + notifier);
> + struct drm_i915_private *dev_priv = ims->dev_priv;
> + struct drm_device *dev = dev_priv->dev;
> +
> + /* FIXME: invalidate page only */
> + intel_iommu_tlb_flush(dev);
> +}
> +
> +static const struct mmu_notifier_ops intel_mmuops = {
> + .release = intel_mm_release,
> + /* no clear_flush_young, we just share the x86 bits */
> + /* no test_young, we just share the x86 bits */
> + .change_pte = intel_change_pte,
> + .invalidate_page = intel_invalidate_page,
> + .invalidate_range_start = intel_invalidate_range_start,
> + .invalidate_range_end = intel_invalidate_range_end,
> +};
> +
> +struct intel_mm_struct *intel_bind_mm(struct drm_device *dev,
> + struct intel_context *ctx)
> +{
> + struct drm_i915_private *dev_priv = dev->dev_private;
> + struct intel_mm_struct *ims;
> + struct mmu_notifier *mn;
> + int ret;
> +
> + WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex));
> +
> + mn = mmu_find_ops(current->mm, &intel_mmuops);
Magic function, I am missing its definition
> + if (mn) {
> + ims = container_of(mn, struct intel_mm_struct, notifier);
> + kref_get(&ims->kref);
> + goto out;
> + }
> +
> + ims = kzalloc(sizeof(*ims), GFP_KERNEL);
> + if (!ims) {
> + ret = -ENOMEM;
> + goto error;
> + }
> + INIT_LIST_HEAD(&ims->context_list);
> +
> + ims->notifier.ops = &intel_mmuops;
> +
> + ret = mmu_notifier_register(&ims->notifier, current->mm);
This has lock inversion between struct_mutex and mm->mmap_sem.
> + if (ret)
> + goto error;
> +
> + ims->dev_priv = dev->dev_private;
> +
> +out:
> + list_add(&ctx->mm_list, &ims->context_list);
> + return ims;
> +error:
> + kfree(ims);
> + return ERR_PTR(ret);
> +}
> +
> +static void intel_mm_free(struct kref *ims_ref)
> +{
> + struct intel_mm_struct *ims =
> + container_of(ims_ref, struct intel_mm_struct, kref);
> +
> + mmu_notifier_unregister(&ims->notifier, current->mm);
More lock inversion.
> + kfree(ims);
> +}
> +
> +void intel_unbind_mm(struct intel_context *ctx)
> +{
> + struct drm_i915_private *dev_priv = ctx->ims->dev_priv;
> +
> + WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex));
> +
> + list_del(&ctx->mm_list);
> + kref_put(&ctx->ims->kref, intel_mm_free);
> +
> + return;
> +}
> +
> +int intel_exec_mm_ioctl(struct drm_device *dev, void *data,
> + struct drm_file *file)
> +{
> +// struct drm_i915_exec_mm *exec_mm = data;
> +// struct drm_i915_private *dev_priv = dev->dev_private;
> +
> + /* Load new context into context reg */
Ah, there is a modicum of user API here.
> + return 0;
> +}
> +
> +/*
> + * The PASID table has 32 entries in the current config, rotate through
> + * them as needed.
> + */
> +int intel_alloc_pasid(struct drm_device *dev, struct intel_context *ctx)
> +{
> + struct drm_i915_private *dev_priv = dev->dev_private;
> + struct pasid_table_entry *table;
> + int i;
> +
> + WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex));
> +
> + spin_lock(&dev_priv->svm.lock);
> + table = dev_priv->svm.pasid_table;
> +
> + for (i = 0; i < PASID_COUNT; i++) {
> + if (!table[i].present)
> + goto found;
> + }
> +
> + spin_unlock(&dev_priv->svm.lock);
> + return -1;
> +
> +found:
> + table[i].pml4 = __pa(current->mm->pgd) >> PAGE_SHIFT;
> + table[i].present = 1;
> +
> + ctx->pasid = i;
> + dev_priv->svm.pasid_ctx[ctx->pasid] = NULL;
> + spin_unlock(&dev_priv->svm.lock);
> +
> + intel_iommu_tlb_flush(dev);
> +
> + return 0;
> +}
> +
> +void intel_free_pasid(struct drm_device *dev, struct intel_context *ctx)
> +{
> + struct drm_i915_private *dev_priv = dev->dev_private;
> + struct pasid_table_entry *table;
> +
> + WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex));
> +
> + if (ctx->pasid >= PASID_COUNT)
> + return;
> +
> + spin_lock(&dev_priv->svm.lock);
> + table = dev_priv->svm.pasid_table;
> + memset(&table[ctx->pasid], 0, sizeof(struct pasid_table_entry));
> + dev_priv->svm.pasid_ctx[ctx->pasid] = NULL;
> + ctx->pasid = -1;
> + spin_unlock(&dev_priv->svm.lock);
> +
> + intel_iommu_tlb_flush(dev);
> +}
> +
> +/*
> + * Each root table entry is 16 bytes wide. In legacy mode, only
> + * the lower 64 bits are used:
> + * Bits 38:12: context table pointer
> + * Bit 0: present
> + * all other bits reserved
> + * In extended mode (what we use for SVM):
> + * Bits 102:76: upper context table pointer
> + * Bit 64: upper present
> + * Bits 38:12: lower context table pointer
> + * Bit 0: lower present
> + * all other bits reserved
> + *
> + * The context entries are 128 bit in legacy mode:
> + * Bits 87:72: Domain ID
> + * Bits 70:67: Available
> + * Bits 66:64: Address width
> + * Bits 38:12: Page table pointer
> + * Bits 3:2: Translation type
> + * 00 - only untranslated DMA requests go through this table
> + * translated and translation requests are blocked
> + * 01 - untranslated, translated, and translation requests supported
> + * 10 - untranslated requests are treated as pass through (HPA == GPA),
> + * translated DMA requests and translation requests are blocked
> + * 11 - reserved
> + * Bit 1: fault disable
> + * Bit 0: Present
> + * and 256 bit in extended:
> + * Bits 230:204: PASID state table pointer
> + * Bits 166:140: PASID table pointer
> + * Bits 131:128: PASID table size
> + * Bits 127:96: Page table attribute (PAT)
> + * Bit 92: SL64KPE
> + * Bit 91: SLEE
> + * Bit 90: ERE
> + * Bit 89: SRE
> + * Bit 88: SMEP
> + * Bits 87:72: Domain ID
> + * Bit 71: Extended memory type enable
> + * Bit 70: cache disable (CD)
> + * Bit 69: write protect (WP)
> + * Bit 68: no execute enable (NXE)
> + * Bit 67: page global enable (PGE)
> + * Bits 66:64: address width
> + * Bits 38:12: 2nd level (VT-d) page table pointer
> + * Bit 11: PASID enable
> + * Bit 10: Nesting enable
> + * Bit 9: Page Request enable
> + * Bit 8: Lazy-Invalidate enable
> + * Bits 7:5: Extended Memory Type (VT-d)
> + * Bits 4:2: Translation type
> + * 000 - Only Untranslated DMA requests are translated through this page
> + * table. Translated DMA requests and Translation Requests are
> + * blocked. Untranslated requests-without-PASID are remapped using
> + * the second-level page-table referenced through SLPTPTR field.
> + * If PASIDE field is Set, Untranslated requests-with-PASID are
> + * remapped using the PASID Table referenced through PASIDPTPTR
> + * field. If PASIDE field is Clear, Untranslated requests-with-PASID
> + * are blocked. Translation requests (with or without PASID), and
> + * Translated Requests are blocked.
> + * 001 - Un-translated and Translation requests without PASID supported
> + * (and with PASID supported, if PASID Enable Set); Translate
> + * requests bypass address translation. Untranslated
> + * requests-without-PASID and Translation requests-without-PASID are
> + * remapped using the second level page-table referenced through
> + * SLPTPTR field. If PASIDE field is Set, Untranslated
> + * requests-with-PASID and Translation requests-with-PASID are
> + * remapped using the PASID Table referenced through PASIDPTPTR
> + * field. If PASIDE field is Clear, Untranslated requests-with-PASID,
> + * and Translation requests-with-PASID, are blocked. Translated
> + * requests bypass address translation.
> + * 010 - If Pass-through Supported (GT supports pass-through),
> + * Un-translated requests without PASID bypass address translation;
> + * All other requests (with or without PASID) blocked. Untranslated
> + * requests-without-PASID bypass address translation and are
> + * processed as passthrough. SLPTPTR field is ignored by hardware.
> + * Untranslated requests-with-PASID, Translation requests (with or
> + * without PASID), and Translated requests are blocked.
> + * 011 - Reserved.
> + * 100 - Un-translated requests without PASID bypass address translation;
> + * Un-translated requests with PASID supported, if PASID Enable Set;
> + * All other requests blocked. Untranslated requests-without-PASID
> + * bypass address translation and are processed as passthrough.
> + * SLPTPTR field is ignored by hardware. Untranslated
> + * requests-with-PASID are remapped using the PASID Table referenced
> + * through PASIDPTPTR field. Translation requests (with or without
> + * PASID) and Translated requests are blocked.
> + * 101 - Un-translated and Translation requests without PASID bypass
> + * address translation; Un-translated and Translation requests with
> + * PASID supported, if PASID Enable Set; Translated requests bypass
> + * address translation. Untranslated requests-without-PASID bypass
> + * address translation and are processed as passthrough. SLPTPTR
> + * field is ignored by hardware. Translation requests-without-PASID
> + * are responded with Untranslated access only bit Set (U=1) along
> + * with read and write permissions (R=W=1). SLPTPTR field is ignored
> + * by hardware. Untranslated requests-with-PASID, and Translation
> + * requests-with-PASID are remapped using the PASID Table referenced
> + * through PASIDPTPTR field. Translated requests bypass address
> + * translation.
> + * 110 - Un-translated requests without PASID are blocked; Un-translated
> + * requests with PASID supported, if PASID Enable Set; All other
> + * requests blocked – Not applicable to GFX, GT should treat this as
> + * reserved.
> + * 111 - Un-translated and Translation requests without PASID blocked;
> + * Un-translated and Translation requests with PASID supported, if
> + * PASID Enable Set; Translated requests bypass address translation.
> + * Note: Not applicable to GFX, GT should treat this as reserved.
> + * Bit 1: Fault disable
> + * Bit 0: Present
> + *
> + * Page walks for graphics addresses can go through one or two levels of
> + * translation, depending on whether VT-d is enabled.
> + *
> + * If we're in driver mode (currently the only supported mode), we always
> + * use a single level of translation, meaning the second level page table
> + * pointer (if present) is ignored.
> + *
> + * The full walk starts at the root table, which indexes into the upper
> + * and lower context tables. Those tables point to PASID mapping and state
> + * tables and potentially a second level page table for VT-d (which, as noted
> + * above, is unused currently). The PASID mapping table points to a PML4
> + * (x86 compatible) page table, while the state table indicates other
> + * information about the PASID involved in the request, which ultimately comes
> + * from the execlist port submission of the context descriptor.
> + *
> + * To enable a shared CPU/GPU address space, we can use a couple of different
> + * translation types, either 101 or 01 w/o nesting. The main requirement
> + * is that requests with PASID are translated through the page tables provided,
> + * potentially with nesting if we're running in a VT-d context (which we
> + * don't currently support).
> + */
> +#define CONTEXT_OFFSET (PAGE_SIZE * 1)
> +#define PASID_OFFSET (PAGE_SIZE * 2)
> +#define PASID_STATE_OFFSET (PAGE_SIZE * 3)
> +#define PRQ_OFFSET (PAGE_SIZE * 4)
> +#define IVQ_OFFSET (PAGE_SIZE * 5)
> +static void intel_init_svm_root_table(struct drm_device *dev,
> + drm_dma_handle_t *tables)
> +{
> + struct drm_i915_private *dev_priv = dev->dev_private;
> + struct extended_root_table_entry *root_table;
> + struct extended_context_table_entry *context;
> + struct pasid_table_entry *pasid_table;
> + struct pasid_state_table_entry *pasid_state_table;
> + u64 *tmp;
> +
> + root_table = tables->vaddr;
> + context = tables->vaddr + CONTEXT_OFFSET;
> + pasid_table = tables->vaddr + PASID_OFFSET;
> + pasid_state_table = tables->vaddr + PASID_STATE_OFFSET;
> +
> + DRM_ERROR("programmed PASID table, vaddr %p, busaddr 0x%16llx\n",
> + pasid_table, tables->busaddr + PASID_OFFSET);
> +
> + /* Context entry for gfx device */
> + context[16].pat = 0x66666666;
> + context[16].ere = 1;
> + context[16].sre = 1;
> + context[16].smep = 1;
> + context[16].domain_id = 1;
> + context[16].addr_width = AGAW_48; /* full x86 walk */
> + context[16].pasid_en = 1;
> + context[16].nesting_en = 0; /* not yet */
> + context[16].pg_req_en = 1;
> + context[16].lazy_invalidate_en = 1;
> + context[16].ext_mem_type = EXTENDED_MTYPE_WB;
> + context[16].translation_type = EXTENDED_TTYPE_UT_TR_PASID_PT;
> + context[16].fault_disable = 0;
> + context[16].present = 1;
> + context[16].pasid_state_table_addr = (tables->busaddr + PASID_STATE_OFFSET) >> PAGE_SHIFT;
> + context[16].pasid_table_addr = (tables->busaddr + PASID_OFFSET) >>
> + PAGE_SHIFT;
> + context[16].pasid_table_size = 0; /* 2^(5+x) */
> +
> + tmp = (u64 *)&context[16];
> + DRM_ERROR("root entry: 0x%016llx%016llx\n", tmp[1], tmp[0]);
> +
> + DRM_ERROR("programmed context table, vaddr %p, busaddr 0x%16llx\n",
> + context, tables->busaddr + CONTEXT_OFFSET);
> +
> + /* Root table */
> + root_table[0].lo_ctx_addr = (tables->busaddr + CONTEXT_OFFSET) >>
> + PAGE_SHIFT;
> + root_table[0].lo_present = 1;
> + root_table[0].hi_present = 0;
> +
> + tmp = (u64 *)&root_table[0];
> + DRM_ERROR("root entry: 0x%016llx%016llx\n", tmp[1], tmp[0]);
> +
> + dev_priv->svm.root_table = root_table;
> + dev_priv->svm.context = context;
> + dev_priv->svm.pasid_table = pasid_table;
> + dev_priv->svm.pasid_state_table = pasid_state_table;
> + dev_priv->svm.prq_ring = tables->vaddr + PRQ_OFFSET;
> + dev_priv->svm.ivq_ring = tables->vaddr + IVQ_OFFSET;
> +
> + /* Enable the page request queue */
> + I915_WRITE64(SVM_PRQA, tables->busaddr + PRQ_OFFSET);
> + I915_WRITE(SVM_PRQ_HEAD, 0);
> + I915_WRITE(SVM_PRQ_TAIL, 0);
> + I915_WRITE(SVM_PRECTL, 0);
> +
> + /* Set up the invalidation request queue */
> + I915_WRITE64(SVM_IQA, tables->busaddr + IVQ_OFFSET);
> + I915_WRITE(SVM_IVQ_HEAD, 0);
> + I915_WRITE(SVM_IVQ_TAIL, 0);
> + I915_WRITE(SVM_IECTL, 0);
> +
> + I915_WRITE(SVM_GCMD, GCMD_QIE);
> + if (wait_for(I915_READ(SVM_GSTS) & GSTS_QIES, 500))
> + DRM_ERROR("timed out waiting for queued invalidation enable\n");
> +
> + /* All set, program the root */
> + I915_WRITE(SVM_RTADDR, tables->busaddr | SVM_RTT_TYPE_EXT);
> +
> + I915_WRITE(SVM_GCMD, GCMD_SRTP);
> + if (wait_for(I915_READ(SVM_GSTS) & GSTS_RTPS, 500))
> + DRM_ERROR("timed out waiting for root table to load\n");
> +
> + DRM_ERROR("programmed SVM root, vaddr %p, busaddr 0x%16llx\n",
> + tables->vaddr, tables->busaddr);
> +
> + intel_iommu_tlb_flush(dev);
> +}
> +
> +/*
> + * Probe for SVM capability. If found:
> + * - try to switch to driver mode
> + * - set up root PASID table
> + * - enable page fault and error handling interrupts
> + * - allow SVM ioctls
> + */
> +void intel_init_svm(struct drm_device *dev)
> +{
> + struct drm_i915_private *dev_priv = dev->dev_private;
> + drm_dma_handle_t *tables;
> + u32 dev_mode;
> + int num_tables = 6;
> +
> + dev_mode = I915_READ(BDW_SVM_DEV_MODE_CNFG);
> + I915_WRITE(BDW_SVM_DEV_MODE_CNFG, dev_mode | BDW_SVM_MODE_DRIVER);
> + dev_mode = I915_READ(BDW_SVM_DEV_MODE_CNFG);
> +#if defined(CONFIG_INTEL_IOMMU) || defined(IOMMU_SUPPORT)
> +#error must disable IOMMU support
> +#endif
> + if (!dev_mode & BDW_SVM_MODE_DRIVER) {
> + DRM_ERROR("driver mode not available, disabling SVM\n");
> + goto err;
> + }
> +
> + tables = drm_pci_alloc(dev, PAGE_SIZE*num_tables, PAGE_SIZE);
> + if (!tables) {
> + DRM_ERROR("table alloc failed, disabling SVM\n");
> + goto err;
> + }
> +
> + memset(tables->vaddr, 0, PAGE_SIZE*num_tables);
> +
> + intel_init_svm_root_table(dev, tables);
> +
> + spin_lock_init(&dev_priv->svm.lock);
> +
> +#if 0
> + I915_WRITE(SVM_GCMD, GCMD_TE);
> + if (wait_for(I915_READ(SVM_GSTS) & GSTS_TES, 500))
> + DRM_ERROR("timed out waiting for translation enable\n");
> +#endif
> + INIT_WORK(&dev_priv->svm.work, intel_gpu_fault_work);
> +
> + DRM_ERROR("SVM driver mode enabled\n");
> + dev_priv->svm.svm_available = true;
> + return;
> +
> +err:
> + dev_priv->svm.svm_available = false;
> + return;
> +}
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 40cbba4..1450491 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -217,6 +217,7 @@ enum {
> FAULT_AND_STREAM,
> FAULT_AND_CONTINUE /* Unsupported */
> };
> +#define GEN8_CTX_FAULT_SHIFT 6
> #define GEN8_CTX_ID_SHIFT 32
> #define CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x17
>
> @@ -289,12 +290,21 @@ uint64_t intel_lr_context_descriptor(struct intel_context *ctx,
> WARN_ON(lrca & 0xFFFFFFFF00000FFFULL);
>
> desc = GEN8_CTX_VALID;
> - desc |= GEN8_CTX_ADDRESSING_MODE(dev) << GEN8_CTX_ADDRESSING_MODE_SHIFT;
> - if (IS_GEN8(ctx_obj->base.dev))
> - desc |= GEN8_CTX_L3LLC_COHERENT;
> - desc |= GEN8_CTX_PRIVILEGE;
> - desc |= lrca;
> - desc |= (u64)intel_execlists_ctx_id(ctx_obj) << GEN8_CTX_ID_SHIFT;
> + if (ctx->is_svm) {
> + desc |= ADVANCED_CONTEXT << GEN8_CTX_ADDRESSING_MODE_SHIFT;
> + desc |= FAULT_AND_STREAM << GEN8_CTX_FAULT_SHIFT;
> + desc |= lrca;
> + desc |= (u64)intel_execlists_ctx_id(ctx_obj) << GEN8_CTX_ID_SHIFT;
> + } else {
> + desc |= GEN8_CTX_ADDRESSING_MODE(dev) <<
> + GEN8_CTX_ADDRESSING_MODE_SHIFT;
> + if (IS_GEN8(ctx_obj->base.dev))
> + desc |= GEN8_CTX_L3LLC_COHERENT;
> + desc |= GEN8_CTX_PRIVILEGE;
> + desc |= lrca;
> + desc |= (u64)intel_execlists_ctx_id(ctx_obj) <<
> + GEN8_CTX_ID_SHIFT;
> + }
>
> /* TODO: WaDisableLiteRestore when we start using semaphore
> * signalling between Command Streamers */
> @@ -545,7 +555,7 @@ void intel_lrc_irq_handler(struct intel_engine_cs *ring)
> _MASKED_FIELD(0x07 << 8, ((u32)ring->next_context_status_buffer & 0x07) << 8));
> }
>
> -static int execlists_context_queue(struct drm_i915_gem_request *request)
> +int execlists_context_queue(struct drm_i915_gem_request *request)
> {
> struct intel_engine_cs *ring = request->ring;
> struct drm_i915_gem_request *cursor;
> @@ -2273,31 +2283,40 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
> reg_state[CTX_LRI_HEADER_1] |= MI_LRI_FORCE_POSTED;
> reg_state[CTX_CTX_TIMESTAMP] = ring->mmio_base + 0x3a8;
> reg_state[CTX_CTX_TIMESTAMP+1] = 0;
> - reg_state[CTX_PDP3_UDW] = GEN8_RING_PDP_UDW(ring, 3);
> - reg_state[CTX_PDP3_LDW] = GEN8_RING_PDP_LDW(ring, 3);
> - reg_state[CTX_PDP2_UDW] = GEN8_RING_PDP_UDW(ring, 2);
> - reg_state[CTX_PDP2_LDW] = GEN8_RING_PDP_LDW(ring, 2);
> - reg_state[CTX_PDP1_UDW] = GEN8_RING_PDP_UDW(ring, 1);
> - reg_state[CTX_PDP1_LDW] = GEN8_RING_PDP_LDW(ring, 1);
> - reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0);
> - reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0);
> -
> - if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
> - /* 64b PPGTT (48bit canonical)
> - * PDP0_DESCRIPTOR contains the base address to PML4 and
> - * other PDP Descriptors are ignored.
> - */
> - ASSIGN_CTX_PML4(ppgtt, reg_state);
> +
> + if (ctx->is_svm) {
> + reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0);
> + reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0);
> + reg_state[CTX_PDP0_UDW+1] = 0;
> + reg_state[CTX_PDP0_LDW+1] = ctx->pasid;
> } else {
> - /* 32b PPGTT
> - * PDP*_DESCRIPTOR contains the base address of space supported.
> - * With dynamic page allocation, PDPs may not be allocated at
> - * this point. Point the unallocated PDPs to the scratch page
> - */
> - ASSIGN_CTX_PDP(ppgtt, reg_state, 3);
> - ASSIGN_CTX_PDP(ppgtt, reg_state, 2);
> - ASSIGN_CTX_PDP(ppgtt, reg_state, 1);
> - ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
> + reg_state[CTX_PDP3_UDW] = GEN8_RING_PDP_UDW(ring, 3);
> + reg_state[CTX_PDP3_LDW] = GEN8_RING_PDP_LDW(ring, 3);
> + reg_state[CTX_PDP2_UDW] = GEN8_RING_PDP_UDW(ring, 2);
> + reg_state[CTX_PDP2_LDW] = GEN8_RING_PDP_LDW(ring, 2);
> + reg_state[CTX_PDP1_UDW] = GEN8_RING_PDP_UDW(ring, 1);
> + reg_state[CTX_PDP1_LDW] = GEN8_RING_PDP_LDW(ring, 1);
> + reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0);
> + reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0);
> +
> + if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
> + /* 64b PPGTT (48bit canonical)
> + * PDP0_DESCRIPTOR contains the base address to PML4 and
> + * other PDP Descriptors are ignored.
> + */
> + ASSIGN_CTX_PML4(ppgtt, reg_state);
> + } else {
> + /* 32b PPGTT
> + * PDP*_DESCRIPTOR contains the base address of space
> + * supported. With dynamic page allocation, PDPs may
> + * not be allocated at this point. Point the
> + * unallocated PDPs to the scratch page
> + */
> + ASSIGN_CTX_PDP(ppgtt, reg_state, 3);
> + ASSIGN_CTX_PDP(ppgtt, reg_state, 2);
> + ASSIGN_CTX_PDP(ppgtt, reg_state, 1);
> + ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
> + }
> }
>
> if (ring->id == RCS) {
> @@ -2327,6 +2346,12 @@ void intel_lr_context_free(struct intel_context *ctx)
> {
> int i;
>
> + if (ctx->is_svm) {
> + intel_free_pasid(ctx->ims->dev_priv->dev, ctx);
> + intel_unbind_mm(ctx);
> + put_task_struct(ctx->tsk);
> + }
> +
> for (i = 0; i < I915_NUM_RINGS; i++) {
> struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state;
>
> @@ -2480,6 +2505,37 @@ int intel_lr_context_deferred_create(struct intel_context *ctx,
>
> }
>
> + if (ctx->is_svm) {
> + /* FIXME: just skip here, don't bail and trash the ctx */
> + if (ring->id != RCS) {
> + DRM_DEBUG_DRIVER("svm context only allowed on RCS\n");
That's fairly useless then :)
-Chris
--
Chris Wilson, Intel Open Source Technology Centre
More information about the Intel-gfx
mailing list