[Intel-gfx] [PATCH 5/7] drm/i915: Implement GGTT update method with MI_UPDATE_GTT
Nirmoy Das
nirmoy.das at intel.com
Mon Sep 18 10:25:41 UTC 2023
On 9/15/2023 5:56 PM, Zeng, Oak wrote:
>
> Thanks,
> Oak
>
>> -----Original Message-----
>> From: Das, Nirmoy <nirmoy.das at intel.com>
>> Sent: Friday, September 15, 2023 4:34 AM
>> To: intel-gfx at lists.freedesktop.org
>> Cc: Zeng, Oak <oak.zeng at intel.com>; chris.p.wilson at linux.intel.com; Piorkowski,
>> Piotr <piotr.piorkowski at intel.com>; Shyti, Andi <andi.shyti at intel.com>; Mun,
>> Gwan-gyeong <gwan-gyeong.mun at intel.com>; Roper, Matthew D
>> <matthew.d.roper at intel.com>; Das, Nirmoy <nirmoy.das at intel.com>
>> Subject: [PATCH 5/7] drm/i915: Implement GGTT update method with
>> MI_UPDATE_GTT
>>
>> Implement GGTT update method with blitter command, MI_UPDATE_GTT
>> and install those handlers if a platform requires that.
>>
>> v2: Make sure we hold the GT wakeref and Blitter engine wakeref before
>> we call mutex_lock/intel_context_enter below. When GT/engine are not
>> awake, the intel_context_enter calls into some runtime pm function which
>> can end up with kmalloc/fs_reclaim. But trigger fs_reclaim holding a
>> mutex lock is not allowed because shrinker can also try to hold the same
>> mutex lock. It is a circular lock. So hold the GT/blitter engine wakeref
>> before calling mutex_lock, to fix the circular lock.
>>
>> Signed-off-by: Nirmoy Das <nirmoy.das at intel.com>
>> Signed-off-by: Oak Zeng <oak.zeng at intel.com>
>> ---
>> drivers/gpu/drm/i915/gt/intel_ggtt.c | 235 +++++++++++++++++++++++++++
>> 1 file changed, 235 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> index dd0ed941441a..b94de7cebfce 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
>> @@ -15,18 +15,23 @@
>> #include "display/intel_display.h"
>> #include "gem/i915_gem_lmem.h"
>>
>> +#include "intel_context.h"
>> #include "intel_ggtt_gmch.h"
>> +#include "intel_gpu_commands.h"
>> #include "intel_gt.h"
>> #include "intel_gt_regs.h"
>> #include "intel_pci_config.h"
>> +#include "intel_ring.h"
>> #include "i915_drv.h"
>> #include "i915_pci.h"
>> +#include "i915_request.h"
>> #include "i915_scatterlist.h"
>> #include "i915_utils.h"
>> #include "i915_vgpu.h"
>>
>> #include "intel_gtt.h"
>> #include "gen8_ppgtt.h"
>> +#include "intel_engine_pm.h"
>>
>> static void i915_ggtt_color_adjust(const struct drm_mm_node *node,
>> unsigned long color,
>> @@ -252,6 +257,145 @@ u64 gen8_ggtt_pte_encode(dma_addr_t addr,
>> return pte;
>> }
>>
>> +static bool should_update_ggtt_with_bind(struct i915_ggtt *ggtt)
>> +{
>> + struct intel_gt *gt = ggtt->vm.gt;
>> +
>> + return intel_gt_is_bind_context_ready(gt);
>> +}
>> +
>> +static struct intel_context *gen8_ggtt_bind_get_ce(struct i915_ggtt *ggtt)
>> +{
>> + struct intel_context *ce;
>> + struct intel_gt *gt = ggtt->vm.gt;
>> +
>> + if (intel_gt_is_wedged(gt))
>> + return NULL;
>> +
>> + ce = gt->engine[BCS0]->bind_context;
>> + GEM_BUG_ON(!ce);
>> +
>> + /*
>> + * If the GT is not awake already at this stage then fallback
>> + * to pci based GGTT update otherwise __intel_wakeref_get_first()
>> + * would conflict with fs_reclaim trying to allocate memory while
>> + * doing rpm_resume().
>> + */
>> + if (!intel_gt_pm_get_if_awake(gt))
>> + return NULL;
>> +
>> + intel_engine_pm_get(ce->engine);
>> +
>> + return ce;
>> +}
>> +
>> +static void gen8_ggtt_bind_put_ce(struct intel_context *ce)
>> +{
>> + intel_engine_pm_put(ce->engine);
>> + intel_gt_pm_put(ce->engine->gt);
>> +}
>> +
>> +static bool gen8_ggtt_bind_ptes(struct i915_ggtt *ggtt, u32 offset,
>> + struct sg_table *pages, u32 num_entries,
>> + const gen8_pte_t pte)
>> +{
>> + struct i915_sched_attr attr = {};
>> + struct intel_gt *gt = ggtt->vm.gt;
>> + const gen8_pte_t scratch_pte = ggtt->vm.scratch[0]->encode;
>> + struct sgt_iter iter;
>> + struct i915_request *rq;
>> + struct intel_context *ce;
>> + u32 *cs;
>> +
>> + if (!num_entries)
>> + return true;
>> +
>> + ce = gen8_ggtt_bind_get_ce(ggtt);
>> + if (!ce)
>> + return false;
>> +
>> + if (pages)
>> + iter = __sgt_iter(pages->sgl, true);
>> +
>> + while (num_entries) {
>> + int count = 0;
>> + dma_addr_t addr;
>> + /*
>> + * MI_UPDATE_GTT can update 512 entries in a single command
>> but
>> + * that end up with engine reset, 511 works.
>> + */
>> + u32 n_ptes = min_t(u32, 511, num_entries);
>> +
>> + if (mutex_lock_interruptible(&ce->timeline->mutex))
>> + goto put_ce;
>> +
>> + intel_context_enter(ce);
>> + rq = __i915_request_create(ce, GFP_NOWAIT | GFP_ATOMIC);
>> + intel_context_exit(ce);
>> + if (IS_ERR(rq)) {
>> + GT_TRACE(gt, "Failed to get bind request\n");
>> + mutex_unlock(&ce->timeline->mutex);
>> + goto put_ce;
>> + }
>> +
>> + cs = intel_ring_begin(rq, 2 * n_ptes + 2);
>> + if (IS_ERR(cs)) {
>> + GT_TRACE(gt, "Failed to ring space for GGTT bind\n");
>> + i915_request_set_error_once(rq, PTR_ERR(cs));
>> + /* once a request is created, it must be queued */
>> + goto queue_err_rq;
>> + }
>> +
>> + *cs++ = MI_UPDATE_GTT | (2 * n_ptes);
>> + *cs++ = offset << 12;
>> +
>> + if (pages) {
>> + for_each_sgt_daddr_next(addr, iter) {
>> + if (count == n_ptes)
>> + break;
>> + *cs++ = lower_32_bits(pte | addr);
>> + *cs++ = upper_32_bits(pte | addr);
>> + count++;
>> + }
>> + /* fill remaining with scratch pte, if any */
>> + if (count < n_ptes) {
>> + memset64((u64 *)cs, scratch_pte,
>> + n_ptes - count);
>> + cs += (n_ptes - count) * 2;
>> + }
> Should we return error instead of silently fill pte with scratch? Or maybe even gem_bug_on on this case? The caller didn't allocate enough pages, means something wrong happened...
We do the dame already in gen8_ggtt_insert_entries() so not adding
something new here. The comment just made it obvious :) I don't know
the exact usecase when this happens but
I saw tons of pipe error without this.
Regards,
Nirmoy
>
> Oak
>
>> + } else {
>> + memset64((u64 *)cs, pte, n_ptes);
>> + cs += n_ptes * 2;
>> + }
>> +
>> + intel_ring_advance(rq, cs);
>> +queue_err_rq:
>> + i915_request_get(rq);
>> + __i915_request_commit(rq);
>> + __i915_request_queue(rq, &attr);
>> +
>> + mutex_unlock(&ce->timeline->mutex);
>> + /* This will break if the request is complete or after engine reset
>> */
>> + i915_request_wait(rq, 0, MAX_SCHEDULE_TIMEOUT);
>> + if (rq->fence.error)
>> + goto err_rq;
>> +
>> + i915_request_put(rq);
>> +
>> + num_entries -= n_ptes;
>> + offset += n_ptes;
>> + }
>> +
>> + gen8_ggtt_bind_put_ce(ce);
>> + return true;
>> +
>> +err_rq:
>> + i915_request_put(rq);
>> +put_ce:
>> + gen8_ggtt_bind_put_ce(ce);
>> + return false;
>> +}
>> +
>> static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
>> {
>> writeq(pte, addr);
>> @@ -272,6 +416,21 @@ static void gen8_ggtt_insert_page(struct
>> i915_address_space *vm,
>> ggtt->invalidate(ggtt);
>> }
>>
>> +static void gen8_ggtt_insert_page_bind(struct i915_address_space *vm,
>> + dma_addr_t addr, u64 offset,
>> + unsigned int pat_index, u32 flags)
>> +{
>> + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
>> + gen8_pte_t pte;
>> +
>> + pte = ggtt->vm.pte_encode(addr, pat_index, flags);
>> + if (should_update_ggtt_with_bind(i915_vm_to_ggtt(vm)) &&
>> + gen8_ggtt_bind_ptes(ggtt, offset, NULL, 1, pte))
>> + return ggtt->invalidate(ggtt);
>> +
>> + gen8_ggtt_insert_page(vm, addr, offset, pat_index, flags);
>> +}
>> +
>> static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
>> struct i915_vma_resource *vma_res,
>> unsigned int pat_index,
>> @@ -311,6 +470,50 @@ static void gen8_ggtt_insert_entries(struct
>> i915_address_space *vm,
>> ggtt->invalidate(ggtt);
>> }
>>
>> +static bool __gen8_ggtt_insert_entries_bind(struct i915_address_space *vm,
>> + struct i915_vma_resource *vma_res,
>> + unsigned int pat_index, u32 flags)
>> +{
>> + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
>> + gen8_pte_t scratch_pte = vm->scratch[0]->encode;
>> + gen8_pte_t pte_encode;
>> + u64 start, end;
>> +
>> + pte_encode = ggtt->vm.pte_encode(0, pat_index, flags);
>> + start = (vma_res->start - vma_res->guard) / I915_GTT_PAGE_SIZE;
>> + end = start + vma_res->guard / I915_GTT_PAGE_SIZE;
>> + if (!gen8_ggtt_bind_ptes(ggtt, start, NULL, end - start, scratch_pte))
>> + goto err;
>> +
>> + start = end;
>> + end += (vma_res->node_size + vma_res->guard) / I915_GTT_PAGE_SIZE;
>> + if (!gen8_ggtt_bind_ptes(ggtt, start, vma_res->bi.pages,
>> + vma_res->node_size / I915_GTT_PAGE_SIZE, pte_encode))
>> + goto err;
>> +
>> + start += vma_res->node_size / I915_GTT_PAGE_SIZE;
>> + if (!gen8_ggtt_bind_ptes(ggtt, start, NULL, end - start, scratch_pte))
>> + goto err;
>> +
>> + return true;
>> +
>> +err:
>> + return false;
>> +}
>> +
>> +static void gen8_ggtt_insert_entries_bind(struct i915_address_space *vm,
>> + struct i915_vma_resource *vma_res,
>> + unsigned int pat_index, u32 flags)
>> +{
>> + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
>> +
>> + if (should_update_ggtt_with_bind(i915_vm_to_ggtt(vm)) &&
>> + __gen8_ggtt_insert_entries_bind(vm, vma_res, pat_index, flags))
>> + return ggtt->invalidate(ggtt);
>> +
>> + gen8_ggtt_insert_entries(vm, vma_res, pat_index, flags);
>> +}
>> +
>> static void gen8_ggtt_clear_range(struct i915_address_space *vm,
>> u64 start, u64 length)
>> {
>> @@ -332,6 +535,27 @@ static void gen8_ggtt_clear_range(struct
>> i915_address_space *vm,
>> gen8_set_pte(>t_base[i], scratch_pte);
>> }
>>
>> +static void gen8_ggtt_scratch_range_bind(struct i915_address_space *vm,
>> + u64 start, u64 length)
>> +{
>> + struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
>> + unsigned int first_entry = start / I915_GTT_PAGE_SIZE;
>> + unsigned int num_entries = length / I915_GTT_PAGE_SIZE;
>> + const gen8_pte_t scratch_pte = vm->scratch[0]->encode;
>> + const int max_entries = ggtt_total_entries(ggtt) - first_entry;
>> +
>> + if (WARN(num_entries > max_entries,
>> + "First entry = %d; Num entries = %d (max=%d)\n",
>> + first_entry, num_entries, max_entries))
>> + num_entries = max_entries;
>> +
>> + if (should_update_ggtt_with_bind(ggtt) && gen8_ggtt_bind_ptes(ggtt,
>> first_entry,
>> + NULL, num_entries, scratch_pte))
>> + return ggtt->invalidate(ggtt);
>> +
>> + gen8_ggtt_clear_range(vm, start, length);
>> +}
>> +
>> static void gen6_ggtt_insert_page(struct i915_address_space *vm,
>> dma_addr_t addr,
>> u64 offset,
>> @@ -997,6 +1221,17 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
>> I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
>> }
>>
>> + if (i915_ggtt_require_binder(i915)) {
>> + ggtt->vm.scratch_range = gen8_ggtt_scratch_range_bind;
>> + ggtt->vm.insert_page = gen8_ggtt_insert_page_bind;
>> + ggtt->vm.insert_entries = gen8_ggtt_insert_entries_bind;
>> + /*
>> + * On GPU is hung, we might bind VMAs for error capture.
>> + * Fallback to CPU GGTT updates in that case.
>> + */
>> + ggtt->vm.raw_insert_page = gen8_ggtt_insert_page;
>> + }
>> +
>> if (intel_uc_wants_guc(&ggtt->vm.gt->uc))
>> ggtt->invalidate = guc_ggtt_invalidate;
>> else
>> --
>> 2.41.0
More information about the Intel-gfx
mailing list