[Intel-gfx] [PATCH 3/4] drm/i915/gtt/xehpsdv: move scratch page to system memory
Thomas Hellström
thomas.hellstrom at linux.intel.com
Wed Dec 8 13:26:11 UTC 2021
On 12/7/21 17:51, Ramalingam C wrote:
> From: Matthew Auld <matthew.auld at intel.com>
>
> On some platforms the hw has dropped support for 4K GTT pages when
> dealing with LMEM, and due to the design of 64K GTT pages in the hw, we
> can only mark the *entire* page-table as operating in 64K GTT mode,
> since the enable bit is still on the pde, and not the pte. And since we
> we
we we
> still need to allow 4K GTT pages for SMEM objects, we can't have a
> "normal" 4K page-table with scratch pointing to LMEM, since that's
> undefined from the hw pov. The simplest solution is to just move the 64K
> scratch page to SMEM on such platforms and call it a day, since that
> should work for all configurations.
>
> Signed-off-by: Matthew Auld <matthew.auld at intel.com>
> Signed-off-by: Ramalingam C <ramalingam.c at intel.com>
LGTM.
Reviewed-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> ---
> drivers/gpu/drm/i915/gt/gen6_ppgtt.c | 1 +
> drivers/gpu/drm/i915/gt/gen8_ppgtt.c | 23 +++++++++++++++++++++--
> drivers/gpu/drm/i915/gt/intel_ggtt.c | 3 +++
> drivers/gpu/drm/i915/gt/intel_gtt.c | 2 +-
> drivers/gpu/drm/i915/gt/intel_gtt.h | 2 ++
> drivers/gpu/drm/i915/selftests/mock_gtt.c | 2 ++
> 6 files changed, 30 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
> index 4a166d25fe60..c0d149f04949 100644
> --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
> +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
> @@ -454,6 +454,7 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
> ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup;
>
> ppgtt->base.vm.alloc_pt_dma = alloc_pt_dma;
> + ppgtt->base.vm.alloc_scratch_dma = alloc_pt_dma;
> ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode;
>
> err = gen6_ppgtt_init_scratch(ppgtt);
> diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> index 95c02096a61b..b012c50f7ce7 100644
> --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> @@ -776,10 +776,29 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
> */
> ppgtt->vm.has_read_only = !IS_GRAPHICS_VER(gt->i915, 11, 12);
>
> - if (HAS_LMEM(gt->i915))
> + if (HAS_LMEM(gt->i915)) {
> ppgtt->vm.alloc_pt_dma = alloc_pt_lmem;
> - else
> +
> + /*
> + * On some platforms the hw has dropped support for 4K GTT pages
> + * when dealing with LMEM, and due to the design of 64K GTT
> + * pages in the hw, we can only mark the *entire* page-table as
> + * operating in 64K GTT mode, since the enable bit is still on
> + * the pde, and not the pte. And since we still need to allow
> + * 4K GTT pages for SMEM objects, we can't have a "normal" 4K
> + * page-table with scratch pointing to LMEM, since that's
> + * undefined from the hw pov. The simplest solution is to just
> + * move the 64K scratch page to SMEM on such platforms and call
> + * it a day, since that should work for all configurations.
> + */
> + if (HAS_64K_PAGES(gt->i915))
> + ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
> + else
> + ppgtt->vm.alloc_scratch_dma = alloc_pt_lmem;
> + } else {
> ppgtt->vm.alloc_pt_dma = alloc_pt_dma;
> + ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
> + }
>
> err = gen8_init_scratch(&ppgtt->vm);
> if (err)
> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> index cbc6d2b1fd9e..d85a1050f4a8 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> @@ -941,6 +941,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
> size = gen8_get_total_gtt_size(snb_gmch_ctl);
>
> ggtt->vm.alloc_pt_dma = alloc_pt_dma;
> + ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
> ggtt->vm.lmem_pt_obj_flags = I915_BO_ALLOC_PM_EARLY;
>
> ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
> @@ -1094,6 +1095,7 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
> ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE;
>
> ggtt->vm.alloc_pt_dma = alloc_pt_dma;
> + ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
>
> ggtt->vm.clear_range = nop_clear_range;
> if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915))
> @@ -1146,6 +1148,7 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt)
> (struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end);
>
> ggtt->vm.alloc_pt_dma = alloc_pt_dma;
> + ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
>
> if (needs_idle_maps(i915)) {
> drm_notice(&i915->drm,
> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
> index 0dd254cb1f69..1428e2b9075a 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gtt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
> @@ -301,7 +301,7 @@ int setup_scratch_page(struct i915_address_space *vm)
> do {
> struct drm_i915_gem_object *obj;
>
> - obj = vm->alloc_pt_dma(vm, size);
> + obj = vm->alloc_scratch_dma(vm, size);
> if (IS_ERR(obj))
> goto skip;
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
> index 51afe66d00f2..15b98321e89a 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gtt.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
> @@ -268,6 +268,8 @@ struct i915_address_space {
>
> struct drm_i915_gem_object *
> (*alloc_pt_dma)(struct i915_address_space *vm, int sz);
> + struct drm_i915_gem_object *
> + (*alloc_scratch_dma)(struct i915_address_space *vm, int sz);
>
> u64 (*pte_encode)(dma_addr_t addr,
> enum i915_cache_level level,
> diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c
> index cc047ec594f9..32ca8962d0ab 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c
> @@ -78,6 +78,7 @@ struct i915_ppgtt *mock_ppgtt(struct drm_i915_private *i915, const char *name)
> i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT);
>
> ppgtt->vm.alloc_pt_dma = alloc_pt_dma;
> + ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
>
> ppgtt->vm.clear_range = mock_clear_range;
> ppgtt->vm.insert_page = mock_insert_page;
> @@ -118,6 +119,7 @@ void mock_init_ggtt(struct drm_i915_private *i915, struct i915_ggtt *ggtt)
> ggtt->vm.total = 4096 * PAGE_SIZE;
>
> ggtt->vm.alloc_pt_dma = alloc_pt_dma;
> + ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
>
> ggtt->vm.clear_range = mock_clear_range;
> ggtt->vm.insert_page = mock_insert_page;
More information about the Intel-gfx
mailing list