[Intel-gfx] [PATCH 3/4] drm/i915/gtt/xehpsdv: move scratch page to system memory

Wed Dec 8 13:26:11 UTC 2021

On 12/7/21 17:51, Ramalingam C wrote:
> From: Matthew Auld <matthew.auld at intel.com>
>
> On some platforms the hw has dropped support for 4K GTT pages when
> dealing with LMEM, and due to the design of 64K GTT pages in the hw, we
> can only mark the *entire* page-table as operating in 64K GTT mode,
> since the enable bit is still on the pde, and not the pte. And since we
> we
we we
> still need to allow 4K GTT pages for SMEM objects, we can't have a
> "normal" 4K page-table with scratch pointing to LMEM, since that's
> undefined from the hw pov. The simplest solution is to just move the 64K
> scratch page to SMEM on such platforms and call it a day, since that
> should work for all configurations.
>
> Signed-off-by: Matthew Auld <matthew.auld at intel.com>
> Signed-off-by: Ramalingam C <ramalingam.c at intel.com>

LGTM.

Reviewed-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>

> ---
>   drivers/gpu/drm/i915/gt/gen6_ppgtt.c      |  1 +
>   drivers/gpu/drm/i915/gt/gen8_ppgtt.c      | 23 +++++++++++++++++++++--
>   drivers/gpu/drm/i915/gt/intel_ggtt.c      |  3 +++
>   drivers/gpu/drm/i915/gt/intel_gtt.c       |  2 +-
>   drivers/gpu/drm/i915/gt/intel_gtt.h       |  2 ++
>   drivers/gpu/drm/i915/selftests/mock_gtt.c |  2 ++
>   6 files changed, 30 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
> index 4a166d25fe60..c0d149f04949 100644
> --- a/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
> +++ b/drivers/gpu/drm/i915/gt/gen6_ppgtt.c
> @@ -454,6 +454,7 @@ struct i915_ppgtt *gen6_ppgtt_create(struct intel_gt *gt)
>   	ppgtt->base.vm.cleanup = gen6_ppgtt_cleanup;
>   
>   	ppgtt->base.vm.alloc_pt_dma = alloc_pt_dma;
> +	ppgtt->base.vm.alloc_scratch_dma = alloc_pt_dma;
>   	ppgtt->base.vm.pte_encode = ggtt->vm.pte_encode;
>   
>   	err = gen6_ppgtt_init_scratch(ppgtt);
> diff --git a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> index 95c02096a61b..b012c50f7ce7 100644
> --- a/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> +++ b/drivers/gpu/drm/i915/gt/gen8_ppgtt.c
> @@ -776,10 +776,29 @@ struct i915_ppgtt *gen8_ppgtt_create(struct intel_gt *gt,
>   	 */
>   	ppgtt->vm.has_read_only = !IS_GRAPHICS_VER(gt->i915, 11, 12);
>   
> -	if (HAS_LMEM(gt->i915))
> +	if (HAS_LMEM(gt->i915)) {
>   		ppgtt->vm.alloc_pt_dma = alloc_pt_lmem;
> -	else
> +
> +		/*
> +		 * On some platforms the hw has dropped support for 4K GTT pages
> +		 * when dealing with LMEM, and due to the design of 64K GTT
> +		 * pages in the hw, we can only mark the *entire* page-table as
> +		 * operating in 64K GTT mode, since the enable bit is still on
> +		 * the pde, and not the pte. And since we still need to allow
> +		 * 4K GTT pages for SMEM objects, we can't have a "normal" 4K
> +		 * page-table with scratch pointing to LMEM, since that's
> +		 * undefined from the hw pov. The simplest solution is to just
> +		 * move the 64K scratch page to SMEM on such platforms and call
> +		 * it a day, since that should work for all configurations.
> +		 */
> +		if (HAS_64K_PAGES(gt->i915))
> +			ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
> +		else
> +			ppgtt->vm.alloc_scratch_dma = alloc_pt_lmem;
> +	} else {
>   		ppgtt->vm.alloc_pt_dma = alloc_pt_dma;
> +		ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
> +	}
>   
>   	err = gen8_init_scratch(&ppgtt->vm);
>   	if (err)
> diff --git a/drivers/gpu/drm/i915/gt/intel_ggtt.c b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> index cbc6d2b1fd9e..d85a1050f4a8 100644
> --- a/drivers/gpu/drm/i915/gt/intel_ggtt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_ggtt.c
> @@ -941,6 +941,7 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
>   		size = gen8_get_total_gtt_size(snb_gmch_ctl);
>   
>   	ggtt->vm.alloc_pt_dma = alloc_pt_dma;
> +	ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
>   	ggtt->vm.lmem_pt_obj_flags = I915_BO_ALLOC_PM_EARLY;
>   
>   	ggtt->vm.total = (size / sizeof(gen8_pte_t)) * I915_GTT_PAGE_SIZE;
> @@ -1094,6 +1095,7 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
>   	ggtt->vm.total = (size / sizeof(gen6_pte_t)) * I915_GTT_PAGE_SIZE;
>   
>   	ggtt->vm.alloc_pt_dma = alloc_pt_dma;
> +	ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
>   
>   	ggtt->vm.clear_range = nop_clear_range;
>   	if (!HAS_FULL_PPGTT(i915) || intel_scanout_needs_vtd_wa(i915))
> @@ -1146,6 +1148,7 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt)
>   		(struct resource)DEFINE_RES_MEM(gmadr_base, ggtt->mappable_end);
>   
>   	ggtt->vm.alloc_pt_dma = alloc_pt_dma;
> +	ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
>   
>   	if (needs_idle_maps(i915)) {
>   		drm_notice(&i915->drm,
> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.c b/drivers/gpu/drm/i915/gt/intel_gtt.c
> index 0dd254cb1f69..1428e2b9075a 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gtt.c
> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.c
> @@ -301,7 +301,7 @@ int setup_scratch_page(struct i915_address_space *vm)
>   	do {
>   		struct drm_i915_gem_object *obj;
>   
> -		obj = vm->alloc_pt_dma(vm, size);
> +		obj = vm->alloc_scratch_dma(vm, size);
>   		if (IS_ERR(obj))
>   			goto skip;
>   
> diff --git a/drivers/gpu/drm/i915/gt/intel_gtt.h b/drivers/gpu/drm/i915/gt/intel_gtt.h
> index 51afe66d00f2..15b98321e89a 100644
> --- a/drivers/gpu/drm/i915/gt/intel_gtt.h
> +++ b/drivers/gpu/drm/i915/gt/intel_gtt.h
> @@ -268,6 +268,8 @@ struct i915_address_space {
>   
>   	struct drm_i915_gem_object *
>   		(*alloc_pt_dma)(struct i915_address_space *vm, int sz);
> +	struct drm_i915_gem_object *
> +		(*alloc_scratch_dma)(struct i915_address_space *vm, int sz);
>   
>   	u64 (*pte_encode)(dma_addr_t addr,
>   			  enum i915_cache_level level,
> diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c
> index cc047ec594f9..32ca8962d0ab 100644
> --- a/drivers/gpu/drm/i915/selftests/mock_gtt.c
> +++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c
> @@ -78,6 +78,7 @@ struct i915_ppgtt *mock_ppgtt(struct drm_i915_private *i915, const char *name)
>   	i915_address_space_init(&ppgtt->vm, VM_CLASS_PPGTT);
>   
>   	ppgtt->vm.alloc_pt_dma = alloc_pt_dma;
> +	ppgtt->vm.alloc_scratch_dma = alloc_pt_dma;
>   
>   	ppgtt->vm.clear_range = mock_clear_range;
>   	ppgtt->vm.insert_page = mock_insert_page;
> @@ -118,6 +119,7 @@ void mock_init_ggtt(struct drm_i915_private *i915, struct i915_ggtt *ggtt)
>   	ggtt->vm.total = 4096 * PAGE_SIZE;
>   
>   	ggtt->vm.alloc_pt_dma = alloc_pt_dma;
> +	ggtt->vm.alloc_scratch_dma = alloc_pt_dma;
>   
>   	ggtt->vm.clear_range = mock_clear_range;
>   	ggtt->vm.insert_page = mock_insert_page;