[Intel-gfx] [PATCH v7 08/19] drm/i915/gen8: Add 4 level switching infrastructure and lrc support

Goel, Akash akash.goel at intel.com
Thu Jul 30 21:23:14 PDT 2015


Reviewed the patch & it looks fine.
Reviewed-by: "Akash Goel <akash.goel at intel.com>"


On 7/30/2015 3:36 PM, Michel Thierry wrote:
> In 64b (48bit canonical) PPGTT addressing, the PDP0 register contains
> the base address to PML4, while the other PDP registers are ignored.
>
> In LRC, the addressing mode must be specified in every context
> descriptor, and the base address to PML4 is stored in the reg state.
>
> v2: PML4 update in legacy context switch is left for historic reasons,
> the preferred mode of operation is with lrc context based submission.
> v3: s/gen8_map_page_directory/gen8_setup_page_directory and
> s/gen8_map_page_directory_pointer/gen8_setup_page_directory_pointer.
> Also, clflush will be needed for bxt. (Akash)
> v4: Squashed lrc-specific code and use a macro to set PML4 register.
> v5: Rebase after Mika's ppgtt cleanup / scratch merge patch series.
> PDP update in bb_start is only for legacy 32b mode.
> v6: Rebase after final merged version of Mika's ppgtt/scratch
> patches.
> v7: There is no need to update the pml4 register value in
> execlists_update_context. (Akash)
> v8: Move pd and pdp setup functions to a previous patch, they do not
> belong here. (Akash)
> v9: Check USES_FULL_48BIT_PPGTT instead of GEN8_CTX_ADDRESSING_MODE in
> gen8_emit_bb_start to check if emit pdps is needed. (Akash)
>
> Cc: Akash Goel <akash.goel at intel.com>
> Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
> Signed-off-by: Michel Thierry <michel.thierry at intel.com> (v2+)
> ---
>   drivers/gpu/drm/i915/i915_gem_gtt.c | 17 +++++++----
>   drivers/gpu/drm/i915/i915_reg.h     |  1 +
>   drivers/gpu/drm/i915/intel_lrc.c    | 60 ++++++++++++++++++++++++++-----------
>   3 files changed, 55 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index c498eaa..ae2e082 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -656,8 +656,8 @@ static int gen8_write_pdp(struct drm_i915_gem_request *req,
>   	return 0;
>   }
>
> -static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
> -			  struct drm_i915_gem_request *req)
> +static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt,
> +				 struct drm_i915_gem_request *req)
>   {
>   	int i, ret;
>
> @@ -672,6 +672,12 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
>   	return 0;
>   }
>
> +static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt,
> +			      struct drm_i915_gem_request *req)
> +{
> +	return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4));
> +}
> +
>   static void gen8_ppgtt_clear_pte_range(struct i915_address_space *vm,
>   				       struct i915_page_directory_pointer *pdp,
>   				       uint64_t start,
> @@ -1321,14 +1327,13 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
>   	ppgtt->base.unbind_vma = ppgtt_unbind_vma;
>   	ppgtt->base.bind_vma = ppgtt_bind_vma;
>
> -	ppgtt->switch_mm = gen8_mm_switch;
> -
>   	if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
>   		ret = setup_px(ppgtt->base.dev, &ppgtt->pml4);
>   		if (ret)
>   			goto free_scratch;
>
>   		ppgtt->base.total = 1ULL << 48;
> +		ppgtt->switch_mm = gen8_48b_mm_switch;
>   	} else {
>   		ret = __pdp_init(false, &ppgtt->pdp);
>   		if (ret)
> @@ -1343,6 +1348,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
>   			 */
>   			ppgtt->base.total = to_i915(ppgtt->base.dev)->gtt.base.total;
>
> +		ppgtt->switch_mm = gen8_legacy_mm_switch;
>   		trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base,
>   							      0, 0,
>   							      GEN8_PML4E_SHIFT);
> @@ -1540,8 +1546,9 @@ static void gen8_ppgtt_enable(struct drm_device *dev)
>   	int j;
>
>   	for_each_ring(ring, dev_priv, j) {
> +		u32 four_level = USES_FULL_48BIT_PPGTT(dev) ? GEN8_GFX_PPGTT_48B : 0;
>   		I915_WRITE(RING_MODE_GEN7(ring),
> -			   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
> +			   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level));
>   	}
>   }
>
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 3a77678..5bd1b6a 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -1670,6 +1670,7 @@ enum skl_disp_power_wells {
>   #define   GFX_REPLAY_MODE		(1<<11)
>   #define   GFX_PSMI_GRANULARITY		(1<<10)
>   #define   GFX_PPGTT_ENABLE		(1<<9)
> +#define   GEN8_GFX_PPGTT_48B		(1<<7)
>
>   #define VLV_DISPLAY_BASE 0x180000
>   #define VLV_MIPI_BASE VLV_DISPLAY_BASE
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 99bba8e..4c40614 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -196,13 +196,21 @@
>   	reg_state[CTX_PDP ## n ## _LDW+1] = lower_32_bits(_addr); \
>   }
>
> +#define ASSIGN_CTX_PML4(ppgtt, reg_state) { \
> +	reg_state[CTX_PDP0_UDW + 1] = upper_32_bits(px_dma(&ppgtt->pml4)); \
> +	reg_state[CTX_PDP0_LDW + 1] = lower_32_bits(px_dma(&ppgtt->pml4)); \
> +}
> +
>   enum {
>   	ADVANCED_CONTEXT = 0,
> -	LEGACY_CONTEXT,
> +	LEGACY_32B_CONTEXT,
>   	ADVANCED_AD_CONTEXT,
>   	LEGACY_64B_CONTEXT
>   };
> -#define GEN8_CTX_MODE_SHIFT 3
> +#define GEN8_CTX_ADDRESSING_MODE_SHIFT 3
> +#define GEN8_CTX_ADDRESSING_MODE(dev)  (USES_FULL_48BIT_PPGTT(dev) ?\
> +		LEGACY_64B_CONTEXT :\
> +		LEGACY_32B_CONTEXT)
>   enum {
>   	FAULT_AND_HANG = 0,
>   	FAULT_AND_HALT, /* Debug only */
> @@ -273,7 +281,7 @@ static uint64_t execlists_ctx_descriptor(struct drm_i915_gem_request *rq)
>   	WARN_ON(lrca & 0xFFFFFFFF00000FFFULL);
>
>   	desc = GEN8_CTX_VALID;
> -	desc |= LEGACY_CONTEXT << GEN8_CTX_MODE_SHIFT;
> +	desc |= GEN8_CTX_ADDRESSING_MODE(dev) << GEN8_CTX_ADDRESSING_MODE_SHIFT;
>   	if (IS_GEN8(ctx_obj->base.dev))
>   		desc |= GEN8_CTX_L3LLC_COHERENT;
>   	desc |= GEN8_CTX_PRIVILEGE;
> @@ -348,10 +356,12 @@ static int execlists_update_context(struct drm_i915_gem_request *rq)
>   	reg_state[CTX_RING_TAIL+1] = rq->tail;
>   	reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(rb_obj);
>
> -	/* True PPGTT with dynamic page allocation: update PDP registers and
> -	 * point the unallocated PDPs to the scratch page
> -	 */
> -	if (ppgtt) {
> +	if (ppgtt && !USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
> +		/* True 32b PPGTT with dynamic page allocation: update PDP
> +		 * registers and point the unallocated PDPs to scratch page.
> +		 * PML4 is allocated during ppgtt init, so this is not needed
> +		 * in 48-bit mode.
> +		 */
>   		ASSIGN_CTX_PDP(ppgtt, reg_state, 3);
>   		ASSIGN_CTX_PDP(ppgtt, reg_state, 2);
>   		ASSIGN_CTX_PDP(ppgtt, reg_state, 1);
> @@ -1512,12 +1522,15 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
>   	 * Ideally, we should set Force PD Restore in ctx descriptor,
>   	 * but we can't. Force Restore would be a second option, but
>   	 * it is unsafe in case of lite-restore (because the ctx is
> -	 * not idle). */
> +	 * not idle). PML4 is allocated during ppgtt init so this is
> +	 * not needed in 48-bit.*/
>   	if (req->ctx->ppgtt &&
>   	    (intel_ring_flag(req->ring) & req->ctx->ppgtt->pd_dirty_rings)) {
> -		ret = intel_logical_ring_emit_pdps(req);
> -		if (ret)
> -			return ret;
> +		if (!USES_FULL_48BIT_PPGTT(req->i915)) {
> +			ret = intel_logical_ring_emit_pdps(req);
> +			if (ret)
> +				return ret;
> +		}
>
>   		req->ctx->ppgtt->pd_dirty_rings &= ~intel_ring_flag(req->ring);
>   	}
> @@ -2198,13 +2211,24 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
>   	reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0);
>   	reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0);
>
> -	/* With dynamic page allocation, PDPs may not be allocated at this point,
> -	 * Point the unallocated PDPs to the scratch page
> -	 */
> -	ASSIGN_CTX_PDP(ppgtt, reg_state, 3);
> -	ASSIGN_CTX_PDP(ppgtt, reg_state, 2);
> -	ASSIGN_CTX_PDP(ppgtt, reg_state, 1);
> -	ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
> +	if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
> +		/* 64b PPGTT (48bit canonical)
> +		 * PDP0_DESCRIPTOR contains the base address to PML4 and
> +		 * other PDP Descriptors are ignored.
> +		 */
> +		ASSIGN_CTX_PML4(ppgtt, reg_state);
> +	} else {
> +		/* 32b PPGTT
> +		 * PDP*_DESCRIPTOR contains the base address of space supported.
> +		 * With dynamic page allocation, PDPs may not be allocated at
> +		 * this point. Point the unallocated PDPs to the scratch page
> +		 */
> +		ASSIGN_CTX_PDP(ppgtt, reg_state, 3);
> +		ASSIGN_CTX_PDP(ppgtt, reg_state, 2);
> +		ASSIGN_CTX_PDP(ppgtt, reg_state, 1);
> +		ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
> +	}
> +
>   	if (ring->id == RCS) {
>   		reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
>   		reg_state[CTX_R_PWR_CLK_STATE] = GEN8_R_PWR_CLK_STATE;
>


More information about the Intel-gfx mailing list