[Intel-gfx] [PATCH v7 08/19] drm/i915/gen8: Add 4 level switching infrastructure and lrc support
Goel, Akash
akash.goel at intel.com
Thu Jul 30 21:23:14 PDT 2015
Reviewed the patch & it looks fine.
Reviewed-by: "Akash Goel <akash.goel at intel.com>"
On 7/30/2015 3:36 PM, Michel Thierry wrote:
> In 64b (48bit canonical) PPGTT addressing, the PDP0 register contains
> the base address to PML4, while the other PDP registers are ignored.
>
> In LRC, the addressing mode must be specified in every context
> descriptor, and the base address to PML4 is stored in the reg state.
>
> v2: PML4 update in legacy context switch is left for historic reasons,
> the preferred mode of operation is with lrc context based submission.
> v3: s/gen8_map_page_directory/gen8_setup_page_directory and
> s/gen8_map_page_directory_pointer/gen8_setup_page_directory_pointer.
> Also, clflush will be needed for bxt. (Akash)
> v4: Squashed lrc-specific code and use a macro to set PML4 register.
> v5: Rebase after Mika's ppgtt cleanup / scratch merge patch series.
> PDP update in bb_start is only for legacy 32b mode.
> v6: Rebase after final merged version of Mika's ppgtt/scratch
> patches.
> v7: There is no need to update the pml4 register value in
> execlists_update_context. (Akash)
> v8: Move pd and pdp setup functions to a previous patch, they do not
> belong here. (Akash)
> v9: Check USES_FULL_48BIT_PPGTT instead of GEN8_CTX_ADDRESSING_MODE in
> gen8_emit_bb_start to check if emit pdps is needed. (Akash)
>
> Cc: Akash Goel <akash.goel at intel.com>
> Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
> Signed-off-by: Michel Thierry <michel.thierry at intel.com> (v2+)
> ---
> drivers/gpu/drm/i915/i915_gem_gtt.c | 17 +++++++----
> drivers/gpu/drm/i915/i915_reg.h | 1 +
> drivers/gpu/drm/i915/intel_lrc.c | 60 ++++++++++++++++++++++++++-----------
> 3 files changed, 55 insertions(+), 23 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index c498eaa..ae2e082 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -656,8 +656,8 @@ static int gen8_write_pdp(struct drm_i915_gem_request *req,
> return 0;
> }
>
> -static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
> - struct drm_i915_gem_request *req)
> +static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt,
> + struct drm_i915_gem_request *req)
> {
> int i, ret;
>
> @@ -672,6 +672,12 @@ static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
> return 0;
> }
>
> +static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt,
> + struct drm_i915_gem_request *req)
> +{
> + return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4));
> +}
> +
> static void gen8_ppgtt_clear_pte_range(struct i915_address_space *vm,
> struct i915_page_directory_pointer *pdp,
> uint64_t start,
> @@ -1321,14 +1327,13 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
> ppgtt->base.unbind_vma = ppgtt_unbind_vma;
> ppgtt->base.bind_vma = ppgtt_bind_vma;
>
> - ppgtt->switch_mm = gen8_mm_switch;
> -
> if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
> ret = setup_px(ppgtt->base.dev, &ppgtt->pml4);
> if (ret)
> goto free_scratch;
>
> ppgtt->base.total = 1ULL << 48;
> + ppgtt->switch_mm = gen8_48b_mm_switch;
> } else {
> ret = __pdp_init(false, &ppgtt->pdp);
> if (ret)
> @@ -1343,6 +1348,7 @@ static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
> */
> ppgtt->base.total = to_i915(ppgtt->base.dev)->gtt.base.total;
>
> + ppgtt->switch_mm = gen8_legacy_mm_switch;
> trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base,
> 0, 0,
> GEN8_PML4E_SHIFT);
> @@ -1540,8 +1546,9 @@ static void gen8_ppgtt_enable(struct drm_device *dev)
> int j;
>
> for_each_ring(ring, dev_priv, j) {
> + u32 four_level = USES_FULL_48BIT_PPGTT(dev) ? GEN8_GFX_PPGTT_48B : 0;
> I915_WRITE(RING_MODE_GEN7(ring),
> - _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
> + _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level));
> }
> }
>
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index 3a77678..5bd1b6a 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -1670,6 +1670,7 @@ enum skl_disp_power_wells {
> #define GFX_REPLAY_MODE (1<<11)
> #define GFX_PSMI_GRANULARITY (1<<10)
> #define GFX_PPGTT_ENABLE (1<<9)
> +#define GEN8_GFX_PPGTT_48B (1<<7)
>
> #define VLV_DISPLAY_BASE 0x180000
> #define VLV_MIPI_BASE VLV_DISPLAY_BASE
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 99bba8e..4c40614 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -196,13 +196,21 @@
> reg_state[CTX_PDP ## n ## _LDW+1] = lower_32_bits(_addr); \
> }
>
> +#define ASSIGN_CTX_PML4(ppgtt, reg_state) { \
> + reg_state[CTX_PDP0_UDW + 1] = upper_32_bits(px_dma(&ppgtt->pml4)); \
> + reg_state[CTX_PDP0_LDW + 1] = lower_32_bits(px_dma(&ppgtt->pml4)); \
> +}
> +
> enum {
> ADVANCED_CONTEXT = 0,
> - LEGACY_CONTEXT,
> + LEGACY_32B_CONTEXT,
> ADVANCED_AD_CONTEXT,
> LEGACY_64B_CONTEXT
> };
> -#define GEN8_CTX_MODE_SHIFT 3
> +#define GEN8_CTX_ADDRESSING_MODE_SHIFT 3
> +#define GEN8_CTX_ADDRESSING_MODE(dev) (USES_FULL_48BIT_PPGTT(dev) ?\
> + LEGACY_64B_CONTEXT :\
> + LEGACY_32B_CONTEXT)
> enum {
> FAULT_AND_HANG = 0,
> FAULT_AND_HALT, /* Debug only */
> @@ -273,7 +281,7 @@ static uint64_t execlists_ctx_descriptor(struct drm_i915_gem_request *rq)
> WARN_ON(lrca & 0xFFFFFFFF00000FFFULL);
>
> desc = GEN8_CTX_VALID;
> - desc |= LEGACY_CONTEXT << GEN8_CTX_MODE_SHIFT;
> + desc |= GEN8_CTX_ADDRESSING_MODE(dev) << GEN8_CTX_ADDRESSING_MODE_SHIFT;
> if (IS_GEN8(ctx_obj->base.dev))
> desc |= GEN8_CTX_L3LLC_COHERENT;
> desc |= GEN8_CTX_PRIVILEGE;
> @@ -348,10 +356,12 @@ static int execlists_update_context(struct drm_i915_gem_request *rq)
> reg_state[CTX_RING_TAIL+1] = rq->tail;
> reg_state[CTX_RING_BUFFER_START+1] = i915_gem_obj_ggtt_offset(rb_obj);
>
> - /* True PPGTT with dynamic page allocation: update PDP registers and
> - * point the unallocated PDPs to the scratch page
> - */
> - if (ppgtt) {
> + if (ppgtt && !USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
> + /* True 32b PPGTT with dynamic page allocation: update PDP
> + * registers and point the unallocated PDPs to scratch page.
> + * PML4 is allocated during ppgtt init, so this is not needed
> + * in 48-bit mode.
> + */
> ASSIGN_CTX_PDP(ppgtt, reg_state, 3);
> ASSIGN_CTX_PDP(ppgtt, reg_state, 2);
> ASSIGN_CTX_PDP(ppgtt, reg_state, 1);
> @@ -1512,12 +1522,15 @@ static int gen8_emit_bb_start(struct drm_i915_gem_request *req,
> * Ideally, we should set Force PD Restore in ctx descriptor,
> * but we can't. Force Restore would be a second option, but
> * it is unsafe in case of lite-restore (because the ctx is
> - * not idle). */
> + * not idle). PML4 is allocated during ppgtt init so this is
> + * not needed in 48-bit.*/
> if (req->ctx->ppgtt &&
> (intel_ring_flag(req->ring) & req->ctx->ppgtt->pd_dirty_rings)) {
> - ret = intel_logical_ring_emit_pdps(req);
> - if (ret)
> - return ret;
> + if (!USES_FULL_48BIT_PPGTT(req->i915)) {
> + ret = intel_logical_ring_emit_pdps(req);
> + if (ret)
> + return ret;
> + }
>
> req->ctx->ppgtt->pd_dirty_rings &= ~intel_ring_flag(req->ring);
> }
> @@ -2198,13 +2211,24 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o
> reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0);
> reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0);
>
> - /* With dynamic page allocation, PDPs may not be allocated at this point,
> - * Point the unallocated PDPs to the scratch page
> - */
> - ASSIGN_CTX_PDP(ppgtt, reg_state, 3);
> - ASSIGN_CTX_PDP(ppgtt, reg_state, 2);
> - ASSIGN_CTX_PDP(ppgtt, reg_state, 1);
> - ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
> + if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) {
> + /* 64b PPGTT (48bit canonical)
> + * PDP0_DESCRIPTOR contains the base address to PML4 and
> + * other PDP Descriptors are ignored.
> + */
> + ASSIGN_CTX_PML4(ppgtt, reg_state);
> + } else {
> + /* 32b PPGTT
> + * PDP*_DESCRIPTOR contains the base address of space supported.
> + * With dynamic page allocation, PDPs may not be allocated at
> + * this point. Point the unallocated PDPs to the scratch page
> + */
> + ASSIGN_CTX_PDP(ppgtt, reg_state, 3);
> + ASSIGN_CTX_PDP(ppgtt, reg_state, 2);
> + ASSIGN_CTX_PDP(ppgtt, reg_state, 1);
> + ASSIGN_CTX_PDP(ppgtt, reg_state, 0);
> + }
> +
> if (ring->id == RCS) {
> reg_state[CTX_LRI_HEADER_2] = MI_LOAD_REGISTER_IMM(1);
> reg_state[CTX_R_PWR_CLK_STATE] = GEN8_R_PWR_CLK_STATE;
>
More information about the Intel-gfx
mailing list