[PATCH 2/2] drm/i915/gvt: Optimize ring siwtch 2x faster again by light weight mmio access wrapper

Zhenyu Wang zhenyuw at linux.intel.com
Fri Jun 23 06:38:54 UTC 2017


On 2017.06.21 16:57:22 +0800, changbin.du at intel.com wrote:
> From: Changbin Du <changbin.du at intel.com>
> 
> The I915_READ/WRITE is not only a mmio read/write, it also contains
> debug checking and Forcewake domain lookup. This is too heavy for
> GVT ring switch case which access batch of mmio registers on ring
> switch. We can handle Forcewake manually and use the raw
> i915_read/write instead. The benefit from this is 2x faster mmio
> switch performance.
>          Before       After
> cycles  ~550000      ~250000
> 
> Signed-off-by: Changbin Du <changbin.du at intel.com>
> ---
>  drivers/gpu/drm/i915/gvt/render.c | 42 +++++++++++++++++++++++++++------------
>  1 file changed, 29 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c
> index 5a08bcd..609b0f9 100644
> --- a/drivers/gpu/drm/i915/gvt/render.c
> +++ b/drivers/gpu/drm/i915/gvt/render.c
> @@ -37,6 +37,9 @@
>  #include "gvt.h"
>  #include "trace.h"
>  
> +#define	I915_RAW_WRITE(reg, val)	__raw_i915_write32(dev_priv, reg, val)
> +#define	I915_RAW_READ(reg)		__raw_i915_read32(dev_priv, reg)
> +

I915_READ_FW/I915_WRITE_FW

>  struct render_mmio {
>  	int ring_id;
>  	i915_reg_t reg;
> @@ -207,7 +210,7 @@ static void load_mocs(struct intel_vgpu *vgpu, int ring_id)
>  
>  	offset.reg = regs[ring_id];
>  	for (i = 0; i < 64; i++) {
> -		gen9_render_mocs[ring_id][i] = I915_READ(offset);
> +		gen9_render_mocs[ring_id][i] = I915_RAW_READ(offset);
>  		I915_WRITE(offset, vgpu_vreg(vgpu, offset));
>  		offset.reg += 4;
>  	}
> @@ -215,8 +218,8 @@ static void load_mocs(struct intel_vgpu *vgpu, int ring_id)
>  	if (ring_id == RCS) {
>  		l3_offset.reg = 0xb020;
>  		for (i = 0; i < 32; i++) {
> -			gen9_render_mocs_L3[i] = I915_READ(l3_offset);
> -			I915_WRITE(l3_offset, vgpu_vreg(vgpu, l3_offset));
> +			gen9_render_mocs_L3[i] = I915_RAW_READ(l3_offset);
> +			I915_RAW_WRITE(l3_offset, vgpu_vreg(vgpu, l3_offset));
>  			l3_offset.reg += 4;
>  		}
>  	}
> @@ -240,16 +243,16 @@ static void restore_mocs(struct intel_vgpu *vgpu, int ring_id)
>  
>  	offset.reg = regs[ring_id];
>  	for (i = 0; i < 64; i++) {
> -		vgpu_vreg(vgpu, offset) = I915_READ(offset);
> -		I915_WRITE(offset, gen9_render_mocs[ring_id][i]);
> +		vgpu_vreg(vgpu, offset) = I915_RAW_READ(offset);
> +		I915_RAW_WRITE(offset, gen9_render_mocs[ring_id][i]);
>  		offset.reg += 4;
>  	}
>  
>  	if (ring_id == RCS) {
>  		l3_offset.reg = 0xb020;
>  		for (i = 0; i < 32; i++) {
> -			vgpu_vreg(vgpu, l3_offset) = I915_READ(l3_offset);
> -			I915_WRITE(l3_offset, gen9_render_mocs_L3[i]);
> +			vgpu_vreg(vgpu, l3_offset) = I915_RAW_READ(l3_offset);
> +			I915_RAW_WRITE(l3_offset, gen9_render_mocs_L3[i]);
>  			l3_offset.reg += 4;
>  		}
>  	}
> @@ -284,7 +287,7 @@ static void switch_mmio_to_vgpu(struct intel_vgpu *vgpu, int ring_id)
>  		if (mmio->ring_id != ring_id)
>  			continue;
>  
> -		mmio->value = I915_READ(mmio->reg);
> +		mmio->value = I915_RAW_READ(mmio->reg);
>  
>  		/*
>  		 * if it is an inhibit context, load in_context mmio
> @@ -301,7 +304,7 @@ static void switch_mmio_to_vgpu(struct intel_vgpu *vgpu, int ring_id)
>  		else
>  			v = vgpu_vreg(vgpu, mmio->reg);
>  
> -		I915_WRITE(mmio->reg, v);
> +		I915_RAW_WRITE(mmio->reg, v);
>  		last_reg = mmio->reg;
>  
>  		trace_render_mmio(vgpu->id, "load",
> @@ -311,7 +314,7 @@ static void switch_mmio_to_vgpu(struct intel_vgpu *vgpu, int ring_id)
>  
>  	/* Make sure the swiched MMIOs has taken effect. */
>  	if (likely(INTEL_GVT_MMIO_OFFSET(last_reg)))
> -		POSTING_READ(last_reg);
> +		I915_RAW_READ(last_reg);
>  
>  	handle_tlb_pending_event(vgpu, ring_id);
>  }
> @@ -338,7 +341,7 @@ static void switch_mmio_to_host(struct intel_vgpu *vgpu, int ring_id)
>  		if (mmio->ring_id != ring_id)
>  			continue;
>  
> -		vgpu_vreg(vgpu, mmio->reg) = I915_READ(mmio->reg);
> +		vgpu_vreg(vgpu, mmio->reg) = I915_RAW_READ(mmio->reg);
>  
>  		if (mmio->mask) {
>  			vgpu_vreg(vgpu, mmio->reg) &= ~(mmio->mask << 16);
> @@ -349,7 +352,7 @@ static void switch_mmio_to_host(struct intel_vgpu *vgpu, int ring_id)
>  		if (mmio->in_context)
>  			continue;
>  
> -		I915_WRITE(mmio->reg, v);
> +		I915_RAW_WRITE(mmio->reg, v);
>  		last_reg = mmio->reg;
>  
>  		trace_render_mmio(vgpu->id, "restore",
> @@ -359,7 +362,7 @@ static void switch_mmio_to_host(struct intel_vgpu *vgpu, int ring_id)
>  
>  	/* Make sure the swiched MMIOs has taken effect. */
>  	if (likely(INTEL_GVT_MMIO_OFFSET(last_reg)))
> -		POSTING_READ(last_reg);
> +		I915_RAW_READ(last_reg);
>  }
>  
>  /**
> @@ -374,12 +377,23 @@ static void switch_mmio_to_host(struct intel_vgpu *vgpu, int ring_id)
>  void intel_gvt_switch_mmio(struct intel_vgpu *pre,
>  			   struct intel_vgpu *next, int ring_id)
>  {
> +	struct drm_i915_private *dev_priv;
> +
>  	if (WARN_ON(!pre && !next))
>  		return;
>  
>  	gvt_dbg_render("switch ring %d from %s to %s\n", ring_id,
>  		       pre ? "vGPU" : "host", next ? "vGPU" : "HOST");
>  
> +	dev_priv = pre ? pre->gvt->dev_priv : next->gvt->dev_priv;
> +
> +	/**
> +	 * We are using raw mmio access wrapper to improve the
> +	 * performace for batch mmio read/write, so we need
> +	 * handle forcewake mannually.
> +	 */
> +	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> +
>  	/**
>  	 * TODO: Optimize for vGPU to vGPU switch by merging
>  	 * switch_mmio_to_host() and switch_mmio_to_vgpu().
> @@ -389,4 +403,6 @@ void intel_gvt_switch_mmio(struct intel_vgpu *pre,
>  
>  	if (next)
>  		switch_mmio_to_vgpu(next, ring_id);
> +
> +	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
>  }
> -- 
> 2.7.4
> 
> _______________________________________________
> intel-gvt-dev mailing list
> intel-gvt-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gvt-dev

-- 
Open Source Technology Center, Intel ltd.

$gpg --keyserver wwwkeys.pgp.net --recv-keys 4D781827
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 195 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/intel-gvt-dev/attachments/20170623/0728b017/attachment.sig>


More information about the intel-gvt-dev mailing list