[PATCH 2/2] drm/i915/gvt: Optimize ring siwtch 2x faster again by light weight mmio access wrapper
Zhenyu Wang
zhenyuw at linux.intel.com
Fri Jun 23 06:38:54 UTC 2017
On 2017.06.21 16:57:22 +0800, changbin.du at intel.com wrote:
> From: Changbin Du <changbin.du at intel.com>
>
> The I915_READ/WRITE is not only a mmio read/write, it also contains
> debug checking and Forcewake domain lookup. This is too heavy for
> GVT ring switch case which access batch of mmio registers on ring
> switch. We can handle Forcewake manually and use the raw
> i915_read/write instead. The benefit from this is 2x faster mmio
> switch performance.
> Before After
> cycles ~550000 ~250000
>
> Signed-off-by: Changbin Du <changbin.du at intel.com>
> ---
> drivers/gpu/drm/i915/gvt/render.c | 42 +++++++++++++++++++++++++++------------
> 1 file changed, 29 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c
> index 5a08bcd..609b0f9 100644
> --- a/drivers/gpu/drm/i915/gvt/render.c
> +++ b/drivers/gpu/drm/i915/gvt/render.c
> @@ -37,6 +37,9 @@
> #include "gvt.h"
> #include "trace.h"
>
> +#define I915_RAW_WRITE(reg, val) __raw_i915_write32(dev_priv, reg, val)
> +#define I915_RAW_READ(reg) __raw_i915_read32(dev_priv, reg)
> +
I915_READ_FW/I915_WRITE_FW
> struct render_mmio {
> int ring_id;
> i915_reg_t reg;
> @@ -207,7 +210,7 @@ static void load_mocs(struct intel_vgpu *vgpu, int ring_id)
>
> offset.reg = regs[ring_id];
> for (i = 0; i < 64; i++) {
> - gen9_render_mocs[ring_id][i] = I915_READ(offset);
> + gen9_render_mocs[ring_id][i] = I915_RAW_READ(offset);
> I915_WRITE(offset, vgpu_vreg(vgpu, offset));
> offset.reg += 4;
> }
> @@ -215,8 +218,8 @@ static void load_mocs(struct intel_vgpu *vgpu, int ring_id)
> if (ring_id == RCS) {
> l3_offset.reg = 0xb020;
> for (i = 0; i < 32; i++) {
> - gen9_render_mocs_L3[i] = I915_READ(l3_offset);
> - I915_WRITE(l3_offset, vgpu_vreg(vgpu, l3_offset));
> + gen9_render_mocs_L3[i] = I915_RAW_READ(l3_offset);
> + I915_RAW_WRITE(l3_offset, vgpu_vreg(vgpu, l3_offset));
> l3_offset.reg += 4;
> }
> }
> @@ -240,16 +243,16 @@ static void restore_mocs(struct intel_vgpu *vgpu, int ring_id)
>
> offset.reg = regs[ring_id];
> for (i = 0; i < 64; i++) {
> - vgpu_vreg(vgpu, offset) = I915_READ(offset);
> - I915_WRITE(offset, gen9_render_mocs[ring_id][i]);
> + vgpu_vreg(vgpu, offset) = I915_RAW_READ(offset);
> + I915_RAW_WRITE(offset, gen9_render_mocs[ring_id][i]);
> offset.reg += 4;
> }
>
> if (ring_id == RCS) {
> l3_offset.reg = 0xb020;
> for (i = 0; i < 32; i++) {
> - vgpu_vreg(vgpu, l3_offset) = I915_READ(l3_offset);
> - I915_WRITE(l3_offset, gen9_render_mocs_L3[i]);
> + vgpu_vreg(vgpu, l3_offset) = I915_RAW_READ(l3_offset);
> + I915_RAW_WRITE(l3_offset, gen9_render_mocs_L3[i]);
> l3_offset.reg += 4;
> }
> }
> @@ -284,7 +287,7 @@ static void switch_mmio_to_vgpu(struct intel_vgpu *vgpu, int ring_id)
> if (mmio->ring_id != ring_id)
> continue;
>
> - mmio->value = I915_READ(mmio->reg);
> + mmio->value = I915_RAW_READ(mmio->reg);
>
> /*
> * if it is an inhibit context, load in_context mmio
> @@ -301,7 +304,7 @@ static void switch_mmio_to_vgpu(struct intel_vgpu *vgpu, int ring_id)
> else
> v = vgpu_vreg(vgpu, mmio->reg);
>
> - I915_WRITE(mmio->reg, v);
> + I915_RAW_WRITE(mmio->reg, v);
> last_reg = mmio->reg;
>
> trace_render_mmio(vgpu->id, "load",
> @@ -311,7 +314,7 @@ static void switch_mmio_to_vgpu(struct intel_vgpu *vgpu, int ring_id)
>
> /* Make sure the swiched MMIOs has taken effect. */
> if (likely(INTEL_GVT_MMIO_OFFSET(last_reg)))
> - POSTING_READ(last_reg);
> + I915_RAW_READ(last_reg);
>
> handle_tlb_pending_event(vgpu, ring_id);
> }
> @@ -338,7 +341,7 @@ static void switch_mmio_to_host(struct intel_vgpu *vgpu, int ring_id)
> if (mmio->ring_id != ring_id)
> continue;
>
> - vgpu_vreg(vgpu, mmio->reg) = I915_READ(mmio->reg);
> + vgpu_vreg(vgpu, mmio->reg) = I915_RAW_READ(mmio->reg);
>
> if (mmio->mask) {
> vgpu_vreg(vgpu, mmio->reg) &= ~(mmio->mask << 16);
> @@ -349,7 +352,7 @@ static void switch_mmio_to_host(struct intel_vgpu *vgpu, int ring_id)
> if (mmio->in_context)
> continue;
>
> - I915_WRITE(mmio->reg, v);
> + I915_RAW_WRITE(mmio->reg, v);
> last_reg = mmio->reg;
>
> trace_render_mmio(vgpu->id, "restore",
> @@ -359,7 +362,7 @@ static void switch_mmio_to_host(struct intel_vgpu *vgpu, int ring_id)
>
> /* Make sure the swiched MMIOs has taken effect. */
> if (likely(INTEL_GVT_MMIO_OFFSET(last_reg)))
> - POSTING_READ(last_reg);
> + I915_RAW_READ(last_reg);
> }
>
> /**
> @@ -374,12 +377,23 @@ static void switch_mmio_to_host(struct intel_vgpu *vgpu, int ring_id)
> void intel_gvt_switch_mmio(struct intel_vgpu *pre,
> struct intel_vgpu *next, int ring_id)
> {
> + struct drm_i915_private *dev_priv;
> +
> if (WARN_ON(!pre && !next))
> return;
>
> gvt_dbg_render("switch ring %d from %s to %s\n", ring_id,
> pre ? "vGPU" : "host", next ? "vGPU" : "HOST");
>
> + dev_priv = pre ? pre->gvt->dev_priv : next->gvt->dev_priv;
> +
> + /**
> + * We are using raw mmio access wrapper to improve the
> + * performace for batch mmio read/write, so we need
> + * handle forcewake mannually.
> + */
> + intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
> +
> /**
> * TODO: Optimize for vGPU to vGPU switch by merging
> * switch_mmio_to_host() and switch_mmio_to_vgpu().
> @@ -389,4 +403,6 @@ void intel_gvt_switch_mmio(struct intel_vgpu *pre,
>
> if (next)
> switch_mmio_to_vgpu(next, ring_id);
> +
> + intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> }
> --
> 2.7.4
>
> _______________________________________________
> intel-gvt-dev mailing list
> intel-gvt-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gvt-dev
--
Open Source Technology Center, Intel ltd.
$gpg --keyserver wwwkeys.pgp.net --recv-keys 4D781827
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 195 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/intel-gvt-dev/attachments/20170623/0728b017/attachment.sig>
More information about the intel-gvt-dev
mailing list