[Intel-gfx] [PATCH] drm/i915: Enable eLLC caching of display buffers for SKL+
Eero Tamminen
eero.t.tamminen at intel.com
Tue Apr 16 14:28:57 UTC 2019
Hi,
Based on quick tests with the patch:
* Results in GfxBench and Unigine (Valley/Heaven) tests were within
daily variation on the tested SKL machines
* SKL GT4e (128MB eLLC) / Wayland / Weston:
+15-20% SynMark TexMem512 (512MB of textures)
+4-6% SynMark TerrainFly*, CSCloth, ShMapVsm
-5-10% SynMark TexMem128 (128MB of textures)
* SKL GT3e (64MB eLLC) / Xorg / Unity:
+4-8% GpuTest Triangle fullscreen (FullHD)
-5-10% GpuTest Triangle windowed (1/2 screen)
* SKL GT2 (no eLLC) / Xorg / Unity:
* Some of the higher FPS SynMark pixel and vertex shader tests
are few percent higher, more than daily variance
=> Do you see any reason why this machine would be impacted
although it doesn't eLLC?
(I built it against drm-tip and compared results against previous and
next day unpatched drm-tip results that I had otherwise.)
- Eero
On 15.4.2019 17.16, Ville Syrjala wrote:
> From: Ville Syrjälä <ville.syrjala at linux.intel.com>
>
> Since SKL the eLLC has been sitting on the far side of the system
> agent, meaning the display engine can utilize it. Let's enable that.
>
> I chose WB for the caching mode, because my numbers are indicating
> that WT might actually be WB and WC might actually be UC. I'm not
> 100% sure that is indeed the case but at least my simple rendercopy
> based benchmark didn't see any difference in performance.
>
> Also if I configure things to do LLCeLLC+WT I still get cache dirt
> on my screen, suggesting that is in fact operating in WB mode
> anyway. This is also the reason I had to fix the MOCS target cache
> to really say PTE rather than LLC+eLLC.
>
> Caveat: I've not benchmarked any real workloads. IIRC Eero did
> benchmark an earlier version, but that didn't have the PTE vs.
> LLC+eLLC MOCS fix so it wasn't actually doing the right thing
> most likely.
>
> Cc: Eero Tamminen <eero.t.tamminen at intel.com>
> Signed-off-by: Ville Syrjälä <ville.syrjala at linux.intel.com>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 3 +--
> drivers/gpu/drm/i915/i915_gem_gtt.c | 7 +++++--
> drivers/gpu/drm/i915/i915_gem_gtt.h | 2 +-
> drivers/gpu/drm/i915/intel_mocs.c | 2 +-
> 4 files changed, 8 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 35d0782c077e..2a4f33fa2bba 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2517,8 +2517,7 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915,
> #define HAS_LLC(dev_priv) (INTEL_INFO(dev_priv)->has_llc)
> #define HAS_SNOOP(dev_priv) (INTEL_INFO(dev_priv)->has_snoop)
> #define HAS_EDRAM(dev_priv) ((dev_priv)->edram_size_mb)
> -#define HAS_WT(dev_priv) ((IS_HASWELL(dev_priv) || \
> - IS_BROADWELL(dev_priv)) && HAS_EDRAM(dev_priv))
> +#define HAS_WT(dev_priv) HAS_EDRAM(dev_priv)
>
> #define HWS_NEEDS_PHYSICAL(dev_priv) (INTEL_INFO(dev_priv)->hws_needs_physical)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 8f460cc4cc1f..038fbf52a997 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -3071,7 +3071,7 @@ static void cnl_setup_private_ppat(struct intel_ppat *ppat)
>
> __alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC);
> __alloc_ppat_entry(ppat, 1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);
> - __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);
> + __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE);
> __alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC);
> __alloc_ppat_entry(ppat, 4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
> __alloc_ppat_entry(ppat, 5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
> @@ -3109,7 +3109,10 @@ static void bdw_setup_private_ppat(struct intel_ppat *ppat)
>
> __alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC); /* for normal objects, no eLLC */
> __alloc_ppat_entry(ppat, 1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC); /* for something pointing to ptes? */
> - __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); /* for scanout with eLLC */
> + if (INTEL_GEN(ppat->i915) >= 9)
> + __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WB | GEN8_PPAT_ELLC_OVERRIDE); /* for scanout with eLLC */
> + else
> + __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); /* for scanout with eLLC */
> __alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC); /* Uncached objects, mostly for scanout */
> __alloc_ppat_entry(ppat, 4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
> __alloc_ppat_entry(ppat, 5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index f597f35b109b..47adc7268867 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -139,7 +139,7 @@ typedef u64 gen8_ppgtt_pml4e_t;
> #define PPAT_UNCACHED (_PAGE_PWT | _PAGE_PCD)
> #define PPAT_CACHED_PDE 0 /* WB LLC */
> #define PPAT_CACHED _PAGE_PAT /* WB LLCeLLC */
> -#define PPAT_DISPLAY_ELLC _PAGE_PCD /* WT eLLC */
> +#define PPAT_DISPLAY_ELLC _PAGE_PCD /* WT LLCeLLC (HSW/BDW) or WB eLLC (SKL+) */
>
> #define CHV_PPAT_SNOOP (1<<6)
> #define GEN8_PPAT_AGE(x) ((x)<<4)
> diff --git a/drivers/gpu/drm/i915/intel_mocs.c b/drivers/gpu/drm/i915/intel_mocs.c
> index 274ba78500c0..d984ccff94ef 100644
> --- a/drivers/gpu/drm/i915/intel_mocs.c
> +++ b/drivers/gpu/drm/i915/intel_mocs.c
> @@ -115,7 +115,7 @@ struct drm_i915_mocs_table {
> LE_1_UC | LE_TC_2_LLC_ELLC, \
> L3_1_UC), \
> MOCS_ENTRY(I915_MOCS_PTE, \
> - LE_0_PAGETABLE | LE_TC_2_LLC_ELLC | LE_LRUM(3), \
> + LE_0_PAGETABLE | LE_TC_0_PAGETABLE | LE_LRUM(3), \
> L3_3_WB)
>
> static const struct drm_i915_mocs_entry skylake_mocs_table[] = {
>
More information about the Intel-gfx
mailing list