[Intel-gfx] [RFCv5 2/2] drm/i915: Introduce private PAT management

Wang, Zhi A zhi.a.wang at intel.com
Tue Aug 29 11:13:26 UTC 2017


Hi Chris:
    There is mapping between i915 cache level -> PPAT index. Currently it's:

static gen8_pte_t gen8_pte_encode(dma_addr_t addr,
                                  enum i915_cache_level level)
{
...
        switch (level) {
        case I915_CACHE_NONE:
                pte |= PPAT_UNCACHED_INDEX;
                break;
        case I915_CACHE_WT:
                pte |= PPAT_DISPLAY_ELLC_INDEX;
                break;
        default:
                pte |= PPAT_CACHED_INDEX;
                break;
        }
...

According to bspec, the PPAT index filled in the page table is calculated as:

PPAT index = 4 * PAT + 2 * PCD + PWT

In the i915_gem_gtt.c

#define PPAT_UNCACHED_INDEX             (_PAGE_PWT | _PAGE_PCD)     // PPAT INDEX =  1 + 2 * 1 = 3
#define PPAT_CACHED_PDE_INDEX           0 /* WB LLC */			//  PPAT INDEX = 0
#define PPAT_CACHED_INDEX               _PAGE_PAT /* WB LLCeLLC */	// PPAT INDEX = 4 * 1 = 4
#define PPAT_DISPLAY_ELLC_INDEX         _PAGE_PCD /* WT eLLC */           // PPAT INDEX = 2 * 1 = 2

Actually the PPAT index used by i915 are: 0 2 3 4, which has already been reserved in the RFC patches.

The reason I want to reserve that PPAT entries for host is to keep the mapping between I915_CACHE_* and PPAT_*_INDEX unchanged.

Thanks,
Zhi.

-----Original Message-----
From: Chris Wilson [mailto:chris at chris-wilson.co.uk] 
Sent: Tuesday, August 29, 2017 1:40 PM
To: Wang, Zhi A <zhi.a.wang at intel.com>; intel-gfx at lists.freedesktop.org; intel-gvt-dev at lists.freedesktop.org
Cc: joonas.lahtinen at linux.intel.com; zhenyuw at linux.intel.com; Wang, Zhi A <zhi.a.wang at intel.com>; Widawsky, Benjamin <benjamin.widawsky at intel.com>; Vivi, Rodrigo <rodrigo.vivi at intel.com>
Subject: Re: [RFCv5 2/2] drm/i915: Introduce private PAT management

Quoting Chris Wilson (2017-08-29 11:05:21)
> Quoting Zhi Wang (2017-08-29 09:00:51)
> > +static void cnl_setup_private_ppat(struct intel_ppat *ppat) {
> > +       ppat->max_entries = 8;
> > +       ppat->update = cnl_private_pat_update;
> > +       ppat->match = bdw_private_pat_match;
> > +       ppat->dummy_value = GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3);
> > +
> >         /* XXX: spec is unclear if this is still needed for CNL+ */
> > -       if (!USES_PPGTT(dev_priv)) {
> > -               I915_WRITE(GEN10_PAT_INDEX(0), GEN8_PPAT_UC);
> > +       if (!USES_PPGTT(ppat->i915)) {
> > +               alloc_ppat_entry(ppat, 0, GEN8_PPAT_UC);
> >                 return;
> >         }
> >  
> > -       I915_WRITE(GEN10_PAT_INDEX(0), GEN8_PPAT_WB | GEN8_PPAT_LLC);
> > -       I915_WRITE(GEN10_PAT_INDEX(1), GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);
> > -       I915_WRITE(GEN10_PAT_INDEX(2), GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);
> > -       I915_WRITE(GEN10_PAT_INDEX(3), GEN8_PPAT_UC);
> > -       I915_WRITE(GEN10_PAT_INDEX(4), GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
> > -       I915_WRITE(GEN10_PAT_INDEX(5), GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
> > -       I915_WRITE(GEN10_PAT_INDEX(6), GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
> > -       I915_WRITE(GEN10_PAT_INDEX(7), GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
> > +       alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC);
> > +       alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);
> > +       alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC);
> > +       alloc_ppat_entry(ppat, 4, GEN8_PPAT_LLCELLC | 
> > + GEN8_PPAT_AGE(0));
> >  }
> >  
> >  /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
> >   * bits. When using advanced contexts each context stores its own PAT, but
> >   * writing this data shouldn't be harmful even in those cases. */ 
> > -static void bdw_setup_private_ppat(struct drm_i915_private 
> > *dev_priv)
> > +static void bdw_setup_private_ppat(struct intel_ppat *ppat)
> >  {
> > -       u64 pat;
> > -
> > -       pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC)     | /* for normal objects, no eLLC */
> > -             GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
> > -             GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
> > -             GEN8_PPAT(3, GEN8_PPAT_UC)                     | /* Uncached objects, mostly for scanout */
> > -             GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
> > -             GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
> > -             GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
> > -             GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
> > +       ppat->max_entries = 8;
> > +       ppat->update = bdw_private_pat_update;
> > +       ppat->match = bdw_private_pat_match;
> > +       ppat->dummy_value = GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | 
> > + GEN8_PPAT_AGE(3);
> >  
> > -       if (!USES_PPGTT(dev_priv))
> > +       if (!USES_PPGTT(dev_priv)) {
> >                 /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry,
> >                  * so RTL will always use the value corresponding to
> >                  * pat_sel = 000".
> > @@ -2864,17 +3038,22 @@ static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv)
> >                  * So we can still hold onto all our assumptions wrt cpu
> >                  * clflushing on LLC machines.
> >                  */
> > -               pat = GEN8_PPAT(0, GEN8_PPAT_UC);
> > +               alloc_ppat_entry(ppat, 0, GEN8_PPAT_UC);
> > +               return;
> > +       }
> >  
> > -       /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b
> > -        * write would work. */
> > -       I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
> > -       I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
> > +       alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC);     /* for normal objects, no eLLC */
> > +       alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); /* for scanout with eLLC */
> > +       alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC);                     /* Uncached objects, mostly for scanout */
> > +       alloc_ppat_entry(ppat, 4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | 
> > + GEN8_PPAT_AGE(0));
> >  }
> >  
> > -static void chv_setup_private_ppat(struct drm_i915_private 
> > *dev_priv)
> > +static void chv_setup_private_ppat(struct intel_ppat *ppat)
> >  {
> > -       u64 pat;
> > +       ppat->max_entries = 8;
> > +       ppat->update = bdw_private_pat_update;
> > +       ppat->match = chv_private_pat_match;
> > +       ppat->dummy_value = CHV_PPAT_SNOOP;
> >  
> >         /*
> >          * Map WB on BDW to snooped on CHV.
> > @@ -2894,17 +3073,11 @@ static void chv_setup_private_ppat(struct drm_i915_private *dev_priv)
> >          * Which means we must set the snoop bit in PAT entry 0
> >          * in order to keep the global status page working.
> >          */
> > -       pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
> > -             GEN8_PPAT(1, 0) |
> > -             GEN8_PPAT(2, 0) |
> > -             GEN8_PPAT(3, 0) |
> > -             GEN8_PPAT(4, CHV_PPAT_SNOOP) |
> > -             GEN8_PPAT(5, CHV_PPAT_SNOOP) |
> > -             GEN8_PPAT(6, CHV_PPAT_SNOOP) |
> > -             GEN8_PPAT(7, CHV_PPAT_SNOOP);
> >  
> > -       I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
> > -       I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
> > +       alloc_ppat_entry(ppat, 0, CHV_PPAT_SNOOP);
> > +       alloc_ppat_entry(ppat, 2, 0);
> > +       alloc_ppat_entry(ppat, 3, 0);
> > +       alloc_ppat_entry(ppat, 4, CHV_PPAT_SNOOP);
> >  }
> 
> 1 is dropped in all cases?
> 
> The current ABI is that we reserve (0, 1, 2) for userspace. 1 means 
> follow-PTE and is especially important.

Ignore the ABI concern, getting my tables confused. But the question still remains why do we need to reserve these in particular?
-Chris


More information about the Intel-gfx mailing list