[Intel-gfx] [PATCH v11 2/5] drm/i915: Introduce private PAT management
Wang, Zhi A
zhi.a.wang at intel.com
Mon Sep 11 04:03:56 UTC 2017
Thanks for the review! :)
-----Original Message-----
From: Zhenyu Wang [mailto:zhenyuw at linux.intel.com]
Sent: Monday, September 11, 2017 6:11 AM
To: Wang, Zhi A <zhi.a.wang at intel.com>
Cc: intel-gfx at lists.freedesktop.org; intel-gvt-dev at lists.freedesktop.org; joonas.lahtinen at linux.intel.com; Vivi, Rodrigo <rodrigo.vivi at intel.com>; chris at chris-wilson.co.uk; Widawsky, Benjamin <benjamin.widawsky at intel.com>
Subject: Re: [PATCH v11 2/5] drm/i915: Introduce private PAT management
On 2017.09.09 17:03:29 +0800, Zhi Wang wrote:
> +/**
> + * intel_ppat_get - get a usable PPAT entry
> + * @i915: i915 device instance
> + * @value: the PPAT value required by the caller
> + *
> + * The function tries to search if there is an existing PPAT entry
> +which
> + * matches with the required value. If perfectly matched, the
> +existing PPAT
> + * entry will be used. If only partially matched, it will try to
> +check if
> + * there is any available PPAT index. If yes, it will allocate a new
> +PPAT
> + * index for the required entry and update the HW. If not, the
> +partially
> + * matched entry will be used.
> + */
> +const struct intel_ppat_entry *
> +intel_ppat_get(struct drm_i915_private *i915, u8 value) {
> + struct intel_ppat *ppat = &i915->ppat;
> + struct intel_ppat_entry *entry;
> + unsigned int scanned, best_score;
> + int i;
> +
> + GEM_BUG_ON(!ppat->max_entries);
> +
> + scanned = best_score = 0;
> +
> + for_each_set_bit(i, ppat->used, ppat->max_entries) {
> + unsigned int score;
> +
> + entry = &ppat->entries[i];
> + score = ppat->match(entry->value, value);
> + if (score > best_score) {
> + if (score == INTEL_PPAT_PERFECT_MATCH) {
> + kref_get(&entry->ref);
> + return entry;
> + }
> + best_score = score;
> + }
> + scanned++;
> + }
> +
> + if (scanned == ppat->max_entries) {
> + if (!best_score)
> + return ERR_PTR(-ENOSPC);
> +
> + kref_get(&entry->ref);
> + return entry;
> + }
This returns last fully scanned entry instead of the real best_score one?
> +
> + i = find_first_zero_bit(ppat->used, ppat->max_entries);
> + entry = __alloc_ppat_entry(ppat, i, value);
> + ppat->update_hw(i915);
> + return entry;
> +}
> +
> +static void release_ppat(struct kref *kref) {
> + struct intel_ppat_entry *entry =
> + container_of(kref, struct intel_ppat_entry, ref);
> + struct drm_i915_private *i915 = entry->ppat->i915;
> +
> + __free_ppat_entry(entry);
> + entry->ppat->update_hw(i915);
> +}
> +
> +/**
> + * intel_ppat_put - put back the PPAT entry got from intel_ppat_get()
> + * @entry: an intel PPAT entry
> + *
> + * Put back the PPAT entry got from intel_ppat_get(). If the PPAT
> +index of the
> + * entry is dynamically allocated, its reference count will be
> +decreased. Once
> + * the reference count becomes into zero, the PPAT index becomes free again.
> + */
> +void intel_ppat_put(const struct intel_ppat_entry *entry) {
> + struct intel_ppat *ppat = entry->ppat;
> + unsigned int index = entry - ppat->entries;
> +
> + GEM_BUG_ON(!ppat->max_entries);
> +
> + kref_put(&ppat->entries[index].ref, release_ppat); }
> +
> +static void cnl_private_pat_update_hw(struct drm_i915_private
> +*dev_priv) {
> + struct intel_ppat *ppat = &dev_priv->ppat;
> + int i;
> +
> + for_each_set_bit(i, ppat->dirty, ppat->max_entries) {
> + I915_WRITE(GEN10_PAT_INDEX(i), ppat->entries[i].value);
> + clear_bit(i, ppat->dirty);
> + }
> +}
> +
> +static void bdw_private_pat_update_hw(struct drm_i915_private
> +*dev_priv) {
> + struct intel_ppat *ppat = &dev_priv->ppat;
> + u64 pat = 0;
> + int i;
> +
> + for (i = 0; i < ppat->max_entries; i++)
> + pat |= GEN8_PPAT(i, ppat->entries[i].value);
> +
> + bitmap_clear(ppat->dirty, 0, ppat->max_entries);
> +
> + I915_WRITE(GEN8_PRIVATE_PAT_LO, lower_32_bits(pat));
> + I915_WRITE(GEN8_PRIVATE_PAT_HI, upper_32_bits(pat)); }
> +
> +static unsigned int bdw_private_pat_match(u8 src, u8 dst) {
> + unsigned int score = 0;
> +
> + /* Cache attribute has to be matched. */
> + if (GEN8_PPAT_GET_CA(src) != GEN8_PPAT_GET_CA(dst))
> + return 0;
> +
> + if (GEN8_PPAT_GET_TC(src) == GEN8_PPAT_GET_TC(dst))
> + score += 2;
> +
> + if (GEN8_PPAT_GET_AGE(src) == GEN8_PPAT_GET_AGE(dst))
> + score += 1;
> +
> + if (score == 3)
> + return INTEL_PPAT_PERFECT_MATCH;
> +
> + return score;
> +}
> +
> +static unsigned int chv_private_pat_match(u8 src, u8 dst) {
> + return (CHV_PPAT_GET_SNOOP(src) == CHV_PPAT_GET_SNOOP(dst)) ?
> + INTEL_PPAT_PERFECT_MATCH : 0;
> +}
> +
> +static void cnl_setup_private_ppat(struct intel_ppat *ppat) {
> + ppat->max_entries = 8;
> + ppat->update_hw = cnl_private_pat_update_hw;
> + ppat->match = bdw_private_pat_match;
> + ppat->clear_value = GEN8_PPAT_WB | GEN8_PPAT_LLCELLC |
> +GEN8_PPAT_AGE(3);
> +
> /* XXX: spec is unclear if this is still needed for CNL+ */
> - if (!USES_PPGTT(dev_priv)) {
> - I915_WRITE(GEN10_PAT_INDEX(0), GEN8_PPAT_UC);
> + if (!USES_PPGTT(ppat->i915)) {
> + __alloc_ppat_entry(ppat, 0, GEN8_PPAT_UC);
> return;
> }
>
> - I915_WRITE(GEN10_PAT_INDEX(0), GEN8_PPAT_WB | GEN8_PPAT_LLC);
> - I915_WRITE(GEN10_PAT_INDEX(1), GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);
> - I915_WRITE(GEN10_PAT_INDEX(2), GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);
> - I915_WRITE(GEN10_PAT_INDEX(3), GEN8_PPAT_UC);
> - I915_WRITE(GEN10_PAT_INDEX(4), GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
> - I915_WRITE(GEN10_PAT_INDEX(5), GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
> - I915_WRITE(GEN10_PAT_INDEX(6), GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
> - I915_WRITE(GEN10_PAT_INDEX(7), GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
> + __alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC);
> + __alloc_ppat_entry(ppat, 1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC);
> + __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC);
> + __alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC);
> + __alloc_ppat_entry(ppat, 4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
> + __alloc_ppat_entry(ppat, 5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
> + __alloc_ppat_entry(ppat, 6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
> + __alloc_ppat_entry(ppat, 7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC |
> +GEN8_PPAT_AGE(3));
> }
>
> /* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
> * bits. When using advanced contexts each context stores its own PAT, but
> * writing this data shouldn't be harmful even in those cases. */
> -static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv)
> +static void bdw_setup_private_ppat(struct intel_ppat *ppat)
> {
> - u64 pat;
> + ppat->max_entries = 8;
> + ppat->update_hw = bdw_private_pat_update_hw;
> + ppat->match = bdw_private_pat_match;
> + ppat->clear_value = GEN8_PPAT_WB | GEN8_PPAT_LLCELLC |
> +GEN8_PPAT_AGE(3);
>
> - pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC) | /* for normal objects, no eLLC */
> - GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
> - GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
> - GEN8_PPAT(3, GEN8_PPAT_UC) | /* Uncached objects, mostly for scanout */
> - GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
> - GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
> - GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
> - GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));
> -
> - if (!USES_PPGTT(dev_priv))
> + if (!USES_PPGTT(ppat->i915)) {
> /* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry,
> * so RTL will always use the value corresponding to
> * pat_sel = 000".
> @@ -2790,17 +2952,26 @@ static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv)
> * So we can still hold onto all our assumptions wrt cpu
> * clflushing on LLC machines.
> */
> - pat = GEN8_PPAT(0, GEN8_PPAT_UC);
> + __alloc_ppat_entry(ppat, 0, GEN8_PPAT_UC);
> + return;
> + }
>
> - /* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b
> - * write would work. */
> - I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
> - I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
> + __alloc_ppat_entry(ppat, 0, GEN8_PPAT_WB | GEN8_PPAT_LLC); /* for normal objects, no eLLC */
> + __alloc_ppat_entry(ppat, 1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC); /* for something pointing to ptes? */
> + __alloc_ppat_entry(ppat, 2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC); /* for scanout with eLLC */
> + __alloc_ppat_entry(ppat, 3, GEN8_PPAT_UC); /* Uncached objects, mostly for scanout */
> + __alloc_ppat_entry(ppat, 4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0));
> + __alloc_ppat_entry(ppat, 5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1));
> + __alloc_ppat_entry(ppat, 6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2));
> + __alloc_ppat_entry(ppat, 7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC |
> +GEN8_PPAT_AGE(3));
> }
>
> -static void chv_setup_private_ppat(struct drm_i915_private *dev_priv)
> +static void chv_setup_private_ppat(struct intel_ppat *ppat)
> {
> - u64 pat;
> + ppat->max_entries = 8;
> + ppat->update_hw = bdw_private_pat_update_hw;
> + ppat->match = chv_private_pat_match;
> + ppat->clear_value = CHV_PPAT_SNOOP;
>
> /*
> * Map WB on BDW to snooped on CHV.
> @@ -2820,17 +2991,15 @@ static void chv_setup_private_ppat(struct drm_i915_private *dev_priv)
> * Which means we must set the snoop bit in PAT entry 0
> * in order to keep the global status page working.
> */
> - pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
> - GEN8_PPAT(1, 0) |
> - GEN8_PPAT(2, 0) |
> - GEN8_PPAT(3, 0) |
> - GEN8_PPAT(4, CHV_PPAT_SNOOP) |
> - GEN8_PPAT(5, CHV_PPAT_SNOOP) |
> - GEN8_PPAT(6, CHV_PPAT_SNOOP) |
> - GEN8_PPAT(7, CHV_PPAT_SNOOP);
>
> - I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
> - I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
> + __alloc_ppat_entry(ppat, 0, CHV_PPAT_SNOOP);
> + __alloc_ppat_entry(ppat, 1, 0);
> + __alloc_ppat_entry(ppat, 2, 0);
> + __alloc_ppat_entry(ppat, 3, 0);
> + __alloc_ppat_entry(ppat, 4, CHV_PPAT_SNOOP);
> + __alloc_ppat_entry(ppat, 5, CHV_PPAT_SNOOP);
> + __alloc_ppat_entry(ppat, 6, CHV_PPAT_SNOOP);
> + __alloc_ppat_entry(ppat, 7, CHV_PPAT_SNOOP);
> }
>
> static void gen6_gmch_remove(struct i915_address_space *vm) @@
> -2843,12 +3012,27 @@ static void gen6_gmch_remove(struct
> i915_address_space *vm)
>
> static void setup_private_pat(struct drm_i915_private *dev_priv) {
> + struct intel_ppat *ppat = &dev_priv->ppat;
> + int i;
> +
> + ppat->i915 = dev_priv;
> +
> if (INTEL_GEN(dev_priv) >= 10)
> - cnl_setup_private_ppat(dev_priv);
> + cnl_setup_private_ppat(ppat);
> else if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv))
> - chv_setup_private_ppat(dev_priv);
> + chv_setup_private_ppat(ppat);
> else
> - bdw_setup_private_ppat(dev_priv);
> + bdw_setup_private_ppat(ppat);
> +
> + GEM_BUG_ON(ppat->max_entries > INTEL_MAX_PPAT_ENTRIES);
> +
> + for_each_clear_bit(i, ppat->used, ppat->max_entries) {
> + ppat->entries[i].value = ppat->clear_value;
> + ppat->entries[i].ppat = ppat;
> + set_bit(i, ppat->dirty);
> + }
> +
> + ppat->update_hw(dev_priv);
> }
>
> static int gen8_gmch_probe(struct i915_ggtt *ggtt) @@ -3162,13
> +3346,10 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
> ggtt->base.closed = false;
>
> if (INTEL_GEN(dev_priv) >= 8) {
> - if (INTEL_GEN(dev_priv) >= 10)
> - cnl_setup_private_ppat(dev_priv);
> - else if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv))
> - chv_setup_private_ppat(dev_priv);
> - else
> - bdw_setup_private_ppat(dev_priv);
> + struct intel_ppat *ppat = &dev_priv->ppat;
>
> + bitmap_set(ppat->dirty, 0, ppat->max_entries);
> + dev_priv->ppat.update_hw(dev_priv);
> return;
> }
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h
> b/drivers/gpu/drm/i915/i915_gem_gtt.h
> index b4e3aa7..e10ca89 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.h
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
> @@ -143,6 +143,11 @@ typedef u64 gen8_ppgtt_pml4e_t;
> #define GEN8_PPAT_ELLC_OVERRIDE (0<<2)
> #define GEN8_PPAT(i, x) ((u64)(x) << ((i) * 8))
>
> +#define GEN8_PPAT_GET_CA(x) ((x) & 3) #define GEN8_PPAT_GET_TC(x)
> +((x) & (3 << 2)) #define GEN8_PPAT_GET_AGE(x) ((x) & (3 << 4))
> +#define CHV_PPAT_GET_SNOOP(x) ((x) & (1 << 6))
> +
> struct sg_table;
>
> struct intel_rotation_info {
> @@ -536,6 +541,37 @@ i915_vm_to_ggtt(struct i915_address_space *vm)
> return container_of(vm, struct i915_ggtt, base); }
>
> +#define INTEL_MAX_PPAT_ENTRIES 8
> +#define INTEL_PPAT_PERFECT_MATCH (~0U)
> +
> +struct intel_ppat;
> +
> +struct intel_ppat_entry {
> + struct intel_ppat *ppat;
> + struct kref ref;
> + u8 value;
> +};
> +
> +struct intel_ppat {
> + struct intel_ppat_entry entries[INTEL_MAX_PPAT_ENTRIES];
> + DECLARE_BITMAP(used, INTEL_MAX_PPAT_ENTRIES);
> + DECLARE_BITMAP(dirty, INTEL_MAX_PPAT_ENTRIES);
> + unsigned int max_entries;
> + u8 clear_value;
> + /*
> + * Return a score to show how two PPAT values match,
> + * a INTEL_PPAT_PERFECT_MATCH indicates a perfect match
> + */
> + unsigned int (*match)(u8 src, u8 dst);
> + void (*update_hw)(struct drm_i915_private *i915);
> +
> + struct drm_i915_private *i915;
> +};
> +
> +const struct intel_ppat_entry *
> +intel_ppat_get(struct drm_i915_private *i915, u8 value); void
> +intel_ppat_put(const struct intel_ppat_entry *entry);
> +
> int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915);
> void i915_gem_fini_aliasing_ppgtt(struct drm_i915_private *i915);
>
> --
> 2.7.4
>
> _______________________________________________
> intel-gvt-dev mailing list
> intel-gvt-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/intel-gvt-dev
--
Open Source Technology Center, Intel ltd.
$gpg --keyserver wwwkeys.pgp.net --recv-keys 4D781827
More information about the Intel-gfx
mailing list