[Intel-gfx] [PATCH 20/62] drm/i915/bdw: Add GTT functions
Imre Deak
imre.deak at intel.com
Mon Nov 4 23:22:06 CET 2013
On Sat, 2013-11-02 at 21:07 -0700, Ben Widawsky wrote:
> With the PTE clarifications, the bind and clear functions can now be
> added for gen8.
>
> v2: Use for_each_sg_pages in gen8_ggtt_insert_entries.
>
> v3: Drop dev argument to pte encode functions, upstream lost it. Also
> rebase on top of the scratch page movement.
>
> v4: Rebase on top of the new address space vfuncs.
>
> v5: Add the bool use_scratch argument to clear_range and the bool valid argument
> to the PTE encode function to follow upstream changes.
>
> Signed-off-by: Ben Widawsky <ben at bwidawsk.net> (v1)
> Signed-off-by: Daniel Vetter <daniel.vetter at ffwll.ch>
Reviewed-by: Imre Deak <imre.deak at intel.com>
> ---
> drivers/gpu/drm/i915/i915_gem_gtt.c | 88 +++++++++++++++++++++++++++++++++++--
> 1 file changed, 85 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
> index 8bf2184..df992dc 100644
> --- a/drivers/gpu/drm/i915/i915_gem_gtt.c
> +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
> @@ -58,6 +58,15 @@ typedef uint64_t gen8_gtt_pte_t;
> #define HSW_WB_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0xb)
> #define HSW_WT_ELLC_LLC_AGE0 HSW_CACHEABILITY_CONTROL(0x6)
>
> +static inline gen8_gtt_pte_t gen8_pte_encode(dma_addr_t addr,
> + enum i915_cache_level level,
> + bool valid)
> +{
> + gen8_gtt_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
> + pte |= addr;
> + return pte;
> +}
> +
> static gen6_gtt_pte_t snb_pte_encode(dma_addr_t addr,
> enum i915_cache_level level,
> bool valid)
> @@ -576,6 +585,56 @@ int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
> return 0;
> }
>
> +static inline void gen8_set_pte(void __iomem *addr, gen8_gtt_pte_t pte)
> +{
> +#ifdef writeq
> + writeq(pte, addr);
> +#else
> + iowrite32((u32)pte, addr);
> + iowrite32(pte >> 32, addr + 4);
> +#endif
> +}
> +
> +static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
> + struct sg_table *st,
> + unsigned int first_entry,
> + enum i915_cache_level level)
> +{
> + struct drm_i915_private *dev_priv = vm->dev->dev_private;
> + gen8_gtt_pte_t __iomem *gtt_entries =
> + (gen8_gtt_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
> + int i = 0;
> + struct sg_page_iter sg_iter;
> + dma_addr_t addr;
> +
> + for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
> + addr = sg_dma_address(sg_iter.sg) +
> + (sg_iter.sg_pgoffset << PAGE_SHIFT);
> + gen8_set_pte(>t_entries[i],
> + gen8_pte_encode(addr, level, true));
> + i++;
> + }
> +
> + /* XXX: This serves as a posting read to make sure that the PTE has
> + * actually been updated. There is some concern that even though
> + * registers and PTEs are within the same BAR that they are potentially
> + * of NUMA access patterns. Therefore, even with the way we assume
> + * hardware should work, we must keep this posting read for paranoia.
> + */
> + if (i != 0)
> + WARN_ON(readl(>t_entries[i-1])
> + != gen8_pte_encode(addr, level, true));
> +
> +#if 0 /* TODO: Still needed on GEN8? */
> + /* This next bit makes the above posting read even more important. We
> + * want to flush the TLBs only after we're certain all the PTE updates
> + * have finished.
> + */
> + I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
> + POSTING_READ(GFX_FLSH_CNTL_GEN6);
> +#endif
> +}
> +
> /*
> * Binds an object into the global gtt with the specified cache level. The object
> * will be accessible to the GPU via commands whose operands reference offsets
> @@ -618,6 +677,30 @@ static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
> POSTING_READ(GFX_FLSH_CNTL_GEN6);
> }
>
> +static void gen8_ggtt_clear_range(struct i915_address_space *vm,
> + unsigned int first_entry,
> + unsigned int num_entries,
> + bool use_scratch)
> +{
> + struct drm_i915_private *dev_priv = vm->dev->dev_private;
> + gen8_gtt_pte_t scratch_pte, __iomem *gtt_base =
> + (gen8_gtt_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
> + const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
> + int i;
> +
> + if (WARN(num_entries > max_entries,
> + "First entry = %d; Num entries = %d (max=%d)\n",
> + first_entry, num_entries, max_entries))
> + num_entries = max_entries;
> +
> + scratch_pte = gen8_pte_encode(vm->scratch.addr,
> + I915_CACHE_LLC,
> + use_scratch);
> + for (i = 0; i < num_entries; i++)
> + gen8_set_pte(>t_base[i], scratch_pte);
> + readl(gtt_base);
> +}
> +
> static void gen6_ggtt_clear_range(struct i915_address_space *vm,
> unsigned int first_entry,
> unsigned int num_entries,
> @@ -641,7 +724,6 @@ static void gen6_ggtt_clear_range(struct i915_address_space *vm,
> readl(gtt_base);
> }
>
> -
> static void i915_ggtt_insert_entries(struct i915_address_space *vm,
> struct sg_table *st,
> unsigned int pg_start,
> @@ -947,8 +1029,8 @@ static int gen8_gmch_probe(struct drm_device *dev,
>
> ret = ggtt_probe_common(dev, gtt_size);
>
> - dev_priv->gtt.base.clear_range = NULL;
> - dev_priv->gtt.base.insert_entries = NULL;
> + dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range;
> + dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries;
>
> return ret;
> }
More information about the Intel-gfx
mailing list