[Intel-gfx] [PATCH 6/9] drm/i915: add frontbuffer tracking to FBC

Paulo Zanoni przanoni at gmail.com
Tue Feb 10 04:19:10 PST 2015


2015-02-09 17:05 GMT-02:00 Daniel Vetter <daniel at ffwll.ch>:
> On Mon, Feb 09, 2015 at 02:46:32PM -0200, Paulo Zanoni wrote:
>> From: Paulo Zanoni <paulo.r.zanoni at intel.com>
>>
>> Kill the blt/render tracking we currently have and use the frontbuffer
>> tracking infrastructure.
>>
>> Don't enable things by default yet.
>>
>> v2: (Rodrigo) Fix small conflict on rebase and typo at subject.
>> v3: (Paulo) Rebase on RENDER_CS change.
>> v4: (Paulo) Rebase.
>>
>> Reviewed-by: Rodrigo Vivi <rodrigo.vivi at intel.com> (v2)
>> Signed-off-by: Rodrigo Vivi <rodrigo.vivi at intel.com>
>> Signed-off-by: Paulo Zanoni <paulo.r.zanoni at intel.com>
>> ---
>>  drivers/gpu/drm/i915/i915_drv.h          |   9 +--
>>  drivers/gpu/drm/i915/intel_drv.h         |   6 +-
>>  drivers/gpu/drm/i915/intel_fbc.c         | 107 ++++++++++++++++++++-----------
>>  drivers/gpu/drm/i915/intel_frontbuffer.c |  14 +---
>>  drivers/gpu/drm/i915/intel_ringbuffer.c  |  41 +-----------
>>  drivers/gpu/drm/i915/intel_ringbuffer.h  |   1 -
>>  6 files changed, 80 insertions(+), 98 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index d578211..dc002df 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -782,6 +782,7 @@ struct i915_fbc {
>>       unsigned long uncompressed_size;
>>       unsigned threshold;
>>       unsigned int fb_id;
>> +     unsigned int possible_framebuffer_bits;
>>       struct intel_crtc *crtc;
>>       int y;
>>
>> @@ -794,14 +795,6 @@ struct i915_fbc {
>>        * possible. */
>>       bool enabled;
>>
>> -     /* On gen8 some rings cannont perform fbc clean operation so for now
>> -      * we are doing this on SW with mmio.
>> -      * This variable works in the opposite information direction
>> -      * of ring->fbc_dirty telling software on frontbuffer tracking
>> -      * to perform the cache clean on sw side.
>> -      */
>> -     bool need_sw_cache_clean;
>> -
>>       struct intel_fbc_work {
>>               struct delayed_work work;
>>               struct drm_crtc *crtc;
>> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
>> index a73d091..ed85df8 100644
>> --- a/drivers/gpu/drm/i915/intel_drv.h
>> +++ b/drivers/gpu/drm/i915/intel_drv.h
>> @@ -1101,7 +1101,11 @@ bool intel_fbc_enabled(struct drm_device *dev);
>>  void intel_fbc_update(struct drm_device *dev);
>>  void intel_fbc_init(struct drm_i915_private *dev_priv);
>>  void intel_fbc_disable(struct drm_device *dev);
>> -void bdw_fbc_sw_flush(struct drm_device *dev, u32 value);
>> +void intel_fbc_invalidate(struct drm_i915_private *dev_priv,
>> +                       unsigned int frontbuffer_bits,
>> +                       enum fb_op_origin origin);
>> +void intel_fbc_flush(struct drm_i915_private *dev_priv,
>> +                  unsigned int frontbuffer_bits, enum fb_op_origin origin);
>>
>>  /* intel_hdmi.c */
>>  void intel_hdmi_init(struct drm_device *dev, int hdmi_reg, enum port port);
>> diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
>> index 7341e87..26c4f9c 100644
>> --- a/drivers/gpu/drm/i915/intel_fbc.c
>> +++ b/drivers/gpu/drm/i915/intel_fbc.c
>> @@ -174,29 +174,10 @@ static bool g4x_fbc_enabled(struct drm_device *dev)
>>       return I915_READ(DPFC_CONTROL) & DPFC_CTL_EN;
>>  }
>>
>> -static void snb_fbc_blit_update(struct drm_device *dev)
>> +static void intel_fbc_nuke(struct drm_i915_private *dev_priv)
>>  {
>> -     struct drm_i915_private *dev_priv = dev->dev_private;
>> -     u32 blt_ecoskpd;
>> -
>> -     /* Make sure blitter notifies FBC of writes */
>> -
>> -     /* Blitter is part of Media powerwell on VLV. No impact of
>> -      * his param in other platforms for now */
>> -     intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA);
>> -
>> -     blt_ecoskpd = I915_READ(GEN6_BLITTER_ECOSKPD);
>> -     blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY <<
>> -             GEN6_BLITTER_LOCK_SHIFT;
>> -     I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
>> -     blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY;
>> -     I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
>> -     blt_ecoskpd &= ~(GEN6_BLITTER_FBC_NOTIFY <<
>> -                      GEN6_BLITTER_LOCK_SHIFT);
>> -     I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
>> -     POSTING_READ(GEN6_BLITTER_ECOSKPD);
>> -
>> -     intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);
>> +     I915_WRITE(MSG_FBC_REND_STATE, FBC_REND_NUKE);
>> +     POSTING_READ(MSG_FBC_REND_STATE);
>>  }
>>
>>  static void ilk_fbc_enable(struct drm_crtc *crtc)
>> @@ -239,9 +220,10 @@ static void ilk_fbc_enable(struct drm_crtc *crtc)
>>               I915_WRITE(SNB_DPFC_CTL_SA,
>>                          SNB_CPU_FENCE_ENABLE | obj->fence_reg);
>>               I915_WRITE(DPFC_CPU_FENCE_OFFSET, crtc->y);
>> -             snb_fbc_blit_update(dev);
>>       }
>>
>> +     intel_fbc_nuke(dev_priv);
>> +
>>       DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(intel_crtc->plane));
>>  }
>>
>> @@ -320,7 +302,7 @@ static void gen7_fbc_enable(struct drm_crtc *crtc)
>>                  SNB_CPU_FENCE_ENABLE | obj->fence_reg);
>>       I915_WRITE(DPFC_CPU_FENCE_OFFSET, crtc->y);
>>
>> -     snb_fbc_blit_update(dev);
>> +     intel_fbc_nuke(dev_priv);
>>
>>       DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(intel_crtc->plane));
>>  }
>> @@ -340,19 +322,6 @@ bool intel_fbc_enabled(struct drm_device *dev)
>>       return dev_priv->fbc.enabled;
>>  }
>>
>> -void bdw_fbc_sw_flush(struct drm_device *dev, u32 value)
>> -{
>> -     struct drm_i915_private *dev_priv = dev->dev_private;
>> -
>> -     if (!IS_GEN8(dev))
>> -             return;
>> -
>> -     if (!intel_fbc_enabled(dev))
>> -             return;
>> -
>> -     I915_WRITE(MSG_FBC_REND_STATE, value);
>> -}
>> -
>>  static void intel_fbc_work_fn(struct work_struct *__work)
>>  {
>>       struct intel_fbc_work *work =
>> @@ -658,6 +627,63 @@ out_disable:
>>       i915_gem_stolen_cleanup_compression(dev);
>>  }
>>
>> +void intel_fbc_invalidate(struct drm_i915_private *dev_priv,
>> +                       unsigned int frontbuffer_bits,
>> +                       enum fb_op_origin origin)
>> +{
>> +     struct drm_device *dev = dev_priv->dev;
>> +     unsigned int fbc_bits;
>> +
>> +     if (origin == ORIGIN_GTT)
>> +             return;
>> +
>> +     if (dev_priv->fbc.enabled)
>> +             fbc_bits = INTEL_FRONTBUFFER_PRIMARY(dev_priv->fbc.crtc->pipe);
>> +     else if (dev_priv->fbc.fbc_work)
>> +             fbc_bits = INTEL_FRONTBUFFER_PRIMARY(
>> +                     to_intel_crtc(dev_priv->fbc.fbc_work->crtc)->pipe);
>> +     else
>> +             fbc_bits = dev_priv->fbc.possible_framebuffer_bits;
>> +
>> +     if ((fbc_bits & frontbuffer_bits) == 0)
>> +             return;
>
> The fbc code should store all the currently invalidate fb_bits somewhere,
> so that it knows when it can reenable fbc. Copypasting from the psr code:
>
> dev_priv->fbc.busy_frontbuffer_bits |= fbc_bits & frontbuffer_bits;
>
> Of course there should only ever be one bit (for the primary plane on the
> crtc fbc is enabled on), so a boolean should be enough. But we could use
> the additional bits for a bit of consistency checks.
>
>> +
>> +     intel_fbc_disable(dev);
>> +}
>> +
>> +void intel_fbc_flush(struct drm_i915_private *dev_priv,
>> +                  unsigned int frontbuffer_bits,
>> +                  enum fb_op_origin origin)
>> +{
>> +     struct drm_device *dev = dev_priv->dev;
>> +     unsigned int fbc_bits = 0;
>> +     bool fbc_enabled;
>> +
>> +     if (dev_priv->fbc.enabled)
>> +             fbc_bits = INTEL_FRONTBUFFER_PRIMARY(dev_priv->fbc.crtc->pipe);
>> +     else
>> +             fbc_bits = dev_priv->fbc.possible_framebuffer_bits;
>> +
>> +     /* These are not the planes you are looking for. */
>> +     if ((frontbuffer_bits & fbc_bits) == 0)
>> +             return;
>> +
>> +     fbc_enabled = intel_fbc_enabled(dev);
>> +
>> +     if ((dev_priv->fb_tracking.busy_bits & frontbuffer_bits &
>> +          fbc_bits) == 0) {
>> +             if (fbc_enabled) {
>> +                     if (origin == ORIGIN_CS || origin == ORIGIN_CPU)
>> +                             intel_fbc_nuke(dev_priv);
>> +             } else {
>> +                     intel_fbc_update(dev);
>> +             }
>
> Ok, here comes the gist of what needs to be changed.
> - You need to check the fbc-private copy of busy bits, otherwise the
>   filtering for the GTT origin won't work. We want to keep fbc enabled (or
>   renable) even when gtt is still invalidated somehow.
> - That means when you decide to reenable fbc (i.e.
>   dev_priv->fbc.busy_frontbuffer_bits == 0) then it's guranteed that fbc
>   is already disabled, and a plain intel_fbc_enable is good enough.
> - nuke in flush is not enough and too late - the frontbuffer rendering
>   could have been going on for a while since you've seen the invalidate,
>   the flush only happens when everything is quiet again. A nuke at the end
>   is hence too late and not enough.
>
> This way you don't need the origin information in the flush helper.
>
> One bit I'm not yet sure off is where to put intel_fbc_update to
> reallocate buffers over pageflips. The complication since you've written
> these patches is the atomic flips, which can now asynchronously update the
> primary plane (and change the pixel format), but different from the
> pageflip code the atomic paths don't disable/enable fbc over a flip.
>
> Which means that doing the intel_fbc_update just somewhere in between the
> disable and enable won't be enough to keep everything in sync.
>
> I think what we ultimately need is a bit of logic in the atomic_check part
> which decides whether fbc must be disabled/enabled (e.g. when we need to
> realloc the cfb) around the update. And then we'll place explicit
> intel_fbc_disable/enable calls in the atomic_begin/flush callbacks.
>
> The other case is just updating the gtt fence data. At least on g4x+ we
> should try not to disable fbc but keep it going over pageflips. Which
> means there could be interesting races between the flip and gtt writes
> right afterwards (which aren't synchronized in any way).
>
> I see two options:
> - We update the fence register whenever we want, but while a pageflip is
>   pending we don't filter out gtt invalidates (since we kinda don't know
>   whether the hw invalidate will hit before/after the flip happens). This
>   one is a bit tricky, we'd need to add fbc callbacks to
>   intel_frontbuffer_flip_prepare/complete.
> - We do a manual nuke after each flip by adding calls to intel_fbc_flip to
>   intel_frontbuffer_flip and flip_complete. This one has the downside that
>   proper userspace which doesn't do gtt writes will get hurt by nuking the
>   fbc twice: Once for the flip and once right afterwards.

Thanks for the review!

If you can think of any way to write a test case to reproduce the
problems you pointed above, please give me ideas! I'll try to do this
while rewriting the patches, but maybe you already have something in
your mind? I guess we need to try to find a way to get an invalidate
but only get the corresponding flush a long time (> 1 frame) after
that? Maybe keep the frame busy with tons of operations that don't
change the CRC?


>
> Cheers, Daniel
>> +     } else {
>> +             if (fbc_enabled)
>> +                     intel_fbc_disable(dev);
>> +     }
>> +}
>> +
>>  /**
>>   * intel_fbc_init - Initialize FBC
>>   * @dev_priv: the i915 device
>> @@ -666,12 +692,19 @@ out_disable:
>>   */
>>  void intel_fbc_init(struct drm_i915_private *dev_priv)
>>  {
>> +     enum pipe pipe;
>> +
>>       if (!HAS_FBC(dev_priv)) {
>>               dev_priv->fbc.enabled = false;
>>               dev_priv->fbc.no_fbc_reason = FBC_UNSUPPORTED;
>>               return;
>>       }
>>
>> +     /* TODO: some platforms have FBC tied to a specific plane! */
>> +     for_each_pipe(dev_priv, pipe)
>> +             dev_priv->fbc.possible_framebuffer_bits |=
>> +                             INTEL_FRONTBUFFER_PRIMARY(pipe);
>> +
>>       if (INTEL_INFO(dev_priv)->gen >= 7) {
>>               dev_priv->display.fbc_enabled = ilk_fbc_enabled;
>>               dev_priv->display.enable_fbc = gen7_fbc_enable;
>> diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c
>> index 0a773981..2fc059c 100644
>> --- a/drivers/gpu/drm/i915/intel_frontbuffer.c
>> +++ b/drivers/gpu/drm/i915/intel_frontbuffer.c
>> @@ -118,8 +118,6 @@ static void intel_mark_fb_busy(struct drm_device *dev,
>>                       continue;
>>
>>               intel_increase_pllclock(dev, pipe);
>> -             if (ring && intel_fbc_enabled(dev))
>> -                     ring->fbc_dirty = true;
>>       }
>>  }
>>
>> @@ -160,6 +158,7 @@ void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
>>
>>       intel_psr_invalidate(dev, obj->frontbuffer_bits);
>>       intel_edp_drrs_invalidate(dev, obj->frontbuffer_bits);
>> +     intel_fbc_invalidate(dev_priv, obj->frontbuffer_bits, origin);
>>  }
>>
>>  /**
>> @@ -189,16 +188,7 @@ void intel_frontbuffer_flush(struct drm_device *dev,
>>
>>       intel_edp_drrs_flush(dev, frontbuffer_bits);
>>       intel_psr_flush(dev, frontbuffer_bits);
>> -
>> -     /*
>> -      * FIXME: Unconditional fbc flushing here is a rather gross hack and
>> -      * needs to be reworked into a proper frontbuffer tracking scheme like
>> -      * psr employs.
>> -      */
>> -     if (dev_priv->fbc.need_sw_cache_clean) {
>> -             dev_priv->fbc.need_sw_cache_clean = false;
>> -             bdw_fbc_sw_flush(dev, FBC_REND_CACHE_CLEAN);
>> -     }
>> +     intel_fbc_flush(dev_priv, frontbuffer_bits, origin);
>>  }
>>
>>  /**
>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
>> index 573b80f..48dd8a2 100644
>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
>> @@ -317,29 +317,6 @@ gen7_render_ring_cs_stall_wa(struct intel_engine_cs *ring)
>>       return 0;
>>  }
>>
>> -static int gen7_ring_fbc_flush(struct intel_engine_cs *ring, u32 value)
>> -{
>> -     int ret;
>> -
>> -     if (!ring->fbc_dirty)
>> -             return 0;
>> -
>> -     ret = intel_ring_begin(ring, 6);
>> -     if (ret)
>> -             return ret;
>> -     /* WaFbcNukeOn3DBlt:ivb/hsw */
>> -     intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
>> -     intel_ring_emit(ring, MSG_FBC_REND_STATE);
>> -     intel_ring_emit(ring, value);
>> -     intel_ring_emit(ring, MI_STORE_REGISTER_MEM(1) | MI_SRM_LRM_GLOBAL_GTT);
>> -     intel_ring_emit(ring, MSG_FBC_REND_STATE);
>> -     intel_ring_emit(ring, ring->scratch.gtt_offset + 256);
>> -     intel_ring_advance(ring);
>> -
>> -     ring->fbc_dirty = false;
>> -     return 0;
>> -}
>> -
>>  static int
>>  gen7_render_ring_flush(struct intel_engine_cs *ring,
>>                      u32 invalidate_domains, u32 flush_domains)
>> @@ -398,9 +375,6 @@ gen7_render_ring_flush(struct intel_engine_cs *ring,
>>       intel_ring_emit(ring, 0);
>>       intel_ring_advance(ring);
>>
>> -     if (!invalidate_domains && flush_domains)
>> -             return gen7_ring_fbc_flush(ring, FBC_REND_NUKE);
>> -
>>       return 0;
>>  }
>>
>> @@ -462,9 +436,6 @@ gen8_render_ring_flush(struct intel_engine_cs *ring,
>>       if (ret)
>>               return ret;
>>
>> -     if (!invalidate_domains && flush_domains)
>> -             return gen7_ring_fbc_flush(ring, FBC_REND_NUKE);
>> -
>>       return 0;
>>  }
>>
>> @@ -2382,7 +2353,6 @@ static int gen6_ring_flush(struct intel_engine_cs *ring,
>>                          u32 invalidate, u32 flush)
>>  {
>>       struct drm_device *dev = ring->dev;
>> -     struct drm_i915_private *dev_priv = dev->dev_private;
>>       uint32_t cmd;
>>       int ret;
>>
>> @@ -2391,7 +2361,7 @@ static int gen6_ring_flush(struct intel_engine_cs *ring,
>>               return ret;
>>
>>       cmd = MI_FLUSH_DW;
>> -     if (INTEL_INFO(ring->dev)->gen >= 8)
>> +     if (INTEL_INFO(dev)->gen >= 8)
>>               cmd += 1;
>>       /*
>>        * Bspec vol 1c.3 - blitter engine command streamer:
>> @@ -2404,7 +2374,7 @@ static int gen6_ring_flush(struct intel_engine_cs *ring,
>>                       MI_FLUSH_DW_OP_STOREDW;
>>       intel_ring_emit(ring, cmd);
>>       intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
>> -     if (INTEL_INFO(ring->dev)->gen >= 8) {
>> +     if (INTEL_INFO(dev)->gen >= 8) {
>>               intel_ring_emit(ring, 0); /* upper addr */
>>               intel_ring_emit(ring, 0); /* value */
>>       } else  {
>> @@ -2413,13 +2383,6 @@ static int gen6_ring_flush(struct intel_engine_cs *ring,
>>       }
>>       intel_ring_advance(ring);
>>
>> -     if (!invalidate && flush) {
>> -             if (IS_GEN7(dev))
>> -                     return gen7_ring_fbc_flush(ring, FBC_REND_CACHE_CLEAN);
>> -             else if (IS_BROADWELL(dev))
>> -                     dev_priv->fbc.need_sw_cache_clean = true;
>> -     }
>> -
>>       return 0;
>>  }
>>
>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> index 714f3fd..f8dc0c6 100644
>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> @@ -267,7 +267,6 @@ struct  intel_engine_cs {
>>        */
>>       struct drm_i915_gem_request *outstanding_lazy_request;
>>       bool gpu_caches_dirty;
>> -     bool fbc_dirty;
>>
>>       wait_queue_head_t irq_queue;
>>
>> --
>> 2.1.4
>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx at lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
>
> --
> Daniel Vetter
> Software Engineer, Intel Corporation
> +41 (0) 79 365 57 48 - http://blog.ffwll.ch



-- 
Paulo Zanoni


More information about the Intel-gfx mailing list