[Intel-gfx] [PATCH 6/7] drm/i915: add frontbuffer tracking to FBC

Vivi, Rodrigo rodrigo.vivi at intel.com
Wed Mar 4 12:54:21 PST 2015


On Wed, 2015-03-04 at 15:03 -0300, Paulo Zanoni wrote:
> 2015-03-03 21:57 GMT-03:00 Rodrigo Vivi <rodrigo.vivi at gmail.com>:
> > On Fri, Feb 13, 2015 at 11:23 AM, Paulo Zanoni <przanoni at gmail.com> wrote:
> >> From: Paulo Zanoni <paulo.r.zanoni at intel.com>
> >>
> >> Kill the blt/render tracking we currently have and use the frontbuffer
> >> tracking infrastructure.
> >>
> >> Don't enable things by default yet.
> >>
> >> v2: (Rodrigo) Fix small conflict on rebase and typo at subject.
> >> v3: (Paulo) Rebase on RENDER_CS change.
> >> v4: (Paulo) Rebase.
> >> v5: (Paulo) Simplify: flushes don't have origin (Daniel).
> >>             Also rebase due to patch order changes.
> >>
> >> Signed-off-by: Rodrigo Vivi <rodrigo.vivi at intel.com>
> >> Signed-off-by: Paulo Zanoni <paulo.r.zanoni at intel.com>
> >> ---
> >>  drivers/gpu/drm/i915/i915_drv.h          | 10 +---
> >>  drivers/gpu/drm/i915/intel_drv.h         |  6 ++-
> >>  drivers/gpu/drm/i915/intel_fbc.c         | 91 +++++++++++++++++++-------------
> >>  drivers/gpu/drm/i915/intel_frontbuffer.c | 14 +----
> >>  drivers/gpu/drm/i915/intel_ringbuffer.c  | 41 +-------------
> >>  drivers/gpu/drm/i915/intel_ringbuffer.h  |  1 -
> >>  6 files changed, 65 insertions(+), 98 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> >> index 30aaa8e..7680ca0 100644
> >> --- a/drivers/gpu/drm/i915/i915_drv.h
> >> +++ b/drivers/gpu/drm/i915/i915_drv.h
> >> @@ -783,6 +783,8 @@ struct i915_fbc {
> >>         unsigned long uncompressed_size;
> >>         unsigned threshold;
> >>         unsigned int fb_id;
> >> +       unsigned int possible_framebuffer_bits;
> >> +       unsigned int busy_bits;
> >
> > I'm not sure if I understood why you keep 2 variables here?
> 
> After Daniel's last suggestion, the possible_framebuffer_bits could
> probably be removed and left as a local variable at intel_fbc_validate
> now: it's just a matter of coding style. Some platforms support FBC on
> more than just pipe A, so this variable is used to simplify checks
> related to this.
> 
> The busy_bits was also part of Daniel's last request: we need to store
> which bits triggered intel_fbc_invalidate calls, and then check them
> when intel_fbc_flush is called.

Got it.
Thanks for explaining it.

> 
> > Is it because we keep enabling/disabling fbc with updates all over the code?
> > In this case it is ok we just need to kill it when we kill update_fbc...
> 
> I don't understand what you mean here. Please clarify.

I had missunderstood the reason of possible_frontbuffer bits, sorry.
But about killing update_fbc I believe that after frontbuffer rework is
in place we don't need to use that update_fbc function that disable and
enable fbc on the cases we knew it could fail. With frontbuffer bits we
could let it always enabled and kicking with frontbuffer bits.

> 
> >
> >>         struct intel_crtc *crtc;
> >>         int y;
> >>
> >> @@ -795,14 +797,6 @@ struct i915_fbc {
> >>          * possible. */
> >>         bool enabled;
> >>
> >> -       /* On gen8 some rings cannont perform fbc clean operation so for now
> >> -        * we are doing this on SW with mmio.
> >> -        * This variable works in the opposite information direction
> >> -        * of ring->fbc_dirty telling software on frontbuffer tracking
> >> -        * to perform the cache clean on sw side.
> >> -        */
> >> -       bool need_sw_cache_clean;
> >> -
> >>         struct intel_fbc_work {
> >>                 struct delayed_work work;
> >>                 struct drm_crtc *crtc;
> >> diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
> >> index 05d0a43f..571174f 100644
> >> --- a/drivers/gpu/drm/i915/intel_drv.h
> >> +++ b/drivers/gpu/drm/i915/intel_drv.h
> >> @@ -1091,7 +1091,11 @@ bool intel_fbc_enabled(struct drm_device *dev);
> >>  void intel_fbc_update(struct drm_device *dev);
> >>  void intel_fbc_init(struct drm_i915_private *dev_priv);
> >>  void intel_fbc_disable(struct drm_device *dev);
> >> -void bdw_fbc_sw_flush(struct drm_device *dev, u32 value);
> >> +void intel_fbc_invalidate(struct drm_i915_private *dev_priv,
> >> +                         unsigned int frontbuffer_bits,
> >> +                         enum fb_op_origin origin);
> >> +void intel_fbc_flush(struct drm_i915_private *dev_priv,
> >> +                    unsigned int frontbuffer_bits);
> >>
> >>  /* intel_hdmi.c */
> >>  void intel_hdmi_init(struct drm_device *dev, int hdmi_reg, enum port port);
> >> diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
> >> index 618f7bd..9fcf446 100644
> >> --- a/drivers/gpu/drm/i915/intel_fbc.c
> >> +++ b/drivers/gpu/drm/i915/intel_fbc.c
> >> @@ -174,29 +174,10 @@ static bool g4x_fbc_enabled(struct drm_device *dev)
> >>         return I915_READ(DPFC_CONTROL) & DPFC_CTL_EN;
> >>  }
> >>
> >> -static void snb_fbc_blit_update(struct drm_device *dev)
> >> +static void intel_fbc_nuke(struct drm_i915_private *dev_priv)
> >>  {
> >> -       struct drm_i915_private *dev_priv = dev->dev_private;
> >> -       u32 blt_ecoskpd;
> >> -
> >> -       /* Make sure blitter notifies FBC of writes */
> >> -
> >> -       /* Blitter is part of Media powerwell on VLV. No impact of
> >> -        * his param in other platforms for now */
> >> -       intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA);
> >> -
> >> -       blt_ecoskpd = I915_READ(GEN6_BLITTER_ECOSKPD);
> >> -       blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY <<
> >> -               GEN6_BLITTER_LOCK_SHIFT;
> >> -       I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
> >> -       blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY;
> >> -       I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
> >> -       blt_ecoskpd &= ~(GEN6_BLITTER_FBC_NOTIFY <<
> >> -                        GEN6_BLITTER_LOCK_SHIFT);
> >> -       I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
> >> -       POSTING_READ(GEN6_BLITTER_ECOSKPD);
> >> -
> >> -       intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);
> >
> > Are you sure this continue working on snb? or should we fully remove
> > fbc support for snb and older?
> >
> > Anyway I believe this could be in a different patch.
> 
> Well, the whole idea of using the sofware-based frontbuffer tracking
> mechanism is to get rid of all the hardware-based stuff.

Hm, I still have a concern here. We would need to test on those
platforms to be certain. The reason for that is that HW tracking still
works so it can let the buffer in a stage that compression is unable to
start... same reason that we still need both lines below on BDW.

Anyway this wouldn't break anything and it this rework was a great
improvement so feel free to use

Reviewed-by: Rodrigo Vivi <rodrigo.vivi at intel.com>

and we continue any other necessary improvement or fix on top.
> 
> >
> >> +       I915_WRITE(MSG_FBC_REND_STATE, FBC_REND_NUKE);
> >> +       POSTING_READ(MSG_FBC_REND_STATE);
> >>  }
> >>
> >>  static void ilk_fbc_enable(struct drm_crtc *crtc)
> >> @@ -239,9 +220,10 @@ static void ilk_fbc_enable(struct drm_crtc *crtc)
> >>                 I915_WRITE(SNB_DPFC_CTL_SA,
> >>                            SNB_CPU_FENCE_ENABLE | obj->fence_reg);
> >>                 I915_WRITE(DPFC_CPU_FENCE_OFFSET, crtc->y);
> >> -               snb_fbc_blit_update(dev);
> >>         }
> >>
> >> +       intel_fbc_nuke(dev_priv);
> >> +
> >>         DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(intel_crtc->plane));
> >>  }
> >>
> >> @@ -320,7 +302,7 @@ static void gen7_fbc_enable(struct drm_crtc *crtc)
> >>                    SNB_CPU_FENCE_ENABLE | obj->fence_reg);
> >>         I915_WRITE(DPFC_CPU_FENCE_OFFSET, crtc->y);
> >>
> >> -       snb_fbc_blit_update(dev);
> >> +       intel_fbc_nuke(dev_priv);
> >>
> >>         DRM_DEBUG_KMS("enabled fbc on plane %c\n", plane_name(intel_crtc->plane));
> >>  }
> >> @@ -340,19 +322,6 @@ bool intel_fbc_enabled(struct drm_device *dev)
> >>         return dev_priv->fbc.enabled;
> >>  }
> >>
> >> -void bdw_fbc_sw_flush(struct drm_device *dev, u32 value)
> >> -{
> >> -       struct drm_i915_private *dev_priv = dev->dev_private;
> >> -
> >> -       if (!IS_GEN8(dev))
> >> -               return;
> >> -
> >> -       if (!intel_fbc_enabled(dev))
> >> -               return;
> >> -
> >> -       I915_WRITE(MSG_FBC_REND_STATE, value);
> >> -}
> >> -
> >>  static void intel_fbc_work_fn(struct work_struct *__work)
> >>  {
> >>         struct intel_fbc_work *work =
> >> @@ -685,6 +654,44 @@ out_disable:
> >>         i915_gem_stolen_cleanup_compression(dev);
> >>  }
> >>
> >> +void intel_fbc_invalidate(struct drm_i915_private *dev_priv,
> >> +                         unsigned int frontbuffer_bits,
> >> +                         enum fb_op_origin origin)
> >> +{
> >> +       struct drm_device *dev = dev_priv->dev;
> >> +       unsigned int fbc_bits;
> >> +
> >> +       if (origin == ORIGIN_GTT)
> >> +               return;
> >> +
> >> +       if (dev_priv->fbc.enabled)
> >> +               fbc_bits = INTEL_FRONTBUFFER_PRIMARY(dev_priv->fbc.crtc->pipe);
> >> +       else if (dev_priv->fbc.fbc_work)
> >> +               fbc_bits = INTEL_FRONTBUFFER_PRIMARY(
> >> +                       to_intel_crtc(dev_priv->fbc.fbc_work->crtc)->pipe);
> >> +       else
> >> +               fbc_bits = dev_priv->fbc.possible_framebuffer_bits;
> >> +
> >> +       dev_priv->fbc.busy_bits |= (fbc_bits & frontbuffer_bits);
> >> +
> >> +       if (dev_priv->fbc.busy_bits)
> >> +               intel_fbc_disable(dev);
> >> +}
> >> +
> >> +void intel_fbc_flush(struct drm_i915_private *dev_priv,
> >> +                    unsigned int frontbuffer_bits)
> >> +{
> >> +       struct drm_device *dev = dev_priv->dev;
> >> +
> >> +       if (!dev_priv->fbc.busy_bits)
> >> +               return;
> >> +
> >> +       dev_priv->fbc.busy_bits &= ~frontbuffer_bits;
> >> +
> >> +       if (!dev_priv->fbc.busy_bits)
> >> +               intel_fbc_update(dev);
> >> +}
> >> +
> >>  /**
> >>   * intel_fbc_init - Initialize FBC
> >>   * @dev_priv: the i915 device
> >> @@ -693,12 +700,22 @@ out_disable:
> >>   */
> >>  void intel_fbc_init(struct drm_i915_private *dev_priv)
> >>  {
> >> +       enum pipe pipe;
> >> +
> >>         if (!HAS_FBC(dev_priv)) {
> >>                 dev_priv->fbc.enabled = false;
> >>                 dev_priv->fbc.no_fbc_reason = FBC_UNSUPPORTED;
> >>                 return;
> >>         }
> >>
> >> +       for_each_pipe(dev_priv, pipe) {
> >> +               dev_priv->fbc.possible_framebuffer_bits |=
> >> +                               INTEL_FRONTBUFFER_PRIMARY(pipe);
> >> +
> >> +               if (IS_HASWELL(dev_priv) || INTEL_INFO(dev_priv)->gen >= 8)
> >> +                       break;
> >> +       }
> >> +
> >>         if (INTEL_INFO(dev_priv)->gen >= 7) {
> >>                 dev_priv->display.fbc_enabled = ilk_fbc_enabled;
> >>                 dev_priv->display.enable_fbc = gen7_fbc_enable;
> >> diff --git a/drivers/gpu/drm/i915/intel_frontbuffer.c b/drivers/gpu/drm/i915/intel_frontbuffer.c
> >> index 5da73f0..0a1bac8 100644
> >> --- a/drivers/gpu/drm/i915/intel_frontbuffer.c
> >> +++ b/drivers/gpu/drm/i915/intel_frontbuffer.c
> >> @@ -118,8 +118,6 @@ static void intel_mark_fb_busy(struct drm_device *dev,
> >>                         continue;
> >>
> >>                 intel_increase_pllclock(dev, pipe);
> >> -               if (ring && intel_fbc_enabled(dev))
> >> -                       ring->fbc_dirty = true;
> >>         }
> >>  }
> >>
> >> @@ -160,6 +158,7 @@ void intel_fb_obj_invalidate(struct drm_i915_gem_object *obj,
> >>
> >>         intel_psr_invalidate(dev, obj->frontbuffer_bits);
> >>         intel_edp_drrs_invalidate(dev, obj->frontbuffer_bits);
> >> +       intel_fbc_invalidate(dev_priv, obj->frontbuffer_bits, origin);
> >>  }
> >>
> >>  /**
> >> @@ -187,16 +186,7 @@ void intel_frontbuffer_flush(struct drm_device *dev,
> >>
> >>         intel_edp_drrs_flush(dev, frontbuffer_bits);
> >>         intel_psr_flush(dev, frontbuffer_bits);
> >> -
> >> -       /*
> >> -        * FIXME: Unconditional fbc flushing here is a rather gross hack and
> >> -        * needs to be reworked into a proper frontbuffer tracking scheme like
> >> -        * psr employs.
> >> -        */
> >> -       if (dev_priv->fbc.need_sw_cache_clean) {
> >> -               dev_priv->fbc.need_sw_cache_clean = false;
> >> -               bdw_fbc_sw_flush(dev, FBC_REND_CACHE_CLEAN);
> >> -       }
> >> +       intel_fbc_flush(dev_priv, frontbuffer_bits);
> >>  }
> >>
> >>  /**
> >> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> >> index d17e76d..fed6284 100644
> >> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> >> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> >> @@ -317,29 +317,6 @@ gen7_render_ring_cs_stall_wa(struct intel_engine_cs *ring)
> >>         return 0;
> >>  }
> >>
> >> -static int gen7_ring_fbc_flush(struct intel_engine_cs *ring, u32 value)
> >> -{
> >> -       int ret;
> >> -
> >> -       if (!ring->fbc_dirty)
> >> -               return 0;
> >> -
> >> -       ret = intel_ring_begin(ring, 6);
> >> -       if (ret)
> >> -               return ret;
> >> -       /* WaFbcNukeOn3DBlt:ivb/hsw */
> >> -       intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
> >> -       intel_ring_emit(ring, MSG_FBC_REND_STATE);
> >> -       intel_ring_emit(ring, value);
> >> -       intel_ring_emit(ring, MI_STORE_REGISTER_MEM(1) | MI_SRM_LRM_GLOBAL_GTT);
> >> -       intel_ring_emit(ring, MSG_FBC_REND_STATE);
> >> -       intel_ring_emit(ring, ring->scratch.gtt_offset + 256);
> >> -       intel_ring_advance(ring);
> >> -
> >> -       ring->fbc_dirty = false;
> >> -       return 0;
> >> -}
> >> -
> >>  static int
> >>  gen7_render_ring_flush(struct intel_engine_cs *ring,
> >>                        u32 invalidate_domains, u32 flush_domains)
> >> @@ -398,9 +375,6 @@ gen7_render_ring_flush(struct intel_engine_cs *ring,
> >>         intel_ring_emit(ring, 0);
> >>         intel_ring_advance(ring);
> >>
> >> -       if (!invalidate_domains && flush_domains)
> >> -               return gen7_ring_fbc_flush(ring, FBC_REND_NUKE);
> >> -
> >>         return 0;
> >>  }
> >>
> >> @@ -462,9 +436,6 @@ gen8_render_ring_flush(struct intel_engine_cs *ring,
> >>         if (ret)
> >>                 return ret;
> >>
> >> -       if (!invalidate_domains && flush_domains)
> >> -               return gen7_ring_fbc_flush(ring, FBC_REND_NUKE);
> >> -
> >>         return 0;
> >>  }
> >>
> >> @@ -2420,7 +2391,6 @@ static int gen6_ring_flush(struct intel_engine_cs *ring,
> >>                            u32 invalidate, u32 flush)
> >>  {
> >>         struct drm_device *dev = ring->dev;
> >> -       struct drm_i915_private *dev_priv = dev->dev_private;
> >>         uint32_t cmd;
> >>         int ret;
> >>
> >> @@ -2429,7 +2399,7 @@ static int gen6_ring_flush(struct intel_engine_cs *ring,
> >>                 return ret;
> >>
> >>         cmd = MI_FLUSH_DW;
> >> -       if (INTEL_INFO(ring->dev)->gen >= 8)
> >> +       if (INTEL_INFO(dev)->gen >= 8)
> >>                 cmd += 1;
> >>
> >>         /* We always require a command barrier so that subsequent
> >> @@ -2449,7 +2419,7 @@ static int gen6_ring_flush(struct intel_engine_cs *ring,
> >>                 cmd |= MI_INVALIDATE_TLB;
> >>         intel_ring_emit(ring, cmd);
> >>         intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
> >> -       if (INTEL_INFO(ring->dev)->gen >= 8) {
> >> +       if (INTEL_INFO(dev)->gen >= 8) {
> >>                 intel_ring_emit(ring, 0); /* upper addr */
> >>                 intel_ring_emit(ring, 0); /* value */
> >>         } else  {
> >> @@ -2458,13 +2428,6 @@ static int gen6_ring_flush(struct intel_engine_cs *ring,
> >>         }
> >>         intel_ring_advance(ring);
> >>
> >> -       if (!invalidate && flush) {
> >> -               if (IS_GEN7(dev))
> >> -                       return gen7_ring_fbc_flush(ring, FBC_REND_CACHE_CLEAN);
> >> -               else if (IS_BROADWELL(dev))
> >> -                       dev_priv->fbc.need_sw_cache_clean = true;
> >> -       }
> >> -
> >>         return 0;
> >>  }
> >>
> >> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> >> index b6c484f..2ac2de2 100644
> >> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> >> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> >> @@ -267,7 +267,6 @@ struct  intel_engine_cs {
> >>          */
> >>         struct drm_i915_gem_request *outstanding_lazy_request;
> >>         bool gpu_caches_dirty;
> >> -       bool fbc_dirty;
> >>
> >>         wait_queue_head_t irq_queue;
> >>
> >> --
> >> 2.1.4
> >>
> >> _______________________________________________
> >> Intel-gfx mailing list
> >> Intel-gfx at lists.freedesktop.org
> >> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
> >
> > Thanks,
> >
> > --
> > Rodrigo Vivi
> > Blog: http://blog.vivi.eng.br
> 
> 
> 



More information about the Intel-gfx mailing list