[Intel-gfx] [PATCH 2/6] drm/i915: Don't use split gamma when we don't have to
Ville Syrjälä
ville.syrjala at linux.intel.com
Fri Mar 29 10:47:02 UTC 2019
On Thu, Mar 28, 2019 at 05:16:03PM -0700, Matt Roper wrote:
> On Thu, Mar 28, 2019 at 11:05:01PM +0200, Ville Syrjala wrote:
> > From: Ville Syrjälä <ville.syrjala at linux.intel.com>
> >
> > Using the split gamma mode when we don't have to has the annoying
> > requirement of loading a linear LUT to the unused half. Instead
> > let's make life simpler by switching to the 10bit gamma mode
> > and duplicating each entry.
> >
> > This also allows us to load the software gamma LUT into the
> > hardware degamma LUT, thus removing some of the buggy
> > configurations we currently allow (YCbCr/limited range RGB
> > + gamma LUT). We do still have other configurations that are
> > also buggy, but those will need more complicated fixes
> > or they just need to be rejected. Sadly GLK doesn't have
> > this flexibility anymore and the degamma and gamma LUTs
> > are very different so no help there.
> >
> > Signed-off-by: Ville Syrjälä <ville.syrjala at linux.intel.com>
> > ---
> > drivers/gpu/drm/i915/i915_reg.h | 1 +
> > drivers/gpu/drm/i915/intel_color.c | 159 +++++++++++++++--------------
> > 2 files changed, 86 insertions(+), 74 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> > index c866379a521b..eb7e93354cfe 100644
> > --- a/drivers/gpu/drm/i915/i915_reg.h
> > +++ b/drivers/gpu/drm/i915/i915_reg.h
> > @@ -10127,6 +10127,7 @@ enum skl_power_gate {
> > #define PAL_PREC_SPLIT_MODE (1 << 31)
> > #define PAL_PREC_AUTO_INCREMENT (1 << 15)
> > #define PAL_PREC_INDEX_VALUE_MASK (0x3ff << 0)
> > +#define PAL_PREC_INDEX_VALUE(x) ((x) << 0)
> > #define _PAL_PREC_DATA_A 0x4A404
> > #define _PAL_PREC_DATA_B 0x4AC04
> > #define _PAL_PREC_DATA_C 0x4B404
> > diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c
> > index d7c38a2bbd8f..ed4bd9bd15f5 100644
> > --- a/drivers/gpu/drm/i915/intel_color.c
> > +++ b/drivers/gpu/drm/i915/intel_color.c
> > @@ -466,72 +466,32 @@ static void skl_color_commit(const struct intel_crtc_state *crtc_state)
> > ilk_load_csc_matrix(crtc_state);
> > }
> >
> > -static void bdw_load_degamma_lut(const struct intel_crtc_state *crtc_state)
> > +static void bdw_load_lut_10(struct intel_crtc *crtc,
> > + const struct drm_property_blob *blob,
> > + u32 prec_index, bool duplicate)
> > {
> > - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> > struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
> > - const struct drm_property_blob *degamma_lut = crtc_state->base.degamma_lut;
> > - u32 i, lut_size = INTEL_INFO(dev_priv)->color.degamma_lut_size;
> > + const struct drm_color_lut *lut = blob->data;
> > + int i, lut_size = drm_color_lut_size(blob);
> > enum pipe pipe = crtc->pipe;
> >
> > - I915_WRITE(PREC_PAL_INDEX(pipe),
> > - PAL_PREC_SPLIT_MODE | PAL_PREC_AUTO_INCREMENT);
> > -
> > - if (degamma_lut) {
> > - const struct drm_color_lut *lut = degamma_lut->data;
> > + I915_WRITE(PREC_PAL_INDEX(pipe), prec_index |
> > + PAL_PREC_AUTO_INCREMENT);
> >
> > - for (i = 0; i < lut_size; i++)
> > - I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_10(&lut[i]));
> > - } else {
> > + /*
> > + * We advertize the split gamma sizes. When not using split
> > + * gamma we just duplicate each entry.
> > + *
> > + * TODO: expose the full LUT to userspace
>
> Any reason not to just do this immediately? Throwing away half the
> table entries if we decide we need split mode doesn't seem any harder
> than duplicating the entries when we decide we don't. The color
> management kerneldoc already explicitly recommends this approach for
> hardware that can support multiple gamma modes, so I don't think we need
> any new ABI to handle it.
Hmm. I guess that apporach could be doable. It might be a bit annoying
for userspace though if it expects a direct color visual. But at least
for X we won't use degamma/ctm anyway so seems like it should work out
just fine.
>
> > + */
> > + if (duplicate) {
> > for (i = 0; i < lut_size; i++) {
> > - u32 v = (i * ((1 << 10) - 1)) / (lut_size - 1);
> > -
> > - I915_WRITE(PREC_PAL_DATA(pipe),
> > - (v << 20) | (v << 10) | v);
> > + I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_10(&lut[i]));
> > + I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_10(&lut[i]));
> > }
> > - }
> > -}
> > -
> > -static void bdw_load_gamma_lut(const struct intel_crtc_state *crtc_state, u32 offset)
> > -{
> > - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> > - struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
> > - const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut;
> > - u32 i, lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size;
> > - enum pipe pipe = crtc->pipe;
> > -
> > - WARN_ON(offset & ~PAL_PREC_INDEX_VALUE_MASK);
> > -
> > - I915_WRITE(PREC_PAL_INDEX(pipe),
> > - (offset ? PAL_PREC_SPLIT_MODE : 0) |
> > - PAL_PREC_AUTO_INCREMENT |
> > - offset);
> > -
> > - if (gamma_lut) {
> > - const struct drm_color_lut *lut = gamma_lut->data;
> > -
> > + } else {
> > for (i = 0; i < lut_size; i++)
> > I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_10(&lut[i]));
> > -
> > - /* Program the max register to clamp values > 1.0. */
> > - i = lut_size - 1;
> > - I915_WRITE(PREC_PAL_GC_MAX(pipe, 0),
> > - drm_color_lut_extract(lut[i].red, 16));
> > - I915_WRITE(PREC_PAL_GC_MAX(pipe, 1),
> > - drm_color_lut_extract(lut[i].green, 16));
> > - I915_WRITE(PREC_PAL_GC_MAX(pipe, 2),
> > - drm_color_lut_extract(lut[i].blue, 16));
> > - } else {
> > - for (i = 0; i < lut_size; i++) {
> > - u32 v = (i * ((1 << 10) - 1)) / (lut_size - 1);
> > -
> > - I915_WRITE(PREC_PAL_DATA(pipe),
> > - (v << 20) | (v << 10) | v);
> > - }
> > -
> > - I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), (1 << 16) - 1);
> > - I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), (1 << 16) - 1);
> > - I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), (1 << 16) - 1);
> > }
> >
> > /*
> > @@ -541,18 +501,43 @@ static void bdw_load_gamma_lut(const struct intel_crtc_state *crtc_state, u32 of
> > I915_WRITE(PREC_PAL_INDEX(pipe), 0);
> > }
> >
> > -/* Loads the palette/gamma unit for the CRTC on Broadwell+. */
> > -static void broadwell_load_luts(const struct intel_crtc_state *crtc_state)
> > +static void bdw_load_lut_10_max(struct intel_crtc *crtc,
> > + const struct drm_property_blob *blob)
> > {
> > - struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> > struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
> > + const struct drm_color_lut *lut = blob->data;
> > + int i = drm_color_lut_size(blob) - 1;
> > + enum pipe pipe = crtc->pipe;
> >
> > - if (crtc_state_is_legacy_gamma(crtc_state)) {
> > + /* Program the max register to clamp values > 1.0. */
> > + I915_WRITE(PREC_PAL_GC_MAX(pipe, 0),
> > + drm_color_lut_extract(lut[i].red, 16));
> > + I915_WRITE(PREC_PAL_GC_MAX(pipe, 1),
> > + drm_color_lut_extract(lut[i].green, 16));
> > + I915_WRITE(PREC_PAL_GC_MAX(pipe, 2),
> > + drm_color_lut_extract(lut[i].blue, 16));
>
> Are these the right registers to use? The "Pipe Palette and Gamma" page
> of the bspec indicates we should be using PAL_EXT_GC_MAX (e.g., 0x4A420
> instead of 0x4A410) for both the 10-bit and split gamma modes. I only
> see PAL_GC_MAX mentioned for the interpolated gamma mode.
Yeah, these aren't the registers we're looking for. Maybe Uma will send
a patch to fix this? I'm also thinking we should just program these to
1.0. There's also EXT_GC_MAX2 on some more recent platforms.
>
>
> Matt
>
> > +}
> > +
> > +static void bdw_load_luts(const struct intel_crtc_state *crtc_state)
> > +{
> > + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> > + const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut;
> > + const struct drm_property_blob *degamma_lut = crtc_state->base.degamma_lut;
> > +
> > + if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) {
> > i9xx_load_luts(crtc_state);
> > + } else if (crtc_state->gamma_mode == GAMMA_MODE_MODE_SPLIT) {
> > + bdw_load_lut_10(crtc, degamma_lut, PAL_PREC_SPLIT_MODE |
> > + PAL_PREC_INDEX_VALUE(0), false);
> > + bdw_load_lut_10(crtc, gamma_lut, PAL_PREC_SPLIT_MODE |
> > + PAL_PREC_INDEX_VALUE(512), false);
> > + bdw_load_lut_10_max(crtc, gamma_lut);
> > } else {
> > - bdw_load_degamma_lut(crtc_state);
> > - bdw_load_gamma_lut(crtc_state,
> > - INTEL_INFO(dev_priv)->color.degamma_lut_size);
> > + const struct drm_property_blob *blob = gamma_lut ?: degamma_lut;
> > +
> > + bdw_load_lut_10(crtc, blob,
> > + PAL_PREC_INDEX_VALUE(0), true);
> > + bdw_load_lut_10_max(crtc, blob);
> > }
> > }
> >
> > @@ -624,6 +609,9 @@ static void glk_load_degamma_lut_linear(const struct intel_crtc_state *crtc_stat
> >
> > static void glk_load_luts(const struct intel_crtc_state *crtc_state)
> > {
> > + const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut;
> > + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> > +
> > /*
> > * On GLK+ both pipe CSC and degamma LUT are controlled
> > * by csc_enable. Hence for the cases where the CSC is
> > @@ -637,22 +625,28 @@ static void glk_load_luts(const struct intel_crtc_state *crtc_state)
> > else
> > glk_load_degamma_lut_linear(crtc_state);
> >
> > - if (crtc_state_is_legacy_gamma(crtc_state))
> > + if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) {
> > i9xx_load_luts(crtc_state);
> > - else
> > - bdw_load_gamma_lut(crtc_state, 0);
> > + } else {
> > + bdw_load_lut_10(crtc, gamma_lut, PAL_PREC_INDEX_VALUE(0), false);
> > + bdw_load_lut_10_max(crtc, gamma_lut);
> > + }
> > }
> >
> > static void icl_load_luts(const struct intel_crtc_state *crtc_state)
> > {
> > + const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut;
> > + struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> > +
> > if (crtc_state->base.degamma_lut)
> > glk_load_degamma_lut(crtc_state);
> >
> > - if (crtc_state_is_legacy_gamma(crtc_state))
> > + if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) {
> > i9xx_load_luts(crtc_state);
> > - else
> > - /* ToDo: Add support for multi segment gamma LUT */
> > - bdw_load_gamma_lut(crtc_state, 0);
> > + } else {
> > + bdw_load_lut_10(crtc, gamma_lut, PAL_PREC_INDEX_VALUE(0), false);
> > + bdw_load_lut_10_max(crtc, gamma_lut);
> > + }
> > }
> >
> > static void cherryview_load_luts(const struct intel_crtc_state *crtc_state)
> > @@ -937,8 +931,25 @@ static u32 bdw_gamma_mode(const struct intel_crtc_state *crtc_state)
> > if (!crtc_state->gamma_enable ||
> > crtc_state_is_legacy_gamma(crtc_state))
> > return GAMMA_MODE_MODE_8BIT;
> > - else
> > + else if (crtc_state->base.gamma_lut &&
> > + crtc_state->base.degamma_lut)
> > return GAMMA_MODE_MODE_SPLIT;
> > + else
> > + return GAMMA_MODE_MODE_10BIT;
> > +}
> > +
> > +static u32 bdw_csc_mode(const struct intel_crtc_state *crtc_state)
> > +{
> > + /*
> > + * CSC comes after the LUT in degamma, RGB->YCbCr,
> > + * and RGB full->limited range mode.
> > + */
> > + if (crtc_state->base.degamma_lut ||
> > + crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB ||
> > + crtc_state->limited_color_range)
> > + return 0;
> > +
> > + return CSC_POSITION_BEFORE_GAMMA;
> > }
> >
> > static int bdw_color_check(struct intel_crtc_state *crtc_state)
> > @@ -960,7 +971,7 @@ static int bdw_color_check(struct intel_crtc_state *crtc_state)
> >
> > crtc_state->gamma_mode = bdw_gamma_mode(crtc_state);
> >
> > - crtc_state->csc_mode = 0;
> > + crtc_state->csc_mode = bdw_csc_mode(crtc_state);
> >
> > ret = intel_color_add_affected_planes(crtc_state);
> > if (ret)
> > @@ -1094,7 +1105,7 @@ void intel_color_init(struct intel_crtc *crtc)
> > else if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv))
> > dev_priv->display.load_luts = glk_load_luts;
> > else if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv))
> > - dev_priv->display.load_luts = broadwell_load_luts;
> > + dev_priv->display.load_luts = bdw_load_luts;
> > else
> > dev_priv->display.load_luts = i9xx_load_luts;
> > }
> > --
> > 2.19.2
> >
>
> --
> Matt Roper
> Graphics Software Engineer
> IoTG Platform Enabling & Development
> Intel Corporation
> (916) 356-2795
--
Ville Syrjälä
Intel
More information about the Intel-gfx
mailing list