[Intel-gfx] [PATCH 2/6] drm/i915: Don't use split gamma when we don't have to

Shankar, Uma uma.shankar at intel.com
Fri Mar 29 12:25:02 UTC 2019



>-----Original Message-----
>From: Ville Syrjälä [mailto:ville.syrjala at linux.intel.com]
>Sent: Friday, March 29, 2019 4:17 PM
>To: Roper, Matthew D <matthew.d.roper at intel.com>
>Cc: intel-gfx at lists.freedesktop.org; Shankar, Uma <uma.shankar at intel.com>
>Subject: Re: [PATCH 2/6] drm/i915: Don't use split gamma when we don't have to
>
>On Thu, Mar 28, 2019 at 05:16:03PM -0700, Matt Roper wrote:
>> On Thu, Mar 28, 2019 at 11:05:01PM +0200, Ville Syrjala wrote:
>> > From: Ville Syrjälä <ville.syrjala at linux.intel.com>
>> >
>> > Using the split gamma mode when we don't have to has the annoying
>> > requirement of loading a linear LUT to the unused half. Instead
>> > let's make life simpler by switching to the 10bit gamma mode and
>> > duplicating each entry.
>> >
>> > This also allows us to load the software gamma LUT into the hardware
>> > degamma LUT, thus removing some of the buggy configurations we
>> > currently allow (YCbCr/limited range RGB
>> > + gamma LUT). We do still have other configurations that are
>> > also buggy, but those will need more complicated fixes or they just
>> > need to be rejected. Sadly GLK doesn't have this flexibility anymore
>> > and the degamma and gamma LUTs are very different so no help there.
>> >
>> > Signed-off-by: Ville Syrjälä <ville.syrjala at linux.intel.com>
>> > ---
>> >  drivers/gpu/drm/i915/i915_reg.h    |   1 +
>> >  drivers/gpu/drm/i915/intel_color.c | 159
>> > +++++++++++++++--------------
>> >  2 files changed, 86 insertions(+), 74 deletions(-)
>> >
>> > diff --git a/drivers/gpu/drm/i915/i915_reg.h
>> > b/drivers/gpu/drm/i915/i915_reg.h index c866379a521b..eb7e93354cfe
>> > 100644
>> > --- a/drivers/gpu/drm/i915/i915_reg.h
>> > +++ b/drivers/gpu/drm/i915/i915_reg.h
>> > @@ -10127,6 +10127,7 @@ enum skl_power_gate {
>> >  #define   PAL_PREC_SPLIT_MODE		(1 << 31)
>> >  #define   PAL_PREC_AUTO_INCREMENT	(1 << 15)
>> >  #define   PAL_PREC_INDEX_VALUE_MASK	(0x3ff << 0)
>> > +#define   PAL_PREC_INDEX_VALUE(x)	((x) << 0)
>> >  #define _PAL_PREC_DATA_A	0x4A404
>> >  #define _PAL_PREC_DATA_B	0x4AC04
>> >  #define _PAL_PREC_DATA_C	0x4B404
>> > diff --git a/drivers/gpu/drm/i915/intel_color.c
>> > b/drivers/gpu/drm/i915/intel_color.c
>> > index d7c38a2bbd8f..ed4bd9bd15f5 100644
>> > --- a/drivers/gpu/drm/i915/intel_color.c
>> > +++ b/drivers/gpu/drm/i915/intel_color.c
>> > @@ -466,72 +466,32 @@ static void skl_color_commit(const struct
>intel_crtc_state *crtc_state)
>> >  		ilk_load_csc_matrix(crtc_state);
>> >  }
>> >
>> > -static void bdw_load_degamma_lut(const struct intel_crtc_state
>> > *crtc_state)
>> > +static void bdw_load_lut_10(struct intel_crtc *crtc,
>> > +			    const struct drm_property_blob *blob,
>> > +			    u32 prec_index, bool duplicate)
>> >  {
>> > -	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
>> >  	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
>> > -	const struct drm_property_blob *degamma_lut = crtc_state-
>>base.degamma_lut;
>> > -	u32 i, lut_size = INTEL_INFO(dev_priv)->color.degamma_lut_size;
>> > +	const struct drm_color_lut *lut = blob->data;
>> > +	int i, lut_size = drm_color_lut_size(blob);
>> >  	enum pipe pipe = crtc->pipe;
>> >
>> > -	I915_WRITE(PREC_PAL_INDEX(pipe),
>> > -		   PAL_PREC_SPLIT_MODE | PAL_PREC_AUTO_INCREMENT);
>> > -
>> > -	if (degamma_lut) {
>> > -		const struct drm_color_lut *lut = degamma_lut->data;
>> > +	I915_WRITE(PREC_PAL_INDEX(pipe), prec_index |
>> > +		   PAL_PREC_AUTO_INCREMENT);
>> >
>> > -		for (i = 0; i < lut_size; i++)
>> > -			I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_10(&lut[i]));
>> > -	} else {
>> > +	/*
>> > +	 * We advertize the split gamma sizes. When not using split
>> > +	 * gamma we just duplicate each entry.
>> > +	 *
>> > +	 * TODO: expose the full LUT to userspace
>>
>> Any reason not to just do this immediately?  Throwing away half the
>> table entries if we decide we need split mode doesn't seem any harder
>> than duplicating the entries when we decide we don't.  The color
>> management kerneldoc already explicitly recommends this approach for
>> hardware that can support multiple gamma modes, so I don't think we
>> need any new ABI to handle it.
>
>Hmm. I guess that apporach could be doable. It might be a bit annoying for userspace
>though if it expects a direct color visual. But at least for X we won't use
>degamma/ctm anyway so seems like it should work out just fine.
>
>>
>> > +	 */
>> > +	if (duplicate) {
>> >  		for (i = 0; i < lut_size; i++) {
>> > -			u32 v = (i * ((1 << 10) - 1)) / (lut_size - 1);
>> > -
>> > -			I915_WRITE(PREC_PAL_DATA(pipe),
>> > -				   (v << 20) | (v << 10) | v);
>> > +			I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_10(&lut[i]));
>> > +			I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_10(&lut[i]));
>> >  		}
>> > -	}
>> > -}
>> > -
>> > -static void bdw_load_gamma_lut(const struct intel_crtc_state
>> > *crtc_state, u32 offset) -{
>> > -	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
>> > -	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
>> > -	const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut;
>> > -	u32 i, lut_size = INTEL_INFO(dev_priv)->color.gamma_lut_size;
>> > -	enum pipe pipe = crtc->pipe;
>> > -
>> > -	WARN_ON(offset & ~PAL_PREC_INDEX_VALUE_MASK);
>> > -
>> > -	I915_WRITE(PREC_PAL_INDEX(pipe),
>> > -		   (offset ? PAL_PREC_SPLIT_MODE : 0) |
>> > -		   PAL_PREC_AUTO_INCREMENT |
>> > -		   offset);
>> > -
>> > -	if (gamma_lut) {
>> > -		const struct drm_color_lut *lut = gamma_lut->data;
>> > -
>> > +	} else {
>> >  		for (i = 0; i < lut_size; i++)
>> >  			I915_WRITE(PREC_PAL_DATA(pipe), ilk_lut_10(&lut[i]));
>> > -
>> > -		/* Program the max register to clamp values > 1.0. */
>> > -		i = lut_size - 1;
>> > -		I915_WRITE(PREC_PAL_GC_MAX(pipe, 0),
>> > -			   drm_color_lut_extract(lut[i].red, 16));
>> > -		I915_WRITE(PREC_PAL_GC_MAX(pipe, 1),
>> > -			   drm_color_lut_extract(lut[i].green, 16));
>> > -		I915_WRITE(PREC_PAL_GC_MAX(pipe, 2),
>> > -			   drm_color_lut_extract(lut[i].blue, 16));
>> > -	} else {
>> > -		for (i = 0; i < lut_size; i++) {
>> > -			u32 v = (i * ((1 << 10) - 1)) / (lut_size - 1);
>> > -
>> > -			I915_WRITE(PREC_PAL_DATA(pipe),
>> > -				   (v << 20) | (v << 10) | v);
>> > -		}
>> > -
>> > -		I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), (1 << 16) - 1);
>> > -		I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), (1 << 16) - 1);
>> > -		I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), (1 << 16) - 1);
>> >  	}
>> >
>> >  	/*
>> > @@ -541,18 +501,43 @@ static void bdw_load_gamma_lut(const struct
>intel_crtc_state *crtc_state, u32 of
>> >  	I915_WRITE(PREC_PAL_INDEX(pipe), 0);  }
>> >
>> > -/* Loads the palette/gamma unit for the CRTC on Broadwell+. */
>> > -static void broadwell_load_luts(const struct intel_crtc_state
>> > *crtc_state)
>> > +static void bdw_load_lut_10_max(struct intel_crtc *crtc,
>> > +				const struct drm_property_blob *blob)
>> >  {
>> > -	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
>> >  	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
>> > +	const struct drm_color_lut *lut = blob->data;
>> > +	int i = drm_color_lut_size(blob) - 1;
>> > +	enum pipe pipe = crtc->pipe;
>> >
>> > -	if (crtc_state_is_legacy_gamma(crtc_state)) {
>> > +	/* Program the max register to clamp values > 1.0. */
>> > +	I915_WRITE(PREC_PAL_GC_MAX(pipe, 0),
>> > +		   drm_color_lut_extract(lut[i].red, 16));
>> > +	I915_WRITE(PREC_PAL_GC_MAX(pipe, 1),
>> > +		   drm_color_lut_extract(lut[i].green, 16));
>> > +	I915_WRITE(PREC_PAL_GC_MAX(pipe, 2),
>> > +		   drm_color_lut_extract(lut[i].blue, 16));
>>
>> Are these the right registers to use?  The "Pipe Palette and Gamma"
>> page of the bspec indicates we should be using PAL_EXT_GC_MAX (e.g.,
>> 0x4A420 instead of 0x4A410) for both the 10-bit and split gamma modes.
>> I only see PAL_GC_MAX mentioned for the interpolated gamma mode.
>
>Yeah, these aren't the registers we're looking for. Maybe Uma will send a patch to fix
>this? I'm also thinking we should just program these to 1.0. There's also
>EXT_GC_MAX2 on some more recent platforms.

Yes, sent now to list :). Please review.

Regards,
Uma Shankar

>>
>>
>> Matt
>>
>> > +}
>> > +
>> > +static void bdw_load_luts(const struct intel_crtc_state
>> > +*crtc_state) {
>> > +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
>> > +	const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut;
>> > +	const struct drm_property_blob *degamma_lut =
>> > +crtc_state->base.degamma_lut;
>> > +
>> > +	if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) {
>> >  		i9xx_load_luts(crtc_state);
>> > +	} else if (crtc_state->gamma_mode == GAMMA_MODE_MODE_SPLIT) {
>> > +		bdw_load_lut_10(crtc, degamma_lut, PAL_PREC_SPLIT_MODE |
>> > +				PAL_PREC_INDEX_VALUE(0), false);
>> > +		bdw_load_lut_10(crtc, gamma_lut, PAL_PREC_SPLIT_MODE |
>> > +				PAL_PREC_INDEX_VALUE(512),  false);
>> > +		bdw_load_lut_10_max(crtc, gamma_lut);
>> >  	} else {
>> > -		bdw_load_degamma_lut(crtc_state);
>> > -		bdw_load_gamma_lut(crtc_state,
>> > -				   INTEL_INFO(dev_priv)->color.degamma_lut_size);
>> > +		const struct drm_property_blob *blob = gamma_lut ?: degamma_lut;
>> > +
>> > +		bdw_load_lut_10(crtc, blob,
>> > +				PAL_PREC_INDEX_VALUE(0), true);
>> > +		bdw_load_lut_10_max(crtc, blob);
>> >  	}
>> >  }
>> >
>> > @@ -624,6 +609,9 @@ static void glk_load_degamma_lut_linear(const
>> > struct intel_crtc_state *crtc_stat
>> >
>> >  static void glk_load_luts(const struct intel_crtc_state
>> > *crtc_state)  {
>> > +	const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut;
>> > +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
>> > +
>> >  	/*
>> >  	 * On GLK+ both pipe CSC and degamma LUT are controlled
>> >  	 * by csc_enable. Hence for the cases where the CSC is @@ -637,22
>> > +625,28 @@ static void glk_load_luts(const struct intel_crtc_state *crtc_state)
>> >  	else
>> >  		glk_load_degamma_lut_linear(crtc_state);
>> >
>> > -	if (crtc_state_is_legacy_gamma(crtc_state))
>> > +	if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) {
>> >  		i9xx_load_luts(crtc_state);
>> > -	else
>> > -		bdw_load_gamma_lut(crtc_state, 0);
>> > +	} else {
>> > +		bdw_load_lut_10(crtc, gamma_lut, PAL_PREC_INDEX_VALUE(0),
>false);
>> > +		bdw_load_lut_10_max(crtc, gamma_lut);
>> > +	}
>> >  }
>> >
>> >  static void icl_load_luts(const struct intel_crtc_state
>> > *crtc_state)  {
>> > +	const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut;
>> > +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
>> > +
>> >  	if (crtc_state->base.degamma_lut)
>> >  		glk_load_degamma_lut(crtc_state);
>> >
>> > -	if (crtc_state_is_legacy_gamma(crtc_state))
>> > +	if (crtc_state->gamma_mode == GAMMA_MODE_MODE_8BIT) {
>> >  		i9xx_load_luts(crtc_state);
>> > -	else
>> > -		/* ToDo: Add support for multi segment gamma LUT */
>> > -		bdw_load_gamma_lut(crtc_state, 0);
>> > +	} else {
>> > +		bdw_load_lut_10(crtc, gamma_lut, PAL_PREC_INDEX_VALUE(0),
>false);
>> > +		bdw_load_lut_10_max(crtc, gamma_lut);
>> > +	}
>> >  }
>> >
>> >  static void cherryview_load_luts(const struct intel_crtc_state
>> > *crtc_state) @@ -937,8 +931,25 @@ static u32 bdw_gamma_mode(const struct
>intel_crtc_state *crtc_state)
>> >  	if (!crtc_state->gamma_enable ||
>> >  	    crtc_state_is_legacy_gamma(crtc_state))
>> >  		return GAMMA_MODE_MODE_8BIT;
>> > -	else
>> > +	else if (crtc_state->base.gamma_lut &&
>> > +		 crtc_state->base.degamma_lut)
>> >  		return GAMMA_MODE_MODE_SPLIT;
>> > +	else
>> > +		return GAMMA_MODE_MODE_10BIT;
>> > +}
>> > +
>> > +static u32 bdw_csc_mode(const struct intel_crtc_state *crtc_state)
>> > +{
>> > +	/*
>> > +	 * CSC comes after the LUT in degamma, RGB->YCbCr,
>> > +	 * and RGB full->limited range mode.
>> > +	 */
>> > +	if (crtc_state->base.degamma_lut ||
>> > +	    crtc_state->output_format != INTEL_OUTPUT_FORMAT_RGB ||
>> > +	    crtc_state->limited_color_range)
>> > +		return 0;
>> > +
>> > +	return CSC_POSITION_BEFORE_GAMMA;
>> >  }
>> >
>> >  static int bdw_color_check(struct intel_crtc_state *crtc_state) @@
>> > -960,7 +971,7 @@ static int bdw_color_check(struct intel_crtc_state
>> > *crtc_state)
>> >
>> >  	crtc_state->gamma_mode = bdw_gamma_mode(crtc_state);
>> >
>> > -	crtc_state->csc_mode = 0;
>> > +	crtc_state->csc_mode = bdw_csc_mode(crtc_state);
>> >
>> >  	ret = intel_color_add_affected_planes(crtc_state);
>> >  	if (ret)
>> > @@ -1094,7 +1105,7 @@ void intel_color_init(struct intel_crtc *crtc)
>> >  		else if (IS_CANNONLAKE(dev_priv) || IS_GEMINILAKE(dev_priv))
>> >  			dev_priv->display.load_luts = glk_load_luts;
>> >  		else if (INTEL_GEN(dev_priv) >= 9 || IS_BROADWELL(dev_priv))
>> > -			dev_priv->display.load_luts = broadwell_load_luts;
>> > +			dev_priv->display.load_luts = bdw_load_luts;
>> >  		else
>> >  			dev_priv->display.load_luts = i9xx_load_luts;
>> >  	}
>> > --
>> > 2.19.2
>> >
>>
>> --
>> Matt Roper
>> Graphics Software Engineer
>> IoTG Platform Enabling & Development
>> Intel Corporation
>> (916) 356-2795
>
>--
>Ville Syrjälä
>Intel


More information about the Intel-gfx mailing list