[Intel-gfx] [PATCH 3/3] drm/i915/icl: Add Multi-segmented gamma support

Fri Apr 26 18:29:03 UTC 2019

On Fri, Apr 26, 2019 at 11:31:51PM +0530, Shashank Sharma wrote:
> ICL introduces a new gamma correction mode in display engine, called
> multi-segmented-gamma mode. This mode allows users to program the
> darker region of the gamma curve with sueprfine precision. An
> example use case for this is HDR curves (like PQ ST-2084).
> 
> If we plot a gamma correction curve from value range between 0.0 to 1.0,
> ICL's multi-segment has 3 different sections:
> - superfine segment: 9 values, ranges between 0 - 1/(128 * 256)
> - fine segment: 257 values, ranges between 0 - 1/(128)
> - corase segment: 257 values, ranges between 0 - 1
> 
> This patch:
> - Changes gamma LUTs size for ICL/GEN11 to 262144 entries (8 * 128 * 256),
>   so that userspace can program with highest precision supported.
> - Changes default gamma mode (non-legacy) to multi-segmented-gamma mode.
> - Adds functions to program/detect multi-segment gamma.
> 
> Cc: Ville Syrjälä <ville.syrjala at linux.intel.com>
> Cc: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
> Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
> 
> Suggested-by: Ville Syrjälä <ville.syrjala at linux.intel.com>
> Signed-off-by: Shashank Sharma <shashank.sharma at intel.com>
> Signed-off-by: Uma Shankar <uma.shankar at intel.com>
> ---
>  drivers/gpu/drm/i915/i915_pci.c    |   3 +-
>  drivers/gpu/drm/i915/intel_color.c | 155 ++++++++++++++++++++++++++++-
>  2 files changed, 156 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> index ffa2ee70a03d..83698951760b 100644
> --- a/drivers/gpu/drm/i915/i915_pci.c
> +++ b/drivers/gpu/drm/i915/i915_pci.c
> @@ -749,7 +749,8 @@ static const struct intel_device_info intel_cannonlake_info = {
>  	GEN(11), \
>  	.ddb_size = 2048, \
>  	.has_logical_ring_elsq = 1, \
> -	.color = { .degamma_lut_size = 33, .gamma_lut_size = 1024 }
> +	.color = { .degamma_lut_size = 33, .gamma_lut_size = 262144 }
> +
>  
>  static const struct intel_device_info intel_icelake_11_info = {
>  	GEN11_FEATURES,
> diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c
> index ca341a9e47e6..d1fb79a5d764 100644
> --- a/drivers/gpu/drm/i915/intel_color.c
> +++ b/drivers/gpu/drm/i915/intel_color.c
> @@ -41,6 +41,7 @@
>  #define CTM_COEFF_ABS(coeff)		((coeff) & (CTM_COEFF_SIGN - 1))
>  
>  #define LEGACY_LUT_LENGTH		256
> +#define ICL_MULTISEG_LUT_LENGTH		(256 * 128 * 8)
>  /*
>   * Extract the CSC coefficient from a CTM coefficient (in U32.32 fixed point
>   * format). This macro takes the coefficient we want transformed and the
> @@ -58,6 +59,12 @@
>  
>  #define ILK_CSC_POSTOFF_LIMITED_RANGE (16 * (1 << 12) / 255)
>  
> +enum icl_ms_gamma_segments {
> +	ICL_MS_GAMMA_SEG_SUPERFINE,
> +	ICL_MS_GAMMA_SEG_FINE,
> +	ICL_MS_GAMMA_SEG_COARSE,
> +};
> +
>  static const u16 ilk_csc_off_zero[3] = {};
>  
>  static const u16 ilk_csc_coeff_identity[9] = {
> @@ -767,6 +774,149 @@ static void glk_load_luts(const struct intel_crtc_state *crtc_state)
>  	}
>  }
>  
> +/* ilk+ "12.4" interpolated format (high 10 bits) */
> +static u32 ilk_lut_12p4_ldw(const struct drm_color_lut *color)
> +{
> +	return (color->red >> 6) << 20 | (color->green >> 6) << 10 |
> +		(color->blue >> 6);
> +}
> +
> +/* ilk+ "12.4" interpolated format (low 6 bits) */
> +static u32 ilk_lut_12p4_udw(const struct drm_color_lut *color)
> +{
> +	return (color->red & 0x3f) << 24 | (color->green & 0x3f) << 14 |
> +		(color->blue & 0x3f);
> +}
> +
> +static void
> +icl_program_gamma_gcmax(const struct intel_crtc_state *crtc_state)
> +{
> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> +	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
> +	enum pipe pipe = crtc->pipe;
> +
> +	/*
> +	 * Program the max register to clamp values > 1.0.
> +	 * ToDo: Extend the ABI to be able to program values
> +	 * from 1.0
> +	 */
> +	I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), (1 << 16));
> +	I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), (1 << 16));
> +	I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), (1 << 16));

This one I think we want to program based on the provide LUT. It's the
last entry that still gets used in interpolation for <1.0 values.
Or at least that's the way it works with the 12p4 mode IIRC. I don't
actually remember how it goes with the multi segment mode.

> +
> +	/*
> +	 * Program the max register to clamp values > 1.0.
> +	 * ToDo: Extend the ABI to be able to program values
> +	 * from 1.0 to 3.0
> +	 */
> +	I915_WRITE(PREC_PAL_EXT_GC_MAX(pipe, 0), (1 << 16));
> +	I915_WRITE(PREC_PAL_EXT_GC_MAX(pipe, 1), (1 << 16));
> +	I915_WRITE(PREC_PAL_EXT_GC_MAX(pipe, 2), (1 << 16));
> +
> +	/*
> +	 * Program the gc max 2 register to clamp values > 1.0.
> +	 * ToDo: Extend the ABI to be able to program values
> +	 * from 3.0 to 7.0
> +	 */
> +	I915_WRITE(PREC_PAL_EXT2_GC_MAX(pipe, 0), (1 << 16));
> +	I915_WRITE(PREC_PAL_EXT2_GC_MAX(pipe, 1), (1 << 16));
> +	I915_WRITE(PREC_PAL_EXT2_GC_MAX(pipe, 2), (1 << 16));

And these are just ivb_load_lut_10_max(). Might need to just
s/10/ext/ in the name since I guess they apply to the 12p4 and
multi segmment modes too.

> +}
> +
> +static void
> +icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state,
> +				const struct drm_property_blob *blob,
> +				enum icl_ms_gamma_segments segment)
> +{
> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> +	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
> +	const struct drm_color_lut *lut = blob->data;
> +	enum pipe pipe = crtc->pipe;
> +	u32 i, word, seg_size, seg_step, seg_start = 0;
> +	i915_reg_t index_reg, data_reg;
> +
> +	if (!lut || (drm_color_lut_size(blob) < ICL_MULTISEG_LUT_LENGTH)) {
> +		DRM_DEBUG_KMS("Not programming gamma segment, invalid input\n");
> +		return;
> +	}
> +
> +	/*
> +	 * Every entry in the multi-segment LUT is corresponding to a superfine
> +	 * segment step which is 1/(8*128*256).
> +	 *
> +	 * Superfine segment has 9 entries, corresponding to values
> +	 * 0, 1/(8 * 128 * 256), 2/(8 * 128 * 256) .... 8/(8 * 128 * 256).
> +	 *
> +	 * Fine segment's step is 1/(128 * 256) ie 1/(128 * 256),  2/(128*256)
> +	 * ... 256/(128*256). So in order to program fine segment of LUT we
> +	 * need to pick every 8'th entry in LUT, and program 256 indexes.
> +	 * Fine segment's index 0 is programmed in HW, and it starts from
> +	 * index 1.
> +	 *
> +	 * Coarse segment's starts from index 0 and it's step is 1/256 ie 0,
> +	 * 1/256, 2/256 ...256/256. As per the description of each entry in LUT
> +	 * above, we need to pick every 8 * 128 = 1024th entry in LUT, and
> +	 * program 256 of those.
> +	 */
> +
> +	switch (segment) {
> +	case ICL_MS_GAMMA_SEG_SUPERFINE:
> +		seg_size = 9;
> +		seg_step = 1;
> +		index_reg = PREC_PAL_MULTI_SEG_INDEX(pipe);
> +		data_reg = PREC_PAL_MULTI_SEG_DATA(pipe);
> +		break;
> +
> +	case ICL_MS_GAMMA_SEG_FINE:
> +		seg_start = 1;
> +		seg_size = 256;
> +		seg_step = 8;
> +		index_reg = PREC_PAL_INDEX(pipe);
> +		data_reg = PREC_PAL_DATA(pipe);
> +		break;
> +
> +	case ICL_MS_GAMMA_SEG_COARSE:
> +		seg_size = 256;
> +		seg_step = 1024;
> +		index_reg = PREC_PAL_INDEX(pipe);
> +		data_reg = PREC_PAL_DATA(pipe);
> +		break;

I'm not convinced this this enum is actually helpful. I would
probably just try to provide separate functions for the segments.
Not sure if we could share the same code for the fine/coarse
segments and just pass in the differences as parameters.

> +
> +	default:
> +		DRM_ERROR("Invalid gamma segment %d to program\n", segment);
> +		return;
> +	}
> +
> +	I915_WRITE(index_reg, PAL_PREC_AUTO_INCREMENT);
> +
> +	for (i = seg_start; i < seg_size; i++) {
> +		/* Even Index */

These comments don't really provide any value.

> +		word = ilk_lut_12p4_udw(&lut[i * seg_step]);

and the 'word' variable isn't particularly useful either.
We might want a local varianble for the lut entry like we
have in the ivb/bdw functions, if only to avoid repeating the
arithmetic.

> +		I915_WRITE(data_reg, word);
> +
> +		/* Odd index */
> +		word = ilk_lut_12p4_ldw(&lut[i * seg_step]);
> +		I915_WRITE(data_reg, word);
> +	}
> +}
> +
> +static void
> +icl_load_gamma_multi_seg_lut(const struct intel_crtc_state *crtc_state)
> +{
> +	const struct drm_property_blob *lut_blob = crtc_state->base.gamma_lut;
> +
> +	if (!lut_blob)
> +		return;
> +
> +	icl_program_gamma_multi_segment(crtc_state, lut_blob,
> +					ICL_MS_GAMMA_SEG_SUPERFINE);
> +	icl_program_gamma_multi_segment(crtc_state, lut_blob,
> +					ICL_MS_GAMMA_SEG_FINE);
> +	icl_program_gamma_multi_segment(crtc_state, lut_blob,
> +					ICL_MS_GAMMA_SEG_COARSE);
> +	icl_program_gamma_gcmax(crtc_state);
> +}

And this I would just inline into icl_load_luts() just like the rest.

> +
>  static void icl_load_luts(const struct intel_crtc_state *crtc_state)
>  {
>  	const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut;
> @@ -776,6 +926,9 @@ static void icl_load_luts(const struct intel_crtc_state *crtc_state)
>  		glk_load_degamma_lut(crtc_state);
>  
>  	if ((crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) ==
> +		GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED) {
> +		icl_load_gamma_multi_seg_lut(crtc_state);

I'd put this last to keep things in consistent order. And probably want
to use a switch statement now that we have three options.

> +	} else if ((crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) ==
>  	    GAMMA_MODE_MODE_8BIT) {
>  		i9xx_load_luts(crtc_state);
>  	} else {
> @@ -1187,7 +1340,7 @@ static u32 icl_gamma_mode(const struct intel_crtc_state *crtc_state)
>  	    crtc_state_is_legacy_gamma(crtc_state))
>  		gamma_mode |= GAMMA_MODE_MODE_8BIT;
>  	else
> -		gamma_mode |= GAMMA_MODE_MODE_10BIT;
> +		gamma_mode |= GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED;
>  
>  	return gamma_mode;
>  }
> -- 
> 2.17.1

-- 
Ville Syrjälä
Intel