[Intel-gfx] [PATCH v2 4/4] drm/i915/icl: Add Multi-segmented gamma support

Mon May 6 13:26:11 UTC 2019

On Mon, May 06, 2019 at 06:44:43PM +0530, Sharma, Shashank wrote:
> Regards
> 
> Shashank
> 
> On 5/6/2019 6:41 PM, Ville Syrjälä wrote:
> > On Mon, May 06, 2019 at 06:25:19PM +0530, Sharma, Shashank wrote:
> >> On 5/6/2019 5:55 PM, Ville Syrjälä wrote:
> >>> On Mon, May 06, 2019 at 04:09:33PM +0530, Sharma, Shashank wrote:
> >>>> Regards
> >>>>
> >>>> Shashank
> >>>>
> >>>> On 5/3/2019 9:20 PM, Ville Syrjälä wrote:
> >>>>> On Tue, Apr 30, 2019 at 08:51:08PM +0530, Shashank Sharma wrote:
> >>>>>> ICL introduces a new gamma correction mode in display engine, called
> >>>>>> multi-segmented-gamma mode. This mode allows users to program the
> >>>>>> darker region of the gamma curve with sueprfine precision. An
> >>>>>> example use case for this is HDR curves (like PQ ST-2084).
> >>>>>>
> >>>>>> If we plot a gamma correction curve from value range between 0.0 to 1.0,
> >>>>>> ICL's multi-segment has 3 different sections:
> >>>>>> - superfine segment: 9 values, ranges between 0 - 1/(128 * 256)
> >>>>>> - fine segment: 257 values, ranges between 0 - 1/(128)
> >>>>>> - corase segment: 257 values, ranges between 0 - 1
> >>>>>>
> >>>>>> This patch:
> >>>>>> - Changes gamma LUTs size for ICL/GEN11 to 262144 entries (8 * 128 * 256),
> >>>>>>      so that userspace can program with highest precision supported.
> >>>>>> - Changes default gamma mode (non-legacy) to multi-segmented-gamma mode.
> >>>>>> - Adds functions to program/detect multi-segment gamma.
> >>>>>>
> >>>>>> V2: Addressed review comments from Ville
> >>>>>>        - separate function for superfine and fine segments.
> >>>>>>        - remove enum for segments.
> >>>>>>        - reuse last entry of the LUT as gc_max value.
> >>>>>>        - replace if() ....cond with switch...case in icl_load_luts.
> >>>>>>        - add an entry variable, instead of 'word'
> >>>>>>
> >>>>>> Cc: Ville Syrjälä <ville.syrjala at linux.intel.com>
> >>>>>> Cc: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
> >>>>>> Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
> >>>>>>
> >>>>>> Suggested-by: Ville Syrjälä <ville.syrjala at linux.intel.com>
> >>>>>> Signed-off-by: Shashank Sharma <shashank.sharma at intel.com>
> >>>>>> Signed-off-by: Uma Shankar <uma.shankar at intel.com>
> >>>>>> ---
> >>>>>>     drivers/gpu/drm/i915/i915_pci.c    |   3 +-
> >>>>>>     drivers/gpu/drm/i915/intel_color.c | 125 ++++++++++++++++++++++++++++-
> >>>>>>     2 files changed, 123 insertions(+), 5 deletions(-)
> >>>>>>
> >>>>>> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
> >>>>>> index ffa2ee70a03d..83698951760b 100644
> >>>>>> --- a/drivers/gpu/drm/i915/i915_pci.c
> >>>>>> +++ b/drivers/gpu/drm/i915/i915_pci.c
> >>>>>> @@ -749,7 +749,8 @@ static const struct intel_device_info intel_cannonlake_info = {
> >>>>>>     	GEN(11), \
> >>>>>>     	.ddb_size = 2048, \
> >>>>>>     	.has_logical_ring_elsq = 1, \
> >>>>>> -	.color = { .degamma_lut_size = 33, .gamma_lut_size = 1024 }
> >>>>>> +	.color = { .degamma_lut_size = 33, .gamma_lut_size = 262144 }
> >>>>> Ugh. Thats one big LUT. But looks correct.
> >>>>>
> >>>>>> +
> >>>>> Bogus newline.
> >>>> Got it.
> >>>>>>     
> >>>>>>     static const struct intel_device_info intel_icelake_11_info = {
> >>>>>>     	GEN11_FEATURES,
> >>>>>> diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c
> >>>>>> index 6c341bea514c..49831e8d02fb 100644
> >>>>>> --- a/drivers/gpu/drm/i915/intel_color.c
> >>>>>> +++ b/drivers/gpu/drm/i915/intel_color.c
> >>>>>> @@ -41,6 +41,9 @@
> >>>>>>     #define CTM_COEFF_ABS(coeff)		((coeff) & (CTM_COEFF_SIGN - 1))
> >>>>>>     
> >>>>>>     #define LEGACY_LUT_LENGTH		256
> >>>>>> +#define ICL_GAMMA_MULTISEG_LUT_LENGTH		(256 * 128 * 8)
> >>>>>> +#define ICL_GAMMA_SUPERFINE_SEG_LENGTH	9
> >>>>>> +
> >>>>>>     /*
> >>>>>>      * Extract the CSC coefficient from a CTM coefficient (in U32.32 fixed point
> >>>>>>      * format). This macro takes the coefficient we want transformed and the
> >>>>>> @@ -767,6 +770,113 @@ static void glk_load_luts(const struct intel_crtc_state *crtc_state)
> >>>>>>     	}
> >>>>>>     }
> >>>>>>     
> >>>>>> +/* ilk+ "12.4" interpolated format (high 10 bits) */
> >>>>>> +static u32 ilk_lut_12p4_ldw(const struct drm_color_lut *color)
> >>>>>> +{
> >>>>>> +	return (color->red >> 6) << 20 | (color->green >> 6) << 10 |
> >>>>>> +		(color->blue >> 6);
> >>>>>> +}
> >>>>>> +
> >>>>>> +/* ilk+ "12.4" interpolated format (low 6 bits) */
> >>>>>> +static u32 ilk_lut_12p4_udw(const struct drm_color_lut *color)
> >>>>>> +{
> >>>>>> +	return (color->red & 0x3f) << 24 | (color->green & 0x3f) << 14 |
> >>>>>> +		(color->blue & 0x3f);
> >>>>>> +}
> >>>>>> +
> >>>>>> +static void
> >>>>>> +icl_load_gcmax(const struct intel_crtc_state *crtc_state,
> >>>>>> +			const struct drm_color_lut *entry)
> >>>>> Indentation looks off. Also s/entry/color/ to match the other similarish
> >>>>> funcs maybe?
> >>>> Sure.
> >>>>>> +{
> >>>>>> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> >>>>>> +	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
> >>>>>> +	enum pipe pipe = crtc->pipe;
> >>>>>> +
> >>>>>> +	/* Fixme: LUT entries are 16 bit only, so we can prog 0xFFFF max */
> >>>>>> +	I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), entry->red);
> >>>>>> +	I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), entry->green);
> >>>>>> +	I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), entry->blue);
> >>>>>> +}
> >>>>>> +
> >>>>>> +static void
> >>>>>> +icl_program_gamma_superfine_segment(const struct intel_crtc_state *crtc_state)
> >>>>>> +{
> >>>>>> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> >>>>>> +	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
> >>>>>> +	const struct drm_property_blob *blob = crtc_state->base.gamma_lut;
> >>>>>> +	const struct drm_color_lut *lut = blob->data;
> >>>>>> +	enum pipe pipe = crtc->pipe;
> >>>>>> +	u32 i;
> >>>>>> +
> >>>>>> +	if (!lut || drm_color_lut_size(blob) < ICL_GAMMA_SUPERFINE_SEG_LENGTH)
> >>>>>> +		return;
> >>>>> These checks aren't needed. Just dead code.
> >>>> Will remove this and similars.
> >>>>>> +
> >>>>>> +	/*
> >>>>>> +	 * Every entry in the multi-segment LUT is corresponding to a superfine
> >>>>>> +	 * segment step which is 1/(8 * 128 * 256).
> >>>>>> +	 *
> >>>>>> +	 * Superfine segment has 9 entries, corresponding to values
> >>>>>> +	 * 0, 1/(8 * 128 * 256), 2/(8 * 128 * 256) .... 8/(8 * 128 * 256).
> >>>>>> +	 */
> >>>>>> +	I915_WRITE(PREC_PAL_MULTI_SEG_INDEX(pipe), PAL_PREC_AUTO_INCREMENT);
> >>>>>> +
> >>>>>> +	for (i = 0; i < 9; i++) {
> >>>>>> +		const struct drm_color_lut *entry = &lut[i];
> >>>>>> +
> >>>>>> +		I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe),
> >>>>>> +			   ilk_lut_12p4_udw(entry));
> >>>>> ldw should come before udw.
> >>>> Got it.
> >>>>>> +		I915_WRITE(PREC_PAL_MULTI_SEG_DATA(pipe),
> >>>>>> +			   ilk_lut_12p4_ldw(entry));
> >>>>>> +	}
> >>>>>> +}
> >>>>>> +
> >>>>>> +static void
> >>>>>> +icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state)
> >>>>>> +{
> >>>>>> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
> >>>>>> +	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
> >>>>>> +	const struct drm_property_blob *blob = crtc_state->base.gamma_lut;
> >>>>>> +	const struct drm_color_lut *lut = blob->data;
> >>>>>> +	const struct drm_color_lut *entry;
> >>>>> 'entry' declaration can be moved into the loops.
> >>>> Its being used in multiple loops,
> >>> IMO still better to move into the loops. We don't want to pass any
> >>> information between the loops.
> >>>
> >>>> also being used for GCMax outside the
> >>>> loop.
> >>> Hmm. That might be an arguemnt for keeping it out. But the current
> >>> gcmax usage looks broken. You're programming the same value into
> >>> the last PAL_PREC index and GCMAX.
> >> Isn't this what you wanted ? IIRC, your recommendation was to program
> >> the highest values user wants to program in GCMAX reg. We also had a
> >> discussion on how user cant program 1.0, as we have LUT depth on 16 bit
> >> only, and we decided to use the last value of the LUT as GCMAX. Did I
> >> misunderstand anything there ?
> > It should be:
> > PAL_PREC[max] = lut[size-2];
> > GCMAX = lut[size-1];
> >
> > Oh, we also need to increase the LUT size by one to make that
> > work correctly.
> 
> Yep, that's why I was not so sure about this, and thought programming a 
> 1.0 is a better idea.
> 
> So now the new size would be 257 * 256 * 128 = 263,168 entries.I will do 
> changes and send V3.

Should be just 256*128*8+1=262145

-- 
Ville Syrjälä
Intel