[Intel-gfx] [PATCH 3/3] drm/i915/icl: Add Multi-segmented gamma support

Sharma, Shashank shashank.sharma at intel.com
Mon Apr 29 11:30:21 UTC 2019


On 4/26/2019 11:59 PM, Ville Syrjälä wrote:
> On Fri, Apr 26, 2019 at 11:31:51PM +0530, Shashank Sharma wrote:
>> ICL introduces a new gamma correction mode in display engine, called
>> multi-segmented-gamma mode. This mode allows users to program the
>> darker region of the gamma curve with sueprfine precision. An
>> example use case for this is HDR curves (like PQ ST-2084).
>>
>> If we plot a gamma correction curve from value range between 0.0 to 1.0,
>> ICL's multi-segment has 3 different sections:
>> - superfine segment: 9 values, ranges between 0 - 1/(128 * 256)
>> - fine segment: 257 values, ranges between 0 - 1/(128)
>> - corase segment: 257 values, ranges between 0 - 1
>>
>> This patch:
>> - Changes gamma LUTs size for ICL/GEN11 to 262144 entries (8 * 128 * 256),
>>    so that userspace can program with highest precision supported.
>> - Changes default gamma mode (non-legacy) to multi-segmented-gamma mode.
>> - Adds functions to program/detect multi-segment gamma.
>>
>> Cc: Ville Syrjälä <ville.syrjala at linux.intel.com>
>> Cc: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
>> Cc: Daniel Vetter <daniel.vetter at ffwll.ch>
>>
>> Suggested-by: Ville Syrjälä <ville.syrjala at linux.intel.com>
>> Signed-off-by: Shashank Sharma <shashank.sharma at intel.com>
>> Signed-off-by: Uma Shankar <uma.shankar at intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_pci.c    |   3 +-
>>   drivers/gpu/drm/i915/intel_color.c | 155 ++++++++++++++++++++++++++++-
>>   2 files changed, 156 insertions(+), 2 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
>> index ffa2ee70a03d..83698951760b 100644
>> --- a/drivers/gpu/drm/i915/i915_pci.c
>> +++ b/drivers/gpu/drm/i915/i915_pci.c
>> @@ -749,7 +749,8 @@ static const struct intel_device_info intel_cannonlake_info = {
>>   	GEN(11), \
>>   	.ddb_size = 2048, \
>>   	.has_logical_ring_elsq = 1, \
>> -	.color = { .degamma_lut_size = 33, .gamma_lut_size = 1024 }
>> +	.color = { .degamma_lut_size = 33, .gamma_lut_size = 262144 }
>> +
>>   
>>   static const struct intel_device_info intel_icelake_11_info = {
>>   	GEN11_FEATURES,
>> diff --git a/drivers/gpu/drm/i915/intel_color.c b/drivers/gpu/drm/i915/intel_color.c
>> index ca341a9e47e6..d1fb79a5d764 100644
>> --- a/drivers/gpu/drm/i915/intel_color.c
>> +++ b/drivers/gpu/drm/i915/intel_color.c
>> @@ -41,6 +41,7 @@
>>   #define CTM_COEFF_ABS(coeff)		((coeff) & (CTM_COEFF_SIGN - 1))
>>   
>>   #define LEGACY_LUT_LENGTH		256
>> +#define ICL_MULTISEG_LUT_LENGTH		(256 * 128 * 8)
>>   /*
>>    * Extract the CSC coefficient from a CTM coefficient (in U32.32 fixed point
>>    * format). This macro takes the coefficient we want transformed and the
>> @@ -58,6 +59,12 @@
>>   
>>   #define ILK_CSC_POSTOFF_LIMITED_RANGE (16 * (1 << 12) / 255)
>>   
>> +enum icl_ms_gamma_segments {
>> +	ICL_MS_GAMMA_SEG_SUPERFINE,
>> +	ICL_MS_GAMMA_SEG_FINE,
>> +	ICL_MS_GAMMA_SEG_COARSE,
>> +};
>> +
>>   static const u16 ilk_csc_off_zero[3] = {};
>>   
>>   static const u16 ilk_csc_coeff_identity[9] = {
>> @@ -767,6 +774,149 @@ static void glk_load_luts(const struct intel_crtc_state *crtc_state)
>>   	}
>>   }
>>   
>> +/* ilk+ "12.4" interpolated format (high 10 bits) */
>> +static u32 ilk_lut_12p4_ldw(const struct drm_color_lut *color)
>> +{
>> +	return (color->red >> 6) << 20 | (color->green >> 6) << 10 |
>> +		(color->blue >> 6);
>> +}
>> +
>> +/* ilk+ "12.4" interpolated format (low 6 bits) */
>> +static u32 ilk_lut_12p4_udw(const struct drm_color_lut *color)
>> +{
>> +	return (color->red & 0x3f) << 24 | (color->green & 0x3f) << 14 |
>> +		(color->blue & 0x3f);
>> +}
>> +
>> +static void
>> +icl_program_gamma_gcmax(const struct intel_crtc_state *crtc_state)
>> +{
>> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
>> +	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
>> +	enum pipe pipe = crtc->pipe;
>> +
>> +	/*
>> +	 * Program the max register to clamp values > 1.0.
>> +	 * ToDo: Extend the ABI to be able to program values
>> +	 * from 1.0
>> +	 */
>> +	I915_WRITE(PREC_PAL_GC_MAX(pipe, 0), (1 << 16));
>> +	I915_WRITE(PREC_PAL_GC_MAX(pipe, 1), (1 << 16));
>> +	I915_WRITE(PREC_PAL_GC_MAX(pipe, 2), (1 << 16));
> This one I think we want to program based on the provide LUT. It's the
> last entry that still gets used in interpolation for <1.0 values.
> Or at least that's the way it works with the 12p4 mode IIRC. I don't
> actually remember how it goes with the multi segment mode.
I am a bit skeptical if its a good idea to program from the LUT, as you 
know, the drm_color_lut elements are 16bit per channel, and in order to 
program 1.0 we need 17 bits. So even if userspace wants, it cant set the 
value of 1.0 as of now. Also, wouldn't it would be good for the 
interpolation to have the accurate max value (1.0) of the curve ?
>> +
>> +	/*
>> +	 * Program the max register to clamp values > 1.0.
>> +	 * ToDo: Extend the ABI to be able to program values
>> +	 * from 1.0 to 3.0
>> +	 */
>> +	I915_WRITE(PREC_PAL_EXT_GC_MAX(pipe, 0), (1 << 16));
>> +	I915_WRITE(PREC_PAL_EXT_GC_MAX(pipe, 1), (1 << 16));
>> +	I915_WRITE(PREC_PAL_EXT_GC_MAX(pipe, 2), (1 << 16));
>> +
>> +	/*
>> +	 * Program the gc max 2 register to clamp values > 1.0.
>> +	 * ToDo: Extend the ABI to be able to program values
>> +	 * from 3.0 to 7.0
>> +	 */
>> +	I915_WRITE(PREC_PAL_EXT2_GC_MAX(pipe, 0), (1 << 16));
>> +	I915_WRITE(PREC_PAL_EXT2_GC_MAX(pipe, 1), (1 << 16));
>> +	I915_WRITE(PREC_PAL_EXT2_GC_MAX(pipe, 2), (1 << 16));
> And these are just ivb_load_lut_10_max(). Might need to just
> s/10/ext/ in the name since I guess they apply to the 12p4 and
> multi segmment modes too.
Got it !
>
>> +}
>> +
>> +static void
>> +icl_program_gamma_multi_segment(const struct intel_crtc_state *crtc_state,
>> +				const struct drm_property_blob *blob,
>> +				enum icl_ms_gamma_segments segment)
>> +{
>> +	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
>> +	struct drm_i915_private *dev_priv = to_i915(crtc->base.dev);
>> +	const struct drm_color_lut *lut = blob->data;
>> +	enum pipe pipe = crtc->pipe;
>> +	u32 i, word, seg_size, seg_step, seg_start = 0;
>> +	i915_reg_t index_reg, data_reg;
>> +
>> +	if (!lut || (drm_color_lut_size(blob) < ICL_MULTISEG_LUT_LENGTH)) {
>> +		DRM_DEBUG_KMS("Not programming gamma segment, invalid input\n");
>> +		return;
>> +	}
>> +
>> +	/*
>> +	 * Every entry in the multi-segment LUT is corresponding to a superfine
>> +	 * segment step which is 1/(8*128*256).
>> +	 *
>> +	 * Superfine segment has 9 entries, corresponding to values
>> +	 * 0, 1/(8 * 128 * 256), 2/(8 * 128 * 256) .... 8/(8 * 128 * 256).
>> +	 *
>> +	 * Fine segment's step is 1/(128 * 256) ie 1/(128 * 256),  2/(128*256)
>> +	 * ... 256/(128*256). So in order to program fine segment of LUT we
>> +	 * need to pick every 8'th entry in LUT, and program 256 indexes.
>> +	 * Fine segment's index 0 is programmed in HW, and it starts from
>> +	 * index 1.
>> +	 *
>> +	 * Coarse segment's starts from index 0 and it's step is 1/256 ie 0,
>> +	 * 1/256, 2/256 ...256/256. As per the description of each entry in LUT
>> +	 * above, we need to pick every 8 * 128 = 1024th entry in LUT, and
>> +	 * program 256 of those.
>> +	 */
>> +
>> +	switch (segment) {
>> +	case ICL_MS_GAMMA_SEG_SUPERFINE:
>> +		seg_size = 9;
>> +		seg_step = 1;
>> +		index_reg = PREC_PAL_MULTI_SEG_INDEX(pipe);
>> +		data_reg = PREC_PAL_MULTI_SEG_DATA(pipe);
>> +		break;
>> +
>> +	case ICL_MS_GAMMA_SEG_FINE:
>> +		seg_start = 1;
>> +		seg_size = 256;
>> +		seg_step = 8;
>> +		index_reg = PREC_PAL_INDEX(pipe);
>> +		data_reg = PREC_PAL_DATA(pipe);
>> +		break;
>> +
>> +	case ICL_MS_GAMMA_SEG_COARSE:
>> +		seg_size = 256;
>> +		seg_step = 1024;
>> +		index_reg = PREC_PAL_INDEX(pipe);
>> +		data_reg = PREC_PAL_DATA(pipe);
>> +		break;
> I'm not convinced this this enum is actually helpful. I would
> probably just try to provide separate functions for the segments.
> Not sure if we could share the same code for the fine/coarse
> segments and just pass in the differences as parameters.
Ah, I started just like that, having two different functions, one for 
fine + coarse segment, other one for superfile segment, and then I saw 
much of code duplication, so I changed it this way. But if you think it 
would have been better that way, I can switch to the previous version.
>> +
>> +	default:
>> +		DRM_ERROR("Invalid gamma segment %d to program\n", segment);
>> +		return;
>> +	}
>> +
>> +	I915_WRITE(index_reg, PAL_PREC_AUTO_INCREMENT);
>> +
>> +	for (i = seg_start; i < seg_size; i++) {
>> +		/* Even Index */
> These comments don't really provide any value.
Ok
>
>> +		word = ilk_lut_12p4_udw(&lut[i * seg_step]);
> and the 'word' variable isn't particularly useful either.
> We might want a local varianble for the lut entry like we
> have in the ivb/bdw functions, if only to avoid repeating the
> arithmetic.
Sure, will do that.
>> +		I915_WRITE(data_reg, word);
>> +
>> +		/* Odd index */
>> +		word = ilk_lut_12p4_ldw(&lut[i * seg_step]);
>> +		I915_WRITE(data_reg, word);
>> +	}
>> +}
>> +
>> +static void
>> +icl_load_gamma_multi_seg_lut(const struct intel_crtc_state *crtc_state)
>> +{
>> +	const struct drm_property_blob *lut_blob = crtc_state->base.gamma_lut;
>> +
>> +	if (!lut_blob)
>> +		return;
>> +
>> +	icl_program_gamma_multi_segment(crtc_state, lut_blob,
>> +					ICL_MS_GAMMA_SEG_SUPERFINE);
>> +	icl_program_gamma_multi_segment(crtc_state, lut_blob,
>> +					ICL_MS_GAMMA_SEG_FINE);
>> +	icl_program_gamma_multi_segment(crtc_state, lut_blob,
>> +					ICL_MS_GAMMA_SEG_COARSE);
>> +	icl_program_gamma_gcmax(crtc_state);
>> +}
> And this I would just inline into icl_load_luts() just like the rest.
Got it !
>> +
>>   static void icl_load_luts(const struct intel_crtc_state *crtc_state)
>>   {
>>   	const struct drm_property_blob *gamma_lut = crtc_state->base.gamma_lut;
>> @@ -776,6 +926,9 @@ static void icl_load_luts(const struct intel_crtc_state *crtc_state)
>>   		glk_load_degamma_lut(crtc_state);
>>   
>>   	if ((crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) ==
>> +		GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED) {
>> +		icl_load_gamma_multi_seg_lut(crtc_state);
> I'd put this last to keep things in consistent order. And probably want
> to use a switch statement now that we have three options.

Sure !

- Shashank

>> +	} else if ((crtc_state->gamma_mode & GAMMA_MODE_MODE_MASK) ==
>>   	    GAMMA_MODE_MODE_8BIT) {
>>   		i9xx_load_luts(crtc_state);
>>   	} else {
>> @@ -1187,7 +1340,7 @@ static u32 icl_gamma_mode(const struct intel_crtc_state *crtc_state)
>>   	    crtc_state_is_legacy_gamma(crtc_state))
>>   		gamma_mode |= GAMMA_MODE_MODE_8BIT;
>>   	else
>> -		gamma_mode |= GAMMA_MODE_MODE_10BIT;
>> +		gamma_mode |= GAMMA_MODE_MODE_12BIT_MULTI_SEGMENTED;
>>   
>>   	return gamma_mode;
>>   }
>> -- 
>> 2.17.1


More information about the Intel-gfx mailing list