[PATCH v2] drm/vkms: Add support to 1D gamma LUT

Pekka Paalanen ppaalanen at gmail.com
Wed Jun 7 09:20:26 UTC 2023


On Tue,  6 Jun 2023 17:53:52 -0300
Arthur Grillo <arthurgrillo at riseup.net> wrote:

> Support a 1D gamma LUT with interpolation for each color channel on the
> VKMS driver. Add a check for the LUT length by creating
> vkms_atomic_check().
> 
> Tested with:
> igt at kms_color@gamma
> igt at kms_color@legacy-gamma
> igt at kms_color@invalid-gamma-lut-sizes
> 
> v2:
>     - Add interpolation between the values of the LUT (Simon Ser)
> 
> Signed-off-by: Arthur Grillo <arthurgrillo at riseup.net>
> ---
>  drivers/gpu/drm/vkms/vkms_composer.c | 97 ++++++++++++++++++++++++++++
>  drivers/gpu/drm/vkms/vkms_crtc.c     |  3 +
>  drivers/gpu/drm/vkms/vkms_drv.c      | 20 +++++-
>  drivers/gpu/drm/vkms/vkms_drv.h      |  2 +
>  4 files changed, 121 insertions(+), 1 deletion(-)
> 

Hi,

here are some casual suggestions that I hope are helpful, nothing more.

> diff --git a/drivers/gpu/drm/vkms/vkms_composer.c b/drivers/gpu/drm/vkms/vkms_composer.c
> index 906d3df40cdb..24bffd98ba49 100644
> --- a/drivers/gpu/drm/vkms/vkms_composer.c
> +++ b/drivers/gpu/drm/vkms/vkms_composer.c
> @@ -6,6 +6,7 @@
>  #include <drm/drm_atomic_helper.h>
>  #include <drm/drm_blend.h>
>  #include <drm/drm_fourcc.h>
> +#include <drm/drm_fixed.h>
>  #include <drm/drm_gem_framebuffer_helper.h>
>  #include <drm/drm_vblank.h>
>  #include <linux/minmax.h>
> @@ -89,6 +90,100 @@ static void fill_background(const struct pixel_argb_u16 *background_color,
>  		output_buffer->pixels[i] = *background_color;
>  }
>  
> +// lerp(a, b, t) = a + (b - a) * t
> +static u16 lerp_u16(u16 a, u16 b, s64 t)
> +{
> +	s64 a_fp = drm_int2fixp(a);
> +	s64 b_fp = drm_int2fixp(b);
> +
> +	s64 ratio = drm_fixp_mul(b_fp - a_fp,  t);

This is more like a delta than a ratio. A ratio would be a unitless
x/y, but delta has the same units as a_fp.

> +
> +	return drm_fixp2int(a_fp + ratio);
> +}
> +
> +static s64 get_lut_index(u16 color_channel, size_t lut_length)
> +{
> +	const s64 max_lut_index_fp = drm_int2fixp(lut_length  - 1);
> +	const s64 u16_max_fp = drm_int2fixp(0xffff);
> +
> +	s64 ratio = drm_fixp_div(max_lut_index_fp, u16_max_fp);

This division is a constant for a specific LUT. Are you sure it won't
be calculated repeatedly for every transformed pixel component?

> +
> +	s64 color_channel_fp = drm_int2fixp(color_channel);
> +
> +	return drm_fixp_mul(color_channel_fp, ratio);

First this multiplication looked really strange, because "color
channel" is one of R, G or B, so likely 0, 1, or 2. But it's not color
channel, it is channel value.

> +}
> +
> +enum lut_area {
> +	LUT_RED,
> +	LUT_GREEN,
> +	LUT_BLUE,
> +	LUT_RESERVED
> +};
> +
> +static void apply_lut_to_color_channel(u16 *color_channel, enum lut_area area,
> +				       struct drm_color_lut *lut, size_t lut_length)

"u16 *color_channel" sounds like it is a pointer to a whole row of a
specific component of many pixels. I got confused, because I think
everything around here uses packed and not planar pixel layout, so it
looked really weird.

If you have nothing to return from a function otherwise, return the
computation result. In this case, pass the input value by-value, since
the indirection is not necessary. That makes the function easier to
read. Ideally the compiler will inline it anyway - a real function call
in innermost loop would be costly.

I suspect struct drm_color_lut and lut_length should be stored together
in a struct, so that you can also pre-compute and store 'ratio' in it.

> +{
> +	s64 ratio;
> +
> +	s64 lut_index = get_lut_index(*color_channel, lut_length);
> +
> +	size_t floor_index = drm_fixp2int(lut_index);
> +	size_t ceil_index = drm_fixp2int_ceil(lut_index);
> +
> +	struct drm_color_lut floor_lut_value = lut[floor_index];
> +	struct drm_color_lut ceil_lut_value = lut[ceil_index];
> +
> +	u16 floor_color_channel;
> +	u16 ceil_color_channel;
> +
> +	switch (area) {

Is it a good idea from performance perspective to do a switch like this
three times per pixel? I cannot guess what the compiler would do.

It would be possible to reinterpret drm_color_lut as a __u16[4], so you
could simply index into it instead of using 'if'. Or maybe the compiler
already does that, or something even smarter? Only benchmarking would
tell which form is better.

The reason I'm paying so much attention to performance here is that
while VKMS is expected to be a slow software implementation, it could
still be smarter instead of even slower than necessary. That usually
translates to structuring code such that the innermost loops will do
only the absolute minimum required work, and everything possible is
pre-computed. I don't think it would make the code too complicated,
either.


Thanks,
pq

> +	case LUT_RED:
> +		floor_color_channel = floor_lut_value.red;
> +		ceil_color_channel = ceil_lut_value.red;
> +		break;
> +	case LUT_GREEN:
> +		floor_color_channel = floor_lut_value.green;
> +		ceil_color_channel = ceil_lut_value.green;
> +		break;
> +	case LUT_BLUE:
> +		floor_color_channel = floor_lut_value.blue;
> +		ceil_color_channel = ceil_lut_value.blue;
> +		break;
> +	case LUT_RESERVED:
> +		floor_color_channel = floor_lut_value.reserved;
> +		ceil_color_channel = ceil_lut_value.reserved;
> +		break;
> +	}
> +
> +	ratio = lut_index - drm_int2fixp(floor_index);
> +
> +	*color_channel = lerp_u16(floor_color_channel, ceil_color_channel, ratio);
> +}
> +
> +static void apply_lut(const struct vkms_crtc_state *crtc_state, struct line_buffer *output_buffer)
> +{
> +	struct drm_color_lut *lut;
> +	size_t lut_length;
> +
> +	if (!crtc_state->base.gamma_lut)
> +		return;
> +
> +	lut = (struct drm_color_lut *)crtc_state->base.gamma_lut->data;
> +
> +	lut_length = crtc_state->base.gamma_lut->length / sizeof(*lut);
> +
> +	if (!lut_length)
> +		return;
> +
> +	for (size_t x = 0; x < output_buffer->n_pixels; x++) {
> +		struct pixel_argb_u16 *pixel = &output_buffer->pixels[x];
> +
> +		apply_lut_to_color_channel(&pixel->r, LUT_RED, lut, lut_length);
> +		apply_lut_to_color_channel(&pixel->g, LUT_GREEN, lut, lut_length);
> +		apply_lut_to_color_channel(&pixel->b, LUT_BLUE, lut, lut_length);
> +	}
> +}
> +
>  /**
>   * @wb_frame_info: The writeback frame buffer metadata
>   * @crtc_state: The crtc state
> @@ -128,6 +223,8 @@ static void blend(struct vkms_writeback_job *wb,
>  					    output_buffer);
>  		}
>  
> +		apply_lut(crtc_state, output_buffer);
> +
>  		*crc32 = crc32_le(*crc32, (void *)output_buffer->pixels, row_size);
>  
>  		if (wb)
> diff --git a/drivers/gpu/drm/vkms/vkms_crtc.c b/drivers/gpu/drm/vkms/vkms_crtc.c
> index 515f6772b866..61e500b8c9da 100644
> --- a/drivers/gpu/drm/vkms/vkms_crtc.c
> +++ b/drivers/gpu/drm/vkms/vkms_crtc.c
> @@ -290,6 +290,9 @@ int vkms_crtc_init(struct drm_device *dev, struct drm_crtc *crtc,
>  
>  	drm_crtc_helper_add(crtc, &vkms_crtc_helper_funcs);
>  
> +	drm_mode_crtc_set_gamma_size(crtc, VKMS_LUT_SIZE);
> +	drm_crtc_enable_color_mgmt(crtc, 0, false, VKMS_LUT_SIZE);
> +
>  	spin_lock_init(&vkms_out->lock);
>  	spin_lock_init(&vkms_out->composer_lock);
>  
> diff --git a/drivers/gpu/drm/vkms/vkms_drv.c b/drivers/gpu/drm/vkms/vkms_drv.c
> index e3c9c9571c8d..dd0af086e7fa 100644
> --- a/drivers/gpu/drm/vkms/vkms_drv.c
> +++ b/drivers/gpu/drm/vkms/vkms_drv.c
> @@ -120,9 +120,27 @@ static const struct drm_driver vkms_driver = {
>  	.minor			= DRIVER_MINOR,
>  };
>  
> +static int vkms_atomic_check(struct drm_device *dev, struct drm_atomic_state *state)
> +{
> +	struct drm_crtc *crtc;
> +	struct drm_crtc_state *new_crtc_state;
> +	int i;
> +
> +	for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
> +		if (!new_crtc_state->gamma_lut || !new_crtc_state->color_mgmt_changed)
> +			continue;
> +
> +		if (new_crtc_state->gamma_lut->length / sizeof(struct drm_color_lut *)
> +		    > VKMS_LUT_SIZE)
> +			return -EINVAL;
> +	}
> +
> +	return drm_atomic_helper_check(dev, state);
> +}
> +
>  static const struct drm_mode_config_funcs vkms_mode_funcs = {
>  	.fb_create = drm_gem_fb_create,
> -	.atomic_check = drm_atomic_helper_check,
> +	.atomic_check = vkms_atomic_check,
>  	.atomic_commit = drm_atomic_helper_commit,
>  };
>  
> diff --git a/drivers/gpu/drm/vkms/vkms_drv.h b/drivers/gpu/drm/vkms/vkms_drv.h
> index 5f1a0a44a78c..a3b7025c1b9a 100644
> --- a/drivers/gpu/drm/vkms/vkms_drv.h
> +++ b/drivers/gpu/drm/vkms/vkms_drv.h
> @@ -23,6 +23,8 @@
>  
>  #define NUM_OVERLAY_PLANES 8
>  
> +#define VKMS_LUT_SIZE 256
> +
>  struct vkms_frame_info {
>  	struct drm_framebuffer *fb;
>  	struct drm_rect src, dst;

-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 833 bytes
Desc: OpenPGP digital signature
URL: <https://lists.freedesktop.org/archives/dri-devel/attachments/20230607/f7743a35/attachment.sig>


More information about the dri-devel mailing list