[RFC PATCH v4 22/42] drm/vkms: Use s32 for internal color pipeline precision
Pekka Paalanen
pekka.paalanen at collabora.com
Tue Mar 12 15:50:09 UTC 2024
On Mon, 26 Feb 2024 16:10:36 -0500
Harry Wentland <harry.wentland at amd.com> wrote:
> Certain operations require us to preserve values below 0.0 and
> above 1.0 (0x0 and 0xffff respectively in 16 bpc unorm). One
> such operation is a BT709 encoding operation followed by its
> decoding operation, or the reverse.
>
> We'll use s32 values as intermediate in and outputs of our
> color operations, for the operations where it matters.
>
> For now this won't apply to LUT operations. We might want to
> update those to work on s32 as well, but it's unclear how
> that should work for unorm LUT definitions. We'll revisit
> that once we add LUT + CTM tests.
>
> In order to allow for this we'll also invert the nesting of our
> colorop processing loops. We now use the pixel iteration loop
> on the outside and the colorop iteration on the inside.
I always go through the same thought process:
- putting the pixel iteration loop outermost is going to be horrible
for performance
- maybe should turn the temporary line buffer to argb_s32 for all of
VKMS
- that's going to be a lot of memory traffic... maybe your way is not
horrible for performance
- maybe processing pixel by pixel is good, if you can prepare the
operation in advance, so you don't need to analyze colorops again and
again on each pixel
- eh, maybe it's not a gain after all, needs benchmarking
My comment on patch 17 was like the step 0 of this train of thought. I
just always get the same comments in my mind when seeing the same code
again.
Thanks,
pq
>
> v4:
> - Clarify that we're packing 16-bit UNORM into s32, not
> converting values to a different representation (Pekka)
>
> v3:
> - Use new colorop->next pointer
>
> Signed-off-by: Harry Wentland <harry.wentland at amd.com>
> ---
> drivers/gpu/drm/vkms/vkms_composer.c | 57 +++++++++++++++++++++-------
> drivers/gpu/drm/vkms/vkms_drv.h | 4 ++
> 2 files changed, 48 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/vkms/vkms_composer.c b/drivers/gpu/drm/vkms/vkms_composer.c
> index 25b786b49db0..d2101fa55aa3 100644
> --- a/drivers/gpu/drm/vkms/vkms_composer.c
> +++ b/drivers/gpu/drm/vkms/vkms_composer.c
> @@ -164,7 +164,7 @@ static void apply_lut(const struct vkms_crtc_state *crtc_state, struct line_buff
> }
> }
>
> -static void apply_colorop(struct pixel_argb_u16 *pixel, struct drm_colorop *colorop)
> +static void apply_colorop(struct pixel_argb_s32 *pixel, struct drm_colorop *colorop)
> {
> /* TODO is this right? */
> struct drm_colorop_state *colorop_state = colorop->state;
> @@ -191,25 +191,56 @@ static void apply_colorop(struct pixel_argb_u16 *pixel, struct drm_colorop *colo
>
> static void pre_blend_color_transform(const struct vkms_plane_state *plane_state, struct line_buffer *output_buffer)
> {
> - struct drm_colorop *colorop = plane_state->base.base.color_pipeline;
> + struct drm_colorop *colorop;
> + struct pixel_argb_s32 pixel;
>
> - while (colorop) {
> - struct drm_colorop_state *colorop_state;
> + for (size_t x = 0; x < output_buffer->n_pixels; x++) {
>
> - if (!colorop)
> - return;
> + /*
> + * Some operations, such as applying a BT709 encoding matrix,
> + * followed by a decoding matrix, require that we preserve
> + * values above 1.0 and below 0.0 until the end of the pipeline.
> + *
> + * Pack the 16-bit UNORM values into s32 to give us head-room to
> + * avoid clipping until we're at the end of the pipeline. Clip
> + * intentionally at the end of the pipeline before packing
> + * UNORM values back into u16.
> + */
> + pixel.a = output_buffer->pixels[x].a;
> + pixel.r = output_buffer->pixels[x].r;
> + pixel.g = output_buffer->pixels[x].g;
> + pixel.b = output_buffer->pixels[x].b;
>
> - /* TODO this is probably wrong */
> - colorop_state = colorop->state;
> + colorop = plane_state->base.base.color_pipeline;
> + while (colorop) {
> + struct drm_colorop_state *colorop_state;
>
> - if (!colorop_state)
> - return;
> + if (!colorop)
> + return;
> +
> + /* TODO this is probably wrong */
> + colorop_state = colorop->state;
> +
> + if (!colorop_state)
> + return;
>
> - for (size_t x = 0; x < output_buffer->n_pixels; x++)
> if (!colorop_state->bypass)
> - apply_colorop(&output_buffer->pixels[x], colorop);
> + apply_colorop(&pixel, colorop);
>
> - colorop = colorop->next;
> + colorop = colorop->next;
> + }
> +
> + /* clamp pixel */
> + pixel.a = max(min(pixel.a, 0xffff), 0x0);
> + pixel.r = max(min(pixel.r, 0xffff), 0x0);
> + pixel.g = max(min(pixel.g, 0xffff), 0x0);
> + pixel.b = max(min(pixel.b, 0xffff), 0x0);
> +
> + /* put back to output_buffer */
> + output_buffer->pixels[x].a = pixel.a;
> + output_buffer->pixels[x].r = pixel.r;
> + output_buffer->pixels[x].g = pixel.g;
> + output_buffer->pixels[x].b = pixel.b;
> }
> }
>
> diff --git a/drivers/gpu/drm/vkms/vkms_drv.h b/drivers/gpu/drm/vkms/vkms_drv.h
> index 2bcc24c196a2..fadb7685a360 100644
> --- a/drivers/gpu/drm/vkms/vkms_drv.h
> +++ b/drivers/gpu/drm/vkms/vkms_drv.h
> @@ -36,6 +36,10 @@ struct vkms_frame_info {
> unsigned int cpp;
> };
>
> +struct pixel_argb_s32 {
> + s32 a, r, g, b;
> +};
> +
> struct pixel_argb_u16 {
> u16 a, r, g, b;
> };
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 833 bytes
Desc: OpenPGP digital signature
URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20240312/eeb490d5/attachment.sig>
More information about the amd-gfx
mailing list