[Mesa-dev] [RFC PATCH 3/7] gallium/vl: Incorporate the bicubic filter in the compositor v2
Thomas Hellstrom
thellstrom at vmware.com
Thu Mar 2 20:00:07 UTC 2017
Import the bicubic filter fragment shader into the compositor, and modify
it to use shader uniform input instead of hardcoding video- and
destination dimensions. This will help enable correct handling of compositor
features such as blending, rotation and additional overlays. It will also save
a bunch of duplicated vertex setup code once the original implementation
is removed.
v2: Addressed review comments by Sinclair Yeh.
Signed-off-by: Thomas Hellstrom <thellstrom at vmware.com>
Reviewed-by: Sinclair Yeh <syeh at vmware.com>
---
src/gallium/auxiliary/vl/vl_compositor.c | 355 ++++++++++++++++++++++++++++++-
src/gallium/auxiliary/vl/vl_compositor.h | 24 +++
2 files changed, 378 insertions(+), 1 deletion(-)
diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index 693d685..c9e1613 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -1,6 +1,8 @@
/**************************************************************************
*
* Copyright 2009 Younes Manton.
+ * Copyright 2016 Nayan Deshmukh.
+ * Copyright 2016 VMWare Inc.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
@@ -43,6 +45,8 @@
#define MIN_DIRTY (0)
#define MAX_DIRTY (1 << 15)
+/* Constant buffer index for the bicubic scaler */
+#define VL_BICUBIC_CB 1
enum VS_OUTPUT
{
@@ -424,9 +428,215 @@ create_frag_shader_rgba(struct vl_compositor *c)
return ureg_create_shader_and_destroy(shader, c->pipe);
}
+static void
+create_frag_shader_cubic_interpolater(struct ureg_program *shader, struct ureg_src tex_a,
+ struct ureg_src tex_b, struct ureg_src tex_c,
+ struct ureg_src tex_d, struct ureg_src t,
+ struct ureg_dst o_fragment)
+{
+ struct ureg_dst temp[11];
+ struct ureg_dst t_2;
+ unsigned i;
+
+ for(i = 0; i < 11; ++i)
+ temp[i] = ureg_DECL_temporary(shader);
+ t_2 = ureg_DECL_temporary(shader);
+
+ /*
+ * |temp[0]| | 0 2 0 0 | |tex_a|
+ * |temp[1]| = | -1 0 1 0 |* |tex_b|
+ * |temp[2]| | 2 -5 4 -1 | |tex_c|
+ * |temp[3]| | -1 3 -3 1 | |tex_d|
+ */
+ ureg_MUL(shader, temp[0], tex_b, ureg_imm1f(shader, 2.0f));
+
+ ureg_MUL(shader, temp[1], tex_a, ureg_imm1f(shader, -1.0f));
+ ureg_MAD(shader, temp[1], tex_c, ureg_imm1f(shader, 1.0f),
+ ureg_src(temp[1]));
+
+ ureg_MUL(shader, temp[2], tex_a, ureg_imm1f(shader, 2.0f));
+ ureg_MAD(shader, temp[2], tex_b, ureg_imm1f(shader, -5.0f),
+ ureg_src(temp[2]));
+ ureg_MAD(shader, temp[2], tex_c, ureg_imm1f(shader, 4.0f),
+ ureg_src(temp[2]));
+ ureg_MAD(shader, temp[2], tex_d, ureg_imm1f(shader, -1.0f),
+ ureg_src(temp[2]));
+
+ ureg_MUL(shader, temp[3], tex_a, ureg_imm1f(shader, -1.0f));
+ ureg_MAD(shader, temp[3], tex_b, ureg_imm1f(shader, 3.0f),
+ ureg_src(temp[3]));
+ ureg_MAD(shader, temp[3], tex_c, ureg_imm1f(shader, -3.0f),
+ ureg_src(temp[3]));
+ ureg_MAD(shader, temp[3], tex_d, ureg_imm1f(shader, 1.0f),
+ ureg_src(temp[3]));
+
+ /*
+ * t_2 = t*t
+ * o_fragment = 0.5*|1 t t^2 t^3|*|temp[0]|
+ * |temp[1]|
+ * |temp[2]|
+ * |temp[3]|
+ */
+
+ ureg_MUL(shader, t_2, t, t);
+ ureg_MUL(shader, temp[4], ureg_src(t_2), t);
+
+ ureg_MUL(shader, temp[4], ureg_src(temp[4]),
+ ureg_src(temp[3]));
+ ureg_MUL(shader, temp[5], ureg_src(t_2),
+ ureg_src(temp[2]));
+ ureg_MUL(shader, temp[6], t,
+ ureg_src(temp[1]));
+ ureg_MUL(shader, temp[7], ureg_imm1f(shader, 1.0f),
+ ureg_src(temp[0]));
+ ureg_ADD(shader, temp[8], ureg_src(temp[4]),
+ ureg_src(temp[5]));
+ ureg_ADD(shader, temp[9], ureg_src(temp[6]),
+ ureg_src(temp[7]));
+
+ ureg_ADD(shader, temp[10], ureg_src(temp[8]),
+ ureg_src(temp[9]));
+ ureg_MUL(shader, o_fragment, ureg_src(temp[10]),
+ ureg_imm1f(shader, 0.5f));
+
+
+ for(i = 0; i < 11; ++i)
+ ureg_release_temporary(shader, temp[i]);
+ ureg_release_temporary(shader, t_2);
+}
+
+/**
+ * \brief Create the bicubic interpolation fragment shader
+ *
+ * \param c[in,out] The compositor.
+ * \return A pointer to the shader, or NULL if creation failed.
+ */
+static void *
+create_frag_shader_bicubic_rgba(struct vl_compositor *c)
+{
+ struct pipe_screen *screen = c->pipe->screen;
+ struct ureg_program *shader;
+ struct ureg_src i_vtex, vtex;
+ struct ureg_src sampler;
+ struct ureg_src half_pixel;
+ struct ureg_src video_size;
+ struct ureg_dst t_array[23];
+ struct ureg_dst o_fragment;
+ struct ureg_dst t;
+ unsigned i;
+
+ if (screen->get_shader_param(
+ screen, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_MAX_TEMPS) < 23) {
+
+ return NULL;
+ }
+
+ shader = ureg_create(PIPE_SHADER_FRAGMENT);
+ if (!shader) {
+ return NULL;
+ }
+
+ i_vtex = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX,
+ TGSI_INTERPOLATE_LINEAR);
+ sampler = ureg_DECL_sampler(shader, 0);
+ ureg_DECL_sampler_view(shader, 0, TGSI_TEXTURE_2D,
+ TGSI_RETURN_TYPE_FLOAT,
+ TGSI_RETURN_TYPE_FLOAT,
+ TGSI_RETURN_TYPE_FLOAT,
+ TGSI_RETURN_TYPE_FLOAT);
+
+ for (i = 0; i < 23; ++i)
+ t_array[i] = ureg_DECL_temporary(shader);
+ t = ureg_DECL_temporary(shader);
+
+ ureg_DECL_constant2D(shader, 0, VL_BICUBIC_FLOATS / 4 - 1, 1);
+ half_pixel = ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, 0),
+ VL_BICUBIC_CB);
+ video_size = ureg_swizzle(half_pixel, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
+ TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W);
+ half_pixel = ureg_swizzle(half_pixel, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y,
+ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y);
+
+ o_fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
+
+ /*
+ * temp = (i_vtex - (0.5/dst_size)) * i_size)
+ * t = frac(temp)
+ * vtex = floor(i_vtex)/i_size
+ * (FIXME: The code actually computes
+ * vtex = floor(temp)/i_size + 0.5/dst_size
+ * Need to figure out why.)
+ */
+ ureg_ADD(shader, ureg_writemask(t_array[21], TGSI_WRITEMASK_XY),
+ i_vtex, ureg_negate(half_pixel));
+ ureg_MUL(shader, ureg_writemask(t_array[22], TGSI_WRITEMASK_XY),
+ ureg_src(t_array[21]), video_size);
+ ureg_FRC(shader, ureg_writemask(t, TGSI_WRITEMASK_XY),
+ ureg_src(t_array[22]));
+
+ ureg_FLR(shader, ureg_writemask(t_array[22], TGSI_WRITEMASK_XY),
+ ureg_src(t_array[22]));
+ ureg_DIV(shader, ureg_writemask(t_array[22], TGSI_WRITEMASK_XY),
+ ureg_src(t_array[22]), video_size);
+ ureg_ADD(shader, ureg_writemask(t_array[22], TGSI_WRITEMASK_XY),
+ ureg_src(t_array[22]), half_pixel);
+
+ /*
+ * t_array[0..*] = vtex + offset[0..*]
+ * t_array[0..*] = tex(t_array[0..*], sampler)
+ * t_array[16+i] = cubic_interpolate(t_array[4*i..4*i+3], t_x)
+ * o_fragment = cubic_interpolate(t_array[16..19], t_y)
+ */
+ vtex = ureg_src(t_array[22]);
+ for (i = 0; i < VL_BICUBIC_OFFSETS; i += 2) {
+ struct ureg_src offset =
+ ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, 1 + i/2), 1);
+
+ ureg_ADD(shader, ureg_writemask(t_array[i], TGSI_WRITEMASK_XY),
+ vtex, ureg_swizzle(offset, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y,
+ TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y));
+ ureg_MOV(shader, ureg_writemask(t_array[i], TGSI_WRITEMASK_ZW),
+ ureg_imm1f(shader, 0.0f));
+ ureg_ADD(shader, ureg_writemask(t_array[i + 1], TGSI_WRITEMASK_XY),
+ vtex, ureg_swizzle(offset, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
+ TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W));
+ ureg_MOV(shader, ureg_writemask(t_array[i + 1], TGSI_WRITEMASK_ZW),
+ ureg_imm1f(shader, 0.0f));
+ }
+
+ for (i = 0; i < VL_BICUBIC_OFFSETS; ++i) {
+ ureg_TEX(shader, t_array[i], TGSI_TEXTURE_2D, ureg_src(t_array[i]), sampler);
+ }
+
+ for(i = 0; i < 4; ++i)
+ create_frag_shader_cubic_interpolater(shader, ureg_src(t_array[4*i]),
+ ureg_src(t_array[4*i+1]),
+ ureg_src(t_array[4*i+2]),
+ ureg_src(t_array[4*i+3]),
+ ureg_scalar(ureg_src(t), TGSI_SWIZZLE_X),
+ t_array[16+i]);
+
+ create_frag_shader_cubic_interpolater(shader, ureg_src(t_array[16]),
+ ureg_src(t_array[17]),
+ ureg_src(t_array[18]),
+ ureg_src(t_array[19]),
+ ureg_scalar(ureg_src(t), TGSI_SWIZZLE_Y),
+ o_fragment);
+
+ for(i = 0; i < 23; ++i)
+ ureg_release_temporary(shader, t_array[i]);
+ ureg_release_temporary(shader, t);
+
+ ureg_END(shader);
+
+ return ureg_create_shader_and_destroy(shader, c->pipe);
+}
+
+
static bool
init_shaders(struct vl_compositor *c)
{
+ struct vertex2f *offsets;
assert(c);
c->vs = create_vert_shader(c);
@@ -472,6 +682,33 @@ init_shaders(struct vl_compositor *c)
return false;
}
+ c->bicubic.fs_rgba = create_frag_shader_bicubic_rgba(c);
+ if (!c->bicubic.fs_rgba) {
+ debug_printf("Unable to create bicubic fragment shader.\n");
+ return false;
+ }
+
+ offsets = c->bicubic.offsets;
+ offsets[0].x = -1.0f; offsets[0].y = -1.0f;
+ offsets[1].x = 0.0f; offsets[1].y = -1.0f;
+ offsets[2].x = 1.0f; offsets[2].y = -1.0f;
+ offsets[3].x = 2.0f; offsets[3].y = -1.0f;
+
+ offsets[4].x = -1.0f; offsets[4].y = 0.0f;
+ offsets[5].x = 0.0f; offsets[5].y = 0.0f;
+ offsets[6].x = 1.0f; offsets[6].y = 0.0f;
+ offsets[7].x = 2.0f; offsets[7].y = 0.0f;
+
+ offsets[8].x = -1.0f; offsets[8].y = 1.0f;
+ offsets[9].x = 0.0f; offsets[9].y = 1.0f;
+ offsets[10].x = 1.0f; offsets[10].y = 1.0f;
+ offsets[11].x = 2.0f; offsets[11].y = 1.0f;
+
+ offsets[12].x = -1.0f; offsets[12].y = 2.0f;
+ offsets[13].x = 0.0f; offsets[13].y = 2.0f;
+ offsets[14].x = 1.0f; offsets[14].y = 2.0f;
+ offsets[15].x = 2.0f; offsets[15].y = 2.0f;
+
return true;
}
@@ -487,6 +724,7 @@ static void cleanup_shaders(struct vl_compositor *c)
c->pipe->delete_fs_state(c->pipe, c->fs_palette.yuv);
c->pipe->delete_fs_state(c->pipe, c->fs_palette.rgb);
c->pipe->delete_fs_state(c->pipe, c->fs_rgba);
+ c->pipe->delete_fs_state(c->pipe, c->bicubic.fs_rgba);
}
static bool
@@ -847,6 +1085,66 @@ gen_vertex_data(struct vl_compositor *c, struct vl_compositor_state *s, struct u
u_upload_unmap(c->pipe->stream_uploader);
}
+/**
+ * \brief Update and bind constant- / uniform buffers before rendering a layer
+ *
+ * \parameter c[in,out] The compositor.
+ * \parameter s[in,out] The compositor state bucket.
+ * \parameter layer[in,out] The layer about to be rendered.
+ */
+static void
+bind_constants(struct vl_compositor *c, struct vl_compositor_state *s,
+ struct vl_compositor_layer *layer)
+{
+ if (layer->fs == c->bicubic.fs_rgba) {
+ int i;
+ float constants[VL_BICUBIC_FLOATS];
+ float width = layer->sampler_views[0]->texture->width0;
+ float height = layer->sampler_views[0]->texture->height0;
+
+ memset(constants, 0, sizeof(constants));
+
+ constants[0] = 0.5f / layer->viewport.scale[0];
+ constants[1] = 0.5f / layer->viewport.scale[1];
+ constants[2] = width;
+ constants[3] = height;
+ for (i = 0; i < VL_BICUBIC_OFFSETS; ++i) {
+ constants[2 * (VL_BICUBIC_OTHER + i) + 0] =
+ c->bicubic.offsets[i].x / width;
+ constants[2 * (VL_BICUBIC_OTHER + i) + 1] =
+ c->bicubic.offsets[i].y / height;
+ }
+
+ /* Update the constant buffer only if it changed. */
+ if (memcmp(constants, s->bicubic.constants, sizeof(constants) != 0)) {
+ struct pipe_context *pipe = s->pipe;
+ struct pipe_box box;
+ struct pipe_transfer *transfer;
+ float *map;
+
+ u_box_1d(0, sizeof(constants), &box);
+ map = pipe->transfer_map(pipe, s->bicubic.constbuf, 0,
+ PIPE_TRANSFER_WRITE |
+ PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE,
+ &box, &transfer);
+ if (map) {
+ memcpy(map, constants, sizeof(constants));
+ memcpy(s->bicubic.constants, constants, sizeof(constants));
+ pipe_transfer_unmap(pipe, transfer);
+ } else {
+ /*
+ * Upload fail will corrupt the output.
+ * Revert back to linear scaling
+ */
+ layer->fs = c->fs_rgba;
+ layer->samplers[0] = c->sampler_linear;
+ }
+ }
+ pipe_set_constant_buffer(c->pipe, PIPE_SHADER_FRAGMENT, VL_BICUBIC_CB,
+ s->bicubic.constbuf);
+ }
+}
+
static void
draw_layers(struct vl_compositor *c, struct vl_compositor_state *s, struct u_rect *dirty)
{
@@ -860,7 +1158,7 @@ draw_layers(struct vl_compositor *c, struct vl_compositor_state *s, struct u_rec
struct pipe_sampler_view **samplers = &layer->sampler_views[0];
unsigned num_sampler_views = !samplers[1] ? 1 : !samplers[2] ? 2 : 3;
void *blend = layer->blend ? layer->blend : i ? c->blend_add : c->blend_clear;
-
+ bind_constants(c, s, layer);
c->pipe->bind_blend_state(c->pipe, blend);
c->pipe->set_viewport_states(c->pipe, 0, 1, &layer->viewport);
c->pipe->bind_fs_state(c->pipe, layer->fs);
@@ -1131,6 +1429,42 @@ vl_compositor_set_rgba_layer(struct vl_compositor_state *s,
s->layers[layer].colors[i] = colors[i];
}
+/**
+ * \brief Set an RGBA layer which uses bicubic scaling.
+ *
+ * \parameter s[in, out] The compositor state bucket.
+ * \parameter c]in, out] The compositor.
+ * \parameter layer[in] The layer number.
+ * \paremeter rgba[in] The sampler view of the RGBA texture to be sampled.
+ * \parameter src_rect[in] The source area rectangle, in source coordinates,
+ * of the area to be composited, or NULL if whole area.
+ * \parameter dst_rect[in] The destination rectangle.
+ *
+ */
+void
+vl_compositor_set_bicubic_rgba_layer(struct vl_compositor_state *s,
+ struct vl_compositor *c,
+ unsigned layer,
+ struct pipe_sampler_view *rgba,
+ struct u_rect *src_rect,
+ struct u_rect *dst_rect)
+{
+ assert(s && c && rgba);
+ assert(layer < VL_COMPOSITOR_MAX_LAYERS);
+
+ s->used_layers |= 1 << layer;
+ s->layers[layer].fs = c->bicubic.fs_rgba;
+ s->layers[layer].samplers[0] = c->sampler_nearest;
+ s->layers[layer].samplers[1] = NULL;
+ s->layers[layer].samplers[2] = NULL;
+ pipe_sampler_view_reference(&s->layers[layer].sampler_views[0], rgba);
+ pipe_sampler_view_reference(&s->layers[layer].sampler_views[1], NULL);
+ pipe_sampler_view_reference(&s->layers[layer].sampler_views[2], NULL);
+ calc_src_and_dst(&s->layers[layer], rgba->texture->width0, rgba->texture->height0,
+ src_rect ? *src_rect : default_rect(&s->layers[layer]),
+ dst_rect ? *dst_rect : default_rect(&s->layers[layer]));
+}
+
void
vl_compositor_set_layer_rotation(struct vl_compositor_state *s,
unsigned layer,
@@ -1267,6 +1601,8 @@ vl_compositor_init_state(struct vl_compositor_state *s, struct pipe_context *pip
PIPE_USAGE_DEFAULT,
sizeof(csc_matrix) + 2*sizeof(float)
);
+ if (!s->csc_matrix)
+ return false;
if (!s->csc_matrix)
return false;
@@ -1277,7 +1613,23 @@ vl_compositor_init_state(struct vl_compositor_state *s, struct pipe_context *pip
if (!vl_compositor_set_csc_matrix(s, (const vl_csc_matrix *)&csc_matrix, 1.0f, 0.0f))
return false;
+ s->bicubic.constbuf = pipe_buffer_create(pipe->screen,
+ PIPE_BIND_CONSTANT_BUFFER,
+ PIPE_USAGE_DEFAULT,
+ VL_BICUBIC_FLOATS * sizeof(float));
+ if (!s->bicubic.constbuf)
+ goto out_bicubic;
+
+ memset(s->bicubic.constants, 0, sizeof(s->bicubic.constants));
+ pipe_buffer_write(s->pipe, s->bicubic.constbuf, 0,
+ sizeof(s->bicubic.constants), s->bicubic.constants);
+
return true;
+
+out_bicubic:
+ pipe_resource_reference(&s->csc_matrix, NULL);
+
+ return false;
}
void
@@ -1287,4 +1639,5 @@ vl_compositor_cleanup_state(struct vl_compositor_state *s)
vl_compositor_clear_layers(s);
pipe_resource_reference(&s->csc_matrix, NULL);
+ pipe_resource_reference(&s->bicubic.constbuf, NULL);
}
diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h
index 535abb7..65acad1 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.h
+++ b/src/gallium/auxiliary/vl/vl_compositor.h
@@ -44,6 +44,9 @@ struct pipe_context;
*/
#define VL_COMPOSITOR_MAX_LAYERS 16
+#define VL_BICUBIC_OFFSETS 16
+#define VL_BICUBIC_OTHER 2
+#define VL_BICUBIC_FLOATS ((VL_BICUBIC_OTHER + VL_BICUBIC_OFFSETS) * 2)
/* deinterlace allgorithem */
enum vl_compositor_deinterlace
@@ -94,6 +97,12 @@ struct vl_compositor_state
unsigned used_layers:VL_COMPOSITOR_MAX_LAYERS;
struct vl_compositor_layer layers[VL_COMPOSITOR_MAX_LAYERS];
+ struct {
+ /** \brief Shadow copy of bicubic scaler fragment shader constants */
+ float constants[VL_BICUBIC_FLOATS];
+ /** \brief The bicubic scaler constant buffer */
+ struct pipe_resource *constbuf;
+ } bicubic;
};
struct vl_compositor
@@ -114,6 +123,12 @@ struct vl_compositor
void *fs_video_buffer;
void *fs_weave_rgb;
void *fs_rgba;
+ struct {
+ /** \brief Pixel offsets for bicubic scaler */
+ struct vertex2f offsets[VL_BICUBIC_OFFSETS];
+ /** \brief Bicubic scaler fragment shader */
+ void *fs_rgba;
+ } bicubic;
struct {
void *y;
@@ -232,6 +247,15 @@ vl_compositor_set_rgba_layer(struct vl_compositor_state *state,
struct u_rect *dst_rect,
struct vertex4f *colors);
+void
+vl_compositor_set_bicubic_rgba_layer(struct vl_compositor_state *s,
+ struct vl_compositor *c,
+ unsigned layer,
+ struct pipe_sampler_view *rgba,
+ struct u_rect *src_rect,
+ struct u_rect *dst_rect);
+
+
/**
* set the layer rotation
*/
--
2.4.11
More information about the mesa-dev
mailing list