[Mesa-dev] [RFC PATCH 3/7] gallium/vl: Incorporate the bicubic filter in the compositor v2

Thomas Hellstrom thellstrom at vmware.com
Thu Mar 2 20:00:07 UTC 2017


Import the bicubic filter fragment shader into the compositor, and modify
it to use shader uniform input instead of hardcoding video- and
destination dimensions. This will help enable correct handling of compositor
features such as blending, rotation and additional overlays. It will also save
a bunch of duplicated vertex setup code once the original implementation
is removed.

v2: Addressed review comments by Sinclair Yeh.

Signed-off-by: Thomas Hellstrom <thellstrom at vmware.com>
Reviewed-by: Sinclair Yeh <syeh at vmware.com>
---
 src/gallium/auxiliary/vl/vl_compositor.c | 355 ++++++++++++++++++++++++++++++-
 src/gallium/auxiliary/vl/vl_compositor.h |  24 +++
 2 files changed, 378 insertions(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index 693d685..c9e1613 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -1,6 +1,8 @@
 /**************************************************************************
  *
  * Copyright 2009 Younes Manton.
+ * Copyright 2016 Nayan Deshmukh.
+ * Copyright 2016 VMWare Inc.
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -43,6 +45,8 @@
 
 #define MIN_DIRTY (0)
 #define MAX_DIRTY (1 << 15)
+/* Constant buffer index for the bicubic scaler */
+#define VL_BICUBIC_CB 1
 
 enum VS_OUTPUT
 {
@@ -424,9 +428,215 @@ create_frag_shader_rgba(struct vl_compositor *c)
    return ureg_create_shader_and_destroy(shader, c->pipe);
 }
 
+static void
+create_frag_shader_cubic_interpolater(struct ureg_program *shader, struct ureg_src tex_a,
+                                      struct ureg_src tex_b, struct ureg_src tex_c,
+                                      struct ureg_src tex_d, struct ureg_src t,
+                                      struct ureg_dst o_fragment)
+{
+   struct ureg_dst temp[11];
+   struct ureg_dst t_2;
+   unsigned i;
+
+   for(i = 0; i < 11; ++i)
+      temp[i] = ureg_DECL_temporary(shader);
+   t_2 = ureg_DECL_temporary(shader);
+
+   /*
+    * |temp[0]|   |  0  2  0  0 |  |tex_a|
+    * |temp[1]| = | -1  0  1  0 |* |tex_b|
+    * |temp[2]|   |  2 -5  4 -1 |  |tex_c|
+    * |temp[3]|   | -1  3 -3  1 |  |tex_d|
+    */
+   ureg_MUL(shader, temp[0], tex_b, ureg_imm1f(shader, 2.0f));
+
+   ureg_MUL(shader, temp[1], tex_a, ureg_imm1f(shader, -1.0f));
+   ureg_MAD(shader, temp[1], tex_c, ureg_imm1f(shader, 1.0f),
+            ureg_src(temp[1]));
+
+   ureg_MUL(shader, temp[2], tex_a, ureg_imm1f(shader, 2.0f));
+   ureg_MAD(shader, temp[2], tex_b, ureg_imm1f(shader, -5.0f),
+            ureg_src(temp[2]));
+   ureg_MAD(shader, temp[2], tex_c, ureg_imm1f(shader, 4.0f),
+            ureg_src(temp[2]));
+   ureg_MAD(shader, temp[2], tex_d, ureg_imm1f(shader, -1.0f),
+            ureg_src(temp[2]));
+
+   ureg_MUL(shader, temp[3], tex_a, ureg_imm1f(shader, -1.0f));
+   ureg_MAD(shader, temp[3], tex_b, ureg_imm1f(shader, 3.0f),
+            ureg_src(temp[3]));
+   ureg_MAD(shader, temp[3], tex_c, ureg_imm1f(shader, -3.0f),
+            ureg_src(temp[3]));
+   ureg_MAD(shader, temp[3], tex_d, ureg_imm1f(shader, 1.0f),
+            ureg_src(temp[3]));
+
+   /*
+    * t_2 = t*t
+    * o_fragment = 0.5*|1  t  t^2  t^3|*|temp[0]|
+    *                                   |temp[1]|
+    *                                   |temp[2]|
+    *                                   |temp[3]|
+    */
+
+   ureg_MUL(shader, t_2, t, t);
+   ureg_MUL(shader, temp[4], ureg_src(t_2), t);
+
+   ureg_MUL(shader, temp[4], ureg_src(temp[4]),
+            ureg_src(temp[3]));
+   ureg_MUL(shader, temp[5], ureg_src(t_2),
+            ureg_src(temp[2]));
+   ureg_MUL(shader, temp[6], t,
+            ureg_src(temp[1]));
+   ureg_MUL(shader, temp[7], ureg_imm1f(shader, 1.0f),
+            ureg_src(temp[0]));
+   ureg_ADD(shader, temp[8], ureg_src(temp[4]),
+            ureg_src(temp[5]));
+   ureg_ADD(shader, temp[9], ureg_src(temp[6]),
+            ureg_src(temp[7]));
+
+   ureg_ADD(shader, temp[10], ureg_src(temp[8]),
+            ureg_src(temp[9]));
+   ureg_MUL(shader, o_fragment, ureg_src(temp[10]),
+            ureg_imm1f(shader, 0.5f));
+
+
+   for(i = 0; i < 11; ++i)
+      ureg_release_temporary(shader, temp[i]);
+   ureg_release_temporary(shader, t_2);
+}
+
+/**
+ * \brief Create the bicubic interpolation fragment shader
+ *
+ * \param c[in,out]  The compositor.
+ * \return  A pointer to the shader, or NULL if creation failed.
+ */
+static void *
+create_frag_shader_bicubic_rgba(struct vl_compositor *c)
+{
+   struct pipe_screen *screen = c->pipe->screen;
+   struct ureg_program *shader;
+   struct ureg_src i_vtex, vtex;
+   struct ureg_src sampler;
+   struct ureg_src half_pixel;
+   struct ureg_src video_size;
+   struct ureg_dst t_array[23];
+   struct ureg_dst o_fragment;
+   struct ureg_dst t;
+   unsigned i;
+
+   if (screen->get_shader_param(
+          screen, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_MAX_TEMPS) < 23) {
+
+      return NULL;
+   }
+
+   shader = ureg_create(PIPE_SHADER_FRAGMENT);
+   if (!shader) {
+      return NULL;
+   }
+
+   i_vtex = ureg_DECL_fs_input(shader, TGSI_SEMANTIC_GENERIC, VS_O_VTEX,
+                               TGSI_INTERPOLATE_LINEAR);
+   sampler = ureg_DECL_sampler(shader, 0);
+   ureg_DECL_sampler_view(shader, 0, TGSI_TEXTURE_2D,
+                          TGSI_RETURN_TYPE_FLOAT,
+                          TGSI_RETURN_TYPE_FLOAT,
+                          TGSI_RETURN_TYPE_FLOAT,
+                          TGSI_RETURN_TYPE_FLOAT);
+
+   for (i = 0; i < 23; ++i)
+      t_array[i] = ureg_DECL_temporary(shader);
+   t = ureg_DECL_temporary(shader);
+
+   ureg_DECL_constant2D(shader, 0, VL_BICUBIC_FLOATS / 4 - 1, 1);
+   half_pixel = ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, 0),
+                                   VL_BICUBIC_CB);
+   video_size = ureg_swizzle(half_pixel, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
+                             TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W);
+   half_pixel = ureg_swizzle(half_pixel, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y,
+                             TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y);
+
+   o_fragment = ureg_DECL_output(shader, TGSI_SEMANTIC_COLOR, 0);
+
+   /*
+    * temp = (i_vtex - (0.5/dst_size)) * i_size)
+    * t = frac(temp)
+    * vtex = floor(i_vtex)/i_size
+    * (FIXME: The code actually computes
+    *  vtex = floor(temp)/i_size + 0.5/dst_size
+    *  Need to figure out why.)
+    */
+   ureg_ADD(shader, ureg_writemask(t_array[21], TGSI_WRITEMASK_XY),
+            i_vtex, ureg_negate(half_pixel));
+   ureg_MUL(shader, ureg_writemask(t_array[22], TGSI_WRITEMASK_XY),
+            ureg_src(t_array[21]), video_size);
+   ureg_FRC(shader, ureg_writemask(t, TGSI_WRITEMASK_XY),
+            ureg_src(t_array[22]));
+
+   ureg_FLR(shader, ureg_writemask(t_array[22], TGSI_WRITEMASK_XY),
+            ureg_src(t_array[22]));
+   ureg_DIV(shader, ureg_writemask(t_array[22], TGSI_WRITEMASK_XY),
+            ureg_src(t_array[22]), video_size);
+   ureg_ADD(shader, ureg_writemask(t_array[22], TGSI_WRITEMASK_XY),
+            ureg_src(t_array[22]), half_pixel);
+
+   /*
+    * t_array[0..*] = vtex + offset[0..*]
+    * t_array[0..*] = tex(t_array[0..*], sampler)
+    * t_array[16+i] = cubic_interpolate(t_array[4*i..4*i+3], t_x)
+    * o_fragment = cubic_interpolate(t_array[16..19], t_y)
+    */
+   vtex = ureg_src(t_array[22]);
+   for (i = 0; i < VL_BICUBIC_OFFSETS; i += 2) {
+      struct ureg_src offset =
+         ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, 1 + i/2), 1);
+
+      ureg_ADD(shader, ureg_writemask(t_array[i], TGSI_WRITEMASK_XY),
+               vtex, ureg_swizzle(offset, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y,
+                                  TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y));
+      ureg_MOV(shader, ureg_writemask(t_array[i], TGSI_WRITEMASK_ZW),
+               ureg_imm1f(shader, 0.0f));
+      ureg_ADD(shader, ureg_writemask(t_array[i + 1], TGSI_WRITEMASK_XY),
+               vtex, ureg_swizzle(offset, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
+                                  TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W));
+      ureg_MOV(shader, ureg_writemask(t_array[i + 1], TGSI_WRITEMASK_ZW),
+               ureg_imm1f(shader, 0.0f));
+   }
+
+   for (i = 0; i < VL_BICUBIC_OFFSETS; ++i) {
+      ureg_TEX(shader, t_array[i], TGSI_TEXTURE_2D, ureg_src(t_array[i]), sampler);
+   }
+
+   for(i = 0; i < 4; ++i)
+      create_frag_shader_cubic_interpolater(shader, ureg_src(t_array[4*i]),
+                                            ureg_src(t_array[4*i+1]),
+                                            ureg_src(t_array[4*i+2]),
+                                            ureg_src(t_array[4*i+3]),
+                                            ureg_scalar(ureg_src(t), TGSI_SWIZZLE_X),
+                                            t_array[16+i]);
+
+   create_frag_shader_cubic_interpolater(shader, ureg_src(t_array[16]),
+                                         ureg_src(t_array[17]),
+                                         ureg_src(t_array[18]),
+                                         ureg_src(t_array[19]),
+                                         ureg_scalar(ureg_src(t), TGSI_SWIZZLE_Y),
+                                         o_fragment);
+
+   for(i = 0; i < 23; ++i)
+      ureg_release_temporary(shader, t_array[i]);
+   ureg_release_temporary(shader, t);
+
+   ureg_END(shader);
+
+   return ureg_create_shader_and_destroy(shader, c->pipe);
+}
+
+
 static bool
 init_shaders(struct vl_compositor *c)
 {
+   struct vertex2f *offsets;
    assert(c);
 
    c->vs = create_vert_shader(c);
@@ -472,6 +682,33 @@ init_shaders(struct vl_compositor *c)
       return false;
    }
 
+   c->bicubic.fs_rgba = create_frag_shader_bicubic_rgba(c);
+   if (!c->bicubic.fs_rgba) {
+      debug_printf("Unable to create bicubic fragment shader.\n");
+      return false;
+   }
+
+   offsets = c->bicubic.offsets;
+   offsets[0].x = -1.0f; offsets[0].y = -1.0f;
+   offsets[1].x = 0.0f; offsets[1].y = -1.0f;
+   offsets[2].x = 1.0f; offsets[2].y = -1.0f;
+   offsets[3].x = 2.0f; offsets[3].y = -1.0f;
+
+   offsets[4].x = -1.0f; offsets[4].y = 0.0f;
+   offsets[5].x = 0.0f; offsets[5].y = 0.0f;
+   offsets[6].x = 1.0f; offsets[6].y = 0.0f;
+   offsets[7].x = 2.0f; offsets[7].y = 0.0f;
+
+   offsets[8].x = -1.0f; offsets[8].y = 1.0f;
+   offsets[9].x = 0.0f; offsets[9].y = 1.0f;
+   offsets[10].x = 1.0f; offsets[10].y = 1.0f;
+   offsets[11].x = 2.0f; offsets[11].y = 1.0f;
+
+   offsets[12].x = -1.0f; offsets[12].y = 2.0f;
+   offsets[13].x = 0.0f; offsets[13].y = 2.0f;
+   offsets[14].x = 1.0f; offsets[14].y = 2.0f;
+   offsets[15].x = 2.0f; offsets[15].y = 2.0f;
+
    return true;
 }
 
@@ -487,6 +724,7 @@ static void cleanup_shaders(struct vl_compositor *c)
    c->pipe->delete_fs_state(c->pipe, c->fs_palette.yuv);
    c->pipe->delete_fs_state(c->pipe, c->fs_palette.rgb);
    c->pipe->delete_fs_state(c->pipe, c->fs_rgba);
+   c->pipe->delete_fs_state(c->pipe, c->bicubic.fs_rgba);
 }
 
 static bool
@@ -847,6 +1085,66 @@ gen_vertex_data(struct vl_compositor *c, struct vl_compositor_state *s, struct u
    u_upload_unmap(c->pipe->stream_uploader);
 }
 
+/**
+ * \brief Update and bind constant- / uniform buffers before rendering a layer
+ *
+ * \parameter c[in,out]  The compositor.
+ * \parameter s[in,out]  The compositor state bucket.
+ * \parameter layer[in,out]  The layer about to be rendered.
+ */
+static void
+bind_constants(struct vl_compositor *c, struct vl_compositor_state *s,
+               struct vl_compositor_layer *layer)
+{
+   if (layer->fs == c->bicubic.fs_rgba) {
+      int i;
+      float constants[VL_BICUBIC_FLOATS];
+      float width = layer->sampler_views[0]->texture->width0;
+      float height = layer->sampler_views[0]->texture->height0;
+
+      memset(constants, 0, sizeof(constants));
+
+      constants[0] = 0.5f / layer->viewport.scale[0];
+      constants[1] = 0.5f / layer->viewport.scale[1];
+      constants[2] = width;
+      constants[3] = height;
+      for (i = 0; i < VL_BICUBIC_OFFSETS; ++i) {
+         constants[2 * (VL_BICUBIC_OTHER + i) + 0] =
+            c->bicubic.offsets[i].x / width;
+         constants[2 * (VL_BICUBIC_OTHER + i) + 1] =
+            c->bicubic.offsets[i].y / height;
+      }
+
+      /* Update the constant buffer only if it changed. */
+      if (memcmp(constants, s->bicubic.constants, sizeof(constants) != 0)) {
+         struct pipe_context *pipe = s->pipe;
+         struct pipe_box box;
+         struct pipe_transfer *transfer;
+         float *map;
+
+         u_box_1d(0, sizeof(constants), &box);
+         map = pipe->transfer_map(pipe, s->bicubic.constbuf, 0,
+                                  PIPE_TRANSFER_WRITE |
+                                  PIPE_TRANSFER_DISCARD_WHOLE_RESOURCE,
+                                  &box, &transfer);
+         if (map) {
+            memcpy(map, constants, sizeof(constants));
+            memcpy(s->bicubic.constants, constants, sizeof(constants));
+            pipe_transfer_unmap(pipe, transfer);
+         } else {
+            /*
+             * Upload fail will corrupt the output.
+             * Revert back to linear scaling
+             */
+            layer->fs = c->fs_rgba;
+            layer->samplers[0] = c->sampler_linear;
+         }
+      }
+      pipe_set_constant_buffer(c->pipe, PIPE_SHADER_FRAGMENT, VL_BICUBIC_CB,
+                               s->bicubic.constbuf);
+   }
+}
+
 static void
 draw_layers(struct vl_compositor *c, struct vl_compositor_state *s, struct u_rect *dirty)
 {
@@ -860,7 +1158,7 @@ draw_layers(struct vl_compositor *c, struct vl_compositor_state *s, struct u_rec
          struct pipe_sampler_view **samplers = &layer->sampler_views[0];
          unsigned num_sampler_views = !samplers[1] ? 1 : !samplers[2] ? 2 : 3;
          void *blend = layer->blend ? layer->blend : i ? c->blend_add : c->blend_clear;
-
+         bind_constants(c, s, layer);
          c->pipe->bind_blend_state(c->pipe, blend);
          c->pipe->set_viewport_states(c->pipe, 0, 1, &layer->viewport);
          c->pipe->bind_fs_state(c->pipe, layer->fs);
@@ -1131,6 +1429,42 @@ vl_compositor_set_rgba_layer(struct vl_compositor_state *s,
          s->layers[layer].colors[i] = colors[i];
 }
 
+/**
+ * \brief Set an RGBA layer which uses bicubic scaling.
+ *
+ * \parameter s[in, out]  The compositor state bucket.
+ * \parameter c]in, out]  The compositor.
+ * \parameter layer[in]  The layer number.
+ * \paremeter rgba[in]  The sampler view of the RGBA texture to be sampled.
+ * \parameter src_rect[in]  The source area rectangle, in source coordinates,
+ * of the area to be composited, or NULL if whole area.
+ * \parameter dst_rect[in]  The destination rectangle.
+ *
+ */
+void
+vl_compositor_set_bicubic_rgba_layer(struct vl_compositor_state *s,
+                                     struct vl_compositor *c,
+                                     unsigned layer,
+                                     struct pipe_sampler_view *rgba,
+                                     struct u_rect *src_rect,
+                                     struct u_rect *dst_rect)
+{
+   assert(s && c && rgba);
+   assert(layer < VL_COMPOSITOR_MAX_LAYERS);
+
+   s->used_layers |= 1 << layer;
+   s->layers[layer].fs = c->bicubic.fs_rgba;
+   s->layers[layer].samplers[0] = c->sampler_nearest;
+   s->layers[layer].samplers[1] = NULL;
+   s->layers[layer].samplers[2] = NULL;
+   pipe_sampler_view_reference(&s->layers[layer].sampler_views[0], rgba);
+   pipe_sampler_view_reference(&s->layers[layer].sampler_views[1], NULL);
+   pipe_sampler_view_reference(&s->layers[layer].sampler_views[2], NULL);
+   calc_src_and_dst(&s->layers[layer], rgba->texture->width0, rgba->texture->height0,
+                    src_rect ? *src_rect : default_rect(&s->layers[layer]),
+                    dst_rect ? *dst_rect : default_rect(&s->layers[layer]));
+}
+
 void
 vl_compositor_set_layer_rotation(struct vl_compositor_state *s,
                                  unsigned layer,
@@ -1267,6 +1601,8 @@ vl_compositor_init_state(struct vl_compositor_state *s, struct pipe_context *pip
       PIPE_USAGE_DEFAULT,
       sizeof(csc_matrix) + 2*sizeof(float)
    );
+   if (!s->csc_matrix)
+      return false;
 
    if (!s->csc_matrix)
       return false;
@@ -1277,7 +1613,23 @@ vl_compositor_init_state(struct vl_compositor_state *s, struct pipe_context *pip
    if (!vl_compositor_set_csc_matrix(s, (const vl_csc_matrix *)&csc_matrix, 1.0f, 0.0f))
       return false;
 
+   s->bicubic.constbuf = pipe_buffer_create(pipe->screen,
+                                            PIPE_BIND_CONSTANT_BUFFER,
+                                            PIPE_USAGE_DEFAULT,
+                                            VL_BICUBIC_FLOATS * sizeof(float));
+   if (!s->bicubic.constbuf)
+      goto out_bicubic;
+
+   memset(s->bicubic.constants, 0, sizeof(s->bicubic.constants));
+   pipe_buffer_write(s->pipe, s->bicubic.constbuf, 0,
+                     sizeof(s->bicubic.constants), s->bicubic.constants);
+
    return true;
+
+out_bicubic:
+   pipe_resource_reference(&s->csc_matrix, NULL);
+
+   return false;
 }
 
 void
@@ -1287,4 +1639,5 @@ vl_compositor_cleanup_state(struct vl_compositor_state *s)
 
    vl_compositor_clear_layers(s);
    pipe_resource_reference(&s->csc_matrix, NULL);
+   pipe_resource_reference(&s->bicubic.constbuf, NULL);
 }
diff --git a/src/gallium/auxiliary/vl/vl_compositor.h b/src/gallium/auxiliary/vl/vl_compositor.h
index 535abb7..65acad1 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.h
+++ b/src/gallium/auxiliary/vl/vl_compositor.h
@@ -44,6 +44,9 @@ struct pipe_context;
  */
 
 #define VL_COMPOSITOR_MAX_LAYERS 16
+#define VL_BICUBIC_OFFSETS 16
+#define VL_BICUBIC_OTHER 2
+#define VL_BICUBIC_FLOATS ((VL_BICUBIC_OTHER + VL_BICUBIC_OFFSETS) * 2)
 
 /* deinterlace allgorithem */
 enum vl_compositor_deinterlace
@@ -94,6 +97,12 @@ struct vl_compositor_state
 
    unsigned used_layers:VL_COMPOSITOR_MAX_LAYERS;
    struct vl_compositor_layer layers[VL_COMPOSITOR_MAX_LAYERS];
+   struct {
+      /** \brief Shadow copy of bicubic scaler fragment shader constants */
+      float constants[VL_BICUBIC_FLOATS];
+      /** \brief The bicubic scaler constant buffer */
+      struct pipe_resource *constbuf;
+   } bicubic;
 };
 
 struct vl_compositor
@@ -114,6 +123,12 @@ struct vl_compositor
    void *fs_video_buffer;
    void *fs_weave_rgb;
    void *fs_rgba;
+   struct {
+      /** \brief Pixel offsets for bicubic scaler */
+      struct vertex2f offsets[VL_BICUBIC_OFFSETS];
+      /** \brief Bicubic scaler fragment shader */
+      void *fs_rgba;
+   } bicubic;
 
    struct {
       void *y;
@@ -232,6 +247,15 @@ vl_compositor_set_rgba_layer(struct vl_compositor_state *state,
                              struct u_rect *dst_rect,
                              struct vertex4f *colors);
 
+void
+vl_compositor_set_bicubic_rgba_layer(struct vl_compositor_state *s,
+                                     struct vl_compositor *c,
+                                     unsigned layer,
+                                     struct pipe_sampler_view *rgba,
+                                     struct u_rect *src_rect,
+                                     struct u_rect *dst_rect);
+
+
 /**
  * set the layer rotation
  */
-- 
2.4.11



More information about the mesa-dev mailing list