[Mesa-dev] [PATCH 3/3] omx: add a compute path in enc_LoadImage_common

Wed Feb 27 22:19:55 UTC 2019

From: Marek Olšák <marek.olsak at amd.com>

---
 .../state_trackers/omx/bellagio/vid_enc.c     |   3 +
 .../state_trackers/omx/tizonia/h264eprc.c     |   3 +
 .../state_trackers/omx/vid_enc_common.c       | 219 +++++++++++++++---
 .../state_trackers/omx/vid_enc_common.h       |   8 +-
 4 files changed, 196 insertions(+), 37 deletions(-)

diff --git a/src/gallium/state_trackers/omx/bellagio/vid_enc.c b/src/gallium/state_trackers/omx/bellagio/vid_enc.c
index 1126c4f583d..525d2f331d8 100644
--- a/src/gallium/state_trackers/omx/bellagio/vid_enc.c
+++ b/src/gallium/state_trackers/omx/bellagio/vid_enc.c
@@ -154,20 +154,22 @@ static OMX_ERRORTYPE vid_enc_Constructor(OMX_COMPONENTTYPE *comp, OMX_STRING nam
 
    screen = priv->screen->pscreen;
    if (!screen->get_video_param(screen, PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH,
                                 PIPE_VIDEO_ENTRYPOINT_ENCODE, PIPE_VIDEO_CAP_SUPPORTED))
       return OMX_ErrorBadParameter;
 
    priv->s_pipe = screen->context_create(screen, NULL, 0);
    if (!priv->s_pipe)
       return OMX_ErrorInsufficientResources;
 
+   enc_InitCompute_common(priv);
+
    if (!vl_compositor_init(&priv->compositor, priv->s_pipe)) {
       priv->s_pipe->destroy(priv->s_pipe);
       priv->s_pipe = NULL;
       return OMX_ErrorInsufficientResources;
    }
 
    if (!vl_compositor_init_state(&priv->cstate, priv->s_pipe)) {
       vl_compositor_cleanup(&priv->compositor);
       priv->s_pipe->destroy(priv->s_pipe);
       priv->s_pipe = NULL;
@@ -260,20 +262,21 @@ static OMX_ERRORTYPE vid_enc_Destructor(OMX_COMPONENTTYPE *comp)
       priv->ports=NULL;
    }
 
    for (i = 0; i < OMX_VID_ENC_NUM_SCALING_BUFFERS; ++i)
       if (priv->scale_buffer[i])
          priv->scale_buffer[i]->destroy(priv->scale_buffer[i]);
 
    if (priv->s_pipe) {
       vl_compositor_cleanup_state(&priv->cstate);
       vl_compositor_cleanup(&priv->compositor);
+      enc_ReleaseCompute_common(priv);
       priv->s_pipe->destroy(priv->s_pipe);
    }
 
    if (priv->t_pipe)
       priv->t_pipe->destroy(priv->t_pipe);
 
    if (priv->screen)
       omx_put_screen();
 
    return omx_workaround_Destructor(comp);
diff --git a/src/gallium/state_trackers/omx/tizonia/h264eprc.c b/src/gallium/state_trackers/omx/tizonia/h264eprc.c
index 586763456f6..effff378e8f 100644
--- a/src/gallium/state_trackers/omx/tizonia/h264eprc.c
+++ b/src/gallium/state_trackers/omx/tizonia/h264eprc.c
@@ -400,20 +400,22 @@ static OMX_ERRORTYPE h264e_prc_create_encoder(void *ap_obj)
 
    screen = priv->screen->pscreen;
    if (!screen->get_video_param(screen, PIPE_VIDEO_PROFILE_MPEG4_AVC_HIGH,
                                 PIPE_VIDEO_ENTRYPOINT_ENCODE, PIPE_VIDEO_CAP_SUPPORTED))
       return OMX_ErrorBadParameter;
 
    priv->s_pipe = screen->context_create(screen, NULL, 0);
    if (!priv->s_pipe)
       return OMX_ErrorInsufficientResources;
 
+   enc_InitCompute_common(priv);
+
    if (!vl_compositor_init(&priv->compositor, priv->s_pipe)) {
       priv->s_pipe->destroy(priv->s_pipe);
       priv->s_pipe = NULL;
       return OMX_ErrorInsufficientResources;
    }
 
    if (!vl_compositor_init_state(&priv->cstate, priv->s_pipe)) {
       vl_compositor_cleanup(&priv->compositor);
       priv->s_pipe->destroy(priv->s_pipe);
       priv->s_pipe = NULL;
@@ -444,20 +446,21 @@ static void h264e_prc_destroy_encoder(void *ap_obj)
    enc_ReleaseTasks(&priv->b_frames);
    enc_ReleaseTasks(&priv->stacked_tasks);
 
    for (i = 0; i < OMX_VID_ENC_NUM_SCALING_BUFFERS; ++i)
       if (priv->scale_buffer[i])
       priv->scale_buffer[i]->destroy(priv->scale_buffer[i]);
 
    if (priv->s_pipe) {
       vl_compositor_cleanup_state(&priv->cstate);
       vl_compositor_cleanup(&priv->compositor);
+      enc_ReleaseCompute_common(priv);
       priv->s_pipe->destroy(priv->s_pipe);
    }
 
    if (priv->t_pipe)
       priv->t_pipe->destroy(priv->t_pipe);
 
    if (priv->screen)
       omx_put_screen();
 }
 
diff --git a/src/gallium/state_trackers/omx/vid_enc_common.c b/src/gallium/state_trackers/omx/vid_enc_common.c
index 4d3f48671f9..3e8ca3202a6 100644
--- a/src/gallium/state_trackers/omx/vid_enc_common.c
+++ b/src/gallium/state_trackers/omx/vid_enc_common.c
@@ -21,20 +21,21 @@
  * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  *
  **************************************************************************/
 
 #include "vid_enc_common.h"
 
 #include "vl/vl_video_buffer.h"
+#include "tgsi/tgsi_text.h"
 
 void enc_ReleaseTasks(struct list_head *head)
 {
    struct encode_task *i, *next;
 
    if (!head || !head->next)
       return;
 
    LIST_FOR_EACH_ENTRY_SAFE(i, next, head, list) {
       pipe_resource_reference(&i->bitstream, NULL);
@@ -299,20 +300,106 @@ void enc_ControlPicture_common(vid_enc_PrivateType * priv, struct pipe_h264_enc_
    picture->quant_p_frames = priv->quant.nQpP;
    picture->quant_b_frames = priv->quant.nQpB;
 
    picture->frame_num = priv->frame_num;
    picture->ref_idx_l0 = priv->ref_idx_l0;
    picture->ref_idx_l1 = priv->ref_idx_l1;
    picture->enable_vui = (picture->rate_ctrl.frame_rate_num != 0);
    enc_GetPictureParamPreset(picture);
 }
 
+static void *create_compute_state(struct pipe_context *pipe,
+                                  const char *source)
+{
+   struct tgsi_token tokens[1024];
+   struct pipe_compute_state state = {0};
+
+   if (!tgsi_text_translate(source, tokens, ARRAY_SIZE(tokens))) {
+           assert(false);
+           return NULL;
+   }
+
+   state.ir_type = PIPE_SHADER_IR_TGSI;
+   state.prog = tokens;
+
+   return pipe->create_compute_state(pipe, &state);
+}
+
+void enc_InitCompute_common(vid_enc_PrivateType *priv)
+{
+   struct pipe_context *pipe = priv->s_pipe;
+   struct pipe_screen *screen = pipe->screen;
+
+   /* We need the partial last block support. */
+   if (!screen->get_param(screen, PIPE_CAP_COMPUTE_GRID_INFO_LAST_BLOCK))
+      return;
+
+   static const char *copy_y =
+         "COMP\n"
+         "PROPERTY CS_FIXED_BLOCK_WIDTH 64\n"
+         "PROPERTY CS_FIXED_BLOCK_HEIGHT 1\n"
+         "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
+         "DCL SV[0], THREAD_ID\n"
+         "DCL SV[1], BLOCK_ID\n"
+         "DCL IMAGE[0], 2D, PIPE_FORMAT_R8_UINT\n"
+         "DCL IMAGE[1], 2D, PIPE_FORMAT_R8_UINT, WR\n"
+         "DCL TEMP[0..1]\n"
+         "IMM[0] UINT32 {64, 0, 0, 0}\n"
+
+         "UMAD TEMP[0].x, SV[1], IMM[0], SV[0]\n"
+         "MOV TEMP[0].y, SV[1]\n"
+         "LOAD TEMP[1].x, IMAGE[0], TEMP[0], 2D, PIPE_FORMAT_R8_UINT\n"
+         "STORE IMAGE[1].x, TEMP[0], TEMP[1], 2D, PIPE_FORMAT_R8_UINT\n"
+         "END\n";
+
+   static const char *copy_uv =
+         "COMP\n"
+         "PROPERTY CS_FIXED_BLOCK_WIDTH 64\n"
+         "PROPERTY CS_FIXED_BLOCK_HEIGHT 1\n"
+         "PROPERTY CS_FIXED_BLOCK_DEPTH 1\n"
+         "DCL SV[0], THREAD_ID\n"
+         "DCL SV[1], BLOCK_ID\n"
+         "DCL IMAGE[0], 2D, PIPE_FORMAT_R8_UINT\n"
+         "DCL IMAGE[2], 2D, PIPE_FORMAT_R8G8_UINT, WR\n"
+         "DCL CONST[0][0]\n" /* .x = offset of the UV portion in the y direction */
+         "DCL TEMP[0..4]\n"
+         "IMM[0] UINT32 {64, 0, 2, 1}\n"
+         /* Destination R8G8 coordinates */
+         "UMAD TEMP[0].x, SV[1], IMM[0], SV[0]\n"
+         "MOV TEMP[0].y, SV[1]\n"
+         /* Source R8 coordinates of U */
+         "UMUL TEMP[1].x, TEMP[0], IMM[0].zzzz\n"
+         "UADD TEMP[1].y, TEMP[0], CONST[0].xxxx\n"
+         /* Source R8 coordinates of V */
+         "UADD TEMP[2].x, TEMP[1], IMM[0].wwww\n"
+         "MOV TEMP[2].y, TEMP[1]\n"
+
+         "LOAD TEMP[3].x, IMAGE[0], TEMP[1], 2D, PIPE_FORMAT_R8_UINT\n"
+         "LOAD TEMP[4].x, IMAGE[0], TEMP[2], 2D, PIPE_FORMAT_R8_UINT\n"
+         "MOV TEMP[3].y, TEMP[4].xxxx\n"
+         "STORE IMAGE[2], TEMP[0], TEMP[3], 2D, PIPE_FORMAT_R8G8_UINT\n"
+         "END\n";
+
+   priv->copy_y_shader = create_compute_state(pipe, copy_y);
+   priv->copy_uv_shader = create_compute_state(pipe, copy_uv);
+}
+
+void enc_ReleaseCompute_common(vid_enc_PrivateType *priv)
+{
+   struct pipe_context *pipe = priv->s_pipe;
+
+   if (priv->copy_y_shader)
+      pipe->delete_compute_state(pipe, priv->copy_y_shader);
+   if (priv->copy_uv_shader)
+      pipe->delete_compute_state(pipe, priv->copy_uv_shader);
+}
+
 OMX_ERRORTYPE enc_LoadImage_common(vid_enc_PrivateType * priv, OMX_VIDEO_PORTDEFINITIONTYPE *def,
                                    OMX_BUFFERHEADERTYPE *buf,
                                    struct pipe_video_buffer *vbuf)
 {
    struct pipe_context *pipe = priv->s_pipe;
    struct pipe_box box = {};
    struct input_buf_private *inp = buf->pInputPortPrivate;
 
    if (!inp->resource) {
       struct pipe_sampler_view **views;
@@ -330,77 +417,137 @@ OMX_ERRORTYPE enc_LoadImage_common(vid_enc_PrivateType * priv, OMX_VIDEO_PORTDEF
                             PIPE_TRANSFER_WRITE, &box,
                             ptr, def->nStride, 0);
       ptr = ((uint8_t*)buf->pBuffer) + (def->nStride * box.height);
       box.width = def->nFrameWidth / 2;
       box.height = def->nFrameHeight / 2;
       box.depth = 1;
       pipe->texture_subdata(pipe, views[1]->texture, 0,
                             PIPE_TRANSFER_WRITE, &box,
                             ptr, def->nStride, 0);
    } else {
-      struct pipe_blit_info blit;
       struct vl_video_buffer *dst_buf = (struct vl_video_buffer *)vbuf;
 
       pipe_transfer_unmap(pipe, inp->transfer);
 
       /* inp->resource uses PIPE_FORMAT_I8 and the layout looks like this:
        *
        * def->nFrameWidth = 4, def->nFrameHeight = 4:
        * |----|
        * |YYYY|
        * |YYYY|
        * |YYYY|
        * |YYYY|
        * |UVUV|
        * |UVUV|
        * |----|
        *
        * The copy has 2 steps:
        * - Copy Y to dst_buf->resources[0] as R8.
        * - Copy UV to dst_buf->resources[1] as R8G8.
        */
+      if (priv->copy_y_shader && priv->copy_uv_shader) {
+         /* Compute path */
+         /* Set shader images for both copies. */
+         struct pipe_image_view image[3] = {0};
+         image[0].resource = inp->resource;
+         image[0].shader_access = image[0].access = PIPE_IMAGE_ACCESS_READ;
+         image[0].format = PIPE_FORMAT_R8_UINT;
+
+         image[1].resource = dst_buf->resources[0];
+         image[1].shader_access = image[1].access = PIPE_IMAGE_ACCESS_WRITE;
+         image[1].format = PIPE_FORMAT_R8_UINT;
+
+         image[2].resource = dst_buf->resources[1];
+         image[2].shader_access = image[1].access = PIPE_IMAGE_ACCESS_WRITE;
+         image[2].format = PIPE_FORMAT_R8G8_UINT;
+
+         pipe->set_shader_images(pipe, PIPE_SHADER_COMPUTE, 0, 3, image);
+
+         /* Set the constant buffer. */
+         uint32_t constants[4] = {def->nFrameHeight};
+         struct pipe_constant_buffer cb = {};
+
+         cb.buffer_size = sizeof(constants);
+         cb.user_buffer = constants;
+         pipe->set_constant_buffer(pipe, PIPE_SHADER_COMPUTE, 0, &cb);
+
+         /* Use the optimal block size for the linear image layout. */
+         struct pipe_grid_info info = {};
+         info.block[0] = 64;
+         info.block[1] = 1;
+         info.block[2] = 1;
+         info.grid[2] = 1;
+
+         /* Copy Y */
+         pipe->bind_compute_state(pipe, priv->copy_y_shader);
+
+         info.grid[0] = DIV_ROUND_UP(def->nFrameWidth, 64);
+         info.grid[1] = def->nFrameHeight;
+         info.last_block[0] = def->nFrameWidth % 64;
+         pipe->launch_grid(pipe, &info);
+
+         /* Copy UV */
+         pipe->bind_compute_state(pipe, priv->copy_uv_shader);
+
+         info.grid[0] = DIV_ROUND_UP(def->nFrameWidth / 2, 64);
+         info.grid[1] = def->nFrameHeight / 2;
+         info.last_block[0] = (def->nFrameWidth / 2) % 64;
+         pipe->launch_grid(pipe, &info);
+
+         /* Make the result visible to all clients. */
+         pipe->memory_barrier(pipe, PIPE_BARRIER_ALL);
+
+         /* Unbind. */
+         pipe->set_shader_images(pipe, PIPE_SHADER_COMPUTE, 0, 3, NULL);
+         pipe->set_constant_buffer(pipe, PIPE_SHADER_COMPUTE, 0, NULL);
+         pipe->bind_compute_state(pipe, NULL);
+      } else {
+         /* Graphics path */
+         struct pipe_blit_info blit;
+
+         box.width = def->nFrameWidth;
+         box.height = def->nFrameHeight;
+         box.depth = 1;
+
+         /* Copy Y */
+         pipe->resource_copy_region(pipe,
+                                    dst_buf->resources[0],
+                                    0, 0, 0, 0, inp->resource, 0, &box);
+
+         /* Copy U */
+         memset(&blit, 0, sizeof(blit));
+         blit.src.resource = inp->resource;
+         blit.src.format = inp->resource->format;
+
+         blit.src.box.x = -1;
+         blit.src.box.y = def->nFrameHeight;
+         blit.src.box.width = def->nFrameWidth;
+         blit.src.box.height = def->nFrameHeight / 2 ;
+         blit.src.box.depth = 1;
+
+         blit.dst.resource = dst_buf->resources[1];
+         blit.dst.format = blit.dst.resource->format;
+
+         blit.dst.box.width = def->nFrameWidth / 2;
+         blit.dst.box.height = def->nFrameHeight / 2;
+         blit.dst.box.depth = 1;
+         blit.filter = PIPE_TEX_FILTER_NEAREST;
+
+         blit.mask = PIPE_MASK_R;
+         pipe->blit(pipe, &blit);
+
+         /* Copy V */
+         blit.src.box.x = 0;
+         blit.mask = PIPE_MASK_G;
+         pipe->blit(pipe, &blit);
+      }
 
-      box.width = def->nFrameWidth;
-      box.height = def->nFrameHeight;
-      box.depth = 1;
-
-      /* Copy Y */
-      pipe->resource_copy_region(pipe,
-                                 dst_buf->resources[0],
-                                 0, 0, 0, 0, inp->resource, 0, &box);
-
-      /* Copy U */
-      memset(&blit, 0, sizeof(blit));
-      blit.src.resource = inp->resource;
-      blit.src.format = inp->resource->format;
-
-      blit.src.box.x = -1;
-      blit.src.box.y = def->nFrameHeight;
-      blit.src.box.width = def->nFrameWidth;
-      blit.src.box.height = def->nFrameHeight / 2 ;
-      blit.src.box.depth = 1;
-
-      blit.dst.resource = dst_buf->resources[1];
-      blit.dst.format = blit.dst.resource->format;
-
-      blit.dst.box.width = def->nFrameWidth / 2;
-      blit.dst.box.height = def->nFrameHeight / 2;
-      blit.dst.box.depth = 1;
-      blit.filter = PIPE_TEX_FILTER_NEAREST;
-
-      blit.mask = PIPE_MASK_R;
-      pipe->blit(pipe, &blit);
-
-      /* Copy V */
-      blit.src.box.x = 0;
-      blit.mask = PIPE_MASK_G;
-      pipe->blit(pipe, &blit);
       pipe->flush(pipe, NULL, 0);
 
       box.width = inp->resource->width0;
       box.height = inp->resource->height0;
       box.depth = inp->resource->depth0;
       buf->pBuffer = pipe->transfer_map(pipe, inp->resource, 0,
                                         PIPE_TRANSFER_WRITE, &box,
                                         &inp->transfer);
    }
 
diff --git a/src/gallium/state_trackers/omx/vid_enc_common.h b/src/gallium/state_trackers/omx/vid_enc_common.h
index bdf519db26b..8c8830dfcc6 100644
--- a/src/gallium/state_trackers/omx/vid_enc_common.h
+++ b/src/gallium/state_trackers/omx/vid_enc_common.h
@@ -93,21 +93,23 @@ DERIVEDCLASS(vid_enc_PrivateType, omx_base_filter_PrivateType)
 	OMX_BOOL restricted_b_frames; \
 	OMX_VIDEO_PARAM_BITRATETYPE bitrate; \
 	OMX_VIDEO_PARAM_QUANTIZATIONTYPE quant; \
 	OMX_VIDEO_PARAM_PROFILELEVELTYPE profile_level; \
 	OMX_CONFIG_INTRAREFRESHVOPTYPE force_pic_type; \
 	struct vl_compositor compositor; \
 	struct vl_compositor_state cstate; \
 	struct pipe_video_buffer *scale_buffer[OMX_VID_ENC_NUM_SCALING_BUFFERS]; \
 	OMX_CONFIG_SCALEFACTORTYPE scale; \
 	OMX_U32 current_scale_buffer; \
-	OMX_U32 stacked_frames_num;
+	OMX_U32 stacked_frames_num; \
+	void *copy_y_shader; \
+	void *copy_uv_shader;
 ENDCLASS(vid_enc_PrivateType)
 
 #else
 
 #include <tizprc_decls.h>
 #include <tizport_decls.h>
 
 typedef struct h264e_prc_class h264e_prc_class_t;
 struct h264e_prc_class
 {
@@ -144,30 +146,34 @@ struct h264e_prc
    OMX_CONFIG_INTRAREFRESHVOPTYPE force_pic_type;
    struct vl_compositor compositor;
    struct vl_compositor_state cstate;
    struct pipe_video_buffer *scale_buffer[OMX_VID_ENC_NUM_SCALING_BUFFERS];
    OMX_CONFIG_SCALEFACTORTYPE scale;
    OMX_U32 current_scale_buffer;
    OMX_U32 stacked_frames_num;
    bool eos_;
    bool in_port_disabled_;
    bool out_port_disabled_;
+   void *copy_y_shader;
+   void *copy_uv_shader;
 };
 #endif
 
 void enc_ReleaseTasks(struct list_head *head);
 void enc_MoveTasks(struct list_head *from, struct list_head *to);
 enum pipe_video_profile enc_TranslateOMXProfileToPipe(unsigned omx_profile);
 unsigned enc_TranslateOMXLevelToPipe(unsigned omx_level);
 void vid_enc_BufferEncoded_common(vid_enc_PrivateType * priv, OMX_BUFFERHEADERTYPE* input,
                                   OMX_BUFFERHEADERTYPE* output);
 struct encode_task *enc_NeedTask_common(vid_enc_PrivateType * priv,
                                         OMX_VIDEO_PORTDEFINITIONTYPE *def);
 void enc_ScaleInput_common(vid_enc_PrivateType * priv,
                                          OMX_VIDEO_PORTDEFINITIONTYPE *def,
                                          struct pipe_video_buffer **vbuf, unsigned *size);
 void enc_ControlPicture_common(vid_enc_PrivateType * priv,
                                struct pipe_h264_enc_picture_desc *picture);
+void enc_InitCompute_common(vid_enc_PrivateType *priv);
+void enc_ReleaseCompute_common(vid_enc_PrivateType *priv);
 OMX_ERRORTYPE enc_LoadImage_common(vid_enc_PrivateType * priv, OMX_VIDEO_PORTDEFINITIONTYPE *def,
                                    OMX_BUFFERHEADERTYPE *buf,
                                    struct pipe_video_buffer *vbuf);
 #endif
-- 
2.17.1