Mesa (main): radeon/vcn/enc: H.264 SVC encode

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Mon Jul 26 20:27:34 UTC 2021


Module: Mesa
Branch: main
Commit: 51935d594ed97fac8f798e7686b8b9c8809b1380
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=51935d594ed97fac8f798e7686b8b9c8809b1380

Author: Thong Thai <thong.thai at amd.com>
Date:   Tue Jul 13 11:51:39 2021 -0400

radeon/vcn/enc: H.264 SVC encode

Implement H.264 temporal, Scalable Video Coding (SVC) for VCN devices by
sending the required parameters to the firmware, and creating H.264 NALU
prefix and SEI scalability_info headers.

Signed-off-by: Thong Thai <thong.thai at amd.com>
Reviewed-by: Boyuan Zhang <Boyuan.Zhang at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11850>

---

 src/gallium/drivers/radeon/radeon_temporal.h    | 232 ++++++++++++++++++++++++
 src/gallium/drivers/radeon/radeon_vcn_enc.c     |   5 +
 src/gallium/drivers/radeon/radeon_vcn_enc.h     |   7 +
 src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c | 175 +++++++++++++++++-
 4 files changed, 416 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_temporal.h b/src/gallium/drivers/radeon/radeon_temporal.h
new file mode 100644
index 00000000000..5a294d61066
--- /dev/null
+++ b/src/gallium/drivers/radeon/radeon_temporal.h
@@ -0,0 +1,232 @@
+/**************************************************************************
+ *
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef _RADEON_TEMPORAL_H
+#define _RADEON_TEMPORAL_H
+
+#include "radeon_video.h"
+
+#define RENCODE_MAX_NUM_TEMPORAL_LAYERS                                             4
+#define RENCODE_MAX_TEMPORAL_LAYER_PATTERN_SIZE                                     9
+
+typedef struct rvcn_temporal_layer_pattern_entry_s
+{
+   unsigned    temporal_id;
+   unsigned    reference_index_in_table;
+   bool        reference_modification;
+   unsigned    frame_num_offset;
+   unsigned    poc_offset;
+   bool        mark_as_reference;
+} rvcn_temporal_layer_pattern_entry_t;
+
+typedef struct rvcn_temporal_layer_pattern_table_s
+{
+   unsigned    pattern_size;
+   rvcn_temporal_layer_pattern_entry_t  pattern_table[RENCODE_MAX_TEMPORAL_LAYER_PATTERN_SIZE];
+} rvcn_temporal_layer_pattern_table_t;
+
+static const rvcn_temporal_layer_pattern_table_t  rvcn_temporal_layer_pattern_tables[RENCODE_MAX_NUM_TEMPORAL_LAYERS] =
+{
+   /* 1 temporal layer */
+   {
+      2,      /* temporal layer pattern size */
+      {
+         {
+            0,
+            0,
+            false,
+            0,
+            0,
+            true,
+         },
+         {
+            0,
+            0,
+            false,
+            1,
+            2,
+            true,
+         }
+      }
+   },
+   /* 2 temporal layers */
+   {
+      3,      /* temporal layer pattern size */
+      {
+         {
+            0,
+            0,
+            false,
+            0,
+            0,
+            true,
+         },
+         {
+            1,
+            0,
+            false,
+            1,
+            2,
+            false,
+         },
+         {
+            0,
+            0,
+            false,
+            1,
+            4,
+            true,
+         }
+      }
+   },
+   /* 3 temporal layers */
+   {
+      5,      /* temporal layer pattern size */
+      {
+         {
+            0,
+            0,
+            false,
+            0,
+            0,
+            true,
+         },
+         {
+            2,
+            0,
+            false,
+            1,
+            2,
+            false,
+         },
+         {
+            1,
+            0,
+            false,
+            1,
+            4,
+            true,
+         },
+         {
+            2,
+            2,
+            false,
+            2,
+            6,
+            false,
+         },
+         {
+            0,
+            0,
+            true,
+            2,
+            8,
+            true,
+         }
+      }
+   },
+   /* 4 temporal layers */
+   {
+      9,      /* temporal layer pattern size */
+      {
+         {
+            0,
+            0,
+            false,
+            0,
+            0,
+            true,
+         },
+         {
+            3,
+            0,
+            false,
+            1,
+            2,
+            false,
+         },
+         {
+            2,
+            0,
+            false,
+            1,
+            4,
+            true,
+         },
+         {
+            3,
+            2,
+            false,
+            2,
+            6,
+            false,
+         },
+         {
+            1,
+            0,
+            true,
+            2,
+            8,
+            true,
+         },
+         {
+            3,
+            4,
+            false,
+            3,
+            10,
+            false,
+         },
+         {
+            2,
+            4,
+            false,
+            3,
+            12,
+            true,
+         },
+         {
+            3,
+            6,
+            false,
+            4,
+            14,
+            false,
+         },
+         {
+            0,
+            0,
+            true,
+            4,
+            16,
+            true,
+         }
+      }
+   }
+};
+
+#endif // _RADEON_TEMPORAL_H
\ No newline at end of file
diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc.c b/src/gallium/drivers/radeon/radeon_vcn_enc.c
index dad9a1b8a43..1ab69ab1998 100644
--- a/src/gallium/drivers/radeon/radeon_vcn_enc.c
+++ b/src/gallium/drivers/radeon/radeon_vcn_enc.c
@@ -61,6 +61,8 @@ static void radeon_vcn_enc_get_param(struct radeon_encoder *enc, struct pipe_pic
          enc->enc_pic.crop_top = 0;
          enc->enc_pic.crop_bottom = (align(enc->base.height, 16) - enc->base.height) / 2;
       }
+      enc->enc_pic.num_temporal_layers = pic->num_temporal_layers;
+      enc->enc_pic.temporal_id = 0;
       enc->enc_pic.rc_layer_init.target_bit_rate = pic->rate_ctrl.target_bitrate;
       enc->enc_pic.rc_layer_init.peak_bit_rate = pic->rate_ctrl.peak_bitrate;
       enc->enc_pic.rc_layer_init.frame_rate_num = pic->rate_ctrl.frame_rate_num;
@@ -95,6 +97,7 @@ static void radeon_vcn_enc_get_param(struct radeon_encoder *enc, struct pipe_pic
       default:
          enc->enc_pic.rc_session_init.rate_control_method = RENCODE_RATE_CONTROL_METHOD_NONE;
       }
+      enc->enc_pic.num_temporal_layers = pic->num_temporal_layers;
    } else if (u_reduce_video_profile(picture->profile) == PIPE_VIDEO_FORMAT_HEVC) {
       struct pipe_h265_enc_picture_desc *pic = (struct pipe_h265_enc_picture_desc *)picture;
       enc->enc_pic.picture_type = pic->picture_type;
@@ -520,6 +523,7 @@ void radeon_enc_code_fixed_bits(struct radeon_encoder *enc, unsigned int value,
                                 unsigned int num_bits)
 {
    unsigned int bits_to_pack = 0;
+   enc->bits_size += num_bits;
 
    while (num_bits > 0) {
       unsigned int value_to_pack = value & (0xffffffff >> (32 - num_bits));
@@ -552,6 +556,7 @@ void radeon_enc_reset(struct radeon_encoder *enc)
    enc->bits_output = 0;
    enc->num_zeros = 0;
    enc->byte_index = 0;
+   enc->bits_size = 0;
 }
 
 void radeon_enc_byte_align(struct radeon_encoder *enc)
diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc.h b/src/gallium/drivers/radeon/radeon_vcn_enc.h
index a11682fb3bd..b568cfd7afc 100644
--- a/src/gallium/drivers/radeon/radeon_vcn_enc.h
+++ b/src/gallium/drivers/radeon/radeon_vcn_enc.h
@@ -71,6 +71,7 @@
 #define RENCODE_DIRECT_OUTPUT_NALU_TYPE_PPS                                         0x00000003
 #define RENCODE_DIRECT_OUTPUT_NALU_TYPE_PREFIX                                      0x00000004
 #define RENCODE_DIRECT_OUTPUT_NALU_TYPE_END_OF_SEQUENCE                             0x00000005
+#define RENCODE_DIRECT_OUTPUT_NALU_TYPE_SEI                                         0x00000006
 
 #define RENCODE_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS                   16
 #define RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS                          16
@@ -438,6 +439,9 @@ struct radeon_enc_pic {
    unsigned bit_depth_chroma_minus8;
    unsigned nal_unit_type;
    unsigned max_num_merge_cand;
+   unsigned temporal_id;
+   unsigned num_temporal_layers;
+   unsigned temporal_layer_pattern_index;
 
    bool not_referenced;
    bool is_idr;
@@ -490,6 +494,8 @@ struct radeon_encoder {
    void (*nalu_pps)(struct radeon_encoder *enc);
    void (*nalu_vps)(struct radeon_encoder *enc);
    void (*nalu_aud)(struct radeon_encoder *enc);
+   void (*nalu_sei)(struct radeon_encoder *enc);
+   void (*nalu_prefix)(struct radeon_encoder *enc);
    void (*slice_header)(struct radeon_encoder *enc);
    void (*ctx)(struct radeon_encoder *enc);
    void (*bitstream)(struct radeon_encoder *enc);
@@ -537,6 +543,7 @@ struct radeon_encoder {
    unsigned num_zeros;
    unsigned byte_index;
    unsigned bits_output;
+   unsigned bits_size;
    uint32_t total_task_size;
    uint32_t *p_task_size;
 
diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
index 6704b3075b2..5db22c1c977 100644
--- a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
+++ b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
@@ -28,6 +28,7 @@
 #include "pipe/p_video_codec.h"
 #include "radeon_vcn_enc.h"
 #include "radeon_video.h"
+#include "radeon_temporal.h"
 #include "si_pipe.h"
 #include "util/u_video.h"
 
@@ -135,8 +136,8 @@ static void radeon_enc_session_init_hevc(struct radeon_encoder *enc)
 
 static void radeon_enc_layer_control(struct radeon_encoder *enc)
 {
-   enc->enc_pic.layer_ctrl.max_num_temporal_layers = 1;
-   enc->enc_pic.layer_ctrl.num_temporal_layers = 1;
+   enc->enc_pic.layer_ctrl.max_num_temporal_layers = enc->enc_pic.num_temporal_layers;
+   enc->enc_pic.layer_ctrl.num_temporal_layers = enc->enc_pic.num_temporal_layers;
 
    RADEON_ENC_BEGIN(enc->cmd.layer_control);
    RADEON_ENC_CS(enc->enc_pic.layer_ctrl.max_num_temporal_layers);
@@ -146,7 +147,7 @@ static void radeon_enc_layer_control(struct radeon_encoder *enc)
 
 static void radeon_enc_layer_select(struct radeon_encoder *enc)
 {
-   enc->enc_pic.layer_sel.temporal_layer_index = 0;
+   enc->enc_pic.layer_sel.temporal_layer_index = enc->enc_pic.temporal_id;
 
    RADEON_ENC_BEGIN(enc->cmd.layer_select);
    RADEON_ENC_CS(enc->enc_pic.layer_sel.temporal_layer_index);
@@ -458,6 +459,168 @@ static void radeon_enc_nalu_sps_hevc(struct radeon_encoder *enc)
    RADEON_ENC_END();
 }
 
+static void radeon_enc_nalu_prefix(struct radeon_encoder *enc)
+{
+   uint nalRefIdc = enc->enc_pic.is_idr ? 3 : 0;
+
+   rvcn_temporal_layer_pattern_table_t table_info;
+   table_info = rvcn_temporal_layer_pattern_tables[enc->enc_pic.layer_ctrl.num_temporal_layers];
+
+   if (enc->enc_pic.pic_order_cnt == 0)
+      enc->enc_pic.temporal_layer_pattern_index = 0;
+   else if(enc->enc_pic.temporal_layer_pattern_index == (table_info.pattern_size - 1))
+      enc->enc_pic.temporal_layer_pattern_index = 1;
+   else
+      enc->enc_pic.temporal_layer_pattern_index++;
+
+   rvcn_temporal_layer_pattern_entry_t pattern =
+      table_info.pattern_table[enc->enc_pic.temporal_layer_pattern_index];
+
+   RADEON_ENC_BEGIN(enc->cmd.nalu);
+   RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_PREFIX);
+   uint32_t *size_in_bytes = &enc->cs.current.buf[enc->cs.current.cdw++];
+   radeon_enc_reset(enc);
+   radeon_enc_set_emulation_prevention(enc, false);
+   radeon_enc_code_fixed_bits(enc, 0x00000001, 32);
+   radeon_enc_code_fixed_bits(enc, 0x0, 1);
+   radeon_enc_code_fixed_bits(enc, nalRefIdc, 2);
+   radeon_enc_code_fixed_bits(enc, 14, 5);
+   radeon_enc_byte_align(enc);
+   radeon_enc_set_emulation_prevention(enc, true);
+   radeon_enc_code_fixed_bits(enc, 0x1, 1);
+   radeon_enc_code_fixed_bits(enc, enc->enc_pic.is_idr ? 0x1 : 0x0, 1);
+   radeon_enc_code_fixed_bits(enc, 0x0, 6);
+   radeon_enc_code_fixed_bits(enc, 0x1, 1);
+   radeon_enc_code_fixed_bits(enc, 0x0, 3);
+   radeon_enc_code_fixed_bits(enc, 0x0, 4);
+   radeon_enc_code_fixed_bits(enc, pattern.temporal_id, 3);
+   radeon_enc_code_fixed_bits(enc, 0x0, 1);
+   radeon_enc_code_fixed_bits(enc, 0x0, 1);
+   radeon_enc_code_fixed_bits(enc, 0x0, 1);
+   radeon_enc_code_fixed_bits(enc, 0x3, 2);
+
+   if(nalRefIdc != 0)
+   {
+      radeon_enc_code_fixed_bits(enc, 0x0, 1);
+      radeon_enc_code_fixed_bits(enc, 0x0, 1);
+      radeon_enc_code_fixed_bits(enc, 0x1, 1);
+      radeon_enc_byte_align(enc);
+   }
+
+   radeon_enc_flush_headers(enc);
+   *size_in_bytes = (enc->bits_output + 7) / 8;
+   RADEON_ENC_END();
+}
+
+static void radeon_enc_nalu_sei(struct radeon_encoder *enc)
+{
+   unsigned number_of_layers;
+
+   rvcn_temporal_layer_pattern_table_t table_info;
+   table_info = rvcn_temporal_layer_pattern_tables[enc->enc_pic.layer_ctrl.num_temporal_layers - 1];
+   number_of_layers = table_info.pattern_size;
+
+   RADEON_ENC_BEGIN(enc->cmd.nalu);
+   RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_SEI);
+   unsigned *size_in_bytes = &enc->cs.current.buf[enc->cs.current.cdw++];
+   radeon_enc_reset(enc);
+   radeon_enc_set_emulation_prevention(enc, false);
+
+   radeon_enc_code_fixed_bits(enc, 0x00000001, 32);
+   radeon_enc_code_fixed_bits(enc, 0x6, 8);
+   radeon_enc_byte_align(enc);
+
+   radeon_enc_set_emulation_prevention(enc, true);
+
+   /* save the current position for later */
+   unsigned position = enc->cs.current.cdw;
+   unsigned shifter = enc->shifter;
+   unsigned bits_in_shifter = enc->bits_in_shifter;
+   unsigned num_zeros = enc->num_zeros;
+   unsigned byte_index = enc->byte_index;
+   unsigned bits_output = enc->bits_output;
+   bool emulation_prevention = enc->emulation_prevention;
+
+   /* temporarily fill out the payload type and size */
+   radeon_enc_code_fixed_bits(enc, 24, 8);
+   radeon_enc_code_fixed_bits(enc, 0, 8);
+
+   unsigned svc_start_offset = enc->bits_size;
+
+   radeon_enc_code_fixed_bits(enc, 0x0, 1);
+   radeon_enc_code_fixed_bits(enc, 0x0, 1);
+   radeon_enc_code_fixed_bits(enc, 0x0, 1);
+   radeon_enc_code_ue(enc, number_of_layers - 1);
+
+   for(int i = 0; i < number_of_layers; i++ )
+   {
+      rvcn_temporal_layer_pattern_entry_t pattern = table_info.pattern_table[i];
+      radeon_enc_code_ue(enc, i);
+      radeon_enc_code_fixed_bits(enc, 0x0, 6);
+      radeon_enc_code_fixed_bits(enc, 0x0, 1);
+      radeon_enc_code_fixed_bits(enc, 0x0, 3);
+      radeon_enc_code_fixed_bits(enc, 0x0, 4);
+      radeon_enc_code_fixed_bits(enc, pattern.temporal_id, 3);
+      radeon_enc_code_fixed_bits(enc, 0x0, 1);
+      radeon_enc_code_fixed_bits(enc, 0x0, 1);
+      radeon_enc_code_fixed_bits(enc, 0x0, 1);
+      radeon_enc_code_fixed_bits(enc, 0x0, 1);
+      radeon_enc_code_fixed_bits(enc, 0x0, 1);
+      radeon_enc_code_fixed_bits(enc, 0x0, 1);
+      radeon_enc_code_fixed_bits(enc, 0x0, 1);
+      radeon_enc_code_fixed_bits(enc, 0x0, 1);
+      radeon_enc_code_fixed_bits(enc, 0x0, 1);
+      radeon_enc_code_fixed_bits(enc, 0x0, 1);
+      radeon_enc_code_fixed_bits(enc, 0x0, 1);
+      radeon_enc_code_fixed_bits(enc, 0x0, 1);
+      radeon_enc_code_fixed_bits(enc, 0x0, 1);
+      radeon_enc_code_ue(enc, 0);
+      radeon_enc_code_ue(enc, 0);
+   }
+   unsigned svc_size = ((enc->bits_size - svc_start_offset) + 7) / 8;
+   unsigned aligned = (32 - enc->bits_in_shifter) % 8;
+   if (aligned > 0)
+      radeon_enc_code_fixed_bits(enc, 0x1, 1);
+   radeon_enc_byte_align(enc);
+
+   radeon_enc_code_fixed_bits(enc, 0x1, 1);
+   radeon_enc_byte_align(enc);
+
+   /* store our current state, and go to the beginning to write the size */
+   unsigned position2 = enc->cs.current.cdw;
+   unsigned shifter2 = enc->shifter;
+   unsigned bits_in_shifter2 = enc->bits_in_shifter;
+   unsigned num_zeros2 = enc->num_zeros;
+   unsigned byte_index2 = enc->byte_index;
+   unsigned bits_output2 = enc->bits_output;
+   bool emulation_prevention2 = enc->emulation_prevention;
+
+   enc->cs.current.cdw = position;
+   enc->shifter = shifter;
+   enc->bits_in_shifter = bits_in_shifter;
+   enc->num_zeros = num_zeros;
+   enc->byte_index = byte_index;
+   enc->bits_output = bits_output;
+   enc->emulation_prevention = emulation_prevention;
+
+   radeon_enc_output_one_byte(enc, 24);
+   radeon_enc_output_one_byte(enc, svc_size);
+
+   /* restore our state */
+   enc->cs.current.cdw = position2;
+   enc->shifter = shifter2;
+   enc->bits_in_shifter = bits_in_shifter2;
+   enc->num_zeros = num_zeros2;
+   enc->byte_index = byte_index2;
+   enc->bits_output = bits_output2;
+   enc->emulation_prevention = emulation_prevention2;
+
+   radeon_enc_flush_headers(enc);
+
+   *size_in_bytes = (enc->bits_output + 7) / 8;
+   RADEON_ENC_END();
+}
+
 static void radeon_enc_nalu_pps(struct radeon_encoder *enc)
 {
    RADEON_ENC_BEGIN(enc->cmd.nalu);
@@ -1140,7 +1303,11 @@ static void begin(struct radeon_encoder *enc)
 
 static void radeon_enc_headers_h264(struct radeon_encoder *enc)
 {
+   if (enc->enc_pic.layer_ctrl.num_temporal_layers > 1)
+      enc->nalu_prefix(enc);
    if (enc->enc_pic.is_idr) {
+      if (enc->enc_pic.layer_ctrl.num_temporal_layers > 1)
+         enc->nalu_sei(enc);
       enc->nalu_sps(enc);
       enc->nalu_pps(enc);
    }
@@ -1223,6 +1390,8 @@ void radeon_enc_1_2_init(struct radeon_encoder *enc)
       enc->encode_params = radeon_enc_encode_params;
       enc->encode_params_codec_spec = radeon_enc_encode_params_h264;
       enc->encode_headers = radeon_enc_headers_h264;
+      enc->nalu_prefix = radeon_enc_nalu_prefix;
+      enc->nalu_sei = radeon_enc_nalu_sei;
    } else if (u_reduce_video_profile(enc->base.profile) == PIPE_VIDEO_FORMAT_HEVC) {
       enc->session_init = radeon_enc_session_init_hevc;
       enc->slice_control = radeon_enc_slice_control_hevc;



More information about the mesa-commit mailing list