Mesa (main): radeon/vcn/enc: H.264 SVC encode
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Mon Jul 26 20:27:34 UTC 2021
Module: Mesa
Branch: main
Commit: 51935d594ed97fac8f798e7686b8b9c8809b1380
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=51935d594ed97fac8f798e7686b8b9c8809b1380
Author: Thong Thai <thong.thai at amd.com>
Date: Tue Jul 13 11:51:39 2021 -0400
radeon/vcn/enc: H.264 SVC encode
Implement H.264 temporal, Scalable Video Coding (SVC) for VCN devices by
sending the required parameters to the firmware, and creating H.264 NALU
prefix and SEI scalability_info headers.
Signed-off-by: Thong Thai <thong.thai at amd.com>
Reviewed-by: Boyuan Zhang <Boyuan.Zhang at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11850>
---
src/gallium/drivers/radeon/radeon_temporal.h | 232 ++++++++++++++++++++++++
src/gallium/drivers/radeon/radeon_vcn_enc.c | 5 +
src/gallium/drivers/radeon/radeon_vcn_enc.h | 7 +
src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c | 175 +++++++++++++++++-
4 files changed, 416 insertions(+), 3 deletions(-)
diff --git a/src/gallium/drivers/radeon/radeon_temporal.h b/src/gallium/drivers/radeon/radeon_temporal.h
new file mode 100644
index 00000000000..5a294d61066
--- /dev/null
+++ b/src/gallium/drivers/radeon/radeon_temporal.h
@@ -0,0 +1,232 @@
+/**************************************************************************
+ *
+ * Copyright 2021 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef _RADEON_TEMPORAL_H
+#define _RADEON_TEMPORAL_H
+
+#include "radeon_video.h"
+
+#define RENCODE_MAX_NUM_TEMPORAL_LAYERS 4
+#define RENCODE_MAX_TEMPORAL_LAYER_PATTERN_SIZE 9
+
+typedef struct rvcn_temporal_layer_pattern_entry_s
+{
+ unsigned temporal_id;
+ unsigned reference_index_in_table;
+ bool reference_modification;
+ unsigned frame_num_offset;
+ unsigned poc_offset;
+ bool mark_as_reference;
+} rvcn_temporal_layer_pattern_entry_t;
+
+typedef struct rvcn_temporal_layer_pattern_table_s
+{
+ unsigned pattern_size;
+ rvcn_temporal_layer_pattern_entry_t pattern_table[RENCODE_MAX_TEMPORAL_LAYER_PATTERN_SIZE];
+} rvcn_temporal_layer_pattern_table_t;
+
+static const rvcn_temporal_layer_pattern_table_t rvcn_temporal_layer_pattern_tables[RENCODE_MAX_NUM_TEMPORAL_LAYERS] =
+{
+ /* 1 temporal layer */
+ {
+ 2, /* temporal layer pattern size */
+ {
+ {
+ 0,
+ 0,
+ false,
+ 0,
+ 0,
+ true,
+ },
+ {
+ 0,
+ 0,
+ false,
+ 1,
+ 2,
+ true,
+ }
+ }
+ },
+ /* 2 temporal layers */
+ {
+ 3, /* temporal layer pattern size */
+ {
+ {
+ 0,
+ 0,
+ false,
+ 0,
+ 0,
+ true,
+ },
+ {
+ 1,
+ 0,
+ false,
+ 1,
+ 2,
+ false,
+ },
+ {
+ 0,
+ 0,
+ false,
+ 1,
+ 4,
+ true,
+ }
+ }
+ },
+ /* 3 temporal layers */
+ {
+ 5, /* temporal layer pattern size */
+ {
+ {
+ 0,
+ 0,
+ false,
+ 0,
+ 0,
+ true,
+ },
+ {
+ 2,
+ 0,
+ false,
+ 1,
+ 2,
+ false,
+ },
+ {
+ 1,
+ 0,
+ false,
+ 1,
+ 4,
+ true,
+ },
+ {
+ 2,
+ 2,
+ false,
+ 2,
+ 6,
+ false,
+ },
+ {
+ 0,
+ 0,
+ true,
+ 2,
+ 8,
+ true,
+ }
+ }
+ },
+ /* 4 temporal layers */
+ {
+ 9, /* temporal layer pattern size */
+ {
+ {
+ 0,
+ 0,
+ false,
+ 0,
+ 0,
+ true,
+ },
+ {
+ 3,
+ 0,
+ false,
+ 1,
+ 2,
+ false,
+ },
+ {
+ 2,
+ 0,
+ false,
+ 1,
+ 4,
+ true,
+ },
+ {
+ 3,
+ 2,
+ false,
+ 2,
+ 6,
+ false,
+ },
+ {
+ 1,
+ 0,
+ true,
+ 2,
+ 8,
+ true,
+ },
+ {
+ 3,
+ 4,
+ false,
+ 3,
+ 10,
+ false,
+ },
+ {
+ 2,
+ 4,
+ false,
+ 3,
+ 12,
+ true,
+ },
+ {
+ 3,
+ 6,
+ false,
+ 4,
+ 14,
+ false,
+ },
+ {
+ 0,
+ 0,
+ true,
+ 4,
+ 16,
+ true,
+ }
+ }
+ }
+};
+
+#endif // _RADEON_TEMPORAL_H
\ No newline at end of file
diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc.c b/src/gallium/drivers/radeon/radeon_vcn_enc.c
index dad9a1b8a43..1ab69ab1998 100644
--- a/src/gallium/drivers/radeon/radeon_vcn_enc.c
+++ b/src/gallium/drivers/radeon/radeon_vcn_enc.c
@@ -61,6 +61,8 @@ static void radeon_vcn_enc_get_param(struct radeon_encoder *enc, struct pipe_pic
enc->enc_pic.crop_top = 0;
enc->enc_pic.crop_bottom = (align(enc->base.height, 16) - enc->base.height) / 2;
}
+ enc->enc_pic.num_temporal_layers = pic->num_temporal_layers;
+ enc->enc_pic.temporal_id = 0;
enc->enc_pic.rc_layer_init.target_bit_rate = pic->rate_ctrl.target_bitrate;
enc->enc_pic.rc_layer_init.peak_bit_rate = pic->rate_ctrl.peak_bitrate;
enc->enc_pic.rc_layer_init.frame_rate_num = pic->rate_ctrl.frame_rate_num;
@@ -95,6 +97,7 @@ static void radeon_vcn_enc_get_param(struct radeon_encoder *enc, struct pipe_pic
default:
enc->enc_pic.rc_session_init.rate_control_method = RENCODE_RATE_CONTROL_METHOD_NONE;
}
+ enc->enc_pic.num_temporal_layers = pic->num_temporal_layers;
} else if (u_reduce_video_profile(picture->profile) == PIPE_VIDEO_FORMAT_HEVC) {
struct pipe_h265_enc_picture_desc *pic = (struct pipe_h265_enc_picture_desc *)picture;
enc->enc_pic.picture_type = pic->picture_type;
@@ -520,6 +523,7 @@ void radeon_enc_code_fixed_bits(struct radeon_encoder *enc, unsigned int value,
unsigned int num_bits)
{
unsigned int bits_to_pack = 0;
+ enc->bits_size += num_bits;
while (num_bits > 0) {
unsigned int value_to_pack = value & (0xffffffff >> (32 - num_bits));
@@ -552,6 +556,7 @@ void radeon_enc_reset(struct radeon_encoder *enc)
enc->bits_output = 0;
enc->num_zeros = 0;
enc->byte_index = 0;
+ enc->bits_size = 0;
}
void radeon_enc_byte_align(struct radeon_encoder *enc)
diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc.h b/src/gallium/drivers/radeon/radeon_vcn_enc.h
index a11682fb3bd..b568cfd7afc 100644
--- a/src/gallium/drivers/radeon/radeon_vcn_enc.h
+++ b/src/gallium/drivers/radeon/radeon_vcn_enc.h
@@ -71,6 +71,7 @@
#define RENCODE_DIRECT_OUTPUT_NALU_TYPE_PPS 0x00000003
#define RENCODE_DIRECT_OUTPUT_NALU_TYPE_PREFIX 0x00000004
#define RENCODE_DIRECT_OUTPUT_NALU_TYPE_END_OF_SEQUENCE 0x00000005
+#define RENCODE_DIRECT_OUTPUT_NALU_TYPE_SEI 0x00000006
#define RENCODE_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS 16
#define RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS 16
@@ -438,6 +439,9 @@ struct radeon_enc_pic {
unsigned bit_depth_chroma_minus8;
unsigned nal_unit_type;
unsigned max_num_merge_cand;
+ unsigned temporal_id;
+ unsigned num_temporal_layers;
+ unsigned temporal_layer_pattern_index;
bool not_referenced;
bool is_idr;
@@ -490,6 +494,8 @@ struct radeon_encoder {
void (*nalu_pps)(struct radeon_encoder *enc);
void (*nalu_vps)(struct radeon_encoder *enc);
void (*nalu_aud)(struct radeon_encoder *enc);
+ void (*nalu_sei)(struct radeon_encoder *enc);
+ void (*nalu_prefix)(struct radeon_encoder *enc);
void (*slice_header)(struct radeon_encoder *enc);
void (*ctx)(struct radeon_encoder *enc);
void (*bitstream)(struct radeon_encoder *enc);
@@ -537,6 +543,7 @@ struct radeon_encoder {
unsigned num_zeros;
unsigned byte_index;
unsigned bits_output;
+ unsigned bits_size;
uint32_t total_task_size;
uint32_t *p_task_size;
diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
index 6704b3075b2..5db22c1c977 100644
--- a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
+++ b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
@@ -28,6 +28,7 @@
#include "pipe/p_video_codec.h"
#include "radeon_vcn_enc.h"
#include "radeon_video.h"
+#include "radeon_temporal.h"
#include "si_pipe.h"
#include "util/u_video.h"
@@ -135,8 +136,8 @@ static void radeon_enc_session_init_hevc(struct radeon_encoder *enc)
static void radeon_enc_layer_control(struct radeon_encoder *enc)
{
- enc->enc_pic.layer_ctrl.max_num_temporal_layers = 1;
- enc->enc_pic.layer_ctrl.num_temporal_layers = 1;
+ enc->enc_pic.layer_ctrl.max_num_temporal_layers = enc->enc_pic.num_temporal_layers;
+ enc->enc_pic.layer_ctrl.num_temporal_layers = enc->enc_pic.num_temporal_layers;
RADEON_ENC_BEGIN(enc->cmd.layer_control);
RADEON_ENC_CS(enc->enc_pic.layer_ctrl.max_num_temporal_layers);
@@ -146,7 +147,7 @@ static void radeon_enc_layer_control(struct radeon_encoder *enc)
static void radeon_enc_layer_select(struct radeon_encoder *enc)
{
- enc->enc_pic.layer_sel.temporal_layer_index = 0;
+ enc->enc_pic.layer_sel.temporal_layer_index = enc->enc_pic.temporal_id;
RADEON_ENC_BEGIN(enc->cmd.layer_select);
RADEON_ENC_CS(enc->enc_pic.layer_sel.temporal_layer_index);
@@ -458,6 +459,168 @@ static void radeon_enc_nalu_sps_hevc(struct radeon_encoder *enc)
RADEON_ENC_END();
}
+static void radeon_enc_nalu_prefix(struct radeon_encoder *enc)
+{
+ uint nalRefIdc = enc->enc_pic.is_idr ? 3 : 0;
+
+ rvcn_temporal_layer_pattern_table_t table_info;
+ table_info = rvcn_temporal_layer_pattern_tables[enc->enc_pic.layer_ctrl.num_temporal_layers];
+
+ if (enc->enc_pic.pic_order_cnt == 0)
+ enc->enc_pic.temporal_layer_pattern_index = 0;
+ else if(enc->enc_pic.temporal_layer_pattern_index == (table_info.pattern_size - 1))
+ enc->enc_pic.temporal_layer_pattern_index = 1;
+ else
+ enc->enc_pic.temporal_layer_pattern_index++;
+
+ rvcn_temporal_layer_pattern_entry_t pattern =
+ table_info.pattern_table[enc->enc_pic.temporal_layer_pattern_index];
+
+ RADEON_ENC_BEGIN(enc->cmd.nalu);
+ RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_PREFIX);
+ uint32_t *size_in_bytes = &enc->cs.current.buf[enc->cs.current.cdw++];
+ radeon_enc_reset(enc);
+ radeon_enc_set_emulation_prevention(enc, false);
+ radeon_enc_code_fixed_bits(enc, 0x00000001, 32);
+ radeon_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_enc_code_fixed_bits(enc, nalRefIdc, 2);
+ radeon_enc_code_fixed_bits(enc, 14, 5);
+ radeon_enc_byte_align(enc);
+ radeon_enc_set_emulation_prevention(enc, true);
+ radeon_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_enc_code_fixed_bits(enc, enc->enc_pic.is_idr ? 0x1 : 0x0, 1);
+ radeon_enc_code_fixed_bits(enc, 0x0, 6);
+ radeon_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_enc_code_fixed_bits(enc, 0x0, 3);
+ radeon_enc_code_fixed_bits(enc, 0x0, 4);
+ radeon_enc_code_fixed_bits(enc, pattern.temporal_id, 3);
+ radeon_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_enc_code_fixed_bits(enc, 0x3, 2);
+
+ if(nalRefIdc != 0)
+ {
+ radeon_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_enc_byte_align(enc);
+ }
+
+ radeon_enc_flush_headers(enc);
+ *size_in_bytes = (enc->bits_output + 7) / 8;
+ RADEON_ENC_END();
+}
+
+static void radeon_enc_nalu_sei(struct radeon_encoder *enc)
+{
+ unsigned number_of_layers;
+
+ rvcn_temporal_layer_pattern_table_t table_info;
+ table_info = rvcn_temporal_layer_pattern_tables[enc->enc_pic.layer_ctrl.num_temporal_layers - 1];
+ number_of_layers = table_info.pattern_size;
+
+ RADEON_ENC_BEGIN(enc->cmd.nalu);
+ RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_SEI);
+ unsigned *size_in_bytes = &enc->cs.current.buf[enc->cs.current.cdw++];
+ radeon_enc_reset(enc);
+ radeon_enc_set_emulation_prevention(enc, false);
+
+ radeon_enc_code_fixed_bits(enc, 0x00000001, 32);
+ radeon_enc_code_fixed_bits(enc, 0x6, 8);
+ radeon_enc_byte_align(enc);
+
+ radeon_enc_set_emulation_prevention(enc, true);
+
+ /* save the current position for later */
+ unsigned position = enc->cs.current.cdw;
+ unsigned shifter = enc->shifter;
+ unsigned bits_in_shifter = enc->bits_in_shifter;
+ unsigned num_zeros = enc->num_zeros;
+ unsigned byte_index = enc->byte_index;
+ unsigned bits_output = enc->bits_output;
+ bool emulation_prevention = enc->emulation_prevention;
+
+ /* temporarily fill out the payload type and size */
+ radeon_enc_code_fixed_bits(enc, 24, 8);
+ radeon_enc_code_fixed_bits(enc, 0, 8);
+
+ unsigned svc_start_offset = enc->bits_size;
+
+ radeon_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_enc_code_ue(enc, number_of_layers - 1);
+
+ for(int i = 0; i < number_of_layers; i++ )
+ {
+ rvcn_temporal_layer_pattern_entry_t pattern = table_info.pattern_table[i];
+ radeon_enc_code_ue(enc, i);
+ radeon_enc_code_fixed_bits(enc, 0x0, 6);
+ radeon_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_enc_code_fixed_bits(enc, 0x0, 3);
+ radeon_enc_code_fixed_bits(enc, 0x0, 4);
+ radeon_enc_code_fixed_bits(enc, pattern.temporal_id, 3);
+ radeon_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_enc_code_fixed_bits(enc, 0x0, 1);
+ radeon_enc_code_ue(enc, 0);
+ radeon_enc_code_ue(enc, 0);
+ }
+ unsigned svc_size = ((enc->bits_size - svc_start_offset) + 7) / 8;
+ unsigned aligned = (32 - enc->bits_in_shifter) % 8;
+ if (aligned > 0)
+ radeon_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_enc_byte_align(enc);
+
+ radeon_enc_code_fixed_bits(enc, 0x1, 1);
+ radeon_enc_byte_align(enc);
+
+ /* store our current state, and go to the beginning to write the size */
+ unsigned position2 = enc->cs.current.cdw;
+ unsigned shifter2 = enc->shifter;
+ unsigned bits_in_shifter2 = enc->bits_in_shifter;
+ unsigned num_zeros2 = enc->num_zeros;
+ unsigned byte_index2 = enc->byte_index;
+ unsigned bits_output2 = enc->bits_output;
+ bool emulation_prevention2 = enc->emulation_prevention;
+
+ enc->cs.current.cdw = position;
+ enc->shifter = shifter;
+ enc->bits_in_shifter = bits_in_shifter;
+ enc->num_zeros = num_zeros;
+ enc->byte_index = byte_index;
+ enc->bits_output = bits_output;
+ enc->emulation_prevention = emulation_prevention;
+
+ radeon_enc_output_one_byte(enc, 24);
+ radeon_enc_output_one_byte(enc, svc_size);
+
+ /* restore our state */
+ enc->cs.current.cdw = position2;
+ enc->shifter = shifter2;
+ enc->bits_in_shifter = bits_in_shifter2;
+ enc->num_zeros = num_zeros2;
+ enc->byte_index = byte_index2;
+ enc->bits_output = bits_output2;
+ enc->emulation_prevention = emulation_prevention2;
+
+ radeon_enc_flush_headers(enc);
+
+ *size_in_bytes = (enc->bits_output + 7) / 8;
+ RADEON_ENC_END();
+}
+
static void radeon_enc_nalu_pps(struct radeon_encoder *enc)
{
RADEON_ENC_BEGIN(enc->cmd.nalu);
@@ -1140,7 +1303,11 @@ static void begin(struct radeon_encoder *enc)
static void radeon_enc_headers_h264(struct radeon_encoder *enc)
{
+ if (enc->enc_pic.layer_ctrl.num_temporal_layers > 1)
+ enc->nalu_prefix(enc);
if (enc->enc_pic.is_idr) {
+ if (enc->enc_pic.layer_ctrl.num_temporal_layers > 1)
+ enc->nalu_sei(enc);
enc->nalu_sps(enc);
enc->nalu_pps(enc);
}
@@ -1223,6 +1390,8 @@ void radeon_enc_1_2_init(struct radeon_encoder *enc)
enc->encode_params = radeon_enc_encode_params;
enc->encode_params_codec_spec = radeon_enc_encode_params_h264;
enc->encode_headers = radeon_enc_headers_h264;
+ enc->nalu_prefix = radeon_enc_nalu_prefix;
+ enc->nalu_sei = radeon_enc_nalu_sei;
} else if (u_reduce_video_profile(enc->base.profile) == PIPE_VIDEO_FORMAT_HEVC) {
enc->session_init = radeon_enc_session_init_hevc;
enc->slice_control = radeon_enc_slice_control_hevc;
More information about the mesa-commit
mailing list