[Mesa-dev] [PATCH v2 4/8] radeon/uvd:add uvd hevc enc hw ib implementation

Boyuan Zhang boyzhang at amd.com
Fri Feb 9 20:11:21 UTC 2018



On 2018-02-08 05:13 PM, Mark Thompson wrote:
> On 06/02/18 20:05, James Zhu wrote:
>> Implement required IBs for UVD HEVC encode.
>>
>> Signed-off-by: James Zhu <James.Zhu at amd.com>
>> ---
>>   src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c | 1115 +++++++++++++++++++++++
>>   1 file changed, 1115 insertions(+)
>>   create mode 100644 src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c
>>
>> diff --git a/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c b/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c
>> new file mode 100644
>> index 0000000..17a39c2
>> --- /dev/null
>> +++ b/src/gallium/drivers/radeon/radeon_uvd_enc_1_1.c
>> @@ -0,0 +1,1115 @@
>> +/**************************************************************************
>> + *
>> + * Copyright 2018 Advanced Micro Devices, Inc.
>> + * All Rights Reserved.
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the
>> + * "Software"), to deal in the Software without restriction, including
>> + * without limitation the rights to use, copy, modify, merge, publish,
>> + * distribute, sub license, and/or sell copies of the Software, and to
>> + * permit persons to whom the Software is furnished to do so, subject to
>> + * the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including the
>> + * next paragraph) shall be included in all copies or substantial portions
>> + * of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
>> + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
>> + * IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR
>> + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
>> + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
>> + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
>> + *
>> + **************************************************************************/
>> +
>> +#include <stdio.h>
>> +
>> +#include "pipe/p_video_codec.h"
>> +
>> +#include "util/u_video.h"
>> +#include "util/u_memory.h"
>> +
>> +#include "vl/vl_video_buffer.h"
>> +#include "radeonsi/si_pipe.h"
>> +#include "radeon_video.h"
>> +#include "radeon_uvd_enc.h"
>> +
>> +#define RADEON_ENC_CS(value) (enc->cs->current.buf[enc->cs->current.cdw++] = (value))
>> +#define RADEON_ENC_BEGIN(cmd) { \
>> +	uint32_t *begin = &enc->cs->current.buf[enc->cs->current.cdw++]; \
>> +RADEON_ENC_CS(cmd)
>> +#define RADEON_ENC_READ(buf, domain, off) radeon_uvd_enc_add_buffer(enc, (buf), RADEON_USAGE_READ, (domain), (off))
>> +#define RADEON_ENC_WRITE(buf, domain, off) radeon_uvd_enc_add_buffer(enc, (buf), RADEON_USAGE_WRITE, (domain), (off))
>> +#define RADEON_ENC_READWRITE(buf, domain, off) radeon_uvd_enc_add_buffer(enc, (buf), RADEON_USAGE_READWRITE, (domain), (off))
>> +#define RADEON_ENC_END() *begin = (&enc->cs->current.buf[enc->cs->current.cdw] - begin) * 4; \
>> +	enc->total_task_size += *begin;}
>> +
>> +static const unsigned profiles[7] = { 66, 77, 88, 100, 110, 122, 244 };
> This looks very suspicious in an H.265 file, because those are H.264 profile values...

Seems that this line is copied from VCN h.264 encode. Not being used 
anywhere, and should be removed.
@James, can you remove this line please?

>
>> +static const unsigned index_to_shifts[4] = { 24, 16, 8, 0 };
>> +
>> ...
>> +
>> +static void
>> +radeon_uvd_enc_session_init_hevc(struct radeon_uvd_encoder *enc)
>> +{
>> +   enc->enc_pic.session_init.aligned_picture_width =
>> +      align(enc->base.width, 64);
> Do you really need to pad width to 64 rather than the MinCbSizeY?

Yes, this is based on the spec as well as hardware requirement.

>
>> +   enc->enc_pic.session_init.aligned_picture_height =
>> +      align(enc->base.height, 16);
>> +   enc->enc_pic.session_init.padding_width =
>> +      enc->enc_pic.session_init.aligned_picture_width - enc->base.width;
>> +   enc->enc_pic.session_init.padding_height =
>> +      enc->enc_pic.session_init.aligned_picture_height - enc->base.height;
>> +   enc->enc_pic.session_init.pre_encode_mode = RENC_UVD_PREENCODE_MODE_NONE;
>> +   enc->enc_pic.session_init.pre_encode_chroma_enabled = false;
>> +
>> +   RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_SESSION_INIT);
>> +   RADEON_ENC_CS(enc->enc_pic.session_init.aligned_picture_width);
>> +   RADEON_ENC_CS(enc->enc_pic.session_init.aligned_picture_height);
>> +   RADEON_ENC_CS(enc->enc_pic.session_init.padding_width);
>> +   RADEON_ENC_CS(enc->enc_pic.session_init.padding_height);
>> +   RADEON_ENC_CS(enc->enc_pic.session_init.pre_encode_mode);
>> +   RADEON_ENC_CS(enc->enc_pic.session_init.pre_encode_chroma_enabled);
>> +   RADEON_ENC_END();
>> +}
>> +
>> ...
>> +
>> +static void
>> +radeon_uvd_enc_nalu_sps_hevc(struct radeon_uvd_encoder *enc)
>> +{
>> +   RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER);
>> +   RADEON_ENC_CS(RENC_UVD_NALU_TYPE_SPS);
>> +   uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
>> +   int i;
>> +
>> +   radeon_uvd_enc_reset(enc);
>> +   radeon_uvd_enc_set_emulation_prevention(enc, false);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x00000001, 32);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x4201, 16);
>> +   radeon_uvd_enc_byte_align(enc);
>> +   radeon_uvd_enc_set_emulation_prevention(enc, true);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x0, 4);
>> +   radeon_uvd_enc_code_fixed_bits(enc,
>> +                                  enc->enc_pic.layer_ctrl.
>> +                                  max_num_temporal_layers - 1, 3);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
>> +   radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_tier_flag, 1);
>> +   radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_profile_idc, 5);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x60000000, 32);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0xb0000000, 32);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x0, 16);
>> +   radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_level_idc, 8);
>> +
>> +   for (i = 0; i < (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i++)
>> +      radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
>> +
>> +   if ((enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1) > 0) {
>> +      for (i = (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i < 8; i++)
>> +         radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
>> +   }
>> +
>> +   radeon_uvd_enc_code_ue(enc, 0x0);
>> +   radeon_uvd_enc_code_ue(enc, enc->enc_pic.chroma_format_idc);
>> +   radeon_uvd_enc_code_ue(enc,
>> +                          enc->enc_pic.session_init.aligned_picture_width);
>> +   radeon_uvd_enc_code_ue(enc,
>> +                          enc->enc_pic.session_init.aligned_picture_height);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
> Given that you've padded, conformance_window_flag should be set and the real size of the picture filled here.

  Agree, should fix it.
@James, can you use crop_left/right/top/bottom to determine 
conformance_window_flag, and add logics for those values please? Please 
refer to VCN H.264 logic.

>
>> +   radeon_uvd_enc_code_ue(enc, enc->enc_pic.bit_depth_luma_minus8);
>> +   radeon_uvd_enc_code_ue(enc, enc->enc_pic.bit_depth_chroma_minus8);
>> +   radeon_uvd_enc_code_ue(enc, enc->enc_pic.log2_max_poc - 4);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
>> +   radeon_uvd_enc_code_ue(enc, 1);
>> +   radeon_uvd_enc_code_ue(enc, 0x0);
>> +   radeon_uvd_enc_code_ue(enc, 0x0);
>> +   radeon_uvd_enc_code_ue(enc,
>> +                          enc->enc_pic.hevc_spec_misc.
>> +                          log2_min_luma_coding_block_size_minus3);
>> +   //Only support CTBSize 64
>> +   radeon_uvd_enc_code_ue(enc,
>> +                          6 -
>> +                          (enc->enc_pic.hevc_spec_misc.
>> +                           log2_min_luma_coding_block_size_minus3 + 3));
>> +   radeon_uvd_enc_code_ue(enc,
>> +                          enc->enc_pic.log2_min_transform_block_size_minus2);
>> +   radeon_uvd_enc_code_ue(enc,
>> +                          enc->enc_pic.
>> +                          log2_diff_max_min_transform_block_size);
>> +   radeon_uvd_enc_code_ue(enc,
>> +                          enc->enc_pic.max_transform_hierarchy_depth_inter);
>> +   radeon_uvd_enc_code_ue(enc,
>> +                          enc->enc_pic.max_transform_hierarchy_depth_intra);
>> +
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
>> +   radeon_uvd_enc_code_fixed_bits(enc,
>> +                                  !enc->enc_pic.hevc_spec_misc.amp_disabled,
>> +                                  1);
>> +   radeon_uvd_enc_code_fixed_bits(enc,
>> +                                  enc->enc_pic.
>> +                                  sample_adaptive_offset_enabled_flag, 1);
>> +   radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.pcm_enabled_flag, 1);
> Missing more syntax elements which should be present if pcm_enabled_flag is set?

Right. We should hardcode pcm_enabled_flag to 0, since temporarily we do 
not support pcm enable.
@James, can you replace  enc->enc_pic.pcm_enabled_flag with 0 please? 
Since we don't support it now.

>
>> +
>> +   radeon_uvd_enc_code_ue(enc, 1);
>> +   radeon_uvd_enc_code_ue(enc, 1);
>> +   radeon_uvd_enc_code_ue(enc, 0);
>> +   radeon_uvd_enc_code_ue(enc, 0);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
>> +
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
>> +
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0, 1);
>> +   radeon_uvd_enc_code_fixed_bits(enc,
>> +                                  enc->enc_pic.hevc_spec_misc.
>> +                                  strong_intra_smoothing_enabled, 1);
>> +
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
> The missing timing information here is rather unfortunate.  You should know at least the framerate?

Yes, we are planning to add this feature in future patches.

>
>> +
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
>> +
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
>> +
>> +   radeon_uvd_enc_byte_align(enc);
>> +   radeon_uvd_enc_flush_headers(enc);
>> +   *size_in_bytes = (enc->bits_output + 7) / 8;
>> +   RADEON_ENC_END();
>> +}
>> +
>> +static void
>> +radeon_uvd_enc_nalu_pps_hevc(struct radeon_uvd_encoder *enc)
>> +{
>> +   RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER);
>> +   RADEON_ENC_CS(RENC_UVD_NALU_TYPE_PPS);
>> +   uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
>> +   radeon_uvd_enc_reset(enc);
>> +   radeon_uvd_enc_set_emulation_prevention(enc, false);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x00000001, 32);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x4401, 16);
>> +   radeon_uvd_enc_byte_align(enc);
>> +   radeon_uvd_enc_set_emulation_prevention(enc, true);
>> +   radeon_uvd_enc_code_ue(enc, 0x0);
>> +   radeon_uvd_enc_code_ue(enc, 0x0);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x0, 4);
> Concatenating elements is confusing.

Right, it seems a bit confusing here. What we really mean is:
output_flag_resent_flag: 0  u(1)
num_extra_slice_header_bits: 0  u(3)

>
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
>> +   radeon_uvd_enc_code_ue(enc, 0x0);
>> +   radeon_uvd_enc_code_ue(enc, 0x0);
>> +   radeon_uvd_enc_code_se(enc, 0x0);
>> +   radeon_uvd_enc_code_fixed_bits(enc,
>> +                                  enc->enc_pic.hevc_spec_misc.
>> +                                  constrained_intra_pred_flag, 1);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
> QP does not vary within slices?

We are working on CBR and VBR right now. So far only CQP is supported.
So cu_qp_delta_enabled case will be added in future patch.

>
>> +   radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.cb_qp_offset);
>> +   radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.cr_qp_offset);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
>> +   radeon_uvd_enc_code_fixed_bits(enc,
>> +                                  enc->enc_pic.hevc_deblock.
>> +                                  loop_filter_across_slices_enabled, 1);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
>> +   radeon_uvd_enc_code_fixed_bits(enc,
>> +                                  enc->enc_pic.hevc_deblock.
>> +                                  deblocking_filter_disabled, 1);
>> +
>> +   if (!enc->enc_pic.hevc_deblock.deblocking_filter_disabled) {
>> +      radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.beta_offset_div2);
>> +      radeon_uvd_enc_code_se(enc, enc->enc_pic.hevc_deblock.tc_offset_div2);
>> +   }
>> +
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
>> +   radeon_uvd_enc_code_ue(enc, enc->enc_pic.log2_parallel_merge_level_minus2);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
>> +
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
>> +
>> +   radeon_uvd_enc_byte_align(enc);
>> +   radeon_uvd_enc_flush_headers(enc);
>> +   *size_in_bytes = (enc->bits_output + 7) / 8;
>> +   RADEON_ENC_END();
>> +}
>> +
>> +static void
>> +radeon_uvd_enc_nalu_vps_hevc(struct radeon_uvd_encoder *enc)
>> +{
>> +   RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER);
>> +   RADEON_ENC_CS(RENC_UVD_NALU_TYPE_VPS);
>> +   uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
>> +   int i;
>> +
>> +   radeon_uvd_enc_reset(enc);
>> +   radeon_uvd_enc_set_emulation_prevention(enc, false);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x00000001, 32);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x4001, 16);
>> +   radeon_uvd_enc_byte_align(enc);
>> +   radeon_uvd_enc_set_emulation_prevention(enc, true);
>> +
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x0, 4);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x3, 2);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x0, 6);
>> +   radeon_uvd_enc_code_fixed_bits(enc,
>> +                                  enc->enc_pic.layer_ctrl.
>> +                                  max_num_temporal_layers - 1, 3);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0xffff, 16);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
>> +   radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_tier_flag, 1);
>> +   radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_profile_idc, 5);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x60000000, 32);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0xb0000000, 32);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x0, 16);
>> +   radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.general_level_idc, 8);
>> +
>> +   for (i = 0; i < (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i++)
>> +      radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
>> +
>> +   if ((enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1) > 0) {
>> +      for (i = (enc->enc_pic.layer_ctrl.max_num_temporal_layers - 1); i < 8; i++)
>> +         radeon_uvd_enc_code_fixed_bits(enc, 0x0, 2);
>> +   }
> The PTL section is the same as the SPS, maybe move it to a separate function to avoid the duplication?

Agree, code optimization/re-organizing work is actually ongoing. But it 
will be a separate patch since it will affect VCN encode as well.

>
>> +
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
>> +   radeon_uvd_enc_code_ue(enc, 0x1);
>> +   radeon_uvd_enc_code_ue(enc, 0x0);
>> +   radeon_uvd_enc_code_ue(enc, 0x0);
>> +
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x0, 6);
>> +   radeon_uvd_enc_code_ue(enc, 0x0);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
>> +
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
>> +
>> +   radeon_uvd_enc_byte_align(enc);
>> +   radeon_uvd_enc_flush_headers(enc);
>> +   *size_in_bytes = (enc->bits_output + 7) / 8;
>> +   RADEON_ENC_END();
>> +}
>> +
>> +static void
>> +radeon_uvd_enc_nalu_aud_hevc(struct radeon_uvd_encoder *enc)
>> +{
>> +   RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_INSERT_NALU_BUFFER);
>> +   RADEON_ENC_CS(RENC_UVD_NALU_TYPE_AUD);
>> +   uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
>> +   radeon_uvd_enc_reset(enc);
>> +   radeon_uvd_enc_set_emulation_prevention(enc, false);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x00000001, 32);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 35, 6);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x0, 6);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x1, 3);
>> +   radeon_uvd_enc_byte_align(enc);
>> +   radeon_uvd_enc_set_emulation_prevention(enc, true);
>> +   switch (enc->enc_pic.picture_type) {
>> +   case PIPE_H265_ENC_PICTURE_TYPE_I:
>> +   case PIPE_H265_ENC_PICTURE_TYPE_IDR:
>> +      radeon_uvd_enc_code_fixed_bits(enc, 0x00, 3);
>> +      break;
>> +   case PIPE_H265_ENC_PICTURE_TYPE_P:
>> +      radeon_uvd_enc_code_fixed_bits(enc, 0x01, 3);
>> +      break;
>> +   case PIPE_H265_ENC_PICTURE_TYPE_B:
>> +      radeon_uvd_enc_code_fixed_bits(enc, 0x02, 3);
>> +      break;
>> +   default:
>> +      radeon_uvd_enc_code_fixed_bits(enc, 0x02, 3);
>> +   }
>> +
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
>> +
>> +   radeon_uvd_enc_byte_align(enc);
>> +   radeon_uvd_enc_flush_headers(enc);
>> +   *size_in_bytes = (enc->bits_output + 7) / 8;
>> +   RADEON_ENC_END();
>> +}
>> +
>> +static void
>> +radeon_uvd_enc_slice_header_hevc(struct radeon_uvd_encoder *enc)
>> +{
>> +   uint32_t instruction[RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = { 0 };
>> +   uint32_t num_bits[RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = { 0 };
>> +   unsigned int inst_index = 0;
>> +   unsigned int bit_index = 0;
>> +   unsigned int bits_copied = 0;
>> +   RADEON_ENC_BEGIN(RENC_UVD_IB_PARAM_SLICE_HEADER);
>> +   radeon_uvd_enc_reset(enc);
>> +   radeon_uvd_enc_set_emulation_prevention(enc, false);
>> +
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
>> +   radeon_uvd_enc_code_fixed_bits(enc, enc->enc_pic.nal_unit_type, 6);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x0, 6);
>> +   radeon_uvd_enc_code_fixed_bits(enc, 0x1, 3);
>> +
>> +   radeon_uvd_enc_flush_headers(enc);
>> +   bit_index++;
>> +   instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_COPY;
>> +   num_bits[inst_index] = enc->bits_output - bits_copied;
>> +   bits_copied = enc->bits_output;
>> +   inst_index++;
>> +
>> +   instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_FIRST_SLICE;
>> +   inst_index++;
>> +
>> +   if ((enc->enc_pic.nal_unit_type >= 16)
>> +       && (enc->enc_pic.nal_unit_type <= 23))
>> +      radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
>> +
>> +   radeon_uvd_enc_code_ue(enc, 0x0);
>> +
>> +   radeon_uvd_enc_flush_headers(enc);
>> +   bit_index++;
>> +   instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_COPY;
>> +   num_bits[inst_index] = enc->bits_output - bits_copied;
>> +   bits_copied = enc->bits_output;
>> +   inst_index++;
>> +
>> +   instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_SLICE_SEGMENT;
>> +   inst_index++;
>> +
>> +   instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_DEPENDENT_SLICE_END;
> Does this instruction somehow remove everything after this point in a dependent slice?  (You seem to still be writing the rest anyway.)

Yes, in dependent slice only.

>
>> +   inst_index++;
>> +
>> +   switch (enc->enc_pic.picture_type) {
>> +   case PIPE_H265_ENC_PICTURE_TYPE_I:
>> +   case PIPE_H265_ENC_PICTURE_TYPE_IDR:
>> +      radeon_uvd_enc_code_ue(enc, 0x2);
>> +      break;
>> +   case PIPE_H265_ENC_PICTURE_TYPE_P:
>> +   case PIPE_H265_ENC_PICTURE_TYPE_SKIP:
>> +      radeon_uvd_enc_code_ue(enc, 0x1);
>> +      break;
>> +   case PIPE_H265_ENC_PICTURE_TYPE_B:
>> +      radeon_uvd_enc_code_ue(enc, 0x0);
>> +      break;
>> +   default:
>> +      radeon_uvd_enc_code_ue(enc, 0x1);
> Does anything hit this default case?  If so then it should probably be explicitly present, if not then not including it at all or assert()ing might be clearer.

Seems only if data got corrupted, and yes, better to have assertion here.
@James, can you change it accordingly please?

>
>> +   }
>> +
>> +   if ((enc->enc_pic.nal_unit_type != 19)
>> +       && (enc->enc_pic.nal_unit_type != 20)) {
>> +      radeon_uvd_enc_code_fixed_bits(enc,
>> +                                     enc->enc_pic.frame_num %
>> +                                     enc->enc_pic.max_poc,
>> +                                     enc->enc_pic.log2_max_poc);
>> +      if (enc->enc_pic.picture_type == PIPE_H265_ENC_PICTURE_TYPE_P)
>> +         radeon_uvd_enc_code_fixed_bits(enc, 0x1, 1);
>> +      else {
>> +         radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
>> +         radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
>> +         radeon_uvd_enc_code_ue(enc, 0x0);
>> +         radeon_uvd_enc_code_ue(enc, 0x0);
> No short-term references in the non-P case - this only supports I and P frames?

Yes, we only support I and P frames for now.

>
>> +      }
>> +   }
>> +
> Missing SAO flags (it was allowed in the SPS).

Right, should hardcode SAO in SPS to 0 since we don't support it right now.
@James, can you hardcode SAO flag in SPS to 0 and add a comment there 
please?

>
>> +   if ((enc->enc_pic.picture_type == PIPE_H265_ENC_PICTURE_TYPE_P) ||
>> +       (enc->enc_pic.picture_type == PIPE_H265_ENC_PICTURE_TYPE_B)) {
>> +      radeon_uvd_enc_code_fixed_bits(enc, 0x0, 1);
> Missing mvd_l1_zero_flag.

Unfortunately we don't have B frame support now.

>
>> +      radeon_uvd_enc_code_fixed_bits(enc,
>> +                                     enc->enc_pic.hevc_spec_misc.
>> +                                     cabac_init_flag, 1);
>> +      radeon_uvd_enc_code_ue(enc, 5 - enc->enc_pic.max_num_merge_cand);
>> +   }
>> +
>> +   radeon_uvd_enc_flush_headers(enc);
>> +   bit_index++;
>> +   instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_COPY;
>> +   num_bits[inst_index] = enc->bits_output - bits_copied;
>> +   bits_copied = enc->bits_output;
>> +   inst_index++;
>> +
>> +   instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_SLICE_QP_DELTA;
>> +   inst_index++;
>> +
>> +   if ((enc->enc_pic.hevc_deblock.loop_filter_across_slices_enabled) &&
>> +       (!enc->enc_pic.hevc_deblock.deblocking_filter_disabled)) {
>> +      radeon_uvd_enc_code_fixed_bits(enc,
>> +                                     enc->enc_pic.hevc_deblock.
>> +                                     loop_filter_across_slices_enabled, 1);
>> +
>> +      radeon_uvd_enc_flush_headers(enc);
>> +      bit_index++;
>> +      instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_COPY;
>> +      num_bits[inst_index] = enc->bits_output - bits_copied;
>> +      bits_copied = enc->bits_output;
>> +      inst_index++;
>> +   }
>> +
>> +   instruction[inst_index] = RENC_UVD_HEADER_INSTRUCTION_END;
>> +
>> +   for (int i = bit_index;
>> +        i < RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS; i++)
>> +      RADEON_ENC_CS(0x00000000);
>> +
>> +   for (int j = 0; j < RENC_UVD_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS;
>> +        j++) {
>> +      RADEON_ENC_CS(instruction[j]);
>> +      RADEON_ENC_CS(num_bits[j]);
>> +   }
>> +
>> +   RADEON_ENC_END();
>> +}
>> +
>> ...
>> +
>> +static void
>> +begin(struct radeon_uvd_encoder *enc, struct pipe_picture_desc *pic)
>> +{
>> +   radeon_uvd_enc_session_info(enc);
>> +   enc->total_task_size = 0;
>> +   radeon_uvd_enc_task_info(enc, enc->need_feedback);
>> +   radeon_uvd_enc_op_init(enc);
>> +
>> +   radeon_uvd_enc_session_init_hevc(enc);
>> +   radeon_uvd_enc_slice_control_hevc(enc);
>> +   radeon_uvd_enc_spec_misc_hevc(enc, pic);
>> +   radeon_uvd_enc_deblocking_filter_hevc(enc, pic);
>> +
>> +   radeon_uvd_enc_layer_control(enc);
>> +   radeon_uvd_enc_rc_session_init(enc, pic);
>> +   radeon_uvd_enc_quality_params(enc);
>> +   radeon_uvd_enc_layer_select(enc);
>> +   radeon_uvd_enc_rc_layer_init(enc, pic);
>> +   radeon_uvd_enc_layer_select(enc);
>> +   radeon_uvd_enc_rc_per_pic(enc, pic);
>> +   radeon_uvd_enc_op_init_rc(enc);
>> +   radeon_uvd_enc_op_init_rc_vbv(enc);
>> +   *enc->p_task_size = (enc->total_task_size);
>> +}
>> +
>> +static void
>> +encode(struct radeon_uvd_encoder *enc)
>> +{
>> +   radeon_uvd_enc_session_info(enc);
>> +   enc->total_task_size = 0;
>> +   radeon_uvd_enc_task_info(enc, enc->need_feedback);
>> +
>> +   radeon_uvd_enc_nalu_aud_hevc(enc);
> Is it really appropriate to always generate AUDs?

Seems fine based on spec. Actually it should be requested by the 
applications based on their purposes. do you have any suggestion here?

>
>> +   if (enc->enc_pic.is_idr) {
>> +      radeon_uvd_enc_nalu_vps_hevc(enc);
>> +      radeon_uvd_enc_nalu_pps_hevc(enc);
>> +      radeon_uvd_enc_nalu_sps_hevc(enc);
>> +   }
>> +   radeon_uvd_enc_slice_header_hevc(enc);
>> +   radeon_uvd_enc_encode_params_hevc(enc);
>> +
>> +   radeon_uvd_enc_ctx(enc);
>> +   radeon_uvd_enc_bitstream(enc);
>> +   radeon_uvd_enc_feedback(enc);
>> +   radeon_uvd_enc_intra_refresh(enc);
>> +
>> +   radeon_uvd_enc_op_speed(enc);
>> +   radeon_uvd_enc_op_enc(enc);
>> +   *enc->p_task_size = (enc->total_task_size);
>> +}
>> +
>> +static void
>> +destroy(struct radeon_uvd_encoder *enc)
>> +{
>> +   radeon_uvd_enc_session_info(enc);
>> +   enc->total_task_size = 0;
>> +   radeon_uvd_enc_task_info(enc, enc->need_feedback);
>> +   radeon_uvd_enc_op_close(enc);
>> +   *enc->p_task_size = (enc->total_task_size);
>> +}
>> +
>> +void
>> +radeon_uvd_enc_1_1_init(struct radeon_uvd_encoder *enc)
>> +{
>> +   enc->begin = begin;
>> +   enc->encode = encode;
>> +   enc->destroy = destroy;
>> +}
>>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev



More information about the mesa-dev mailing list