[Mesa-dev] [PATCH 10/18] radeon/vcn: add encode header implementations

Christian König ckoenig.leichtzumerken at gmail.com
Thu Nov 9 17:19:12 UTC 2017


Am 09.11.2017 um 17:57 schrieb Mark Thompson:
> On 08/11/17 18:08, boyuan.zhang at amd.com wrote:
>> From: Boyuan Zhang <boyuan.zhang at amd.com>
>>
>> Implement encoding of sps, pps, and silce headers using the newly added h.264
>> header coding descriptors functions based on h.264 specs.
>>
>> Signed-off-by: Boyuan Zhang <boyuan.zhang at amd.com>
>> ---
>>   src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c | 234 ++++++++++++++++++++++++
>>   1 file changed, 234 insertions(+)
> So, does this mean we could actually implement VAAPI encode properly with packed headers now rather than hard-coding all of this in the driver?

At least that's the intention here.

BTW: Boyuan we already have a bitstream write in picture_mpeg4.c in the 
VA state tracker.

Please unify all that stuff in a header in src/gallium/auxiliary/vl/.

Regards,
Christian.

>
>> diff --git a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
>> index 5170c67..c6dc420 100644
>> --- a/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
>> +++ b/src/gallium/drivers/radeon/radeon_vcn_enc_1_2.c
>> @@ -362,6 +362,233 @@ static void radeon_enc_quality_params(struct radeon_encoder *enc)
>>   	RADEON_ENC_END();
>>   }
>>   
>> +static void radeon_enc_nalu_sps(struct radeon_encoder *enc)
>> +{
>> +	RADEON_ENC_BEGIN(RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU);
>> +	RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_SPS);
>> +	uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
>> +	radeon_enc_reset(enc);
>> +	radeon_enc_set_emulation_prevention(enc, false);
>> +	radeon_enc_code_fixed_bits(enc, 0x00000001, 32);
>> +	radeon_enc_code_fixed_bits(enc, 0x67, 8);
>> +	radeon_enc_byte_align(enc);
>> +	radeon_enc_set_emulation_prevention(enc, true);
>> +	radeon_enc_code_fixed_bits(enc, enc->enc_pic.spec_misc.profile_idc, 8);
>> +	radeon_enc_code_fixed_bits(enc, 0x04, 8);
> Please always set constraint_set1_flag when profile_idc is 66.  There are enough actually-constrained-baseline-but-not-marked-as-such streams in the world already to catch out decoders without full baseline support (that is, all of them).
>
> Also, "constraint_set5_flag shall be equal to 0 when profile_idc is not equal to 77, 88, 100, or 118".
>
>> +	radeon_enc_code_fixed_bits(enc, enc->enc_pic.spec_misc.level_idc, 8);
>> +	radeon_enc_code_ue(enc, 0x0);
>> +
>> +	if(enc->enc_pic.spec_misc.profile_idc == 100 || enc->enc_pic.spec_misc.profile_idc == 110 || enc->enc_pic.spec_misc.profile_idc == 122 ||
>> +			enc->enc_pic.spec_misc.profile_idc == 244 || enc->enc_pic.spec_misc.profile_idc == 44 || enc->enc_pic.spec_misc.profile_idc == 83 ||
>> +			enc->enc_pic.spec_misc.profile_idc == 86 || enc->enc_pic.spec_misc.profile_idc == 118 || enc->enc_pic.spec_misc.profile_idc == 128 ||
>> +			enc->enc_pic.spec_misc.profile_idc == 138) {
>> +		radeon_enc_code_ue(enc, 0x1);
>> +		radeon_enc_code_ue(enc, 0x0);
>> +		radeon_enc_code_ue(enc, 0x0);
>> +		radeon_enc_code_fixed_bits(enc, 0x0, 2);
>> +	}
>> +
>> +	radeon_enc_code_ue(enc, 1);
>> +	radeon_enc_code_ue(enc, enc->enc_pic.pic_order_cnt_type);
>> +
>> +	if (enc->enc_pic.pic_order_cnt_type == 0)
>> +		radeon_enc_code_ue(enc, 1);
> POC type can be 1 as well, which has more associated syntax.
>
>> +
>> +	radeon_enc_code_ue(enc, (enc->base.max_references + 1));
>> +	radeon_enc_code_fixed_bits(enc, enc->enc_pic.layer_ctrl.max_num_temporal_layers > 1 ? 0x1 : 0x0, 1);
>> +	radeon_enc_code_ue(enc, (enc->enc_pic.session_init.aligned_picture_width / 16 - 1));
>> +	radeon_enc_code_ue(enc, (enc->enc_pic.session_init.aligned_picture_height / 16 - 1));
>> +	bool progressive_only = true;
>> +	radeon_enc_code_fixed_bits(enc, progressive_only ? 0x1 : 0x0, 1);
>> +
>> +	if (!progressive_only)
>> +		radeon_enc_code_fixed_bits(enc, 0x0, 1);
>> +
>> +	radeon_enc_code_fixed_bits(enc, 0x1, 1);
>> +
>> +	if ((enc->enc_pic.crop_left != 0) || (enc->enc_pic.crop_right != 0) ||
>> +			(enc->enc_pic.crop_top != 0) || (enc->enc_pic.crop_bottom != 0)) {
>> +		radeon_enc_code_fixed_bits(enc, 0x1, 1);
>> +		radeon_enc_code_ue(enc, enc->enc_pic.crop_left);
>> +		radeon_enc_code_ue(enc, enc->enc_pic.crop_right);
>> +		radeon_enc_code_ue(enc, enc->enc_pic.crop_top);
>> +		radeon_enc_code_ue(enc, enc->enc_pic.crop_bottom);
>> +	} else
>> +		radeon_enc_code_fixed_bits(enc, 0x0, 1);
>> +
>> +	radeon_enc_code_fixed_bits(enc, 0x1, 1);
>> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);
>> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);
>> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);
>> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);
>> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);
> Including the tick rate would be nice even if none of the other metadata is available; I think you do know it here.
>
>> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);
>> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);
>> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);
>> +	radeon_enc_code_fixed_bits(enc, 0x1, 1);
>> +	radeon_enc_code_fixed_bits(enc, 0x1, 1);
>> +	radeon_enc_code_ue(enc, 0x0);
> Note that level limits imply the default value (2) of max_bytes_per_pic_denom.
>
>> +	radeon_enc_code_ue(enc, 0x0);
>> +	radeon_enc_code_ue(enc, 16);
>> +	radeon_enc_code_ue(enc, 16);
>> +	radeon_enc_code_ue(enc, 0x0);
>> +	radeon_enc_code_ue(enc, (enc->base.max_references + 1));
>> +	radeon_enc_code_fixed_bits(enc, 0x1, 1);
> Maybe separate the rbsp_trailing_bits to make it clearer?
>
>> +	radeon_enc_byte_align(enc);
>> +	radeon_enc_flush_headers(enc);
>> +	*size_in_bytes = (enc->bits_output + 7) / 8;
>> +	RADEON_ENC_END();
>> +}
>> +
>> +static void radeon_enc_nalu_pps(struct radeon_encoder *enc)
>> +{
>> +	RADEON_ENC_BEGIN(RENCODE_IB_PARAM_DIRECT_OUTPUT_NALU);
>> +	RADEON_ENC_CS(RENCODE_DIRECT_OUTPUT_NALU_TYPE_PPS);
>> +	uint32_t *size_in_bytes = &enc->cs->current.buf[enc->cs->current.cdw++];
>> +	radeon_enc_reset(enc);
>> +	radeon_enc_set_emulation_prevention(enc, false);
>> +	radeon_enc_code_fixed_bits(enc, 0x00000001, 32);
>> +	radeon_enc_code_fixed_bits(enc, 0x68, 8);
>> +	radeon_enc_byte_align(enc);
>> +	radeon_enc_set_emulation_prevention(enc, true);
>> +	radeon_enc_code_ue(enc, 0x0);
>> +	radeon_enc_code_ue(enc, 0x0);
>> +	radeon_enc_code_fixed_bits(enc, (enc->enc_pic.spec_misc.cabac_enable ? 0x1 : 0x0), 1);
>> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);
>> +	radeon_enc_code_ue(enc, 0x0);
>> +	radeon_enc_code_ue(enc, 0x0);
>> +	radeon_enc_code_ue(enc, 0x0);
>> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);
>> +	radeon_enc_code_fixed_bits(enc, 0x0, 2);
>> +	radeon_enc_code_se(enc, 0x0);
>> +	radeon_enc_code_se(enc, 0x0);
>> +	radeon_enc_code_se(enc, 0x0);
>> +	radeon_enc_code_fixed_bits(enc, 0x1, 1);
>> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);
>> +	radeon_enc_code_fixed_bits(enc, 0x0, 1);
> No 8x8 transform?
>
>> +	radeon_enc_code_fixed_bits(enc, 0x1, 1);
>> +	radeon_enc_byte_align(enc);
>> +	radeon_enc_flush_headers(enc);
>> +	*size_in_bytes = (enc->bits_output + 7) / 8;
>> +	RADEON_ENC_END();
>> +}
>> +
>> +static void radeon_enc_slice_header(struct radeon_encoder *enc)
>> +{
>> +	uint32_t instruction[RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = {0};
>> +	uint32_t num_bits[RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS] = {0};
>> +	unsigned int inst_index = 0;
>> +	unsigned int bit_index = 0;
>> +	unsigned int bits_copied = 0;
>> +	RADEON_ENC_BEGIN(RENCODE_IB_PARAM_SLICE_HEADER);
>> +	radeon_enc_reset(enc);
>> +	radeon_enc_set_emulation_prevention(enc, false);
>> +
>> +	if (enc->enc_pic.is_idr)
>> +		radeon_enc_code_fixed_bits(enc, 0x65, 8);
>> +	else if (enc->enc_pic.not_referenced)
>> +		radeon_enc_code_fixed_bits(enc, 0x01, 8);
>> +	else
>> +		radeon_enc_code_fixed_bits(enc, 0x41, 8);
>> +
>> +	radeon_enc_flush_headers(enc);
>> +	bit_index ++;
>> +	instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_COPY;
>> +	num_bits[inst_index] = enc->bits_output - bits_copied;
>> +	bits_copied = enc->bits_output;
>> +	inst_index++;
>> +
>> +	instruction[inst_index] = RENCODE_H264_HEADER_INSTRUCTION_FIRST_MB;
>> +	inst_index++;
> Who fills first_mb_in_slice?  Is this dynamic because there might be multiple slices with boundaries unknown at this point?
>
>> +
>> +	switch(enc->enc_pic.picture_type) {
>> +		case PIPE_H264_ENC_PICTURE_TYPE_I:
>> +		case PIPE_H264_ENC_PICTURE_TYPE_IDR:
>> +			radeon_enc_code_fixed_bits(enc, 0x08, 7);
>> +			break;
>> +		case PIPE_H264_ENC_PICTURE_TYPE_P:
>> +		case PIPE_H264_ENC_PICTURE_TYPE_SKIP:
>> +			radeon_enc_code_fixed_bits(enc, 0x06, 5);
>> +			break;
>> +		case PIPE_H264_ENC_PICTURE_TYPE_B:
>> +			radeon_enc_code_fixed_bits(enc, 0x07, 5);
>> +			break;
>> +		default:
>> +			radeon_enc_code_fixed_bits(enc, 0x08, 7);
>> +	}
> Can you explain what this is doing?  It's slice_type, which is normally unsigned exp-golomb, in some strange form?
>
>> +
>> +	radeon_enc_code_ue(enc, 0x0);
>> +	radeon_enc_code_fixed_bits(enc, enc->enc_pic.frame_num % 32, 5);
> If the user had log2_max_frame_num_minus4 == 0 then the truncation doesn't work.  Might be better to use the user value of log2_max_frame_num_minus4 in the SPS?
>
>> +
>> +	if (enc->enc_pic.h264_enc_params.input_picture_structure != RENCODE_H264_PICTURE_STRUCTURE_FRAME) {
>> +		radeon_enc_code_fixed_bits(enc, 0x1, 1);
>> +		radeon_enc_code_fixed_bits(enc, enc->enc_pic.h264_enc_params.input_picture_structure == RENCODE_H264_PICTURE_STRUCTURE_BOTTOM_FIELD ? 1 : 0, 1);
>> +	}
>> +
>> +	if (enc->enc_pic.is_idr)
>> +		radeon_enc_code_ue(enc, 0x0);
> Please make idr_pic_id change between successive IDR pictures.  This is required if they are adjacent, and is generally a good idea even if they aren't.
>
> (Or, better yet, use the value from the user.)
>
>> +
>> +	if (enc->enc_pic.pic_order_cnt_type == 0)
>> +		radeon_enc_code_fixed_bits(enc, enc->enc_pic.pic_order_cnt % 32, 5);
> Same problem as frame_num with log2_max_pic_order_cnt_lsb_minus4.
>
>> +
> B-frames aren't actually supported yet, right?
>
> (direct_spatial_mv_pred_flag is missing here.)
>
>> +	if (enc->enc_pic.picture_type != PIPE_H264_ENC_PICTURE_TYPE_IDR) {
>> +		radeon_enc_code_fixed_bits(enc, 0x0, 1);
>> +
>> +		if (enc->enc_pic.frame_num - enc->enc_pic.ref_idx_l0 > 1) {
>> +			radeon_enc_code_fixed_bits(enc, 0x1, 1);
>> +			radeon_enc_code_ue(enc, 0x0);
>> +			radeon_enc_code_ue(enc, (enc->enc_pic.frame_num - enc->enc_pic.ref_idx_l0 - 1));
> Does the rest of the code support using this to pick an earlier reference frame?
>
>> +			radeon_enc_code_ue(enc, 0x3);
>> +		} else
>> +			radeon_enc_code_fixed_bits(enc, 0x0, 1);
>> +	}
>> +
>> +	if (enc->enc_pic.is_idr) {
>> +		radeon_enc_code_fixed_bits(enc, 0x0, 1);
>> +		radeon_enc_code_fixed_bits(enc, 0x0, 1);
>> +	} else
>> +		radeon_enc_code_fixed_bits(enc, 0x0, 1);
>> +
>> +	if ((enc->enc_pic.picture_type != PIPE_H264_ENC_PICTURE_TYPE_IDR) && (enc->enc_pic.spec_misc.cabac_enable))
>> +		radeon_enc_code_ue(enc, enc->enc_pic.spec_misc.cabac_init_idc);
>> +
>> +	radeon_enc_flush_headers(enc);
>> +	bit_index ++;
>> +	instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_COPY;
>> +	num_bits[inst_index] = enc->bits_output - bits_copied;
>> +	bits_copied = enc->bits_output;
>> +	inst_index++;
>> +
>> +	instruction[inst_index] = RENCODE_H264_HEADER_INSTRUCTION_SLICE_QP_DELTA;
>> +	inst_index++;
> Who is going to decide the slice_qp_delta value to go here?
>
>> +
>> +	radeon_enc_code_ue(enc, enc->enc_pic.h264_deblock.disable_deblocking_filter_idc ? 1: 0);
> Can also be 2.
>
>> +
>> +	if (!enc->enc_pic.h264_deblock.disable_deblocking_filter_idc) {
>> +		radeon_enc_code_se(enc, enc->enc_pic.h264_deblock.alpha_c0_offset_div2);
>> +		radeon_enc_code_se(enc, enc->enc_pic.h264_deblock.beta_offset_div2);
>> +	}
>> +
>> +	radeon_enc_flush_headers(enc);
>> +	bit_index ++;
>> +	instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_COPY;
>> +	num_bits[inst_index] = enc->bits_output - bits_copied;
>> +	bits_copied = enc->bits_output;
>> +	inst_index++;
>> +
>> +	instruction[inst_index] = RENCODE_HEADER_INSTRUCTION_END;
>> +
>> +	for (int i = bit_index; i < RENCODE_SLICE_HEADER_TEMPLATE_MAX_TEMPLATE_SIZE_IN_DWORDS; i++)
>> +		RADEON_ENC_CS(0x00000000);
>> +
>> +	for (int j = 0; j < RENCODE_SLICE_HEADER_TEMPLATE_MAX_NUM_INSTRUCTIONS; j++) {
>> +		RADEON_ENC_CS(instruction[j]);
>> +		RADEON_ENC_CS(num_bits[j]);
>> +	}
>> +
>> +	RADEON_ENC_END();
>> +}
>> +
>>   static void radeon_enc_ctx(struct radeon_encoder *enc)
>>   {
>>   	enc->enc_pic.ctx_buf.swizzle_mode = 0;
>> @@ -574,6 +801,13 @@ static void encode(struct radeon_encoder *enc)
>>   	radeon_enc_session_info(enc);
>>   	enc->total_task_size = 0;
>>   	radeon_enc_task_info(enc, enc->need_feedback);
>> +
>> +	if (enc->enc_pic.is_idr) {
>> +		radeon_enc_nalu_sps(enc);
>> +		radeon_enc_nalu_pps(enc);
>> +	}
>> +
>> +	radeon_enc_slice_header(enc);
>>   	radeon_enc_ctx(enc);
>>   	radeon_enc_bitstream(enc);
>>   	radeon_enc_feedback(enc);
>>
> Thanks,
>
> - Mark
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev




More information about the mesa-dev mailing list