[Libva] [Libva-intel-driver][PATCH] Support AVC VDEnc on KBL

Xiang, Haihao haihao.xiang at intel.com
Fri Dec 23 16:58:04 UTC 2016


I verified AVC VDEnc on KBL with the HuC loading patch from
https://patchwork.freedesktop.org/api/1.0/series/16584/revisions/1/mbox/

Signed-off-by: Xiang, Haihao <haihao.xiang at intel.com>
---
 src/gen9_vdenc.c       | 231 +++++++++++++++++++++++++++++++++++++++++++++----
 src/gen9_vdenc.h       |   7 +-
 src/i965_defines.h     |   1 +
 src/i965_device_info.c |   3 +
 4 files changed, 224 insertions(+), 18 deletions(-)

diff --git a/src/gen9_vdenc.c b/src/gen9_vdenc.c
index 6402d41..1913a67 100644
--- a/src/gen9_vdenc.c
+++ b/src/gen9_vdenc.c
@@ -41,6 +41,9 @@
 #include "intel_media.h"
 #include "gen9_vdenc.h"
 
+extern int
+intel_avc_enc_slice_type_fixup(int slice_type);
+
 static const uint8_t buf_rate_adj_tab_i_lowdelay[72] = {
     0,   0, -8, -12, -16, -20, -28, -36,
     0,   0, -4,  -8, -12, -16, -24, -32,
@@ -2050,6 +2053,7 @@ gen9_vdenc_init_vdenc_img_state(VADriverContextP ctx,
     }
 
     pstate->dw1.transform_8x8_flag = vdenc_context->transform_8x8_mode_enable;
+    pstate->dw1.extended_pak_obj_cmd_enable = !!vdenc_context->use_extended_pak_obj_cmd;
 
     pstate->dw3.picture_width = vdenc_context->frame_width_in_mbs;
 
@@ -2752,6 +2756,86 @@ gen9_vdenc_vdenc_walker_state(VADriverContextP ctx,
 }
 
 static void
+gen95_vdenc_vdecn_weihgtsoffsets_state(VADriverContextP ctx,
+                                       struct encode_state *encode_state,
+                                       struct intel_encoder_context *encoder_context,
+                                       VAEncSliceParameterBufferH264 *slice_param)
+{
+    struct intel_batchbuffer *batch = encoder_context->base.batch;
+    VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
+
+    BEGIN_BCS_BATCH(batch, 3);
+
+    OUT_BCS_BATCH(batch, VDENC_WEIGHTSOFFSETS_STATE | (3 - 2));
+
+    if (pic_param->pic_fields.bits.weighted_pred_flag == 1) {
+        OUT_BCS_BATCH(batch, (slice_param->luma_offset_l0[1] << 24 |
+                              slice_param->luma_weight_l0[1] << 16 |
+                              slice_param->luma_offset_l0[0] << 8 |
+                              slice_param->luma_weight_l0[0] << 0));
+        OUT_BCS_BATCH(batch, (slice_param->luma_offset_l0[2] << 8 |
+                              slice_param->luma_weight_l0[2] << 0));
+    } else {
+        OUT_BCS_BATCH(batch, (0 << 24 |
+                              1 << 16 |
+                              0 << 8 |
+                              1 << 0));
+        OUT_BCS_BATCH(batch, (0 << 8 |
+                              1 << 0));
+    }
+
+
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static void
+gen95_vdenc_vdenc_walker_state(VADriverContextP ctx,
+                               struct encode_state *encode_state,
+                               struct intel_encoder_context *encoder_context,
+                               VAEncSliceParameterBufferH264 *slice_param,
+                               VAEncSliceParameterBufferH264 *next_slice_param)
+{
+    struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
+    struct intel_batchbuffer *batch = encoder_context->base.batch;
+    VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
+    int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
+    int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
+    int luma_log2_weight_denom, weighted_pred_idc;
+
+    slice_hor_pos = slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
+    slice_ver_pos = slice_param->macroblock_address / vdenc_context->frame_height_in_mbs;
+
+    if (next_slice_param) {
+        next_slice_hor_pos = next_slice_param->macroblock_address % vdenc_context->frame_width_in_mbs;
+        next_slice_ver_pos = next_slice_param->macroblock_address / vdenc_context->frame_height_in_mbs;
+    } else {
+        next_slice_hor_pos = 0;
+        next_slice_ver_pos = vdenc_context->frame_height_in_mbs;
+    }
+
+    if (slice_type == SLICE_TYPE_P)
+        weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
+    else
+        weighted_pred_idc = 0;
+
+    if (weighted_pred_idc == 1)
+        luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
+    else
+        luma_log2_weight_denom = 0;
+
+    BEGIN_BCS_BATCH(batch, 4);
+
+    OUT_BCS_BATCH(batch, VDENC_WALKER_STATE | (4 - 2));
+    OUT_BCS_BATCH(batch, (slice_hor_pos << 16 |
+                          slice_ver_pos));
+    OUT_BCS_BATCH(batch, (next_slice_hor_pos << 16 |
+                          next_slice_ver_pos));
+    OUT_BCS_BATCH(batch, luma_log2_weight_denom);
+
+    ADVANCE_BCS_BATCH(batch);
+}
+
+static void
 gen9_vdenc_vdenc_img_state(VADriverContextP ctx,
                            struct encode_state *encode_state,
                            struct intel_encoder_context *encoder_context)
@@ -2766,9 +2850,6 @@ gen9_vdenc_vdenc_img_state(VADriverContextP ctx,
     ADVANCE_BCS_BATCH(batch);
 }
 
-extern int
-intel_avc_enc_slice_type_fixup(int slice_type);
-
 static void
 gen9_vdenc_mfx_avc_insert_object(VADriverContextP ctx,
                                  struct intel_encoder_context *encoder_context,
@@ -2802,7 +2883,8 @@ static void
 gen9_vdenc_mfx_avc_insert_slice_packed_data(VADriverContextP ctx,
                                             struct encode_state *encode_state,
                                             struct intel_encoder_context *encoder_context,
-                                            int slice_index)
+                                            int slice_index,
+                                            unsigned int insert_one_zero_byte)
 {
     VAEncPackedHeaderParameterBuffer *param = NULL;
     unsigned int length_in_bits;
@@ -2846,13 +2928,28 @@ gen9_vdenc_mfx_avc_insert_slice_packed_data(VADriverContextP ctx,
                                          0,
                                          !param->has_emulation_bytes,
                                          0);
+
+        insert_one_zero_byte = 0;
+    }
+
+    /* Insert one zero byte before the slice header if no any other NAL unit is inserted, required on KBL */
+    if (insert_one_zero_byte) {
+        unsigned int insert_data[] = { 0, };
+
+        gen9_vdenc_mfx_avc_insert_object(ctx,
+                                         encoder_context,
+                                         insert_data,
+                                         1,
+                                         8,
+                                         1,
+                                         0, 0, 0, 0);
     }
 
     if (slice_header_index == -1) {
         VAEncSequenceParameterBufferH264 *seq_param = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
         VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
         VAEncSliceParameterBufferH264 *slice_params = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
-        unsigned char *slice_header = NULL;
+        unsigned char *slice_header = NULL, *slice_header1 = NULL;
         int slice_header_length_in_bits = 0;
 
         /* No slice header data is passed. And the driver needs to generate it */
@@ -2861,9 +2958,17 @@ gen9_vdenc_mfx_avc_insert_slice_packed_data(VADriverContextP ctx,
                                                              pic_param,
                                                              slice_params,
                                                              &slice_header);
+
+        slice_header1 = slice_header;
+
+        if (insert_one_zero_byte) {
+            slice_header1 += 1;
+            slice_header_length_in_bits -= 8;
+        }
+
         gen9_vdenc_mfx_avc_insert_object(ctx,
                                          encoder_context,
-                                         (unsigned int *)slice_header,
+                                         (unsigned int *)slice_header1,
                                          ALIGN(slice_header_length_in_bits, 32) >> 5,
                                          slice_header_length_in_bits & 0x1f,
                                          5,  /* first 5 bytes are start code + nal unit type */
@@ -2873,20 +2978,31 @@ gen9_vdenc_mfx_avc_insert_slice_packed_data(VADriverContextP ctx,
         free(slice_header);
     } else {
         unsigned int skip_emul_byte_cnt;
+        unsigned char *slice_header1 = NULL;
 
         header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
 
         param = (VAEncPackedHeaderParameterBuffer *)(encode_state->packed_header_params_ext[slice_header_index]->buffer);
         length_in_bits = param->bit_length;
 
+        slice_header1 = (unsigned char *)header_data;
+
+        if (insert_one_zero_byte) {
+            slice_header1 += 1;
+            length_in_bits -= 8;
+        }
+
         /* as the slice header is the last header data for one slice,
          * the last header flag is set to one.
          */
         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
 
+        if (insert_one_zero_byte)
+            skip_emul_byte_cnt -= 1;
+
         gen9_vdenc_mfx_avc_insert_object(ctx,
                                          encoder_context,
-                                         header_data,
+                                         (unsigned int *)slice_header1,
                                          ALIGN(length_in_bits, 32) >> 5,
                                          length_in_bits & 0x1f,
                                          skip_emul_byte_cnt,
@@ -2910,8 +3026,11 @@ gen9_vdenc_mfx_avc_inset_headers(VADriverContextP ctx,
     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
     unsigned int internal_rate_mode = vdenc_context->internal_rate_mode;
     unsigned int skip_emul_byte_cnt;
+    unsigned int insert_one_zero_byte = 0;
 
     if (slice_index == 0) {
+        insert_one_zero_byte = 1;
+
         if (encode_state->packed_header_data[idx]) {
             VAEncPackedHeaderParameterBuffer *param = NULL;
             unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
@@ -2932,6 +3051,8 @@ gen9_vdenc_mfx_avc_inset_headers(VADriverContextP ctx,
                                              0,
                                              !param->has_emulation_bytes,
                                              0);
+
+            insert_one_zero_byte = 0;
         }
 
         idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
@@ -2957,6 +3078,8 @@ gen9_vdenc_mfx_avc_inset_headers(VADriverContextP ctx,
                                              0,
                                              !param->has_emulation_bytes,
                                              0);
+
+            insert_one_zero_byte = 0;
         }
 
         idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
@@ -2981,15 +3104,21 @@ gen9_vdenc_mfx_avc_inset_headers(VADriverContextP ctx,
                                              0,
                                              !param->has_emulation_bytes,
                                              0);
+
+            insert_one_zero_byte = 0;
         } else if (internal_rate_mode == I965_BRC_CBR) {
             /* TODO: insert others */
         }
     }
 
+    if (vdenc_context->is_frame_level_vdenc)
+        insert_one_zero_byte = 0;
+
     gen9_vdenc_mfx_avc_insert_slice_packed_data(ctx,
                                                 encode_state,
                                                 encoder_context,
-                                                slice_index);
+                                                slice_index,
+                                                insert_one_zero_byte);
 }
 
 static void
@@ -3233,6 +3362,7 @@ gen9_vdenc_mfx_avc_single_slice(VADriverContextP ctx,
                                 VAEncSliceParameterBufferH264 *next_slice_param,
                                 int slice_index)
 {
+    struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
 
     gen9_vdenc_mfx_avc_ref_idx_state(ctx, encode_state, encoder_context, slice_param);
@@ -3252,6 +3382,18 @@ gen9_vdenc_mfx_avc_single_slice(VADriverContextP ctx,
                                      encoder_context,
                                      slice_param,
                                      slice_index);
+
+    if (!vdenc_context->is_frame_level_vdenc) {
+        gen95_vdenc_vdecn_weihgtsoffsets_state(ctx,
+                                               encode_state,
+                                               encoder_context,
+                                               slice_param);
+        gen95_vdenc_vdenc_walker_state(ctx,
+                                       encode_state,
+                                       encoder_context,
+                                       slice_param,
+                                       next_slice_param);
+    }
 }
 
 static void
@@ -3259,12 +3401,12 @@ gen9_vdenc_mfx_vdenc_avc_slices(VADriverContextP ctx,
                                 struct encode_state *encode_state,
                                 struct intel_encoder_context *encoder_context)
 {
+    struct gen9_vdenc_context *vdenc_context = encoder_context->mfc_context;
     struct intel_batchbuffer *batch = encoder_context->base.batch;
     struct gpe_mi_flush_dw_parameter mi_flush_dw_params;
     VAEncSliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
     int i, j;
     int slice_index = 0;
-    int is_frame_level_vdenc = 1;       /* TODO: check it for SKL */
     int has_tail = 0;                   /* TODO: check it later */
 
     for (j = 0; j < encode_state->num_slice_params_ext; j++) {
@@ -3287,22 +3429,47 @@ gen9_vdenc_mfx_vdenc_avc_slices(VADriverContextP ctx,
                                             slice_param,
                                             next_slice_param,
                                             slice_index);
-            slice_param++;
-            slice_index++;
 
-            if (is_frame_level_vdenc)
+            if (vdenc_context->is_frame_level_vdenc)
                 break;
             else {
-                /* TODO: remove assert(0) and add other commands here */
-                assert(0);
+                struct vd_pipeline_flush_parameter pipeline_flush_params;
+                int insert_mi_flush;
+
+                memset(&pipeline_flush_params, 0, sizeof(pipeline_flush_params));
+
+                if (next_slice_group_param) {
+                    pipeline_flush_params.mfx_pipeline_done = 0;
+                    insert_mi_flush = 1;
+                } else if (i < encode_state->slice_params_ext[j]->num_elements - 1) {
+                    pipeline_flush_params.mfx_pipeline_done = 0;
+                    insert_mi_flush = 1;
+                } else {
+                    pipeline_flush_params.mfx_pipeline_done = !has_tail;
+                    insert_mi_flush = 0;
+                }
+
+                pipeline_flush_params.vdenc_pipeline_done = 1;
+                pipeline_flush_params.vdenc_pipeline_command_flush = 1;
+                pipeline_flush_params.vd_command_message_parser_done = 1;
+                gen9_vdenc_vd_pipeline_flush(ctx, encoder_context, &pipeline_flush_params);
+
+                if (insert_mi_flush) {
+                    memset(&mi_flush_dw_params, 0, sizeof(mi_flush_dw_params));
+                    mi_flush_dw_params.video_pipeline_cache_invalidate = 1;
+                    gen8_gpe_mi_flush_dw(ctx, batch, &mi_flush_dw_params);
+                }
             }
+
+            slice_param++;
+            slice_index++;
         }
 
-        if (is_frame_level_vdenc)
+        if (vdenc_context->is_frame_level_vdenc)
             break;
     }
 
-    if (is_frame_level_vdenc) {
+    if (vdenc_context->is_frame_level_vdenc) {
         struct vd_pipeline_flush_parameter pipeline_flush_params;
 
         gen9_vdenc_vdenc_walker_state(ctx, encode_state, encoder_context);
@@ -3653,6 +3820,36 @@ gen9_vdenc_allocate_resources(VADriverContextP ctx,
                                 "HuC Status buffer");
 }
 
+static void
+gen9_vdenc_hw_interfaces_init(VADriverContextP ctx,
+                              struct intel_encoder_context *encoder_context,
+                              struct gen9_vdenc_context *vdenc_context)
+{
+    vdenc_context->is_frame_level_vdenc = 1;
+}
+
+static void
+gen95_vdenc_hw_interfaces_init(VADriverContextP ctx,
+                               struct intel_encoder_context *encoder_context,
+                               struct gen9_vdenc_context *vdenc_context)
+{
+    vdenc_context->use_extended_pak_obj_cmd = 1;
+}
+
+static void
+vdenc_hw_interfaces_init(VADriverContextP ctx,
+                         struct intel_encoder_context *encoder_context,
+                         struct gen9_vdenc_context *vdenc_context)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+
+    if (IS_KBL(i965->intel.device_info)) {
+        gen95_vdenc_hw_interfaces_init(ctx, encoder_context, vdenc_context);
+    } else {
+        gen9_vdenc_hw_interfaces_init(ctx, encoder_context, vdenc_context);
+    }
+}
+
 static VAStatus
 gen9_vdenc_context_get_status(VADriverContextP ctx,
                               struct intel_encoder_context *encoder_context,
@@ -3680,7 +3877,9 @@ gen9_vdenc_context_init(VADriverContextP ctx, struct intel_encoder_context *enco
     vdenc_context->num_passes = 1;
     vdenc_context->vdenc_streamin_enable = 0;
     vdenc_context->vdenc_pak_threshold_check_enable = 0;
+    vdenc_context->is_frame_level_vdenc = 0;
 
+    vdenc_hw_interfaces_init(ctx, encoder_context, vdenc_context);
     gen9_vdenc_allocate_resources(ctx, encoder_context, vdenc_context);
 
     encoder_context->mfc_context = vdenc_context;
diff --git a/src/gen9_vdenc.h b/src/gen9_vdenc.h
index 41e4362..ad0f2ae 100644
--- a/src/gen9_vdenc.h
+++ b/src/gen9_vdenc.h
@@ -304,7 +304,8 @@ struct gen9_vdenc_img_state
         uint32_t bidirectional_mix_disable:1;
         uint32_t pad1:1;
         uint32_t time_budget_overflow_check:1;
-        uint32_t pad2:2;
+        uint32_t pad2:1;
+        uint32_t extended_pak_obj_cmd_enable:1;
         uint32_t transform_8x8_flag:1;
         uint32_t vdenc_l1_cache_priority:2;
         uint32_t pad3:22;
@@ -776,7 +777,9 @@ struct gen9_vdenc_context
     uint32_t    frame_type:2;
 
     uint32_t    mb_brc_enabled:1;
-    uint32_t    pad0:31;
+    uint32_t    is_frame_level_vdenc:1;
+    uint32_t    use_extended_pak_obj_cmd:1;
+    uint32_t    pad0:29;
 
     struct i965_gpe_resource brc_init_reset_dmem_res;
     struct i965_gpe_resource brc_history_buffer_res;
diff --git a/src/i965_defines.h b/src/i965_defines.h
index f86ac8e..941ad4e 100755
--- a/src/i965_defines.h
+++ b/src/i965_defines.h
@@ -959,6 +959,7 @@
 #define VDENC_IMG_STATE                 VDENC(1, 0, 5)
 #define VDENC_CONST_QPT_STATE           VDENC(1, 0, 6)
 #define VDENC_WALKER_STATE              VDENC(1, 0, 7)
+#define VDENC_WEIGHTSOFFSETS_STATE      VDENC(1, 0, 8)
 
 #define VDENC_CODEC_AVC                 2
 
diff --git a/src/i965_device_info.c b/src/i965_device_info.c
index 73602ae..0fc8930 100644
--- a/src/i965_device_info.c
+++ b/src/i965_device_info.c
@@ -481,6 +481,9 @@ static struct hw_codec_info kbl_hw_codec_info = {
     .has_vp9_decoding = 1,
     .has_vpp_p010 = 1,
     .has_vp9_encoding = 1,
+    .has_lp_h264_encoding = 1,
+
+    .lp_h264_brc_mode = VA_RC_CQP,
 
     .num_filters = 5,
     .filters = {
-- 
1.9.1



More information about the Libva mailing list