[Libva] [PATCH V4: 01/11] Encoding: mbmv cost table related changes for ROI

Pengfei Qu Pengfei.Qu at intel.com
Fri Aug 12 08:41:33 UTC 2016


From: Zhao Yakui <yakui.zhao at intel.com>

v3:remove the warning according to haihao's comments

v2: merge three mbmv cost table related patches together.
Encoding:Abstract the calculation of mbmv cost for qp as one function.
Encoding:Add one function that initialize mbmv cost table for supported Qp range.
Encoding:Setup one cost_table surface state for VME shader
According haihao's comments, free pointer directly.

v1:
format/style aligment accordingly to avoid the warning.
Currently the length of VME MEDIA_OBJECT command on Ivy can't exceed 8 dwords. If more parameter needs to be passed, the buffer length should be enlarged.
Pass the Qp parameter into VME shader

Signed-off-by: Zhao Yakui <yakui.zhao at intel.com>
Signed-off-by: pjl <cecilia.peng at intel.com>
Signed-off-by: Pengfei Qu <Pengfei.Qu at intel.com>
---
 src/gen6_mfc_common.c | 160 ++++++++++++++++++++++++++++++++++++++++++--------
 src/gen6_vme.h        |  19 ++++++
 src/gen75_vme.c       |  35 +++++++++--
 src/gen7_vme.c        |  24 ++++++--
 src/gen8_vme.c        |  21 +++++--
 5 files changed, 219 insertions(+), 40 deletions(-)

diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c
index 59f7785..30cf7e5 100644
--- a/src/gen6_mfc_common.c
+++ b/src/gen6_mfc_common.c
@@ -764,30 +764,14 @@ static float intel_lambda_qp(int qp)
     return lambdaf;
 }
 
-
-void intel_vme_update_mbmv_cost(VADriverContextP ctx,
-                                struct encode_state *encode_state,
-                                struct intel_encoder_context *encoder_context)
+static
+void intel_h264_calc_mbmvcost_qp(int qp,
+                                 int slice_type,
+                                 uint8_t *vme_state_message)
 {
-    struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
-    struct gen6_vme_context *vme_context = encoder_context->vme_context;
-    VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
-    VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
-    int qp, m_cost, j, mv_count;
-    uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
+    int m_cost, j, mv_count;
     float   lambda, m_costf;
 
-    int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
-
-    
-    if (encoder_context->rate_control_mode == VA_RC_CQP)
-        qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
-    else
-        qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
-
-    if (vme_state_message == NULL)
-        return;
-
     assert(qp <= QP_MAX); 
     lambda = intel_lambda_qp(qp);
 
@@ -880,6 +864,31 @@ void intel_vme_update_mbmv_cost(VADriverContextP ctx,
             vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
         }
     }
+    return;
+}
+
+void intel_vme_update_mbmv_cost(VADriverContextP ctx,
+                                struct encode_state *encode_state,
+                                struct intel_encoder_context *encoder_context)
+{
+    struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+    struct gen6_vme_context *vme_context = encoder_context->vme_context;
+    VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
+    VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
+    int qp;
+    uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
+
+    int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
+
+    if (encoder_context->rate_control_mode == VA_RC_CQP)
+        qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
+    else
+        qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
+
+    if (vme_state_message == NULL)
+        return;
+
+    intel_h264_calc_mbmvcost_qp(qp, slice_type, vme_state_message);
 }
 
 void intel_vme_vp8_update_mbmv_cost(VADriverContextP ctx,
@@ -1023,6 +1032,16 @@ gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
     int mb_row;
     int s;
     unsigned int *command_ptr;
+    struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+    VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
+    VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
+    int qp;
+    int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
+
+    if (encoder_context->rate_control_mode == VA_RC_CQP)
+        qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
+    else
+        qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
 
 #define		USE_SCOREBOARD		(1 << 21)
  
@@ -1062,7 +1081,7 @@ gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
                     }
                 }
 
-                *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
+                *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
                 *command_ptr++ = kernel;
                 *command_ptr++ = USE_SCOREBOARD;
                 /* Indirect data */
@@ -1073,6 +1092,8 @@ gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
                 /*inline data */
                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
+                /* QP occupies one byte */
+                *command_ptr++ = qp;
                 x_inner -= 2;
                 y_inner += 1;
             }
@@ -1106,7 +1127,7 @@ gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
                     }
                 }
 
-                *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
+                *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
                 *command_ptr++ = kernel;
                 *command_ptr++ = USE_SCOREBOARD;
                 /* Indirect data */
@@ -1117,6 +1138,8 @@ gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
                 /*inline data */
                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
+                /* qp occupies one byte */
+                *command_ptr++ = qp;
 
                 x_inner -= 2;
                 y_inner += 1;
@@ -1649,6 +1672,97 @@ void intel_avc_slice_insert_packed_data(VADriverContextP ctx,
     return;
 }
 
+void
+intel_h264_initialize_mbmv_cost(VADriverContextP ctx,
+                                struct encode_state *encode_state,
+                                struct intel_encoder_context *encoder_context)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct gen6_vme_context *vme_context = encoder_context->vme_context;
+    VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
+    int qp;
+    dri_bo *bo;
+    uint8_t *cost_table;
+
+    int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
+
+
+    if (slice_type == SLICE_TYPE_I) {
+        if (vme_context->i_qp_cost_table)
+            return;
+    } else if (slice_type == SLICE_TYPE_P) {
+        if (vme_context->p_qp_cost_table)
+            return;
+    } else {
+        if (vme_context->b_qp_cost_table)
+            return;
+    }
+
+    /* It is enough to allocate 32 bytes for each qp. */
+    bo = dri_bo_alloc(i965->intel.bufmgr,
+                      "cost_table ",
+                      QP_MAX * 32,
+                      64);
+
+    dri_bo_map(bo, 1);
+
+    cost_table = (uint8_t *)(bo->virtual);
+    for (qp = 0; qp < QP_MAX; qp++) {
+        intel_h264_calc_mbmvcost_qp(qp, slice_type, cost_table);
+        cost_table += 32;
+    }
+
+    dri_bo_unmap(bo);
+
+    if (slice_type == SLICE_TYPE_I) {
+        vme_context->i_qp_cost_table = bo;
+    } else if (slice_type == SLICE_TYPE_P) {
+        vme_context->p_qp_cost_table = bo;
+    } else {
+        vme_context->b_qp_cost_table = bo;
+    }
+
+    vme_context->cost_table_size = QP_MAX * 32;
+    return;
+}
+
+extern void
+intel_h264_setup_cost_surface(VADriverContextP ctx,
+                              struct encode_state *encode_state,
+                              struct intel_encoder_context *encoder_context,
+                              unsigned long binding_table_offset,
+                              unsigned long surface_state_offset)
+{
+    struct gen6_vme_context *vme_context = encoder_context->vme_context;
+    VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
+    dri_bo *bo;
+
+
+    struct i965_buffer_surface cost_table;
+
+    int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
+
+
+    if (slice_type == SLICE_TYPE_I) {
+        bo = vme_context->i_qp_cost_table;
+    } else if (slice_type == SLICE_TYPE_P) {
+        bo = vme_context->p_qp_cost_table;
+    } else {
+        bo = vme_context->b_qp_cost_table;
+    }
+
+    cost_table.bo = bo;
+    cost_table.num_blocks = QP_MAX;
+    cost_table.pitch = 16;
+    cost_table.size_block = 32;
+
+    vme_context->vme_buffer_suface_setup(ctx,
+                                         &vme_context->gpe_context,
+                                         &cost_table,
+                                         binding_table_offset,
+                                         surface_state_offset);
+}
+
 /* HEVC */
 static int
 hevc_temporal_find_surface(VAPictureHEVC *curr_pic,
diff --git a/src/gen6_vme.h b/src/gen6_vme.h
index dc568ac..5031339 100644
--- a/src/gen6_vme.h
+++ b/src/gen6_vme.h
@@ -46,6 +46,8 @@
 
 #define GEN6_VME_KERNEL_NUMBER          3
 
+#define INTEL_COST_TABLE_OFFSET         8
+
 struct encode_state;
 struct intel_encoder_context;
 
@@ -91,6 +93,11 @@ struct gen6_vme_context
     struct object_surface *used_reference_objects[2];
     void *used_references[2];
     unsigned int ref_index_in_mb[2];
+
+    dri_bo *i_qp_cost_table;
+    dri_bo *p_qp_cost_table;
+    dri_bo *b_qp_cost_table;
+    int cost_table_size;
 };
 
 #define MPEG2_PIC_WIDTH_HEIGHT	30
@@ -200,4 +207,16 @@ void intel_vme_hevc_update_mbmv_cost(VADriverContextP ctx,
 extern Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context);
 
 extern Bool gen9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context);
+
+extern void
+intel_h264_initialize_mbmv_cost(VADriverContextP ctx,
+                                struct encode_state *encode_state,
+                                struct intel_encoder_context *encoder_context);
+
+extern void
+intel_h264_setup_cost_surface(VADriverContextP ctx,
+                              struct encode_state *encode_state,
+                              struct intel_encoder_context *encoder_context,
+                              unsigned long binding_table_offset,
+                              unsigned long surface_state_offset);
 #endif /* _GEN6_VME_H_ */
diff --git a/src/gen75_vme.c b/src/gen75_vme.c
index a85d6b3..dcf170e 100644
--- a/src/gen75_vme.c
+++ b/src/gen75_vme.c
@@ -280,6 +280,9 @@ gen75_vme_surface_setup(VADriverContextP ctx,
     /* VME output */
     gen75_vme_output_buffer_setup(ctx, encode_state, 3, encoder_context);
     gen75_vme_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
+    intel_h264_setup_cost_surface(ctx, encode_state, encoder_context,
+                                 BINDING_TABLE_OFFSET(INTEL_COST_TABLE_OFFSET),
+                                 SURFACE_STATE_OFFSET(INTEL_COST_TABLE_OFFSET));
 
     return VA_STATUS_SUCCESS;
 }
@@ -488,6 +491,16 @@ gen75_vme_fill_vme_batchbuffer(VADriverContextP ctx,
     int mb_x = 0, mb_y = 0;
     int i, s;
     unsigned int *command_ptr;
+    struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+    VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
+    VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
+    int qp;
+    int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
+
+    if (encoder_context->rate_control_mode == VA_RC_CQP)
+        qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
+    else
+        qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
 
     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
@@ -525,7 +538,7 @@ gen75_vme_fill_vme_batchbuffer(VADriverContextP ctx,
 	    if ((i == mb_width) && slice_mb_x) {
 		mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D);
 	    }
-            *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
+            *command_ptr++ = (CMD_MEDIA_OBJECT | (9 - 2));
             *command_ptr++ = kernel;
             *command_ptr++ = 0;
             *command_ptr++ = 0;
@@ -535,6 +548,8 @@ gen75_vme_fill_vme_batchbuffer(VADriverContextP ctx,
             /*inline data */
             *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
             *command_ptr++ = ((encoder_context->quality_level << 24) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
+            /* qp occupies one byte */
+            *command_ptr++ = qp;
 
             i += 1;
         } 
@@ -647,7 +662,8 @@ static VAStatus gen75_vme_prepare(VADriverContextP ctx,
     }	
 
     intel_vme_update_mbmv_cost(ctx, encode_state, encoder_context);
-    	
+    intel_h264_initialize_mbmv_cost(ctx, encode_state, encoder_context);
+
     /*Setup all the memory object*/
     gen75_vme_surface_setup(ctx, encode_state, is_intra, encoder_context);
     gen75_vme_interface_setup(ctx, encode_state, encoder_context);
@@ -1002,10 +1018,17 @@ gen75_vme_context_destroy(void *context)
     dri_bo_unreference(vme_context->vme_batchbuffer.bo);
     vme_context->vme_batchbuffer.bo = NULL;
 
-    if (vme_context->vme_state_message) {
-	free(vme_context->vme_state_message);
-	vme_context->vme_state_message = NULL;
-    }
+    free(vme_context->vme_state_message);
+    vme_context->vme_state_message = NULL;
+
+    dri_bo_unreference(vme_context->i_qp_cost_table);
+    vme_context->i_qp_cost_table = NULL;
+
+    dri_bo_unreference(vme_context->p_qp_cost_table);
+    vme_context->p_qp_cost_table = NULL;
+
+    dri_bo_unreference(vme_context->b_qp_cost_table);
+    vme_context->b_qp_cost_table = NULL;
 
     free(vme_context);
 }
diff --git a/src/gen7_vme.c b/src/gen7_vme.c
index 9da44d1..fb6358f 100644
--- a/src/gen7_vme.c
+++ b/src/gen7_vme.c
@@ -227,7 +227,7 @@ gen7_vme_output_vme_batchbuffer_setup(VADriverContextP ctx,
     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
 
     vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
-    vme_context->vme_batchbuffer.size_block = 32; /* 2 OWORDs */
+    vme_context->vme_batchbuffer.size_block = 64; /* 4 OWORDs */
     vme_context->vme_batchbuffer.pitch = 16;
     vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr, 
                                                    "VME batchbuffer",
@@ -270,6 +270,9 @@ gen7_vme_surface_setup(VADriverContextP ctx,
     /* VME output */
     gen7_vme_output_buffer_setup(ctx, encode_state, 3, encoder_context);
     gen7_vme_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
+    intel_h264_setup_cost_surface(ctx, encode_state, encoder_context,
+                                  BINDING_TABLE_OFFSET(INTEL_COST_TABLE_OFFSET),
+                                  SURFACE_STATE_OFFSET(INTEL_COST_TABLE_OFFSET));
 
     return VA_STATUS_SUCCESS;
 }
@@ -669,8 +672,10 @@ static VAStatus gen7_vme_prepare(VADriverContextP ctx,
         (vme_context->h264_level != pSequenceParameter->level_idc)) {
 	vme_context->h264_level = pSequenceParameter->level_idc;	
     }
-	
+
     intel_vme_update_mbmv_cost(ctx, encode_state, encoder_context);
+    intel_h264_initialize_mbmv_cost(ctx, encode_state, encoder_context);
+
     /*Setup all the memory object*/
     gen7_vme_surface_setup(ctx, encode_state, is_intra, encoder_context);
     gen7_vme_interface_setup(ctx, encode_state, encoder_context);
@@ -1018,10 +1023,17 @@ gen7_vme_context_destroy(void *context)
     dri_bo_unreference(vme_context->vme_batchbuffer.bo);
     vme_context->vme_batchbuffer.bo = NULL;
 
-    if (vme_context->vme_state_message) {
-	free(vme_context->vme_state_message);
-	vme_context->vme_state_message = NULL;
-    }
+    free(vme_context->vme_state_message);
+    vme_context->vme_state_message = NULL;
+
+    dri_bo_unreference(vme_context->i_qp_cost_table);
+    vme_context->i_qp_cost_table = NULL;
+
+    dri_bo_unreference(vme_context->p_qp_cost_table);
+    vme_context->p_qp_cost_table = NULL;
+
+    dri_bo_unreference(vme_context->b_qp_cost_table);
+    vme_context->b_qp_cost_table = NULL;
 
     free(vme_context);
 }
diff --git a/src/gen8_vme.c b/src/gen8_vme.c
index edf6060..998f7d6 100644
--- a/src/gen8_vme.c
+++ b/src/gen8_vme.c
@@ -314,6 +314,9 @@ gen8_vme_surface_setup(VADriverContextP ctx,
     /* VME output */
     gen8_vme_avc_output_buffer_setup(ctx, encode_state, 3, encoder_context);
     gen8_vme_avc_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
+    intel_h264_setup_cost_surface(ctx, encode_state, encoder_context,
+                                  BINDING_TABLE_OFFSET(INTEL_COST_TABLE_OFFSET),
+                                  SURFACE_STATE_OFFSET(INTEL_COST_TABLE_OFFSET));
 
     return VA_STATUS_SUCCESS;
 }
@@ -721,7 +724,8 @@ static VAStatus gen8_vme_prepare(VADriverContextP ctx,
     }	
 
     intel_vme_update_mbmv_cost(ctx, encode_state, encoder_context);
-    	
+    intel_h264_initialize_mbmv_cost(ctx, encode_state, encoder_context);
+
     /*Setup all the memory object*/
     gen8_vme_surface_setup(ctx, encode_state, is_intra, encoder_context);
     gen8_vme_interface_setup(ctx, encode_state, encoder_context);
@@ -1287,10 +1291,17 @@ gen8_vme_context_destroy(void *context)
     dri_bo_unreference(vme_context->vme_batchbuffer.bo);
     vme_context->vme_batchbuffer.bo = NULL;
 
-    if (vme_context->vme_state_message) {
-	free(vme_context->vme_state_message);
-	vme_context->vme_state_message = NULL;
-    }
+    free(vme_context->vme_state_message);
+    vme_context->vme_state_message = NULL;
+
+    dri_bo_unreference(vme_context->i_qp_cost_table);
+    vme_context->i_qp_cost_table = NULL;
+
+    dri_bo_unreference(vme_context->p_qp_cost_table);
+    vme_context->p_qp_cost_table = NULL;
+
+    dri_bo_unreference(vme_context->b_qp_cost_table);
+    vme_context->b_qp_cost_table = NULL;
 
     free(vme_context);
 }
-- 
2.7.4



More information about the Libva mailing list