[Libva] [PATCH V2 5/6] HEVC ENC:Added VME pipeline

Qu,Pengfei Pengfei.Qu at intel.com
Tue Jan 6 00:57:23 PST 2015


Signed-off-by: Qu,Pengfei <Pengfei.Qu at intel.com>
---
 src/gen6_mfc_common.c | 222 +++++++++++++++++++++++
 src/gen6_vme.h        |  19 ++
 src/gen9_vme.c        | 484 ++++++++++++++++++++++++++++++++++++++++++++++++++
 src/i965_encoder.c    |  74 +++++++-
 4 files changed, 798 insertions(+), 1 deletion(-)

diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c
index fe41dac..6a5f720 100644
--- a/src/gen6_mfc_common.c
+++ b/src/gen6_mfc_common.c
@@ -1652,3 +1652,225 @@ void intel_avc_slice_insert_packed_data(VADriverContextP ctx,
     return;
 }
 
+/* HEVC */
+static int
+hevc_temporal_find_surface(VAPictureHEVC *curr_pic,
+                           VAPictureHEVC *ref_list,
+                           int num_pictures,
+                           int dir)
+{
+    int i, found = -1, min = 0x7FFFFFFF;
+
+    for (i = 0; i < num_pictures; i++) {
+        int tmp;
+
+        if ((ref_list[i].flags & VA_PICTURE_HEVC_INVALID) ||
+            (ref_list[i].picture_id == VA_INVALID_SURFACE))
+            break;
+
+        tmp = curr_pic->pic_order_cnt - ref_list[i].pic_order_cnt;
+
+        if (dir)
+            tmp = -tmp;
+
+        if (tmp > 0 && tmp < min) {
+            min = tmp;
+            found = i;
+        }
+    }
+
+    return found;
+}
+void
+intel_hevc_vme_reference_state(VADriverContextP ctx,
+                               struct encode_state *encode_state,
+                               struct intel_encoder_context *encoder_context,
+                               int list_index,
+                               int surface_index,
+                               void (* vme_source_surface_state)(
+                                   VADriverContextP ctx,
+                                   int index,
+                                   struct object_surface *obj_surface,
+                                   struct intel_encoder_context *encoder_context))
+{
+    struct gen6_vme_context *vme_context = encoder_context->vme_context;
+    struct object_surface *obj_surface = NULL;
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    VASurfaceID ref_surface_id;
+    VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
+    VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
+    int max_num_references;
+    VAPictureHEVC *curr_pic;
+    VAPictureHEVC *ref_list;
+    int ref_idx;
+
+    if (list_index == 0) {
+        max_num_references = pic_param->num_ref_idx_l0_default_active_minus1 + 1;
+        ref_list = slice_param->ref_pic_list0;
+    } else {
+        max_num_references = pic_param->num_ref_idx_l1_default_active_minus1 + 1;
+        ref_list = slice_param->ref_pic_list1;
+    }
+
+    if (max_num_references == 1) {
+        if (list_index == 0) {
+            ref_surface_id = slice_param->ref_pic_list0[0].picture_id;
+            vme_context->used_references[0] = &slice_param->ref_pic_list0[0];
+        } else {
+            ref_surface_id = slice_param->ref_pic_list1[0].picture_id;
+            vme_context->used_references[1] = &slice_param->ref_pic_list1[0];
+        }
+
+        if (ref_surface_id != VA_INVALID_SURFACE)
+            obj_surface = SURFACE(ref_surface_id);
+
+        if (!obj_surface ||
+            !obj_surface->bo) {
+            obj_surface = encode_state->reference_objects[list_index];
+            vme_context->used_references[list_index] = &pic_param->reference_frames[list_index];
+        }
+
+        ref_idx = 0;
+    } else {
+        curr_pic = &pic_param->decoded_curr_pic;
+
+        /* select the reference frame in temporal space */
+        ref_idx = hevc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
+        ref_surface_id = ref_list[ref_idx].picture_id;
+
+        if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
+            obj_surface = SURFACE(ref_surface_id);
+
+        vme_context->used_reference_objects[list_index] = obj_surface;
+        vme_context->used_references[list_index] = &ref_list[ref_idx];
+    }
+
+    if (obj_surface &&
+        obj_surface->bo) {
+        assert(ref_idx >= 0);
+        vme_context->used_reference_objects[list_index] = obj_surface;
+        vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
+        vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
+                ref_idx << 16 |
+                ref_idx <<  8 |
+                ref_idx);
+    } else {
+        vme_context->used_reference_objects[list_index] = NULL;
+        vme_context->used_references[list_index] = NULL;
+        vme_context->ref_index_in_mb[list_index] = 0;
+    }
+}
+
+void intel_vme_hevc_update_mbmv_cost(VADriverContextP ctx,
+                                     struct encode_state *encode_state,
+                                     struct intel_encoder_context *encoder_context)
+{
+    //struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
+    struct gen6_vme_context *vme_context = encoder_context->vme_context;
+    VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
+    VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
+    int qp, m_cost, j, mv_count;
+    uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
+    float   lambda, m_costf;
+
+    /* here no SI SP slice for HEVC, do not need slice fixup */
+    int slice_type = slice_param->slice_type;
+
+
+    /* to do for CBR*/
+    //if (encoder_context->rate_control_mode == VA_RC_CQP)
+    qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
+    //else
+    //qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
+
+    if (vme_state_message == NULL)
+        return;
+
+    assert(qp <= QP_MAX);
+    lambda = intel_lambda_qp(qp);
+    if (slice_type == SLICE_TYPE_I) {
+        vme_state_message[MODE_INTRA_16X16] = 0;
+        m_cost = lambda * 4;
+        vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
+        m_cost = lambda * 16;
+        vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
+        m_cost = lambda * 3;
+        vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
+    } else {
+        m_cost = 0;
+        vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
+        for (j = 1; j < 3; j++) {
+            m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
+            m_cost = (int)m_costf;
+            vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
+        }
+        mv_count = 3;
+        for (j = 4; j <= 64; j *= 2) {
+            m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
+            m_cost = (int)m_costf;
+            vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
+            mv_count++;
+        }
+
+        if (qp <= 25) {
+            vme_state_message[MODE_INTRA_16X16] = 0x4a;
+            vme_state_message[MODE_INTRA_8X8] = 0x4a;
+            vme_state_message[MODE_INTRA_4X4] = 0x4a;
+            vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
+            vme_state_message[MODE_INTER_16X16] = 0x4a;
+            vme_state_message[MODE_INTER_16X8] = 0x4a;
+            vme_state_message[MODE_INTER_8X8] = 0x4a;
+            vme_state_message[MODE_INTER_8X4] = 0x4a;
+            vme_state_message[MODE_INTER_4X4] = 0x4a;
+            vme_state_message[MODE_INTER_BWD] = 0x2a;
+            return;
+        }
+        m_costf = lambda * 10;
+        vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
+        m_cost = lambda * 14;
+        vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
+        m_cost = lambda * 24;
+        vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
+        m_costf = lambda * 3.5;
+        m_cost = m_costf;
+        vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
+        if (slice_type == SLICE_TYPE_P) {
+            m_costf = lambda * 2.5;
+            m_cost = m_costf;
+            vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
+            m_costf = lambda * 4;
+            m_cost = m_costf;
+            vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
+            m_costf = lambda * 1.5;
+            m_cost = m_costf;
+            vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
+            m_costf = lambda * 3;
+            m_cost = m_costf;
+            vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
+            m_costf = lambda * 5;
+            m_cost = m_costf;
+            vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
+            /* BWD is not used in P-frame */
+            vme_state_message[MODE_INTER_BWD] = 0;
+        } else {
+            m_costf = lambda * 2.5;
+            m_cost = m_costf;
+            vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
+            m_costf = lambda * 5.5;
+            m_cost = m_costf;
+            vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
+            m_costf = lambda * 3.5;
+            m_cost = m_costf;
+            vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
+            m_costf = lambda * 5.0;
+            m_cost = m_costf;
+            vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
+            m_costf = lambda * 6.5;
+            m_cost = m_costf;
+            vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
+            m_costf = lambda * 1.5;
+            m_cost = m_costf;
+            vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
+        }
+    }
+}
diff --git a/src/gen6_vme.h b/src/gen6_vme.h
index bc62c14..c9d6b48 100644
--- a/src/gen6_vme.h
+++ b/src/gen6_vme.h
@@ -83,6 +83,7 @@ struct gen6_vme_context
                                            unsigned long surface_state_offset);
     void *vme_state_message;
     unsigned int h264_level;
+    unsigned int hevc_level;
     unsigned int video_coding_type;
     unsigned int vme_kernel_sum;
     unsigned int mpeg2_level;
@@ -174,6 +175,24 @@ intel_avc_vme_reference_state(VADriverContextP ctx,
                                   struct object_surface *obj_surface,
                                   struct intel_encoder_context *encoder_context));
 
+/* HEVC */
+void
+intel_hevc_vme_reference_state(VADriverContextP ctx,
+                              struct encode_state *encode_state,
+                              struct intel_encoder_context *encoder_context,
+                              int list_index,
+                              int surface_index,
+                              void (* vme_source_surface_state)(
+                                  VADriverContextP ctx,
+                                  int index,
+                                  struct object_surface *obj_surface,
+                                  struct intel_encoder_context *encoder_context));
+
+void intel_vme_hevc_update_mbmv_cost(VADriverContextP ctx,
+                                struct encode_state *encode_state,
+                                struct intel_encoder_context *encoder_context);
+
+
 extern Bool gen8_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context);
 
 extern Bool gen9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context);
diff --git a/src/gen9_vme.c b/src/gen9_vme.c
index b4310f2..0e94581 100644
--- a/src/gen9_vme.c
+++ b/src/gen9_vme.c
@@ -120,6 +120,43 @@ static struct i965_kernel gen9_vme_mpeg2_kernels[] = {
     },
 };
 
+/* HEVC */
+
+static const uint32_t gen9_vme_hevc_intra_frame[][4] = {
+#include "shaders/vme/intra_frame_gen9.g9b"
+};
+
+static const uint32_t gen9_vme_hevc_inter_frame[][4] = {
+#include "shaders/vme/inter_frame_gen9.g9b"
+};
+
+static const uint32_t gen9_vme_hevc_inter_bframe[][4] = {
+#include "shaders/vme/inter_bframe_gen9.g9b"
+};
+
+static struct i965_kernel gen9_vme_hevc_kernels[] = {
+    {
+        "VME Intra Frame",
+        VME_INTRA_SHADER, /*index*/
+        gen9_vme_hevc_intra_frame,
+        sizeof(gen9_vme_hevc_intra_frame),
+        NULL
+    },
+    {
+        "VME inter Frame",
+        VME_INTER_SHADER,
+        gen9_vme_hevc_inter_frame,
+        sizeof(gen9_vme_hevc_inter_frame),
+        NULL
+    },
+    {
+        "VME inter BFrame",
+        VME_BINTER_SHADER,
+        gen9_vme_hevc_inter_bframe,
+        sizeof(gen9_vme_hevc_inter_bframe),
+        NULL
+    }
+};
 /* only used for VME source surface state */
 static void
 gen9_vme_source_surface_state(VADriverContextP ctx,
@@ -330,6 +367,13 @@ static VAStatus gen9_vme_constant_setup(VADriverContextP ctx,
         }
     } else if (encoder_context->codec == CODEC_MPEG2) {
         mv_num = 2;
+    }else if (encoder_context->codec == CODEC_HEVC) {
+        if (vme_context->hevc_level >= 30*3) {
+            mv_num = 16;
+
+            if (vme_context->hevc_level >= 31*3)
+                mv_num = 8;
+        }/* use the avc level setting */
     }
 
     vme_state_message[31] = mv_num;
@@ -1130,6 +1174,440 @@ gen9_vme_mpeg2_pipeline(VADriverContextP ctx,
     return VA_STATUS_SUCCESS;
 }
 
+/* HEVC */
+
+static void
+gen9_vme_hevc_output_buffer_setup(VADriverContextP ctx,
+                             struct encode_state *encode_state,
+                             int index,
+                             struct intel_encoder_context *encoder_context)
+
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct gen6_vme_context *vme_context = encoder_context->vme_context;
+    VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
+    VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
+    int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
+    int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + 15)/16;
+    int height_in_mbs = (pSequenceParameter->pic_height_in_luma_samples + 15)/16;
+
+
+    vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
+    vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
+
+    if (is_intra)
+        vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 2;
+    else
+        vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 24;
+    /*
+     * Inter MV . 32-byte Intra search + 16 IME info + 128 IME MV + 32 IME Ref
+     * + 16 FBR Info + 128 FBR MV + 32 FBR Ref.
+     * 16 * (2 + 2 * (1 + 8 + 2))= 16 * 24.
+     */
+
+    vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr,
+                                              "VME output buffer",
+                                              vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
+                                              0x1000);
+    assert(vme_context->vme_output.bo);
+    vme_context->vme_buffer_suface_setup(ctx,
+                                         &vme_context->gpe_context,
+                                         &vme_context->vme_output,
+                                         BINDING_TABLE_OFFSET(index),
+                                         SURFACE_STATE_OFFSET(index));
+}
+
+static void
+gen9_vme_hevc_output_vme_batchbuffer_setup(VADriverContextP ctx,
+                                      struct encode_state *encode_state,
+                                      int index,
+                                      struct intel_encoder_context *encoder_context)
+
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct gen6_vme_context *vme_context = encoder_context->vme_context;
+    VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
+    int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + 15)/16;
+    int height_in_mbs = (pSequenceParameter->pic_height_in_luma_samples + 15)/16;
+
+    vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
+    vme_context->vme_batchbuffer.size_block = 64; /* 4 OWORDs */
+    vme_context->vme_batchbuffer.pitch = 16;
+    vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr,
+                                                   "VME batchbuffer",
+                                                   vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
+                                                   0x1000);
+}
+static VAStatus
+gen9_vme_hevc_surface_setup(VADriverContextP ctx,
+                       struct encode_state *encode_state,
+                       int is_intra,
+                       struct intel_encoder_context *encoder_context)
+{
+    struct object_surface *obj_surface;
+
+    /*Setup surfaces state*/
+    /* current picture for encoding */
+    obj_surface = encode_state->input_yuv_object;
+    gen9_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
+    gen9_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
+    gen9_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, encoder_context);
+
+    if (!is_intra) {
+        VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
+        int slice_type;
+
+        slice_type = slice_param->slice_type;
+        assert(slice_type != SLICE_TYPE_I && slice_type != SLICE_TYPE_SI);
+
+        /* to do HEVC */
+        intel_hevc_vme_reference_state(ctx, encode_state, encoder_context, 0, 1, gen9_vme_source_surface_state);
+
+        if (slice_type == SLICE_TYPE_B)
+            intel_hevc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen9_vme_source_surface_state);
+    }
+
+    /* VME output */
+    gen9_vme_hevc_output_buffer_setup(ctx, encode_state, 3, encoder_context);
+    gen9_vme_hevc_output_vme_batchbuffer_setup(ctx, encode_state, 5, encoder_context);
+
+    return VA_STATUS_SUCCESS;
+}
+static void
+gen9wa_vme_hevc_walker_fill_vme_batchbuffer(VADriverContextP ctx,
+                                     struct encode_state *encode_state,
+                                     int mb_width, int mb_height,
+                                     int kernel,
+                                     int transform_8x8_mode_flag,
+                                     struct intel_encoder_context *encoder_context)
+{
+    struct gen6_vme_context *vme_context = encoder_context->vme_context;
+    int mb_row;
+    int s;
+    unsigned int *command_ptr;
+    VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
+    int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
+    int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
+    int ctb_size = 1 << log2_ctb_size;
+    int num_mb_in_ctb = (ctb_size + 15)/16;
+    num_mb_in_ctb = num_mb_in_ctb * num_mb_in_ctb;
+
+#define		USE_SCOREBOARD		(1 << 21)
+
+    dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
+    command_ptr = vme_context->vme_batchbuffer.bo->virtual;
+
+    /*slice_segment_address  must picture_width_in_ctb alainment */
+    for (s = 0; s < encode_state->num_slice_params_ext; s++) {
+        VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[s]->buffer;
+        int first_mb = pSliceParameter->slice_segment_address * num_mb_in_ctb;
+        int num_mb = pSliceParameter->num_ctu_in_slice * num_mb_in_ctb;
+        unsigned int mb_intra_ub, score_dep;
+        int x_outer, y_outer, x_inner, y_inner;
+        int xtemp_outer = 0;
+
+        x_outer = first_mb % mb_width;
+        y_outer = first_mb / mb_width;
+        mb_row = y_outer;
+
+        for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
+            x_inner = x_outer;
+            y_inner = y_outer;
+            for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
+                mb_intra_ub = 0;
+                score_dep = 0;
+                if (x_inner != 0) {
+                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
+                    score_dep |= MB_SCOREBOARD_A;
+                }
+                if (y_inner != mb_row) {
+                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
+                    score_dep |= MB_SCOREBOARD_B;
+                    if (x_inner != 0)
+                        mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
+                    if (x_inner != (mb_width -1)) {
+                        mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
+                        score_dep |= MB_SCOREBOARD_C;
+                    }
+                }
+
+                *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
+                *command_ptr++ = kernel;
+                *command_ptr++ = USE_SCOREBOARD;
+                /* Indirect data */
+                *command_ptr++ = 0;
+                /* the (X, Y) term of scoreboard */
+                *command_ptr++ = ((y_inner << 16) | x_inner);
+                *command_ptr++ = score_dep;
+                /*inline data */
+                *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
+                *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
+                *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
+                *command_ptr++ = 0;
+
+                x_inner -= 2;
+                y_inner += 1;
+            }
+            x_outer += 1;
+        }
+
+        xtemp_outer = mb_width - 2;
+        if (xtemp_outer < 0)
+            xtemp_outer = 0;
+        x_outer = xtemp_outer;
+        y_outer = first_mb / mb_width;
+        for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
+            y_inner = y_outer;
+            x_inner = x_outer;
+            for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
+                mb_intra_ub = 0;
+                score_dep = 0;
+                if (x_inner != 0) {
+                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
+                    score_dep |= MB_SCOREBOARD_A;
+                }
+                if (y_inner != mb_row) {
+                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
+                    score_dep |= MB_SCOREBOARD_B;
+                    if (x_inner != 0)
+                        mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
+
+                    if (x_inner != (mb_width -1)) {
+                        mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
+                        score_dep |= MB_SCOREBOARD_C;
+                    }
+                }
+
+                *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
+                *command_ptr++ = kernel;
+                *command_ptr++ = USE_SCOREBOARD;
+                /* Indirect data */
+                *command_ptr++ = 0;
+                /* the (X, Y) term of scoreboard */
+                *command_ptr++ = ((y_inner << 16) | x_inner);
+                *command_ptr++ = score_dep;
+                /*inline data */
+                *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
+                *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
+
+                *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
+                *command_ptr++ = 0;
+                x_inner -= 2;
+                y_inner += 1;
+            }
+            x_outer++;
+            if (x_outer >= mb_width) {
+                y_outer += 1;
+                x_outer = xtemp_outer;
+            }
+        }
+    }
+
+    *command_ptr++ = MI_BATCH_BUFFER_END;
+    *command_ptr++ = 0;
+
+    dri_bo_unmap(vme_context->vme_batchbuffer.bo);
+}
+
+static void
+gen9_vme_hevc_fill_vme_batchbuffer(VADriverContextP ctx,
+                              struct encode_state *encode_state,
+                              int mb_width, int mb_height,
+                              int kernel,
+                              int transform_8x8_mode_flag,
+                              struct intel_encoder_context *encoder_context)
+{
+    struct gen6_vme_context *vme_context = encoder_context->vme_context;
+    int mb_x = 0, mb_y = 0;
+    int i, s;
+    unsigned int *command_ptr;
+    VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
+    int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
+    int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
+
+    int ctb_size = 1 << log2_ctb_size;
+    int num_mb_in_ctb = (ctb_size + 15)/16;
+    num_mb_in_ctb = num_mb_in_ctb * num_mb_in_ctb;
+
+    dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
+    command_ptr = vme_context->vme_batchbuffer.bo->virtual;
+
+    for (s = 0; s < encode_state->num_slice_params_ext; s++) {
+        VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[s]->buffer;
+        int slice_mb_begin = pSliceParameter->slice_segment_address * num_mb_in_ctb;
+        int slice_mb_number = pSliceParameter->num_ctu_in_slice * num_mb_in_ctb;
+
+        unsigned int mb_intra_ub;
+        int slice_mb_x = slice_mb_begin % mb_width;
+        for (i = 0; i < slice_mb_number;  ) {
+            int mb_count = i + slice_mb_begin;
+            mb_x = mb_count % mb_width;
+            mb_y = mb_count / mb_width;
+            mb_intra_ub = 0;
+
+            if (mb_x != 0) {
+                mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
+            }
+            if (mb_y != 0) {
+                mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
+                if (mb_x != 0)
+                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
+                if (mb_x != (mb_width -1))
+                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
+            }
+            if (i < mb_width) {
+                if (i == 0)
+                    mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_AE);
+                mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK);
+                if ((i == (mb_width - 1)) && slice_mb_x) {
+                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
+                }
+            }
+
+            if ((i == mb_width) && slice_mb_x) {
+                mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D);
+            }
+
+            *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
+            *command_ptr++ = kernel;
+            *command_ptr++ = 0;
+            *command_ptr++ = 0;
+            *command_ptr++ = 0;
+            *command_ptr++ = 0;
+
+            /*inline data */
+            *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
+            *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
+
+            *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
+            *command_ptr++ = 0;
+            i += 1;
+        }
+    }
+
+    *command_ptr++ = MI_BATCH_BUFFER_END;
+    *command_ptr++ = 0;
+
+    dri_bo_unmap(vme_context->vme_batchbuffer.bo);
+}
+
+static void gen9_vme_hevc_pipeline_programing(VADriverContextP ctx,
+                                         struct encode_state *encode_state,
+                                         struct intel_encoder_context *encoder_context)
+{
+    struct gen6_vme_context *vme_context = encoder_context->vme_context;
+    struct intel_batchbuffer *batch = encoder_context->base.batch;
+    VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
+    VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
+    int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + 15)/16;
+    int height_in_mbs = (pSequenceParameter->pic_height_in_luma_samples + 15)/16;
+    int kernel_shader;
+    bool allow_hwscore = true;
+    int s;
+
+    int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
+    int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
+
+    int ctb_size = 1 << log2_ctb_size;
+    int num_mb_in_ctb = (ctb_size + 15)/16;
+    int transform_8x8_mode_flag = 1;
+    num_mb_in_ctb = num_mb_in_ctb * num_mb_in_ctb;
+
+    for (s = 0; s < encode_state->num_slice_params_ext; s++) {
+        pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[s]->buffer;
+        int slice_mb_begin = pSliceParameter->slice_segment_address * num_mb_in_ctb;
+        if ((slice_mb_begin % width_in_mbs)) {
+            allow_hwscore = false;
+            break;
+        }
+    }
+
+    if (pSliceParameter->slice_type == SLICE_TYPE_I) {
+        kernel_shader = VME_INTRA_SHADER;
+    } else if (pSliceParameter->slice_type == SLICE_TYPE_P) {
+        kernel_shader = VME_INTER_SHADER;
+    } else {
+        kernel_shader = VME_BINTER_SHADER;
+        if (!allow_hwscore)
+            kernel_shader = VME_INTER_SHADER;
+    }
+    if (allow_hwscore)
+        gen9wa_vme_hevc_walker_fill_vme_batchbuffer(ctx,
+                                               encode_state,
+                                               width_in_mbs, height_in_mbs,
+                                               kernel_shader,
+                                               transform_8x8_mode_flag,
+                                               encoder_context);
+    else
+        gen9_vme_hevc_fill_vme_batchbuffer(ctx,
+                                      encode_state,
+                                      width_in_mbs, height_in_mbs,
+                                      kernel_shader,
+                                      transform_8x8_mode_flag,
+                                      encoder_context);
+
+    intel_batchbuffer_start_atomic(batch, 0x1000);
+    gen9_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
+    BEGIN_BATCH(batch, 3);
+    OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
+    OUT_RELOC(batch,
+              vme_context->vme_batchbuffer.bo,
+              I915_GEM_DOMAIN_COMMAND, 0,
+              0);
+    OUT_BATCH(batch, 0);
+    ADVANCE_BATCH(batch);
+
+    gen9_gpe_pipeline_end(ctx, &vme_context->gpe_context, batch);
+
+    intel_batchbuffer_end_atomic(batch);
+}
+
+static VAStatus gen9_vme_hevc_prepare(VADriverContextP ctx,
+                                 struct encode_state *encode_state,
+                                 struct intel_encoder_context *encoder_context)
+{
+    VAStatus vaStatus = VA_STATUS_SUCCESS;
+    VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
+    int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
+    VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
+    struct gen6_vme_context *vme_context = encoder_context->vme_context;
+
+    /* here use the avc level for hevc vme */
+    if (!vme_context->hevc_level ||
+        (vme_context->hevc_level != pSequenceParameter->general_level_idc)) {
+        vme_context->hevc_level = pSequenceParameter->general_level_idc;
+    }
+
+    intel_vme_hevc_update_mbmv_cost(ctx, encode_state, encoder_context);
+
+    /*Setup all the memory object*/
+    gen9_vme_hevc_surface_setup(ctx, encode_state, is_intra, encoder_context);
+    gen9_vme_interface_setup(ctx, encode_state, encoder_context);
+    //gen9_vme_vme_state_setup(ctx, encode_state, is_intra, encoder_context);
+    gen9_vme_constant_setup(ctx, encode_state, encoder_context);
+
+    /*Programing media pipeline*/
+    gen9_vme_hevc_pipeline_programing(ctx, encode_state, encoder_context);
+
+    return vaStatus;
+}
+
+
+static VAStatus
+gen9_vme_hevc_pipeline(VADriverContextP ctx,
+                  VAProfile profile,
+                  struct encode_state *encode_state,
+                  struct intel_encoder_context *encoder_context)
+{
+    gen9_vme_media_init(ctx, encoder_context);
+    gen9_vme_hevc_prepare(ctx, encode_state, encoder_context);
+    gen9_vme_run(ctx, encode_state, encoder_context);
+    gen9_vme_stop(ctx, encode_state, encoder_context);
+
+    return VA_STATUS_SUCCESS;
+}
+
+
 static void
 gen9_vme_context_destroy(void *context)
 {
@@ -1172,6 +1650,12 @@ Bool gen9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e
         vme_kernel_list = gen9_vme_mpeg2_kernels;
         encoder_context->vme_pipeline = gen9_vme_mpeg2_pipeline;
         i965_kernel_num = sizeof(gen9_vme_mpeg2_kernels) / sizeof(struct i965_kernel);
+        break;
+
+   case CODEC_HEVC:
+        vme_kernel_list = gen9_vme_hevc_kernels;
+        encoder_context->vme_pipeline = gen9_vme_hevc_pipeline;
+        i965_kernel_num = sizeof(gen9_vme_hevc_kernels) / sizeof(struct i965_kernel);
 
         break;
 
diff --git a/src/i965_encoder.c b/src/i965_encoder.c
index d924f5a..c9ff2ec 100644
--- a/src/i965_encoder.c
+++ b/src/i965_encoder.c
@@ -39,10 +39,12 @@
 #include "i965_encoder.h"
 #include "gen6_vme.h"
 #include "gen6_mfc.h"
+#include "gen9_mfc.h"
 
 extern Bool gen6_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context);
 extern Bool gen6_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context);
 extern Bool gen7_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context);
+extern Bool gen9_hcpe_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context);
 
 static VAStatus
 intel_encoder_check_yuv_surface(VADriverContextP ctx,
@@ -422,6 +424,63 @@ error:
 }
 
 static VAStatus
+intel_encoder_check_hevc_parameter(VADriverContextP ctx,
+                                  struct encode_state *encode_state,
+                                  struct intel_encoder_context *encoder_context)
+{
+    struct i965_driver_data *i965 = i965_driver_data(ctx);
+    struct object_surface *obj_surface;	
+    struct object_buffer *obj_buffer;
+    VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
+    int i;
+
+    assert(!(pic_param->decoded_curr_pic.flags & VA_PICTURE_HEVC_INVALID));
+
+    if (pic_param->decoded_curr_pic.flags & VA_PICTURE_HEVC_INVALID)
+        goto error;
+
+    obj_surface = SURFACE(pic_param->decoded_curr_pic.picture_id);
+    assert(obj_surface); /* It is possible the store buffer isn't allocated yet */
+    
+    if (!obj_surface)
+        goto error;
+
+    encode_state->reconstructed_object = obj_surface;
+    obj_buffer = BUFFER(pic_param->coded_buf);
+    assert(obj_buffer && obj_buffer->buffer_store && obj_buffer->buffer_store->bo);
+
+    if (!obj_buffer || !obj_buffer->buffer_store || !obj_buffer->buffer_store->bo)
+        goto error;
+
+    encode_state->coded_buf_object = obj_buffer;
+
+    for (i = 0; i < 15; i++) {
+        if (pic_param->reference_frames[i].flags & VA_PICTURE_HEVC_INVALID ||
+            pic_param->reference_frames[i].picture_id == VA_INVALID_SURFACE)
+            break;
+        else {
+            obj_surface = SURFACE(pic_param->reference_frames[i].picture_id);
+            assert(obj_surface);
+
+            if (!obj_surface)
+                goto error;
+
+            if (obj_surface->bo)
+                encode_state->reference_objects[i] = obj_surface;
+            else
+                encode_state->reference_objects[i] = NULL; /* FIXME: Warning or Error ??? */
+        }
+    }
+
+    for ( ; i < 15; i++)
+        encode_state->reference_objects[i] = NULL;
+    
+    return VA_STATUS_SUCCESS;
+
+error:
+    return VA_STATUS_ERROR_INVALID_PARAMETER;
+}
+static VAStatus
 intel_encoder_sanity_check_input(VADriverContextP ctx,
                                  VAProfile profile,
                                  struct encode_state *encode_state,
@@ -459,6 +518,13 @@ intel_encoder_sanity_check_input(VADriverContextP ctx,
         break;
     }
 
+    case VAProfileHEVCMain:  {
+        vaStatus = intel_encoder_check_hevc_parameter(ctx, encode_state, encoder_context);
+        if (vaStatus != VA_STATUS_SUCCESS)
+            goto out;
+        vaStatus = intel_encoder_check_yuv_surface(ctx, profile, encode_state, encoder_context);
+        break;
+    }
     default:
         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
         break;
@@ -554,6 +620,10 @@ intel_enc_hw_context_init(VADriverContextP ctx,
         encoder_context->codec = CODEC_JPEG;
         break;
 
+    case VAProfileHEVCMain:
+        encoder_context->codec = CODEC_HEVC;
+        break;
+
     default:
         /* Never get here */
         assert(0);
@@ -617,7 +687,9 @@ gen8_enc_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
 struct hw_context *
 gen9_enc_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
 {
-    if (obj_config->profile == VAProfileJPEGBaseline)
+    if (obj_config->profile == VAProfileHEVCMain) {
+        return intel_enc_hw_context_init(ctx, obj_config, gen9_vme_context_init, gen9_hcpe_context_init);
+    } else if (obj_config->profile == VAProfileJPEGBaseline)
         return intel_enc_hw_context_init(ctx, obj_config, gen8_vme_context_init, gen8_mfc_context_init);
     else
         return intel_enc_hw_context_init(ctx, obj_config, gen9_vme_context_init, gen9_mfc_context_init);
-- 
1.9.1



More information about the Libva mailing list