[Libva] [PATCH V2 5/6] HEVC ENC:Added VME pipeline

Qu, Pengfei pengfei.qu at intel.com
Tue Jan 6 17:54:34 PST 2015


Sure, I will put src/i965_encoder.c into an independent patch.

-----Original Message-----
From: Zhao, Yakui 
Sent: Wednesday, January 7, 2015 9:16 AM
To: Qu, Pengfei
Cc: libva at lists.freedesktop.org
Subject: Re: [Libva] [PATCH V2 5/6] HEVC ENC:Added VME pipeline

On Tue, 2015-01-06 at 01:57 -0700, Qu,Pengfei wrote:

It seems that this patch also mixes the VME pipeline setting up and initialization of intel_enc_hw_context_init together.

Can it be split into two patches? 

> Signed-off-by: Qu,Pengfei <Pengfei.Qu at intel.com>
> ---
>  src/gen6_mfc_common.c | 222 +++++++++++++++++++++++
>  src/gen6_vme.h        |  19 ++
>  src/gen9_vme.c        | 484 ++++++++++++++++++++++++++++++++++++++++++++++++++
>  src/i965_encoder.c    |  74 +++++++-
>  4 files changed, 798 insertions(+), 1 deletion(-)
> 
> diff --git a/src/gen6_mfc_common.c b/src/gen6_mfc_common.c index 
> fe41dac..6a5f720 100644
> --- a/src/gen6_mfc_common.c
> +++ b/src/gen6_mfc_common.c
> @@ -1652,3 +1652,225 @@ void intel_avc_slice_insert_packed_data(VADriverContextP ctx,
>      return;
>  }
>  
> +/* HEVC */
> +static int
> +hevc_temporal_find_surface(VAPictureHEVC *curr_pic,
> +                           VAPictureHEVC *ref_list,
> +                           int num_pictures,
> +                           int dir)
> +{
> +    int i, found = -1, min = 0x7FFFFFFF;
> +
> +    for (i = 0; i < num_pictures; i++) {
> +        int tmp;
> +
> +        if ((ref_list[i].flags & VA_PICTURE_HEVC_INVALID) ||
> +            (ref_list[i].picture_id == VA_INVALID_SURFACE))
> +            break;
> +
> +        tmp = curr_pic->pic_order_cnt - ref_list[i].pic_order_cnt;
> +
> +        if (dir)
> +            tmp = -tmp;
> +
> +        if (tmp > 0 && tmp < min) {
> +            min = tmp;
> +            found = i;
> +        }
> +    }
> +
> +    return found;
> +}
> +void
> +intel_hevc_vme_reference_state(VADriverContextP ctx,
> +                               struct encode_state *encode_state,
> +                               struct intel_encoder_context *encoder_context,
> +                               int list_index,
> +                               int surface_index,
> +                               void (* vme_source_surface_state)(
> +                                   VADriverContextP ctx,
> +                                   int index,
> +                                   struct object_surface *obj_surface,
> +                                   struct intel_encoder_context 
> +*encoder_context)) {
> +    struct gen6_vme_context *vme_context = encoder_context->vme_context;
> +    struct object_surface *obj_surface = NULL;
> +    struct i965_driver_data *i965 = i965_driver_data(ctx);
> +    VASurfaceID ref_surface_id;
> +    VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
> +    VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
> +    int max_num_references;
> +    VAPictureHEVC *curr_pic;
> +    VAPictureHEVC *ref_list;
> +    int ref_idx;
> +
> +    if (list_index == 0) {
> +        max_num_references = pic_param->num_ref_idx_l0_default_active_minus1 + 1;
> +        ref_list = slice_param->ref_pic_list0;
> +    } else {
> +        max_num_references = pic_param->num_ref_idx_l1_default_active_minus1 + 1;
> +        ref_list = slice_param->ref_pic_list1;
> +    }
> +
> +    if (max_num_references == 1) {
> +        if (list_index == 0) {
> +            ref_surface_id = slice_param->ref_pic_list0[0].picture_id;
> +            vme_context->used_references[0] = &slice_param->ref_pic_list0[0];
> +        } else {
> +            ref_surface_id = slice_param->ref_pic_list1[0].picture_id;
> +            vme_context->used_references[1] = &slice_param->ref_pic_list1[0];
> +        }
> +
> +        if (ref_surface_id != VA_INVALID_SURFACE)
> +            obj_surface = SURFACE(ref_surface_id);
> +
> +        if (!obj_surface ||
> +            !obj_surface->bo) {
> +            obj_surface = encode_state->reference_objects[list_index];
> +            vme_context->used_references[list_index] = &pic_param->reference_frames[list_index];
> +        }
> +
> +        ref_idx = 0;
> +    } else {
> +        curr_pic = &pic_param->decoded_curr_pic;
> +
> +        /* select the reference frame in temporal space */
> +        ref_idx = hevc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
> +        ref_surface_id = ref_list[ref_idx].picture_id;
> +
> +        if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
> +            obj_surface = SURFACE(ref_surface_id);
> +
> +        vme_context->used_reference_objects[list_index] = obj_surface;
> +        vme_context->used_references[list_index] = &ref_list[ref_idx];
> +    }
> +
> +    if (obj_surface &&
> +        obj_surface->bo) {
> +        assert(ref_idx >= 0);
> +        vme_context->used_reference_objects[list_index] = obj_surface;
> +        vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
> +        vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
> +                ref_idx << 16 |
> +                ref_idx <<  8 |
> +                ref_idx);
> +    } else {
> +        vme_context->used_reference_objects[list_index] = NULL;
> +        vme_context->used_references[list_index] = NULL;
> +        vme_context->ref_index_in_mb[list_index] = 0;
> +    }
> +}
> +
> +void intel_vme_hevc_update_mbmv_cost(VADriverContextP ctx,
> +                                     struct encode_state *encode_state,
> +                                     struct intel_encoder_context 
> +*encoder_context) {
> +    //struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
> +    struct gen6_vme_context *vme_context = encoder_context->vme_context;
> +    VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
> +    VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
> +    int qp, m_cost, j, mv_count;
> +    uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
> +    float   lambda, m_costf;
> +
> +    /* here no SI SP slice for HEVC, do not need slice fixup */
> +    int slice_type = slice_param->slice_type;
> +
> +
> +    /* to do for CBR*/
> +    //if (encoder_context->rate_control_mode == VA_RC_CQP)
> +    qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
> +    //else
> +    //qp = 
> + mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
> +
> +    if (vme_state_message == NULL)
> +        return;
> +
> +    assert(qp <= QP_MAX);
> +    lambda = intel_lambda_qp(qp);
> +    if (slice_type == SLICE_TYPE_I) {
> +        vme_state_message[MODE_INTRA_16X16] = 0;
> +        m_cost = lambda * 4;
> +        vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
> +        m_cost = lambda * 16;
> +        vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
> +        m_cost = lambda * 3;
> +        vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
> +    } else {
> +        m_cost = 0;
> +        vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
> +        for (j = 1; j < 3; j++) {
> +            m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
> +            m_cost = (int)m_costf;
> +            vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
> +        }
> +        mv_count = 3;
> +        for (j = 4; j <= 64; j *= 2) {
> +            m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
> +            m_cost = (int)m_costf;
> +            vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
> +            mv_count++;
> +        }
> +
> +        if (qp <= 25) {
> +            vme_state_message[MODE_INTRA_16X16] = 0x4a;
> +            vme_state_message[MODE_INTRA_8X8] = 0x4a;
> +            vme_state_message[MODE_INTRA_4X4] = 0x4a;
> +            vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
> +            vme_state_message[MODE_INTER_16X16] = 0x4a;
> +            vme_state_message[MODE_INTER_16X8] = 0x4a;
> +            vme_state_message[MODE_INTER_8X8] = 0x4a;
> +            vme_state_message[MODE_INTER_8X4] = 0x4a;
> +            vme_state_message[MODE_INTER_4X4] = 0x4a;
> +            vme_state_message[MODE_INTER_BWD] = 0x2a;
> +            return;
> +        }
> +        m_costf = lambda * 10;
> +        vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
> +        m_cost = lambda * 14;
> +        vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
> +        m_cost = lambda * 24;
> +        vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
> +        m_costf = lambda * 3.5;
> +        m_cost = m_costf;
> +        vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
> +        if (slice_type == SLICE_TYPE_P) {
> +            m_costf = lambda * 2.5;
> +            m_cost = m_costf;
> +            vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
> +            m_costf = lambda * 4;
> +            m_cost = m_costf;
> +            vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
> +            m_costf = lambda * 1.5;
> +            m_cost = m_costf;
> +            vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
> +            m_costf = lambda * 3;
> +            m_cost = m_costf;
> +            vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
> +            m_costf = lambda * 5;
> +            m_cost = m_costf;
> +            vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
> +            /* BWD is not used in P-frame */
> +            vme_state_message[MODE_INTER_BWD] = 0;
> +        } else {
> +            m_costf = lambda * 2.5;
> +            m_cost = m_costf;
> +            vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
> +            m_costf = lambda * 5.5;
> +            m_cost = m_costf;
> +            vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
> +            m_costf = lambda * 3.5;
> +            m_cost = m_costf;
> +            vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
> +            m_costf = lambda * 5.0;
> +            m_cost = m_costf;
> +            vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
> +            m_costf = lambda * 6.5;
> +            m_cost = m_costf;
> +            vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
> +            m_costf = lambda * 1.5;
> +            m_cost = m_costf;
> +            vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
> +        }
> +    }
> +}
> diff --git a/src/gen6_vme.h b/src/gen6_vme.h index bc62c14..c9d6b48 
> 100644
> --- a/src/gen6_vme.h
> +++ b/src/gen6_vme.h
> @@ -83,6 +83,7 @@ struct gen6_vme_context
>                                             unsigned long surface_state_offset);
>      void *vme_state_message;
>      unsigned int h264_level;
> +    unsigned int hevc_level;
>      unsigned int video_coding_type;
>      unsigned int vme_kernel_sum;
>      unsigned int mpeg2_level;
> @@ -174,6 +175,24 @@ intel_avc_vme_reference_state(VADriverContextP ctx,
>                                    struct object_surface *obj_surface,
>                                    struct intel_encoder_context 
> *encoder_context));
>  
> +/* HEVC */
> +void
> +intel_hevc_vme_reference_state(VADriverContextP ctx,
> +                              struct encode_state *encode_state,
> +                              struct intel_encoder_context *encoder_context,
> +                              int list_index,
> +                              int surface_index,
> +                              void (* vme_source_surface_state)(
> +                                  VADriverContextP ctx,
> +                                  int index,
> +                                  struct object_surface *obj_surface,
> +                                  struct intel_encoder_context 
> +*encoder_context));
> +
> +void intel_vme_hevc_update_mbmv_cost(VADriverContextP ctx,
> +                                struct encode_state *encode_state,
> +                                struct intel_encoder_context 
> +*encoder_context);
> +
> +
>  extern Bool gen8_vme_context_init(VADriverContextP ctx, struct 
> intel_encoder_context *encoder_context);
>  
>  extern Bool gen9_vme_context_init(VADriverContextP ctx, struct 
> intel_encoder_context *encoder_context); diff --git a/src/gen9_vme.c 
> b/src/gen9_vme.c index b4310f2..0e94581 100644
> --- a/src/gen9_vme.c
> +++ b/src/gen9_vme.c
> @@ -120,6 +120,43 @@ static struct i965_kernel gen9_vme_mpeg2_kernels[] = {
>      },
>  };
>  
> +/* HEVC */
> +
> +static const uint32_t gen9_vme_hevc_intra_frame[][4] = { #include 
> +"shaders/vme/intra_frame_gen9.g9b"
> +};
> +
> +static const uint32_t gen9_vme_hevc_inter_frame[][4] = { #include 
> +"shaders/vme/inter_frame_gen9.g9b"
> +};
> +
> +static const uint32_t gen9_vme_hevc_inter_bframe[][4] = { #include 
> +"shaders/vme/inter_bframe_gen9.g9b"
> +};
> +
> +static struct i965_kernel gen9_vme_hevc_kernels[] = {
> +    {
> +        "VME Intra Frame",
> +        VME_INTRA_SHADER, /*index*/
> +        gen9_vme_hevc_intra_frame,
> +        sizeof(gen9_vme_hevc_intra_frame),
> +        NULL
> +    },
> +    {
> +        "VME inter Frame",
> +        VME_INTER_SHADER,
> +        gen9_vme_hevc_inter_frame,
> +        sizeof(gen9_vme_hevc_inter_frame),
> +        NULL
> +    },
> +    {
> +        "VME inter BFrame",
> +        VME_BINTER_SHADER,
> +        gen9_vme_hevc_inter_bframe,
> +        sizeof(gen9_vme_hevc_inter_bframe),
> +        NULL
> +    }
> +};
>  /* only used for VME source surface state */  static void  
> gen9_vme_source_surface_state(VADriverContextP ctx, @@ -330,6 +367,13 
> @@ static VAStatus gen9_vme_constant_setup(VADriverContextP ctx,
>          }
>      } else if (encoder_context->codec == CODEC_MPEG2) {
>          mv_num = 2;
> +    }else if (encoder_context->codec == CODEC_HEVC) {
> +        if (vme_context->hevc_level >= 30*3) {
> +            mv_num = 16;
> +
> +            if (vme_context->hevc_level >= 31*3)
> +                mv_num = 8;
> +        }/* use the avc level setting */
>      }
>  
>      vme_state_message[31] = mv_num;
> @@ -1130,6 +1174,440 @@ gen9_vme_mpeg2_pipeline(VADriverContextP ctx,
>      return VA_STATUS_SUCCESS;
>  }
>  
> +/* HEVC */
> +
> +static void
> +gen9_vme_hevc_output_buffer_setup(VADriverContextP ctx,
> +                             struct encode_state *encode_state,
> +                             int index,
> +                             struct intel_encoder_context 
> +*encoder_context)
> +
> +{
> +    struct i965_driver_data *i965 = i965_driver_data(ctx);
> +    struct gen6_vme_context *vme_context = encoder_context->vme_context;
> +    VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
> +    VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
> +    int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
> +    int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + 15)/16;
> +    int height_in_mbs = 
> +(pSequenceParameter->pic_height_in_luma_samples + 15)/16;
> +
> +
> +    vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
> +    vme_context->vme_output.pitch = 16; /* in bytes, always 16 */
> +
> +    if (is_intra)
> +        vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 2;
> +    else
> +        vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 24;
> +    /*
> +     * Inter MV . 32-byte Intra search + 16 IME info + 128 IME MV + 32 IME Ref
> +     * + 16 FBR Info + 128 FBR MV + 32 FBR Ref.
> +     * 16 * (2 + 2 * (1 + 8 + 2))= 16 * 24.
> +     */
> +
> +    vme_context->vme_output.bo = dri_bo_alloc(i965->intel.bufmgr,
> +                                              "VME output buffer",
> +                                              vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
> +                                              0x1000);
> +    assert(vme_context->vme_output.bo);
> +    vme_context->vme_buffer_suface_setup(ctx,
> +                                         &vme_context->gpe_context,
> +                                         &vme_context->vme_output,
> +                                         BINDING_TABLE_OFFSET(index),
> +                                         
> +SURFACE_STATE_OFFSET(index)); }
> +
> +static void
> +gen9_vme_hevc_output_vme_batchbuffer_setup(VADriverContextP ctx,
> +                                      struct encode_state *encode_state,
> +                                      int index,
> +                                      struct intel_encoder_context 
> +*encoder_context)
> +
> +{
> +    struct i965_driver_data *i965 = i965_driver_data(ctx);
> +    struct gen6_vme_context *vme_context = encoder_context->vme_context;
> +    VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
> +    int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + 15)/16;
> +    int height_in_mbs = 
> +(pSequenceParameter->pic_height_in_luma_samples + 15)/16;
> +
> +    vme_context->vme_batchbuffer.num_blocks = width_in_mbs * height_in_mbs + 1;
> +    vme_context->vme_batchbuffer.size_block = 64; /* 4 OWORDs */
> +    vme_context->vme_batchbuffer.pitch = 16;
> +    vme_context->vme_batchbuffer.bo = dri_bo_alloc(i965->intel.bufmgr,
> +                                                   "VME batchbuffer",
> +                                                   vme_context->vme_batchbuffer.num_blocks * vme_context->vme_batchbuffer.size_block,
> +                                                   0x1000); } static 
> +VAStatus gen9_vme_hevc_surface_setup(VADriverContextP ctx,
> +                       struct encode_state *encode_state,
> +                       int is_intra,
> +                       struct intel_encoder_context *encoder_context) 
> +{
> +    struct object_surface *obj_surface;
> +
> +    /*Setup surfaces state*/
> +    /* current picture for encoding */
> +    obj_surface = encode_state->input_yuv_object;
> +    gen9_vme_source_surface_state(ctx, 0, obj_surface, encoder_context);
> +    gen9_vme_media_source_surface_state(ctx, 4, obj_surface, encoder_context);
> +    gen9_vme_media_chroma_source_surface_state(ctx, 6, obj_surface, 
> + encoder_context);
> +
> +    if (!is_intra) {
> +        VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
> +        int slice_type;
> +
> +        slice_type = slice_param->slice_type;
> +        assert(slice_type != SLICE_TYPE_I && slice_type != 
> + SLICE_TYPE_SI);
> +
> +        /* to do HEVC */
> +        intel_hevc_vme_reference_state(ctx, encode_state, 
> + encoder_context, 0, 1, gen9_vme_source_surface_state);
> +
> +        if (slice_type == SLICE_TYPE_B)
> +            intel_hevc_vme_reference_state(ctx, encode_state, encoder_context, 1, 2, gen9_vme_source_surface_state);
> +    }
> +
> +    /* VME output */
> +    gen9_vme_hevc_output_buffer_setup(ctx, encode_state, 3, encoder_context);
> +    gen9_vme_hevc_output_vme_batchbuffer_setup(ctx, encode_state, 5, 
> + encoder_context);
> +
> +    return VA_STATUS_SUCCESS;
> +}
> +static void
> +gen9wa_vme_hevc_walker_fill_vme_batchbuffer(VADriverContextP ctx,
> +                                     struct encode_state *encode_state,
> +                                     int mb_width, int mb_height,
> +                                     int kernel,
> +                                     int transform_8x8_mode_flag,
> +                                     struct intel_encoder_context 
> +*encoder_context) {
> +    struct gen6_vme_context *vme_context = encoder_context->vme_context;
> +    int mb_row;
> +    int s;
> +    unsigned int *command_ptr;
> +    VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
> +    int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
> +    int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
> +    int ctb_size = 1 << log2_ctb_size;
> +    int num_mb_in_ctb = (ctb_size + 15)/16;
> +    num_mb_in_ctb = num_mb_in_ctb * num_mb_in_ctb;
> +
> +#define		USE_SCOREBOARD		(1 << 21)
> +
> +    dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
> +    command_ptr = vme_context->vme_batchbuffer.bo->virtual;
> +
> +    /*slice_segment_address  must picture_width_in_ctb alainment */
> +    for (s = 0; s < encode_state->num_slice_params_ext; s++) {
> +        VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[s]->buffer;
> +        int first_mb = pSliceParameter->slice_segment_address * num_mb_in_ctb;
> +        int num_mb = pSliceParameter->num_ctu_in_slice * num_mb_in_ctb;
> +        unsigned int mb_intra_ub, score_dep;
> +        int x_outer, y_outer, x_inner, y_inner;
> +        int xtemp_outer = 0;
> +
> +        x_outer = first_mb % mb_width;
> +        y_outer = first_mb / mb_width;
> +        mb_row = y_outer;
> +
> +        for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
> +            x_inner = x_outer;
> +            y_inner = y_outer;
> +            for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
> +                mb_intra_ub = 0;
> +                score_dep = 0;
> +                if (x_inner != 0) {
> +                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> +                    score_dep |= MB_SCOREBOARD_A;
> +                }
> +                if (y_inner != mb_row) {
> +                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> +                    score_dep |= MB_SCOREBOARD_B;
> +                    if (x_inner != 0)
> +                        mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> +                    if (x_inner != (mb_width -1)) {
> +                        mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> +                        score_dep |= MB_SCOREBOARD_C;
> +                    }
> +                }
> +
> +                *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> +                *command_ptr++ = kernel;
> +                *command_ptr++ = USE_SCOREBOARD;
> +                /* Indirect data */
> +                *command_ptr++ = 0;
> +                /* the (X, Y) term of scoreboard */
> +                *command_ptr++ = ((y_inner << 16) | x_inner);
> +                *command_ptr++ = score_dep;
> +                /*inline data */
> +                *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
> +                *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
> +                *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
> +                *command_ptr++ = 0;
> +
> +                x_inner -= 2;
> +                y_inner += 1;
> +            }
> +            x_outer += 1;
> +        }
> +
> +        xtemp_outer = mb_width - 2;
> +        if (xtemp_outer < 0)
> +            xtemp_outer = 0;
> +        x_outer = xtemp_outer;
> +        y_outer = first_mb / mb_width;
> +        for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
> +            y_inner = y_outer;
> +            x_inner = x_outer;
> +            for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
> +                mb_intra_ub = 0;
> +                score_dep = 0;
> +                if (x_inner != 0) {
> +                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> +                    score_dep |= MB_SCOREBOARD_A;
> +                }
> +                if (y_inner != mb_row) {
> +                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> +                    score_dep |= MB_SCOREBOARD_B;
> +                    if (x_inner != 0)
> +                        mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> +
> +                    if (x_inner != (mb_width -1)) {
> +                        mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> +                        score_dep |= MB_SCOREBOARD_C;
> +                    }
> +                }
> +
> +                *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> +                *command_ptr++ = kernel;
> +                *command_ptr++ = USE_SCOREBOARD;
> +                /* Indirect data */
> +                *command_ptr++ = 0;
> +                /* the (X, Y) term of scoreboard */
> +                *command_ptr++ = ((y_inner << 16) | x_inner);
> +                *command_ptr++ = score_dep;
> +                /*inline data */
> +                *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
> +                *command_ptr++ = ((1 << 18) | (1 << 16) | 
> + transform_8x8_mode_flag | (mb_intra_ub << 8));
> +
> +                *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
> +                *command_ptr++ = 0;
> +                x_inner -= 2;
> +                y_inner += 1;
> +            }
> +            x_outer++;
> +            if (x_outer >= mb_width) {
> +                y_outer += 1;
> +                x_outer = xtemp_outer;
> +            }
> +        }
> +    }
> +
> +    *command_ptr++ = MI_BATCH_BUFFER_END;
> +    *command_ptr++ = 0;
> +
> +    dri_bo_unmap(vme_context->vme_batchbuffer.bo);
> +}
> +
> +static void
> +gen9_vme_hevc_fill_vme_batchbuffer(VADriverContextP ctx,
> +                              struct encode_state *encode_state,
> +                              int mb_width, int mb_height,
> +                              int kernel,
> +                              int transform_8x8_mode_flag,
> +                              struct intel_encoder_context 
> +*encoder_context) {
> +    struct gen6_vme_context *vme_context = encoder_context->vme_context;
> +    int mb_x = 0, mb_y = 0;
> +    int i, s;
> +    unsigned int *command_ptr;
> +    VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
> +    int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
> +    int log2_ctb_size = 
> +pSequenceParameter->log2_diff_max_min_luma_coding_block_size + 
> +log2_cu_size;
> +
> +    int ctb_size = 1 << log2_ctb_size;
> +    int num_mb_in_ctb = (ctb_size + 15)/16;
> +    num_mb_in_ctb = num_mb_in_ctb * num_mb_in_ctb;
> +
> +    dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
> +    command_ptr = vme_context->vme_batchbuffer.bo->virtual;
> +
> +    for (s = 0; s < encode_state->num_slice_params_ext; s++) {
> +        VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[s]->buffer;
> +        int slice_mb_begin = pSliceParameter->slice_segment_address * num_mb_in_ctb;
> +        int slice_mb_number = pSliceParameter->num_ctu_in_slice * 
> + num_mb_in_ctb;
> +
> +        unsigned int mb_intra_ub;
> +        int slice_mb_x = slice_mb_begin % mb_width;
> +        for (i = 0; i < slice_mb_number;  ) {
> +            int mb_count = i + slice_mb_begin;
> +            mb_x = mb_count % mb_width;
> +            mb_y = mb_count / mb_width;
> +            mb_intra_ub = 0;
> +
> +            if (mb_x != 0) {
> +                mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
> +            }
> +            if (mb_y != 0) {
> +                mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
> +                if (mb_x != 0)
> +                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
> +                if (mb_x != (mb_width -1))
> +                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> +            }
> +            if (i < mb_width) {
> +                if (i == 0)
> +                    mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_AE);
> +                mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_BCD_MASK);
> +                if ((i == (mb_width - 1)) && slice_mb_x) {
> +                    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
> +                }
> +            }
> +
> +            if ((i == mb_width) && slice_mb_x) {
> +                mb_intra_ub &= ~(INTRA_PRED_AVAIL_FLAG_D);
> +            }
> +
> +            *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
> +            *command_ptr++ = kernel;
> +            *command_ptr++ = 0;
> +            *command_ptr++ = 0;
> +            *command_ptr++ = 0;
> +            *command_ptr++ = 0;
> +
> +            /*inline data */
> +            *command_ptr++ = (mb_width << 16 | mb_y << 8 | mb_x);
> +            *command_ptr++ = ( (1 << 16) | transform_8x8_mode_flag | 
> + (mb_intra_ub << 8));
> +
> +            *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
> +            *command_ptr++ = 0;
> +            i += 1;
> +        }
> +    }
> +
> +    *command_ptr++ = MI_BATCH_BUFFER_END;
> +    *command_ptr++ = 0;
> +
> +    dri_bo_unmap(vme_context->vme_batchbuffer.bo);
> +}
> +
> +static void gen9_vme_hevc_pipeline_programing(VADriverContextP ctx,
> +                                         struct encode_state *encode_state,
> +                                         struct intel_encoder_context 
> +*encoder_context) {
> +    struct gen6_vme_context *vme_context = encoder_context->vme_context;
> +    struct intel_batchbuffer *batch = encoder_context->base.batch;
> +    VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
> +    VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
> +    int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + 15)/16;
> +    int height_in_mbs = (pSequenceParameter->pic_height_in_luma_samples + 15)/16;
> +    int kernel_shader;
> +    bool allow_hwscore = true;
> +    int s;
> +
> +    int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
> +    int log2_ctb_size = 
> + pSequenceParameter->log2_diff_max_min_luma_coding_block_size + 
> + log2_cu_size;
> +
> +    int ctb_size = 1 << log2_ctb_size;
> +    int num_mb_in_ctb = (ctb_size + 15)/16;
> +    int transform_8x8_mode_flag = 1;
> +    num_mb_in_ctb = num_mb_in_ctb * num_mb_in_ctb;
> +
> +    for (s = 0; s < encode_state->num_slice_params_ext; s++) {
> +        pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[s]->buffer;
> +        int slice_mb_begin = pSliceParameter->slice_segment_address * num_mb_in_ctb;
> +        if ((slice_mb_begin % width_in_mbs)) {
> +            allow_hwscore = false;
> +            break;
> +        }
> +    }
> +
> +    if (pSliceParameter->slice_type == SLICE_TYPE_I) {
> +        kernel_shader = VME_INTRA_SHADER;
> +    } else if (pSliceParameter->slice_type == SLICE_TYPE_P) {
> +        kernel_shader = VME_INTER_SHADER;
> +    } else {
> +        kernel_shader = VME_BINTER_SHADER;
> +        if (!allow_hwscore)
> +            kernel_shader = VME_INTER_SHADER;
> +    }
> +    if (allow_hwscore)
> +        gen9wa_vme_hevc_walker_fill_vme_batchbuffer(ctx,
> +                                               encode_state,
> +                                               width_in_mbs, height_in_mbs,
> +                                               kernel_shader,
> +                                               transform_8x8_mode_flag,
> +                                               encoder_context);
> +    else
> +        gen9_vme_hevc_fill_vme_batchbuffer(ctx,
> +                                      encode_state,
> +                                      width_in_mbs, height_in_mbs,
> +                                      kernel_shader,
> +                                      transform_8x8_mode_flag,
> +                                      encoder_context);
> +
> +    intel_batchbuffer_start_atomic(batch, 0x1000);
> +    gen9_gpe_pipeline_setup(ctx, &vme_context->gpe_context, batch);
> +    BEGIN_BATCH(batch, 3);
> +    OUT_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
> +    OUT_RELOC(batch,
> +              vme_context->vme_batchbuffer.bo,
> +              I915_GEM_DOMAIN_COMMAND, 0,
> +              0);
> +    OUT_BATCH(batch, 0);
> +    ADVANCE_BATCH(batch);
> +
> +    gen9_gpe_pipeline_end(ctx, &vme_context->gpe_context, batch);
> +
> +    intel_batchbuffer_end_atomic(batch);
> +}
> +
> +static VAStatus gen9_vme_hevc_prepare(VADriverContextP ctx,
> +                                 struct encode_state *encode_state,
> +                                 struct intel_encoder_context 
> +*encoder_context) {
> +    VAStatus vaStatus = VA_STATUS_SUCCESS;
> +    VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
> +    int is_intra = pSliceParameter->slice_type == SLICE_TYPE_I;
> +    VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
> +    struct gen6_vme_context *vme_context = 
> +encoder_context->vme_context;
> +
> +    /* here use the avc level for hevc vme */
> +    if (!vme_context->hevc_level ||
> +        (vme_context->hevc_level != pSequenceParameter->general_level_idc)) {
> +        vme_context->hevc_level = pSequenceParameter->general_level_idc;
> +    }
> +
> +    intel_vme_hevc_update_mbmv_cost(ctx, encode_state, 
> + encoder_context);
> +
> +    /*Setup all the memory object*/
> +    gen9_vme_hevc_surface_setup(ctx, encode_state, is_intra, encoder_context);
> +    gen9_vme_interface_setup(ctx, encode_state, encoder_context);
> +    //gen9_vme_vme_state_setup(ctx, encode_state, is_intra, encoder_context);
> +    gen9_vme_constant_setup(ctx, encode_state, encoder_context);
> +
> +    /*Programing media pipeline*/
> +    gen9_vme_hevc_pipeline_programing(ctx, encode_state, 
> + encoder_context);
> +
> +    return vaStatus;
> +}
> +
> +
> +static VAStatus
> +gen9_vme_hevc_pipeline(VADriverContextP ctx,
> +                  VAProfile profile,
> +                  struct encode_state *encode_state,
> +                  struct intel_encoder_context *encoder_context) {
> +    gen9_vme_media_init(ctx, encoder_context);
> +    gen9_vme_hevc_prepare(ctx, encode_state, encoder_context);
> +    gen9_vme_run(ctx, encode_state, encoder_context);
> +    gen9_vme_stop(ctx, encode_state, encoder_context);
> +
> +    return VA_STATUS_SUCCESS;
> +}
> +
> +
>  static void
>  gen9_vme_context_destroy(void *context)  { @@ -1172,6 +1650,12 @@ 
> Bool gen9_vme_context_init(VADriverContextP ctx, struct intel_encoder_context *e
>          vme_kernel_list = gen9_vme_mpeg2_kernels;
>          encoder_context->vme_pipeline = gen9_vme_mpeg2_pipeline;
>          i965_kernel_num = sizeof(gen9_vme_mpeg2_kernels) / 
> sizeof(struct i965_kernel);
> +        break;
> +
> +   case CODEC_HEVC:
> +        vme_kernel_list = gen9_vme_hevc_kernels;
> +        encoder_context->vme_pipeline = gen9_vme_hevc_pipeline;
> +        i965_kernel_num = sizeof(gen9_vme_hevc_kernels) / 
> + sizeof(struct i965_kernel);
>  
>          break;
>  
> diff --git a/src/i965_encoder.c b/src/i965_encoder.c index 
> d924f5a..c9ff2ec 100644
> --- a/src/i965_encoder.c
> +++ b/src/i965_encoder.c
> @@ -39,10 +39,12 @@
>  #include "i965_encoder.h"
>  #include "gen6_vme.h"
>  #include "gen6_mfc.h"
> +#include "gen9_mfc.h"
>  
>  extern Bool gen6_mfc_context_init(VADriverContextP ctx, struct 
> intel_encoder_context *encoder_context);  extern Bool 
> gen6_vme_context_init(VADriverContextP ctx, struct 
> intel_encoder_context *encoder_context);  extern Bool 
> gen7_mfc_context_init(VADriverContextP ctx, struct 
> intel_encoder_context *encoder_context);
> +extern Bool gen9_hcpe_context_init(VADriverContextP ctx, struct 
> +intel_encoder_context *encoder_context);
>  
>  static VAStatus
>  intel_encoder_check_yuv_surface(VADriverContextP ctx, @@ -422,6 
> +424,63 @@ error:
>  }
>  
>  static VAStatus
> +intel_encoder_check_hevc_parameter(VADriverContextP ctx,
> +                                  struct encode_state *encode_state,
> +                                  struct intel_encoder_context 
> +*encoder_context) {
> +    struct i965_driver_data *i965 = i965_driver_data(ctx);
> +    struct object_surface *obj_surface;	
> +    struct object_buffer *obj_buffer;
> +    VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
> +    int i;
> +
> +    assert(!(pic_param->decoded_curr_pic.flags & 
> + VA_PICTURE_HEVC_INVALID));
> +
> +    if (pic_param->decoded_curr_pic.flags & VA_PICTURE_HEVC_INVALID)
> +        goto error;
> +
> +    obj_surface = SURFACE(pic_param->decoded_curr_pic.picture_id);
> +    assert(obj_surface); /* It is possible the store buffer isn't 
> + allocated yet */
> +    
> +    if (!obj_surface)
> +        goto error;
> +
> +    encode_state->reconstructed_object = obj_surface;
> +    obj_buffer = BUFFER(pic_param->coded_buf);
> +    assert(obj_buffer && obj_buffer->buffer_store && 
> + obj_buffer->buffer_store->bo);
> +
> +    if (!obj_buffer || !obj_buffer->buffer_store || !obj_buffer->buffer_store->bo)
> +        goto error;
> +
> +    encode_state->coded_buf_object = obj_buffer;
> +
> +    for (i = 0; i < 15; i++) {
> +        if (pic_param->reference_frames[i].flags & VA_PICTURE_HEVC_INVALID ||
> +            pic_param->reference_frames[i].picture_id == VA_INVALID_SURFACE)
> +            break;
> +        else {
> +            obj_surface = SURFACE(pic_param->reference_frames[i].picture_id);
> +            assert(obj_surface);
> +
> +            if (!obj_surface)
> +                goto error;
> +
> +            if (obj_surface->bo)
> +                encode_state->reference_objects[i] = obj_surface;
> +            else
> +                encode_state->reference_objects[i] = NULL; /* FIXME: Warning or Error ??? */
> +        }
> +    }
> +
> +    for ( ; i < 15; i++)
> +        encode_state->reference_objects[i] = NULL;
> +    
> +    return VA_STATUS_SUCCESS;
> +
> +error:
> +    return VA_STATUS_ERROR_INVALID_PARAMETER;
> +}
> +static VAStatus
>  intel_encoder_sanity_check_input(VADriverContextP ctx,
>                                   VAProfile profile,
>                                   struct encode_state *encode_state, 
> @@ -459,6 +518,13 @@ intel_encoder_sanity_check_input(VADriverContextP ctx,
>          break;
>      }
>  
> +    case VAProfileHEVCMain:  {
> +        vaStatus = intel_encoder_check_hevc_parameter(ctx, encode_state, encoder_context);
> +        if (vaStatus != VA_STATUS_SUCCESS)
> +            goto out;
> +        vaStatus = intel_encoder_check_yuv_surface(ctx, profile, encode_state, encoder_context);
> +        break;
> +    }
>      default:
>          vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
>          break;
> @@ -554,6 +620,10 @@ intel_enc_hw_context_init(VADriverContextP ctx,
>          encoder_context->codec = CODEC_JPEG;
>          break;
>  
> +    case VAProfileHEVCMain:
> +        encoder_context->codec = CODEC_HEVC;
> +        break;
> +
>      default:
>          /* Never get here */
>          assert(0);
> @@ -617,7 +687,9 @@ gen8_enc_hw_context_init(VADriverContextP ctx, 
> struct object_config *obj_config)  struct hw_context *  
> gen9_enc_hw_context_init(VADriverContextP ctx, struct object_config 
> *obj_config)  {
> -    if (obj_config->profile == VAProfileJPEGBaseline)
> +    if (obj_config->profile == VAProfileHEVCMain) {
> +        return intel_enc_hw_context_init(ctx, obj_config, gen9_vme_context_init, gen9_hcpe_context_init);
> +    } else if (obj_config->profile == VAProfileJPEGBaseline)
>          return intel_enc_hw_context_init(ctx, obj_config, gen8_vme_context_init, gen8_mfc_context_init);
>      else
>          return intel_enc_hw_context_init(ctx, obj_config, 
> gen9_vme_context_init, gen9_mfc_context_init);
> --
> 1.9.1
> 
> _______________________________________________
> Libva mailing list
> Libva at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/libva




More information about the Libva mailing list