[Libva] [PATCH v3 intel-driver 6/9] decoder: h264: enable Picture ID Remapping on Haswell and newer.

Zhao, Yakui yakui.zhao at intel.com
Tue Jun 3 23:15:44 PDT 2014


On Tue, 2014-06-03 at 23:20 -0600, Gwenole Beauchesne wrote:
> 2014-06-04 6:45 GMT+02:00 Gwenole Beauchesne <gb.devel at gmail.com>:
> > 2014-06-04 3:47 GMT+02:00 Zhao, Yakui <yakui.zhao at intel.com>:
> >> On Tue, 2014-06-03 at 10:43 -0600, Gwenole Beauchesne wrote:
> >>> Fill and submit MFX_AVC_PICID_STATE commands to Gen7.5+ hardware.
> >>> This optimizes the management of the DPB as the binding array can
> >>> now contain entries in any order. This also makes it possible to
> >>> support H.264 MultiView High profiles, with any particular number
> >>> of views.
> >>>
> >>> Signed-off-by: Gwenole Beauchesne <gwenole.beauchesne at intel.com>
> >>> ---
> >>>  src/gen75_mfd.c          |   23 +++--------
> >>>  src/gen8_mfd.c           |   27 ++++---------
> >>>  src/i965_decoder_utils.c |   98 ++++++++++++++++++++++++++++++++++++++++++++++
> >>>  src/i965_decoder_utils.h |   23 +++++++++++
> >>>  4 files changed, 135 insertions(+), 36 deletions(-)
> >>>
> >>> diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c
> >>> index 01db56c..b080fb8 100644
> >>> --- a/src/gen75_mfd.c
> >>> +++ b/src/gen75_mfd.c
> >>> @@ -632,25 +632,13 @@ gen75_mfd_avc_qm_state(VADriverContextP ctx,
> >>>      }
> >>>  }
> >>>
> >>> -static void
> >>> +static inline void
> >>>  gen75_mfd_avc_picid_state(VADriverContextP ctx,
> >>>                        struct decode_state *decode_state,
> >>>                        struct gen7_mfd_context *gen7_mfd_context)
> >>>  {
> >>> -    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
> >>> -
> >>> -    BEGIN_BCS_BATCH(batch, 10);
> >>> -    OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2));
> >>> -    OUT_BCS_BATCH(batch, 1); // disable Picture ID Remapping
> >>> -    OUT_BCS_BATCH(batch, 0);
> >>> -    OUT_BCS_BATCH(batch, 0);
> >>> -    OUT_BCS_BATCH(batch, 0);
> >>> -    OUT_BCS_BATCH(batch, 0);
> >>> -    OUT_BCS_BATCH(batch, 0);
> >>> -    OUT_BCS_BATCH(batch, 0);
> >>> -    OUT_BCS_BATCH(batch, 0);
> >>> -    OUT_BCS_BATCH(batch, 0);
> >>> -    ADVANCE_BCS_BATCH(batch);
> >>> +    gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
> >>> +        gen7_mfd_context->reference_surface);
> >>>  }
> >>>
> >>>  static void
> >>> @@ -1054,7 +1042,8 @@ gen75_mfd_avc_decode_init(VADriverContextP ctx,
> >>>
> >>>      assert(decode_state->pic_param && decode_state->pic_param->buffer);
> >>>      pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
> >>> -    intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen7_mfd_context->reference_surface);
> >>> +    gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
> >>> +        gen7_mfd_context->reference_surface);
> >>>      width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
> >>>      height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
> >>>      assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
> >>> @@ -1139,8 +1128,8 @@ gen75_mfd_avc_decode_picture(VADriverContextP ctx,
> >>>      gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
> >>>      gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
> >>>      gen75_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
> >>> -    gen75_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
> >>>      gen75_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
> >>> +    gen75_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
> >>>
> >>>      for (j = 0; j < decode_state->num_slice_params; j++) {
> >>>          assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
> >>> diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c
> >>> index 622e0f3..4e0a95a 100644
> >>> --- a/src/gen8_mfd.c
> >>> +++ b/src/gen8_mfd.c
> >>> @@ -499,25 +499,13 @@ gen8_mfd_avc_qm_state(VADriverContextP ctx,
> >>>      }
> >>>  }
> >>>
> >>> -static void
> >>> +static inline void
> >>>  gen8_mfd_avc_picid_state(VADriverContextP ctx,
> >>> -                      struct decode_state *decode_state,
> >>> -                      struct gen7_mfd_context *gen7_mfd_context)
> >>> +    struct decode_state *decode_state,
> >>> +    struct gen7_mfd_context *gen7_mfd_context)
> >>>  {
> >>> -    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
> >>> -
> >>> -    BEGIN_BCS_BATCH(batch, 10);
> >>> -    OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2));
> >>> -    OUT_BCS_BATCH(batch, 1); // disable Picture ID Remapping
> >>> -    OUT_BCS_BATCH(batch, 0);
> >>> -    OUT_BCS_BATCH(batch, 0);
> >>> -    OUT_BCS_BATCH(batch, 0);
> >>> -    OUT_BCS_BATCH(batch, 0);
> >>> -    OUT_BCS_BATCH(batch, 0);
> >>> -    OUT_BCS_BATCH(batch, 0);
> >>> -    OUT_BCS_BATCH(batch, 0);
> >>> -    OUT_BCS_BATCH(batch, 0);
> >>> -    ADVANCE_BCS_BATCH(batch);
> >>> +    gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
> >>> +        gen7_mfd_context->reference_surface);
> >>>  }
> >>>
> >>>  static void
> >>> @@ -827,7 +815,8 @@ gen8_mfd_avc_decode_init(VADriverContextP ctx,
> >>>
> >>>      assert(decode_state->pic_param && decode_state->pic_param->buffer);
> >>>      pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
> >>> -    intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen7_mfd_context->reference_surface);
> >>> +    gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
> >>> +        gen7_mfd_context->reference_surface);
> >>>      width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
> >>>      height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
> >>>      assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
> >>> @@ -912,8 +901,8 @@ gen8_mfd_avc_decode_picture(VADriverContextP ctx,
> >>>      gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
> >>>      gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
> >>>      gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
> >>> -    gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
> >>>      gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
> >>> +    gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
> >>>
> >>>      for (j = 0; j < decode_state->num_slice_params; j++) {
> >>>          assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
> >>> diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c
> >>> index 13b717d..22d5b6f 100644
> >>> --- a/src/i965_decoder_utils.c
> >>> +++ b/src/i965_decoder_utils.c
> >>> @@ -252,6 +252,23 @@ avc_gen_default_iq_matrix(VAIQMatrixBufferH264 *iq_matrix)
> >>>      memset(&iq_matrix->ScalingList8x8, 16, sizeof(iq_matrix->ScalingList8x8));
> >>>  }
> >>>
> >>> +/* Returns a unique picture ID that represents the supplied VA surface object */
> >>> +int
> >>> +avc_get_picture_id(struct object_surface *obj_surface)
> >>> +{
> >>> +    int pic_id;
> >>> +
> >>> +    /* This highly depends on how the internal organization of VA objects.
> >>> +
> >>> +       The VA objects are maintained in heaps so that any released VA
> >>> +       surface will become free again for future allocation. This means
> >>> +       that holes in there are filled in for subsequent allocations.
> >>> +       So, this ultimately means that we could just use the Heap ID of
> >>> +       the VA surface as the resulting picture ID (16 bits) */
> >>> +    pic_id = 1 + (obj_surface->base.id & OBJECT_HEAP_ID_MASK);
> >>
> >> The OBJECT_HEAP_ID_MASK is 0xFFFFFF. Maybe the pic_id will be greater
> >> than 0xFFFF. So please directly use the 0xFFFF mask.
> >
> > That's what the check under that is for. In your proposal, you will
> > just wrap around, which is inappropriate as you wouldn't catch the
> > error. Anyway, that's a theoritical limit, and you clearly can't reach
> > that in most cases due to memory limitations anyway. So, no change
> > needed here.
> 
> An alternative was to track and generate unique ids ourselves, and
> that was the other intention with the GenFrameStoreContext too. Still,
> this approach is considerably faster, at the expense to remember about
> *not* changing the semantics of the VA object heap implementation,
> i.e. an array of elements with holes filled in for future allocations.
> 
> Beyond the memory limits (e.g. 65535x 1080p is 190 GB), the only other
> possible way to error out is in presence of memory leaks. But this
> remains fine to error out, since memory leaks are to be fixed if there
> are any. I will keep this as it is.

OK. This is no problem to me.

> 
> >>> +    return (pic_id <= 0xffff) ? pic_id : -1;
> >>> +}
> >>> +
> >>>  /* Finds the VA/H264 picture associated with the specified VA surface id */
> >>>  VAPictureH264 *
> >>>  avc_find_picture(VASurfaceID id, VAPictureH264 *pic_list, int pic_list_count)
> >>> @@ -508,6 +525,87 @@ intel_update_avc_frame_store_index(
> >>>  }
> >>>
> >>>  void
> >>> +gen75_update_avc_frame_store_index(
> >>> +    VADriverContextP              ctx,
> >>> +    struct decode_state          *decode_state,
> >>> +    VAPictureParameterBufferH264 *pic_param,
> >>> +    GenFrameStore                 frame_store[MAX_GEN_REFERENCE_FRAMES]
> >>> +)
> >>> +{
> >>> +    int i, n;
> >>> +
> >>> +    /* Construct the Frame Store array */
> >>> +    for (i = 0, n = 0; i < ARRAY_ELEMS(decode_state->reference_objects); i++) {
> >>> +        struct object_surface * const obj_surface =
> >>> +            decode_state->reference_objects[i];
> >>> +        if (!obj_surface)
> >>> +            continue;
> >>> +
> >>> +        GenFrameStore * const fs = &frame_store[n];
> >>> +        fs->surface_id = obj_surface->base.id;
> >>> +        fs->obj_surface = obj_surface;
> >>> +        fs->frame_store_id = n++;
> >>> +    }
> >>> +
> >>> +    /* Any remaining entry is invalid */
> >>> +    for (; n < MAX_GEN_REFERENCE_FRAMES; n++) {
> >>> +        GenFrameStore * const fs = &frame_store[n];
> >>> +        fs->surface_id = VA_INVALID_ID;
> >>> +        fs->obj_surface = NULL;
> >>> +        fs->frame_store_id = -1;
> >>> +    }
> >>> +}
> >>> +
> >>> +bool
> >>> +gen75_fill_avc_picid_list(
> >>> +    uint16_t                            pic_ids[16],
> >>> +    GenFrameStore                       frame_store[MAX_GEN_REFERENCE_FRAMES]
> >>> +)
> >>> +{
> >>> +    int i, pic_id;
> >>> +
> >>> +    /* Fill in with known picture IDs. The Frame Store array is in
> >>> +       compact form, i.e. empty entries are only to be found at the
> >>> +       end of the array: there are no holes in the set of active
> >>> +       reference frames */
> >>> +    for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
> >>> +        GenFrameStore * const fs = &frame_store[i];
> >>> +        if (!fs->obj_surface)
> >>> +            break;
> >>> +        pic_id = avc_get_picture_id(fs->obj_surface);
> >>> +        if (pic_id < 0)
> >>> +            return false;
> >>
> >> the below assert of "fs->frame_store_id == i " be removed.
> >
> > The assert() is only useful to detect programming errors, it won't be
> > reached in practice. The only way to hit it is to forget to use the
> > Gen7.5+ frame store update function for future generations. But that's
> > fine to remove it. Just remember about that.
> >
> >>
> >>> +        assert(fs->frame_store_id == i);
> >>> +        pic_ids[i] = pic_id;
> >>> +    }
> >>> +
> >>> +    /* When an element of the list is not relevant the value of the
> >>> +       picture ID shall be set to 0 */
> >>> +    for (; i < MAX_GEN_REFERENCE_FRAMES; i++)
> >>> +        pic_ids[i] = 0;
> >>> +    return true;
> >>> +}
> >>> +
> >>> +bool
> >>> +gen75_send_avc_picid_state(
> >>> +    struct intel_batchbuffer           *batch,
> >>> +    GenFrameStore                       frame_store[MAX_GEN_REFERENCE_FRAMES]
> >>> +)
> >>> +{
> >>> +    uint16_t pic_ids[16];
> >>> +
> >>> +    if (!gen75_fill_avc_picid_list(pic_ids, frame_store))
> >>> +        return false;
> >>> +
> >>> +    BEGIN_BCS_BATCH(batch, 10);
> >>> +    OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2));
> >>> +    OUT_BCS_BATCH(batch, 0); // enable Picture ID Remapping
> >>> +    intel_batchbuffer_data(batch, pic_ids, sizeof(pic_ids));
> >>> +    ADVANCE_BCS_BATCH(batch);
> >>> +    return true;
> >>> +}
> >>> +
> >>> +void
> >>>  intel_update_vc1_frame_store_index(VADriverContextP ctx,
> >>>                                     struct decode_state *decode_state,
> >>>                                     VAPictureParameterBufferVC1 *pic_param,
> >>> diff --git a/src/i965_decoder_utils.h b/src/i965_decoder_utils.h
> >>> index a4c9415..0ffbd7f 100644
> >>> --- a/src/i965_decoder_utils.h
> >>> +++ b/src/i965_decoder_utils.h
> >>> @@ -54,6 +54,9 @@ avc_ensure_surface_bo(
> >>>  void
> >>>  avc_gen_default_iq_matrix(VAIQMatrixBufferH264 *iq_matrix);
> >>>
> >>> +int
> >>> +avc_get_picture_id(struct object_surface *obj_surface);
> >>> +
> >>>  VAPictureH264 *
> >>>  avc_find_picture(VASurfaceID id, VAPictureH264 *pic_list, int pic_list_count);
> >>>
> >>> @@ -98,6 +101,26 @@ intel_update_avc_frame_store_index(VADriverContextP ctx,
> >>>                                     GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES]);
> >>>
> >>>  void
> >>> +gen75_update_avc_frame_store_index(
> >>> +    VADriverContextP                    ctx,
> >>> +    struct decode_state                *decode_state,
> >>> +    VAPictureParameterBufferH264       *pic_param,
> >>> +    GenFrameStore                       frame_store[MAX_GEN_REFERENCE_FRAMES]
> >>> +);
> >>> +
> >>> +bool
> >>> +gen75_fill_avc_picid_list(
> >>> +    uint16_t                            pic_ids[16],
> >>> +    GenFrameStore                       frame_store[MAX_GEN_REFERENCE_FRAMES]
> >>> +);
> >>> +
> >>> +bool
> >>> +gen75_send_avc_picid_state(
> >>> +    struct intel_batchbuffer           *batch,
> >>> +    GenFrameStore                       frame_store[MAX_GEN_REFERENCE_FRAMES]
> >>> +);
> >>> +
> >>> +void
> >>>  intel_update_vc1_frame_store_index(VADriverContextP ctx,
> >>>                                     struct decode_state *decode_state,
> >>>                                     VAPictureParameterBufferVC1 *pic_param,
> >>
> >>




More information about the Libva mailing list