[Libva] [PATCH v3 intel-driver 6/9] decoder: h264: enable Picture ID Remapping on Haswell and newer.
Zhao, Yakui
yakui.zhao at intel.com
Tue Jun 3 23:15:44 PDT 2014
On Tue, 2014-06-03 at 23:20 -0600, Gwenole Beauchesne wrote:
> 2014-06-04 6:45 GMT+02:00 Gwenole Beauchesne <gb.devel at gmail.com>:
> > 2014-06-04 3:47 GMT+02:00 Zhao, Yakui <yakui.zhao at intel.com>:
> >> On Tue, 2014-06-03 at 10:43 -0600, Gwenole Beauchesne wrote:
> >>> Fill and submit MFX_AVC_PICID_STATE commands to Gen7.5+ hardware.
> >>> This optimizes the management of the DPB as the binding array can
> >>> now contain entries in any order. This also makes it possible to
> >>> support H.264 MultiView High profiles, with any particular number
> >>> of views.
> >>>
> >>> Signed-off-by: Gwenole Beauchesne <gwenole.beauchesne at intel.com>
> >>> ---
> >>> src/gen75_mfd.c | 23 +++--------
> >>> src/gen8_mfd.c | 27 ++++---------
> >>> src/i965_decoder_utils.c | 98 ++++++++++++++++++++++++++++++++++++++++++++++
> >>> src/i965_decoder_utils.h | 23 +++++++++++
> >>> 4 files changed, 135 insertions(+), 36 deletions(-)
> >>>
> >>> diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c
> >>> index 01db56c..b080fb8 100644
> >>> --- a/src/gen75_mfd.c
> >>> +++ b/src/gen75_mfd.c
> >>> @@ -632,25 +632,13 @@ gen75_mfd_avc_qm_state(VADriverContextP ctx,
> >>> }
> >>> }
> >>>
> >>> -static void
> >>> +static inline void
> >>> gen75_mfd_avc_picid_state(VADriverContextP ctx,
> >>> struct decode_state *decode_state,
> >>> struct gen7_mfd_context *gen7_mfd_context)
> >>> {
> >>> - struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
> >>> -
> >>> - BEGIN_BCS_BATCH(batch, 10);
> >>> - OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2));
> >>> - OUT_BCS_BATCH(batch, 1); // disable Picture ID Remapping
> >>> - OUT_BCS_BATCH(batch, 0);
> >>> - OUT_BCS_BATCH(batch, 0);
> >>> - OUT_BCS_BATCH(batch, 0);
> >>> - OUT_BCS_BATCH(batch, 0);
> >>> - OUT_BCS_BATCH(batch, 0);
> >>> - OUT_BCS_BATCH(batch, 0);
> >>> - OUT_BCS_BATCH(batch, 0);
> >>> - OUT_BCS_BATCH(batch, 0);
> >>> - ADVANCE_BCS_BATCH(batch);
> >>> + gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
> >>> + gen7_mfd_context->reference_surface);
> >>> }
> >>>
> >>> static void
> >>> @@ -1054,7 +1042,8 @@ gen75_mfd_avc_decode_init(VADriverContextP ctx,
> >>>
> >>> assert(decode_state->pic_param && decode_state->pic_param->buffer);
> >>> pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
> >>> - intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen7_mfd_context->reference_surface);
> >>> + gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
> >>> + gen7_mfd_context->reference_surface);
> >>> width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
> >>> height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
> >>> assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
> >>> @@ -1139,8 +1128,8 @@ gen75_mfd_avc_decode_picture(VADriverContextP ctx,
> >>> gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
> >>> gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
> >>> gen75_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
> >>> - gen75_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
> >>> gen75_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
> >>> + gen75_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
> >>>
> >>> for (j = 0; j < decode_state->num_slice_params; j++) {
> >>> assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
> >>> diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c
> >>> index 622e0f3..4e0a95a 100644
> >>> --- a/src/gen8_mfd.c
> >>> +++ b/src/gen8_mfd.c
> >>> @@ -499,25 +499,13 @@ gen8_mfd_avc_qm_state(VADriverContextP ctx,
> >>> }
> >>> }
> >>>
> >>> -static void
> >>> +static inline void
> >>> gen8_mfd_avc_picid_state(VADriverContextP ctx,
> >>> - struct decode_state *decode_state,
> >>> - struct gen7_mfd_context *gen7_mfd_context)
> >>> + struct decode_state *decode_state,
> >>> + struct gen7_mfd_context *gen7_mfd_context)
> >>> {
> >>> - struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
> >>> -
> >>> - BEGIN_BCS_BATCH(batch, 10);
> >>> - OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2));
> >>> - OUT_BCS_BATCH(batch, 1); // disable Picture ID Remapping
> >>> - OUT_BCS_BATCH(batch, 0);
> >>> - OUT_BCS_BATCH(batch, 0);
> >>> - OUT_BCS_BATCH(batch, 0);
> >>> - OUT_BCS_BATCH(batch, 0);
> >>> - OUT_BCS_BATCH(batch, 0);
> >>> - OUT_BCS_BATCH(batch, 0);
> >>> - OUT_BCS_BATCH(batch, 0);
> >>> - OUT_BCS_BATCH(batch, 0);
> >>> - ADVANCE_BCS_BATCH(batch);
> >>> + gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
> >>> + gen7_mfd_context->reference_surface);
> >>> }
> >>>
> >>> static void
> >>> @@ -827,7 +815,8 @@ gen8_mfd_avc_decode_init(VADriverContextP ctx,
> >>>
> >>> assert(decode_state->pic_param && decode_state->pic_param->buffer);
> >>> pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
> >>> - intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen7_mfd_context->reference_surface);
> >>> + gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
> >>> + gen7_mfd_context->reference_surface);
> >>> width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
> >>> height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
> >>> assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
> >>> @@ -912,8 +901,8 @@ gen8_mfd_avc_decode_picture(VADriverContextP ctx,
> >>> gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
> >>> gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
> >>> gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
> >>> - gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
> >>> gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
> >>> + gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
> >>>
> >>> for (j = 0; j < decode_state->num_slice_params; j++) {
> >>> assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
> >>> diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c
> >>> index 13b717d..22d5b6f 100644
> >>> --- a/src/i965_decoder_utils.c
> >>> +++ b/src/i965_decoder_utils.c
> >>> @@ -252,6 +252,23 @@ avc_gen_default_iq_matrix(VAIQMatrixBufferH264 *iq_matrix)
> >>> memset(&iq_matrix->ScalingList8x8, 16, sizeof(iq_matrix->ScalingList8x8));
> >>> }
> >>>
> >>> +/* Returns a unique picture ID that represents the supplied VA surface object */
> >>> +int
> >>> +avc_get_picture_id(struct object_surface *obj_surface)
> >>> +{
> >>> + int pic_id;
> >>> +
> >>> + /* This highly depends on how the internal organization of VA objects.
> >>> +
> >>> + The VA objects are maintained in heaps so that any released VA
> >>> + surface will become free again for future allocation. This means
> >>> + that holes in there are filled in for subsequent allocations.
> >>> + So, this ultimately means that we could just use the Heap ID of
> >>> + the VA surface as the resulting picture ID (16 bits) */
> >>> + pic_id = 1 + (obj_surface->base.id & OBJECT_HEAP_ID_MASK);
> >>
> >> The OBJECT_HEAP_ID_MASK is 0xFFFFFF. Maybe the pic_id will be greater
> >> than 0xFFFF. So please directly use the 0xFFFF mask.
> >
> > That's what the check under that is for. In your proposal, you will
> > just wrap around, which is inappropriate as you wouldn't catch the
> > error. Anyway, that's a theoritical limit, and you clearly can't reach
> > that in most cases due to memory limitations anyway. So, no change
> > needed here.
>
> An alternative was to track and generate unique ids ourselves, and
> that was the other intention with the GenFrameStoreContext too. Still,
> this approach is considerably faster, at the expense to remember about
> *not* changing the semantics of the VA object heap implementation,
> i.e. an array of elements with holes filled in for future allocations.
>
> Beyond the memory limits (e.g. 65535x 1080p is 190 GB), the only other
> possible way to error out is in presence of memory leaks. But this
> remains fine to error out, since memory leaks are to be fixed if there
> are any. I will keep this as it is.
OK. This is no problem to me.
>
> >>> + return (pic_id <= 0xffff) ? pic_id : -1;
> >>> +}
> >>> +
> >>> /* Finds the VA/H264 picture associated with the specified VA surface id */
> >>> VAPictureH264 *
> >>> avc_find_picture(VASurfaceID id, VAPictureH264 *pic_list, int pic_list_count)
> >>> @@ -508,6 +525,87 @@ intel_update_avc_frame_store_index(
> >>> }
> >>>
> >>> void
> >>> +gen75_update_avc_frame_store_index(
> >>> + VADriverContextP ctx,
> >>> + struct decode_state *decode_state,
> >>> + VAPictureParameterBufferH264 *pic_param,
> >>> + GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES]
> >>> +)
> >>> +{
> >>> + int i, n;
> >>> +
> >>> + /* Construct the Frame Store array */
> >>> + for (i = 0, n = 0; i < ARRAY_ELEMS(decode_state->reference_objects); i++) {
> >>> + struct object_surface * const obj_surface =
> >>> + decode_state->reference_objects[i];
> >>> + if (!obj_surface)
> >>> + continue;
> >>> +
> >>> + GenFrameStore * const fs = &frame_store[n];
> >>> + fs->surface_id = obj_surface->base.id;
> >>> + fs->obj_surface = obj_surface;
> >>> + fs->frame_store_id = n++;
> >>> + }
> >>> +
> >>> + /* Any remaining entry is invalid */
> >>> + for (; n < MAX_GEN_REFERENCE_FRAMES; n++) {
> >>> + GenFrameStore * const fs = &frame_store[n];
> >>> + fs->surface_id = VA_INVALID_ID;
> >>> + fs->obj_surface = NULL;
> >>> + fs->frame_store_id = -1;
> >>> + }
> >>> +}
> >>> +
> >>> +bool
> >>> +gen75_fill_avc_picid_list(
> >>> + uint16_t pic_ids[16],
> >>> + GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES]
> >>> +)
> >>> +{
> >>> + int i, pic_id;
> >>> +
> >>> + /* Fill in with known picture IDs. The Frame Store array is in
> >>> + compact form, i.e. empty entries are only to be found at the
> >>> + end of the array: there are no holes in the set of active
> >>> + reference frames */
> >>> + for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
> >>> + GenFrameStore * const fs = &frame_store[i];
> >>> + if (!fs->obj_surface)
> >>> + break;
> >>> + pic_id = avc_get_picture_id(fs->obj_surface);
> >>> + if (pic_id < 0)
> >>> + return false;
> >>
> >> the below assert of "fs->frame_store_id == i " be removed.
> >
> > The assert() is only useful to detect programming errors, it won't be
> > reached in practice. The only way to hit it is to forget to use the
> > Gen7.5+ frame store update function for future generations. But that's
> > fine to remove it. Just remember about that.
> >
> >>
> >>> + assert(fs->frame_store_id == i);
> >>> + pic_ids[i] = pic_id;
> >>> + }
> >>> +
> >>> + /* When an element of the list is not relevant the value of the
> >>> + picture ID shall be set to 0 */
> >>> + for (; i < MAX_GEN_REFERENCE_FRAMES; i++)
> >>> + pic_ids[i] = 0;
> >>> + return true;
> >>> +}
> >>> +
> >>> +bool
> >>> +gen75_send_avc_picid_state(
> >>> + struct intel_batchbuffer *batch,
> >>> + GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES]
> >>> +)
> >>> +{
> >>> + uint16_t pic_ids[16];
> >>> +
> >>> + if (!gen75_fill_avc_picid_list(pic_ids, frame_store))
> >>> + return false;
> >>> +
> >>> + BEGIN_BCS_BATCH(batch, 10);
> >>> + OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2));
> >>> + OUT_BCS_BATCH(batch, 0); // enable Picture ID Remapping
> >>> + intel_batchbuffer_data(batch, pic_ids, sizeof(pic_ids));
> >>> + ADVANCE_BCS_BATCH(batch);
> >>> + return true;
> >>> +}
> >>> +
> >>> +void
> >>> intel_update_vc1_frame_store_index(VADriverContextP ctx,
> >>> struct decode_state *decode_state,
> >>> VAPictureParameterBufferVC1 *pic_param,
> >>> diff --git a/src/i965_decoder_utils.h b/src/i965_decoder_utils.h
> >>> index a4c9415..0ffbd7f 100644
> >>> --- a/src/i965_decoder_utils.h
> >>> +++ b/src/i965_decoder_utils.h
> >>> @@ -54,6 +54,9 @@ avc_ensure_surface_bo(
> >>> void
> >>> avc_gen_default_iq_matrix(VAIQMatrixBufferH264 *iq_matrix);
> >>>
> >>> +int
> >>> +avc_get_picture_id(struct object_surface *obj_surface);
> >>> +
> >>> VAPictureH264 *
> >>> avc_find_picture(VASurfaceID id, VAPictureH264 *pic_list, int pic_list_count);
> >>>
> >>> @@ -98,6 +101,26 @@ intel_update_avc_frame_store_index(VADriverContextP ctx,
> >>> GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES]);
> >>>
> >>> void
> >>> +gen75_update_avc_frame_store_index(
> >>> + VADriverContextP ctx,
> >>> + struct decode_state *decode_state,
> >>> + VAPictureParameterBufferH264 *pic_param,
> >>> + GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES]
> >>> +);
> >>> +
> >>> +bool
> >>> +gen75_fill_avc_picid_list(
> >>> + uint16_t pic_ids[16],
> >>> + GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES]
> >>> +);
> >>> +
> >>> +bool
> >>> +gen75_send_avc_picid_state(
> >>> + struct intel_batchbuffer *batch,
> >>> + GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES]
> >>> +);
> >>> +
> >>> +void
> >>> intel_update_vc1_frame_store_index(VADriverContextP ctx,
> >>> struct decode_state *decode_state,
> >>> VAPictureParameterBufferVC1 *pic_param,
> >>
> >>
More information about the Libva
mailing list