[Libva] [PATCH v3 intel-driver 6/9] decoder: h264: enable Picture ID Remapping on Haswell and newer.

Tue Jun 3 22:20:33 PDT 2014

2014-06-04 6:45 GMT+02:00 Gwenole Beauchesne <gb.devel at gmail.com>:
> 2014-06-04 3:47 GMT+02:00 Zhao, Yakui <yakui.zhao at intel.com>:
>> On Tue, 2014-06-03 at 10:43 -0600, Gwenole Beauchesne wrote:
>>> Fill and submit MFX_AVC_PICID_STATE commands to Gen7.5+ hardware.
>>> This optimizes the management of the DPB as the binding array can
>>> now contain entries in any order. This also makes it possible to
>>> support H.264 MultiView High profiles, with any particular number
>>> of views.
>>>
>>> Signed-off-by: Gwenole Beauchesne <gwenole.beauchesne at intel.com>
>>> ---
>>>  src/gen75_mfd.c          |   23 +++--------
>>>  src/gen8_mfd.c           |   27 ++++---------
>>>  src/i965_decoder_utils.c |   98 ++++++++++++++++++++++++++++++++++++++++++++++
>>>  src/i965_decoder_utils.h |   23 +++++++++++
>>>  4 files changed, 135 insertions(+), 36 deletions(-)
>>>
>>> diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c
>>> index 01db56c..b080fb8 100644
>>> --- a/src/gen75_mfd.c
>>> +++ b/src/gen75_mfd.c
>>> @@ -632,25 +632,13 @@ gen75_mfd_avc_qm_state(VADriverContextP ctx,
>>>      }
>>>  }
>>>
>>> -static void
>>> +static inline void
>>>  gen75_mfd_avc_picid_state(VADriverContextP ctx,
>>>                        struct decode_state *decode_state,
>>>                        struct gen7_mfd_context *gen7_mfd_context)
>>>  {
>>> -    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
>>> -
>>> -    BEGIN_BCS_BATCH(batch, 10);
>>> -    OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2));
>>> -    OUT_BCS_BATCH(batch, 1); // disable Picture ID Remapping
>>> -    OUT_BCS_BATCH(batch, 0);
>>> -    OUT_BCS_BATCH(batch, 0);
>>> -    OUT_BCS_BATCH(batch, 0);
>>> -    OUT_BCS_BATCH(batch, 0);
>>> -    OUT_BCS_BATCH(batch, 0);
>>> -    OUT_BCS_BATCH(batch, 0);
>>> -    OUT_BCS_BATCH(batch, 0);
>>> -    OUT_BCS_BATCH(batch, 0);
>>> -    ADVANCE_BCS_BATCH(batch);
>>> +    gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
>>> +        gen7_mfd_context->reference_surface);
>>>  }
>>>
>>>  static void
>>> @@ -1054,7 +1042,8 @@ gen75_mfd_avc_decode_init(VADriverContextP ctx,
>>>
>>>      assert(decode_state->pic_param && decode_state->pic_param->buffer);
>>>      pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
>>> -    intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen7_mfd_context->reference_surface);
>>> +    gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
>>> +        gen7_mfd_context->reference_surface);
>>>      width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
>>>      height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
>>>      assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
>>> @@ -1139,8 +1128,8 @@ gen75_mfd_avc_decode_picture(VADriverContextP ctx,
>>>      gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
>>>      gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
>>>      gen75_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
>>> -    gen75_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
>>>      gen75_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
>>> +    gen75_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
>>>
>>>      for (j = 0; j < decode_state->num_slice_params; j++) {
>>>          assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
>>> diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c
>>> index 622e0f3..4e0a95a 100644
>>> --- a/src/gen8_mfd.c
>>> +++ b/src/gen8_mfd.c
>>> @@ -499,25 +499,13 @@ gen8_mfd_avc_qm_state(VADriverContextP ctx,
>>>      }
>>>  }
>>>
>>> -static void
>>> +static inline void
>>>  gen8_mfd_avc_picid_state(VADriverContextP ctx,
>>> -                      struct decode_state *decode_state,
>>> -                      struct gen7_mfd_context *gen7_mfd_context)
>>> +    struct decode_state *decode_state,
>>> +    struct gen7_mfd_context *gen7_mfd_context)
>>>  {
>>> -    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
>>> -
>>> -    BEGIN_BCS_BATCH(batch, 10);
>>> -    OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2));
>>> -    OUT_BCS_BATCH(batch, 1); // disable Picture ID Remapping
>>> -    OUT_BCS_BATCH(batch, 0);
>>> -    OUT_BCS_BATCH(batch, 0);
>>> -    OUT_BCS_BATCH(batch, 0);
>>> -    OUT_BCS_BATCH(batch, 0);
>>> -    OUT_BCS_BATCH(batch, 0);
>>> -    OUT_BCS_BATCH(batch, 0);
>>> -    OUT_BCS_BATCH(batch, 0);
>>> -    OUT_BCS_BATCH(batch, 0);
>>> -    ADVANCE_BCS_BATCH(batch);
>>> +    gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
>>> +        gen7_mfd_context->reference_surface);
>>>  }
>>>
>>>  static void
>>> @@ -827,7 +815,8 @@ gen8_mfd_avc_decode_init(VADriverContextP ctx,
>>>
>>>      assert(decode_state->pic_param && decode_state->pic_param->buffer);
>>>      pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
>>> -    intel_update_avc_frame_store_index(ctx, decode_state, pic_param, gen7_mfd_context->reference_surface);
>>> +    gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
>>> +        gen7_mfd_context->reference_surface);
>>>      width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
>>>      height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
>>>      assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
>>> @@ -912,8 +901,8 @@ gen8_mfd_avc_decode_picture(VADriverContextP ctx,
>>>      gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
>>>      gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
>>>      gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
>>> -    gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
>>>      gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
>>> +    gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
>>>
>>>      for (j = 0; j < decode_state->num_slice_params; j++) {
>>>          assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
>>> diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c
>>> index 13b717d..22d5b6f 100644
>>> --- a/src/i965_decoder_utils.c
>>> +++ b/src/i965_decoder_utils.c
>>> @@ -252,6 +252,23 @@ avc_gen_default_iq_matrix(VAIQMatrixBufferH264 *iq_matrix)
>>>      memset(&iq_matrix->ScalingList8x8, 16, sizeof(iq_matrix->ScalingList8x8));
>>>  }
>>>
>>> +/* Returns a unique picture ID that represents the supplied VA surface object */
>>> +int
>>> +avc_get_picture_id(struct object_surface *obj_surface)
>>> +{
>>> +    int pic_id;
>>> +
>>> +    /* This highly depends on how the internal organization of VA objects.
>>> +
>>> +       The VA objects are maintained in heaps so that any released VA
>>> +       surface will become free again for future allocation. This means
>>> +       that holes in there are filled in for subsequent allocations.
>>> +       So, this ultimately means that we could just use the Heap ID of
>>> +       the VA surface as the resulting picture ID (16 bits) */
>>> +    pic_id = 1 + (obj_surface->base.id & OBJECT_HEAP_ID_MASK);
>>
>> The OBJECT_HEAP_ID_MASK is 0xFFFFFF. Maybe the pic_id will be greater
>> than 0xFFFF. So please directly use the 0xFFFF mask.
>
> That's what the check under that is for. In your proposal, you will
> just wrap around, which is inappropriate as you wouldn't catch the
> error. Anyway, that's a theoritical limit, and you clearly can't reach
> that in most cases due to memory limitations anyway. So, no change
> needed here.

An alternative was to track and generate unique ids ourselves, and
that was the other intention with the GenFrameStoreContext too. Still,
this approach is considerably faster, at the expense to remember about
*not* changing the semantics of the VA object heap implementation,
i.e. an array of elements with holes filled in for future allocations.

Beyond the memory limits (e.g. 65535x 1080p is 190 GB), the only other
possible way to error out is in presence of memory leaks. But this
remains fine to error out, since memory leaks are to be fixed if there
are any. I will keep this as it is.

>>> +    return (pic_id <= 0xffff) ? pic_id : -1;
>>> +}
>>> +
>>>  /* Finds the VA/H264 picture associated with the specified VA surface id */
>>>  VAPictureH264 *
>>>  avc_find_picture(VASurfaceID id, VAPictureH264 *pic_list, int pic_list_count)
>>> @@ -508,6 +525,87 @@ intel_update_avc_frame_store_index(
>>>  }
>>>
>>>  void
>>> +gen75_update_avc_frame_store_index(
>>> +    VADriverContextP              ctx,
>>> +    struct decode_state          *decode_state,
>>> +    VAPictureParameterBufferH264 *pic_param,
>>> +    GenFrameStore                 frame_store[MAX_GEN_REFERENCE_FRAMES]
>>> +)
>>> +{
>>> +    int i, n;
>>> +
>>> +    /* Construct the Frame Store array */
>>> +    for (i = 0, n = 0; i < ARRAY_ELEMS(decode_state->reference_objects); i++) {
>>> +        struct object_surface * const obj_surface =
>>> +            decode_state->reference_objects[i];
>>> +        if (!obj_surface)
>>> +            continue;
>>> +
>>> +        GenFrameStore * const fs = &frame_store[n];
>>> +        fs->surface_id = obj_surface->base.id;
>>> +        fs->obj_surface = obj_surface;
>>> +        fs->frame_store_id = n++;
>>> +    }
>>> +
>>> +    /* Any remaining entry is invalid */
>>> +    for (; n < MAX_GEN_REFERENCE_FRAMES; n++) {
>>> +        GenFrameStore * const fs = &frame_store[n];
>>> +        fs->surface_id = VA_INVALID_ID;
>>> +        fs->obj_surface = NULL;
>>> +        fs->frame_store_id = -1;
>>> +    }
>>> +}
>>> +
>>> +bool
>>> +gen75_fill_avc_picid_list(
>>> +    uint16_t                            pic_ids[16],
>>> +    GenFrameStore                       frame_store[MAX_GEN_REFERENCE_FRAMES]
>>> +)
>>> +{
>>> +    int i, pic_id;
>>> +
>>> +    /* Fill in with known picture IDs. The Frame Store array is in
>>> +       compact form, i.e. empty entries are only to be found at the
>>> +       end of the array: there are no holes in the set of active
>>> +       reference frames */
>>> +    for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
>>> +        GenFrameStore * const fs = &frame_store[i];
>>> +        if (!fs->obj_surface)
>>> +            break;
>>> +        pic_id = avc_get_picture_id(fs->obj_surface);
>>> +        if (pic_id < 0)
>>> +            return false;
>>
>> the below assert of "fs->frame_store_id == i " be removed.
>
> The assert() is only useful to detect programming errors, it won't be
> reached in practice. The only way to hit it is to forget to use the
> Gen7.5+ frame store update function for future generations. But that's
> fine to remove it. Just remember about that.
>
>>
>>> +        assert(fs->frame_store_id == i);
>>> +        pic_ids[i] = pic_id;
>>> +    }
>>> +
>>> +    /* When an element of the list is not relevant the value of the
>>> +       picture ID shall be set to 0 */
>>> +    for (; i < MAX_GEN_REFERENCE_FRAMES; i++)
>>> +        pic_ids[i] = 0;
>>> +    return true;
>>> +}
>>> +
>>> +bool
>>> +gen75_send_avc_picid_state(
>>> +    struct intel_batchbuffer           *batch,
>>> +    GenFrameStore                       frame_store[MAX_GEN_REFERENCE_FRAMES]
>>> +)
>>> +{
>>> +    uint16_t pic_ids[16];
>>> +
>>> +    if (!gen75_fill_avc_picid_list(pic_ids, frame_store))
>>> +        return false;
>>> +
>>> +    BEGIN_BCS_BATCH(batch, 10);
>>> +    OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2));
>>> +    OUT_BCS_BATCH(batch, 0); // enable Picture ID Remapping
>>> +    intel_batchbuffer_data(batch, pic_ids, sizeof(pic_ids));
>>> +    ADVANCE_BCS_BATCH(batch);
>>> +    return true;
>>> +}
>>> +
>>> +void
>>>  intel_update_vc1_frame_store_index(VADriverContextP ctx,
>>>                                     struct decode_state *decode_state,
>>>                                     VAPictureParameterBufferVC1 *pic_param,
>>> diff --git a/src/i965_decoder_utils.h b/src/i965_decoder_utils.h
>>> index a4c9415..0ffbd7f 100644
>>> --- a/src/i965_decoder_utils.h
>>> +++ b/src/i965_decoder_utils.h
>>> @@ -54,6 +54,9 @@ avc_ensure_surface_bo(
>>>  void
>>>  avc_gen_default_iq_matrix(VAIQMatrixBufferH264 *iq_matrix);
>>>
>>> +int
>>> +avc_get_picture_id(struct object_surface *obj_surface);
>>> +
>>>  VAPictureH264 *
>>>  avc_find_picture(VASurfaceID id, VAPictureH264 *pic_list, int pic_list_count);
>>>
>>> @@ -98,6 +101,26 @@ intel_update_avc_frame_store_index(VADriverContextP ctx,
>>>                                     GenFrameStore frame_store[MAX_GEN_REFERENCE_FRAMES]);
>>>
>>>  void
>>> +gen75_update_avc_frame_store_index(
>>> +    VADriverContextP                    ctx,
>>> +    struct decode_state                *decode_state,
>>> +    VAPictureParameterBufferH264       *pic_param,
>>> +    GenFrameStore                       frame_store[MAX_GEN_REFERENCE_FRAMES]
>>> +);
>>> +
>>> +bool
>>> +gen75_fill_avc_picid_list(
>>> +    uint16_t                            pic_ids[16],
>>> +    GenFrameStore                       frame_store[MAX_GEN_REFERENCE_FRAMES]
>>> +);
>>> +
>>> +bool
>>> +gen75_send_avc_picid_state(
>>> +    struct intel_batchbuffer           *batch,
>>> +    GenFrameStore                       frame_store[MAX_GEN_REFERENCE_FRAMES]
>>> +);
>>> +
>>> +void
>>>  intel_update_vc1_frame_store_index(VADriverContextP ctx,
>>>                                     struct decode_state *decode_state,
>>>                                     VAPictureParameterBufferVC1 *pic_param,
>>
>>