[Libva] [PATCH 4/4] decoder: h264: enable Picture ID Remapping on Haswell and newer.

Mon Jun 2 10:58:42 PDT 2014

Fill and submit MFX_AVC_PICID_STATE commands to Gen7.5+ hardware.
This optimizes the management of the DPB as the binding array can
now contain entries in any order. This also makes it possible to
support H.264 MultiView High profiles, with any particular number
of views.

Signed-off-by: Gwenole Beauchesne <gwenole.beauchesne at intel.com>
---
 src/gen75_mfd.c          |   20 ++--------
 src/gen8_mfd.c           |   26 ++++--------
 src/i965_decoder_utils.c |   99 ++++++++++++++++++++++++++++++++++++++++++++++
 src/i965_decoder_utils.h |   23 +++++++++++
 4 files changed, 133 insertions(+), 35 deletions(-)

diff --git a/src/gen75_mfd.c b/src/gen75_mfd.c
index 673ca14..25806b8 100644
--- a/src/gen75_mfd.c
+++ b/src/gen75_mfd.c
@@ -626,25 +626,13 @@ gen75_mfd_avc_qm_state(VADriverContextP ctx,
     }
 }
 
-static void
+static inline void
 gen75_mfd_avc_picid_state(VADriverContextP ctx,
                       struct decode_state *decode_state,
                       struct gen7_mfd_context *gen7_mfd_context)
 {
-    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
-
-    BEGIN_BCS_BATCH(batch, 10);
-    OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2));
-    OUT_BCS_BATCH(batch, 1); // disable Picture ID Remapping
-    OUT_BCS_BATCH(batch, 0);
-    OUT_BCS_BATCH(batch, 0);
-    OUT_BCS_BATCH(batch, 0);
-    OUT_BCS_BATCH(batch, 0);
-    OUT_BCS_BATCH(batch, 0);
-    OUT_BCS_BATCH(batch, 0);
-    OUT_BCS_BATCH(batch, 0);
-    OUT_BCS_BATCH(batch, 0);
-    ADVANCE_BCS_BATCH(batch);
+    gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
+        &gen7_mfd_context->fsc);
 }
 
 static void
@@ -1045,7 +1033,7 @@ gen75_mfd_avc_decode_init(VADriverContextP ctx,
 
     assert(decode_state->pic_param && decode_state->pic_param->buffer);
     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
-    intel_update_avc_frame_store_index(ctx, &gen7_mfd_context->fsc,
+    gen75_update_avc_frame_store_index(ctx, &gen7_mfd_context->fsc,
         decode_state, pic_param);
     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
diff --git a/src/gen8_mfd.c b/src/gen8_mfd.c
index 13514df..2e0bc85 100644
--- a/src/gen8_mfd.c
+++ b/src/gen8_mfd.c
@@ -496,25 +496,13 @@ gen8_mfd_avc_qm_state(VADriverContextP ctx,
     }
 }
 
-static void
+static inline void
 gen8_mfd_avc_picid_state(VADriverContextP ctx,
-                      struct decode_state *decode_state,
-                      struct gen7_mfd_context *gen7_mfd_context)
+    struct decode_state *decode_state,
+    struct gen7_mfd_context *gen7_mfd_context)
 {
-    struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
-
-    BEGIN_BCS_BATCH(batch, 10);
-    OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2));
-    OUT_BCS_BATCH(batch, 1); // disable Picture ID Remapping
-    OUT_BCS_BATCH(batch, 0);
-    OUT_BCS_BATCH(batch, 0);
-    OUT_BCS_BATCH(batch, 0);
-    OUT_BCS_BATCH(batch, 0);
-    OUT_BCS_BATCH(batch, 0);
-    OUT_BCS_BATCH(batch, 0);
-    OUT_BCS_BATCH(batch, 0);
-    OUT_BCS_BATCH(batch, 0);
-    ADVANCE_BCS_BATCH(batch);
+    gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
+        &gen7_mfd_context->fsc);
 }
 
 static void
@@ -822,7 +810,7 @@ gen8_mfd_avc_decode_init(VADriverContextP ctx,
 
     assert(decode_state->pic_param && decode_state->pic_param->buffer);
     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
-    intel_update_avc_frame_store_index(ctx, &gen7_mfd_context->fsc,
+    gen75_update_avc_frame_store_index(ctx, &gen7_mfd_context->fsc,
         decode_state, pic_param);
     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
@@ -910,8 +898,8 @@ gen8_mfd_avc_decode_picture(VADriverContextP ctx,
     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
     gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
-    gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
     gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
+    gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
 
     for (j = 0; j < decode_state->num_slice_params; j++) {
         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c
index 8f23bfb..ec4f50a 100644
--- a/src/i965_decoder_utils.c
+++ b/src/i965_decoder_utils.c
@@ -252,6 +252,23 @@ avc_gen_default_iq_matrix(VAIQMatrixBufferH264 *iq_matrix)
     memset(&iq_matrix->ScalingList8x8, 16, sizeof(iq_matrix->ScalingList8x8));
 }
 
+/* Returns a unique picture ID that represents the supplied VA surface object */
+int
+avc_get_picture_id(struct object_surface *obj_surface)
+{
+    int pic_id;
+
+    /* This highly depends on how the internal organization of VA objects.
+
+       The VA objects are maintained in heaps so that any released VA
+       surface will become free again for future allocation. This means
+       that holes in there are filled in for subsequent allocations.
+       So, this ultimately means that we could just use the Heap ID of
+       the VA surface as the resulting picture ID (16 bits) */
+    pic_id = 1 + (obj_surface->base.id & OBJECT_HEAP_ID_MASK);
+    return (pic_id <= 0xffff) ? pic_id : -1;
+}
+
 /* Finds the VA/H264 picture associated with the specified VA surface id */
 VAPictureH264 *
 avc_find_picture(VASurfaceID id, VAPictureH264 *pic_list, int pic_list_count)
@@ -522,6 +539,88 @@ intel_update_avc_frame_store_index(
 }
 
 void
+gen75_update_avc_frame_store_index(
+    VADriverContextP                    ctx,
+    GenFrameStoreContext               *fs_ctx,
+    struct decode_state                *decode_state,
+    VAPictureParameterBufferH264       *pic_param
+)
+{
+    int i, n;
+
+    /* Construct the Frame Store array */
+    for (i = 0, n = 0; i < ARRAY_ELEMS(decode_state->reference_objects); i++) {
+        struct object_surface * const obj_surface =
+            decode_state->reference_objects[i];
+        if (!obj_surface)
+            continue;
+
+        GenFrameStore * const fs = &fs_ctx->used_frames[n];
+        fs->surface_id = obj_surface->base.id;
+        fs->obj_surface = obj_surface;
+        fs->frame_store_id = n++;
+    }
+
+    /* Any remaining entry is invalid */
+    for (; n < MAX_GEN_REFERENCE_FRAMES; n++) {
+        GenFrameStore * const fs = &fs_ctx->used_frames[n];
+        fs->surface_id = VA_INVALID_ID;
+        fs->obj_surface = NULL;
+        fs->frame_store_id = -1;
+    }
+}
+
+bool
+gen75_fill_avc_picid_list(
+    uint16_t                            pic_ids[16],
+    GenFrameStoreContext               *fs_ctx
+)
+{
+    int i, pic_id;
+
+    /* Fill in with known picture IDs. The Frame Store array is in
+       compact form, i.e. empty entries are only to be found at the
+       end of the array: there are no holes in the set of active
+       reference frames */
+    for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
+        GenFrameStore * const fs = &fs_ctx->used_frames[i];
+        if (!fs->obj_surface)
+            break;
+        pic_id = avc_get_picture_id(fs->obj_surface);
+        if (pic_id < 0)
+            return false;
+        assert(fs->frame_store_id == i);
+        pic_ids[i] = pic_id;
+    }
+
+    /* When an element of the list is not relevant the value of the
+       picture ID shall be set to 0 */
+    for (; i < MAX_GEN_REFERENCE_FRAMES; i++)
+        pic_ids[i] = 0;
+    return true;
+}
+
+
+bool
+gen75_send_avc_picid_state(
+    struct intel_batchbuffer           *batch,
+    GenFrameStoreContext               *fs_ctx
+)
+{
+    uint16_t pic_ids[16];
+
+    if (!gen75_fill_avc_picid_list(pic_ids, fs_ctx))
+        return false;
+
+    BEGIN_BCS_BATCH(batch, 10);
+    OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2));
+    OUT_BCS_BATCH(batch, 0); // enable Picture ID Remapping
+    intel_batchbuffer_data(batch, pic_ids, sizeof(pic_ids));
+    ADVANCE_BCS_BATCH(batch);
+    return true;
+}
+
+void
 intel_update_vc1_frame_store_index(VADriverContextP ctx,
                                    struct decode_state *decode_state,
                                    VAPictureParameterBufferVC1 *pic_param,
diff --git a/src/i965_decoder_utils.h b/src/i965_decoder_utils.h
index 77e9d2e..9296a7c 100644
--- a/src/i965_decoder_utils.h
+++ b/src/i965_decoder_utils.h
@@ -54,6 +54,9 @@ avc_ensure_surface_bo(
 void
 avc_gen_default_iq_matrix(VAIQMatrixBufferH264 *iq_matrix);
 
+int
+avc_get_picture_id(struct object_surface *obj_surface);
+
 VAPictureH264 *
 avc_find_picture(VASurfaceID id, VAPictureH264 *pic_list, int pic_list_count);
 
@@ -100,6 +103,26 @@ intel_update_avc_frame_store_index(
 );
 
 void
+gen75_update_avc_frame_store_index(
+    VADriverContextP                    ctx,
+    GenFrameStoreContext               *fs_ctx,
+    struct decode_state                *decode_state,
+    VAPictureParameterBufferH264       *pic_param
+);
+
+bool
+gen75_fill_avc_picid_list(
+    uint16_t                            pic_ids[16],
+    GenFrameStoreContext               *fs_ctx
+);
+
+bool
+gen75_send_avc_picid_state(
+    struct intel_batchbuffer           *batch,
+    GenFrameStoreContext               *fs_ctx
+);
+
+void
 intel_update_vc1_frame_store_index(VADriverContextP ctx,
                                    struct decode_state *decode_state,
                                    VAPictureParameterBufferVC1 *pic_param,
-- 
1.7.9.5