[Libva] [PATCH] h264: fix first macroblock bit offset calculation (ILK, SNB, IVB).

Gwenole Beauchesne gb.devel at gmail.com
Thu Mar 1 22:41:32 PST 2012


This simplifies and fixes the scan for emulation_prevention_bytes,
thus avoiding a read beyond the end of the slice data buffer. Besides,
this also uses dri_bo_get_subddata() to read slice data buffer back.

HW specific changes:
- SNB: make the HW skip the emulation prevention bytes itself.
- IVB: fix MFD_AVC_BSD_OBJECT to report the actual slice data buffer size.

Note: this assumes VASliceParameterBufferH264.slice_data_bit_offset
represents the offset relative to the raw bitstream with emulation
prevention bytes. Should this count be minus emulation prevention bytes,
then avc_get_first_mb_bit_offset() is swapped with the _epb() variant
with '+' EPB count instead of '-'.

It might be possible to make SNB & IVB work without EPB scan, by using the
other bit modes?

Signed-off-by: Gwenole Beauchesne <gwenole.beauchesne at intel.com>
---
 src/gen6_mfd.c           |   43 ++++++++--------------------------
 src/gen7_mfd.c           |   37 +++++-----------------------
 src/i965_avc_bsd.c       |   58 ++++++++++-----------------------------------
 src/i965_decoder_utils.c |   53 ++++++++++++++++++++++++++++++++++++++++++
 src/i965_decoder_utils.h |   14 +++++++++++
 5 files changed, 97 insertions(+), 108 deletions(-)

diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c
index 384b70e..85d9a8c 100644
--- a/src/gen6_mfd.c
+++ b/src/gen6_mfd.c
@@ -847,27 +847,6 @@ gen6_mfd_avc_weightoffset_state(VADriverContextP ctx,
     }
 }
 
-static int
-gen6_mfd_avc_get_slice_bit_offset(uint8_t *buf, int mode_flag, int in_slice_data_bit_offset)
-{
-    int out_slice_data_bit_offset;
-    int slice_header_size = in_slice_data_bit_offset / 8;
-    int i, j;
-
-    for (i = 0, j = 0; i < slice_header_size; i++, j++) {
-        if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3) {
-            i++, j += 2;
-        }
-    }
-
-    out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
-
-    if (mode_flag == ENTROPY_CABAC)
-        out_slice_data_bit_offset = ALIGN(out_slice_data_bit_offset, 0x8);
-
-    return out_slice_data_bit_offset;
-}
-
 static void
 gen6_mfd_avc_bsd_object(VADriverContextP ctx,
                         VAPictureParameterBufferH264 *pic_param,
@@ -876,21 +855,19 @@ gen6_mfd_avc_bsd_object(VADriverContextP ctx,
                         struct gen6_mfd_context *gen6_mfd_context)
 {
     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
-    int slice_data_bit_offset;
-    uint8_t *slice_data = NULL;
+    unsigned int slice_data_bit_offset;
 
-    dri_bo_map(slice_data_bo, 0);
-    slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
-    slice_data_bit_offset = gen6_mfd_avc_get_slice_bit_offset(slice_data,
-                                                              pic_param->pic_fields.bits.entropy_coding_mode_flag,
-                                                              slice_param->slice_data_bit_offset);
-    dri_bo_unmap(slice_data_bo);
+    slice_data_bit_offset = avc_get_first_mb_bit_offset(
+        slice_data_bo,
+        slice_param,
+        pic_param->pic_fields.bits.entropy_coding_mode_flag
+    );
 
     BEGIN_BCS_BATCH(batch, 6);
     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
     OUT_BCS_BATCH(batch, 
-                  ((slice_param->slice_data_size - (slice_data_bit_offset >> 3)) << 0));
-    OUT_BCS_BATCH(batch, slice_param->slice_data_offset + (slice_data_bit_offset >> 3));
+                  (slice_param->slice_data_size - slice_param->slice_data_offset));
+    OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
     OUT_BCS_BATCH(batch,
                   (0 << 31) |
                   (0 << 14) |
@@ -898,8 +875,8 @@ gen6_mfd_avc_bsd_object(VADriverContextP ctx,
                   (0 << 10) |
                   (0 << 8));
     OUT_BCS_BATCH(batch,
-                  (0 << 16) |
-                  (0 << 6)  |
+                  ((slice_data_bit_offset >> 3) << 16) |
+                  (1 << 6)  |
                   ((0x7 - (slice_data_bit_offset & 0x7)) << 0));
     OUT_BCS_BATCH(batch, 0);
     ADVANCE_BCS_BATCH(batch);
diff --git a/src/gen7_mfd.c b/src/gen7_mfd.c
index 7b89e1c..f9bf09b 100644
--- a/src/gen7_mfd.c
+++ b/src/gen7_mfd.c
@@ -817,27 +817,6 @@ gen7_mfd_avc_weightoffset_state(VADriverContextP ctx,
     }
 }
 
-static int
-gen7_mfd_avc_get_slice_bit_offset(uint8_t *buf, int mode_flag, int in_slice_data_bit_offset)
-{
-    int out_slice_data_bit_offset;
-    int slice_header_size = in_slice_data_bit_offset / 8;
-    int i, j;
-
-    for (i = 0, j = 0; i < slice_header_size; i++, j++) {
-        if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3) {
-            i++, j += 2;
-        }
-    }
-
-    out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
-
-    if (mode_flag == ENTROPY_CABAC)
-        out_slice_data_bit_offset = ALIGN(out_slice_data_bit_offset, 0x8);
-
-    return out_slice_data_bit_offset;
-}
-
 static void
 gen7_mfd_avc_bsd_object(VADriverContextP ctx,
                         VAPictureParameterBufferH264 *pic_param,
@@ -847,21 +826,19 @@ gen7_mfd_avc_bsd_object(VADriverContextP ctx,
                         struct gen7_mfd_context *gen7_mfd_context)
 {
     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
-    int slice_data_bit_offset;
-    uint8_t *slice_data = NULL;
+    unsigned int slice_data_bit_offset;
 
-    dri_bo_map(slice_data_bo, 0);
-    slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
-    slice_data_bit_offset = gen7_mfd_avc_get_slice_bit_offset(slice_data,
-                                                              pic_param->pic_fields.bits.entropy_coding_mode_flag,
-                                                              slice_param->slice_data_bit_offset);
-    dri_bo_unmap(slice_data_bo);
+    slice_data_bit_offset = avc_get_first_mb_bit_offset(
+        slice_data_bo,
+        slice_param,
+        pic_param->pic_fields.bits.entropy_coding_mode_flag
+    );
 
     /* the input bitsteam format on GEN7 differs from GEN6 */
     BEGIN_BCS_BATCH(batch, 6);
     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
     OUT_BCS_BATCH(batch, 
-                  (slice_param->slice_data_size));
+                  (slice_param->slice_data_size - slice_param->slice_data_offset));
     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
     OUT_BCS_BATCH(batch,
                   (0 << 31) |
diff --git a/src/i965_avc_bsd.c b/src/i965_avc_bsd.c
index 612d0ee..b2b6c92 100644
--- a/src/i965_avc_bsd.c
+++ b/src/i965_avc_bsd.c
@@ -534,35 +534,6 @@ i965_avc_bsd_buf_base_state(VADriverContextP ctx,
     ADVANCE_BCS_BATCH(batch);
 }
 
-/*
- * Return the bit offset to the first bit of the slice data
- *
- * VASliceParameterBufferH264.slice_data_bit_offset will point into the part
- * of slice header if there are some escaped bytes in the slice header. The offset 
- * to slice data is needed for BSD unit so that BSD unit can fetch right slice data
- * for processing. This fixes conformance case BASQP1_Sony_C.jsv
- */
-static int
-i965_avc_bsd_get_slice_bit_offset(uint8_t *buf, int mode_flag, int in_slice_data_bit_offset)
-{
-    int out_slice_data_bit_offset;
-    int slice_header_size = in_slice_data_bit_offset / 8;
-    int i, j;
-
-    for (i = 0, j = 0; i < slice_header_size; i++, j++) {
-        if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3) {
-            i++, j += 2;
-        }
-    }
-
-    out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
-
-    if (mode_flag == ENTROPY_CABAC)
-        out_slice_data_bit_offset = ALIGN(out_slice_data_bit_offset, 0x8);
-
-    return out_slice_data_bit_offset;
-}
-
 static void
 g4x_avc_bsd_object(VADriverContextP ctx, 
                    struct decode_state *decode_state,
@@ -581,11 +552,10 @@ g4x_avc_bsd_object(VADriverContextP ctx,
         int num_ref_idx_l0, num_ref_idx_l1;
         int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
                              pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
-        int slice_data_bit_offset;
+        unsigned int slice_data_bit_offset;
         int weighted_pred_idc = 0;
         int first_mb_in_slice = 0;
         int slice_type;
-        uint8_t *slice_data = NULL;
 
         encrypted = 0; /* FIXME: which flag in VAAPI is used for encryption? */
 
@@ -595,12 +565,12 @@ g4x_avc_bsd_object(VADriverContextP ctx,
         } else 
             cmd_len = 8;
 
-        dri_bo_map(decode_state->slice_datas[slice_index]->bo, 0);
-        slice_data = (uint8_t *)(decode_state->slice_datas[slice_index]->bo->virtual + slice_param->slice_data_offset);
-        slice_data_bit_offset = i965_avc_bsd_get_slice_bit_offset(slice_data,
-                                                                  pic_param->pic_fields.bits.entropy_coding_mode_flag,
-                                                                  slice_param->slice_data_bit_offset);
-        dri_bo_unmap(decode_state->slice_datas[slice_index]->bo);
+
+        slice_data_bit_offset = avc_get_first_mb_bit_offset_with_epb(
+            decode_state->slice_datas[slice_index]->bo,
+            slice_param,
+            pic_param->pic_fields.bits.entropy_coding_mode_flag
+        );
 
         if (slice_param->slice_type == SLICE_TYPE_I ||
             slice_param->slice_type == SLICE_TYPE_SI)
@@ -710,11 +680,10 @@ ironlake_avc_bsd_object(VADriverContextP ctx,
         int num_ref_idx_l0, num_ref_idx_l1;
         int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
                              pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
-        int slice_data_bit_offset;
+        unsigned int slice_data_bit_offset;
         int weighted_pred_idc = 0;
         int first_mb_in_slice;
         int slice_type;
-        uint8_t *slice_data = NULL;
 
         encrypted = 0; /* FIXME: which flag in VAAPI is used for encryption? */
 
@@ -723,12 +692,11 @@ ironlake_avc_bsd_object(VADriverContextP ctx,
         } else 
             counter_value = 0;
 
-        dri_bo_map(decode_state->slice_datas[slice_index]->bo, 0);
-        slice_data = (uint8_t *)(decode_state->slice_datas[slice_index]->bo->virtual + slice_param->slice_data_offset);
-        slice_data_bit_offset = i965_avc_bsd_get_slice_bit_offset(slice_data,
-                                                                  pic_param->pic_fields.bits.entropy_coding_mode_flag,
-                                                                  slice_param->slice_data_bit_offset);
-        dri_bo_unmap(decode_state->slice_datas[slice_index]->bo);
+        slice_data_bit_offset = avc_get_first_mb_bit_offset_with_epb(
+            decode_state->slice_datas[slice_index]->bo,
+            slice_param,
+            pic_param->pic_fields.bits.entropy_coding_mode_flag
+        );
 
         if (slice_param->slice_type == SLICE_TYPE_I ||
             slice_param->slice_type == SLICE_TYPE_SI)
diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c
index d4400c5..6fff67f 100644
--- a/src/i965_decoder_utils.c
+++ b/src/i965_decoder_utils.c
@@ -24,6 +24,7 @@
 #include <assert.h>
 #include <stddef.h>
 #include <string.h>
+#include <alloca.h>
 #include "intel_batchbuffer.h"
 #include "i965_decoder_utils.h"
 #include "i965_defines.h"
@@ -39,6 +40,58 @@ avc_gen_default_iq_matrix(VAIQMatrixBufferH264 *iq_matrix)
     memset(&iq_matrix->ScalingList8x8, 16, sizeof(iq_matrix->ScalingList8x8));
 }
 
+/* Get first macroblock bit offset for BSD (AVC) */
+unsigned int
+avc_get_first_mb_bit_offset(
+    dri_bo                     *slice_data_bo,
+    VASliceParameterBufferH264 *slice_param,
+    unsigned int                mode_flag
+)
+{
+    unsigned int in_slice_data_bit_offset = slice_param->slice_data_bit_offset;
+    unsigned int out_slice_data_bit_offset;
+    unsigned int i, n, buf_size, data_size;
+    uint8_t *buf;
+    int ret;
+
+    buf_size  = slice_param->slice_data_bit_offset / 8;
+    data_size = slice_param->slice_data_size - slice_param->slice_data_offset;
+    if (buf_size > data_size)
+        buf_size = data_size;
+
+    buf = alloca(buf_size);
+    ret = dri_bo_get_subdata(
+        slice_data_bo, slice_param->slice_data_offset,
+        buf_size, buf
+    );
+    assert(ret == 0);
+
+    for (i = 2, n = 0; i < buf_size; i++) {
+        if (!buf[i - 2] && !buf[i - 1] && buf[i] == 3)
+            i += 2, n++;
+    }
+    out_slice_data_bit_offset = in_slice_data_bit_offset - n * 8;
+
+    if (mode_flag == ENTROPY_CABAC)
+        out_slice_data_bit_offset = ALIGN(out_slice_data_bit_offset, 0x8);
+    return out_slice_data_bit_offset;
+}
+
+/* Get first macroblock bit offset for BSD, with emulation prevention bytes (AVC) */
+unsigned int
+avc_get_first_mb_bit_offset_with_epb(
+    dri_bo                     *slice_data_bo,
+    VASliceParameterBufferH264 *slice_param,
+    unsigned int                mode_flag
+)
+{
+    unsigned int slice_data_bit_offset = slice_param->slice_data_bit_offset;
+
+    if (mode_flag == ENTROPY_CABAC)
+        slice_data_bit_offset = ALIGN(slice_data_bit_offset, 0x8);
+    return slice_data_bit_offset;
+}
+
 static inline uint8_t
 get_ref_idx_state_1(const VAPictureH264 *va_pic, unsigned int frame_store_id)
 {
diff --git a/src/i965_decoder_utils.h b/src/i965_decoder_utils.h
index bf9be84..37402b4 100644
--- a/src/i965_decoder_utils.h
+++ b/src/i965_decoder_utils.h
@@ -30,6 +30,20 @@
 void
 avc_gen_default_iq_matrix(VAIQMatrixBufferH264 *iq_matrix);
 
+unsigned int
+avc_get_first_mb_bit_offset(
+    dri_bo                     *slice_data_bo,
+    VASliceParameterBufferH264 *slice_param,
+    unsigned int                mode_flag
+);
+
+unsigned int
+avc_get_first_mb_bit_offset_with_epb(
+    dri_bo                     *slice_data_bo,
+    VASliceParameterBufferH264 *slice_param,
+    unsigned int                mode_flag
+);
+
 void
 gen5_fill_avc_ref_idx_state(
     uint8_t             state[32],
-- 
1.7.0.4



More information about the Libva mailing list