[Libva] [PATCH] h264: fix first macroblock bit offset calculation (ILK, SNB, IVB).
Gwenole Beauchesne
gb.devel at gmail.com
Thu Mar 1 22:41:32 PST 2012
This simplifies and fixes the scan for emulation_prevention_bytes,
thus avoiding a read beyond the end of the slice data buffer. Besides,
this also uses dri_bo_get_subddata() to read slice data buffer back.
HW specific changes:
- SNB: make the HW skip the emulation prevention bytes itself.
- IVB: fix MFD_AVC_BSD_OBJECT to report the actual slice data buffer size.
Note: this assumes VASliceParameterBufferH264.slice_data_bit_offset
represents the offset relative to the raw bitstream with emulation
prevention bytes. Should this count be minus emulation prevention bytes,
then avc_get_first_mb_bit_offset() is swapped with the _epb() variant
with '+' EPB count instead of '-'.
It might be possible to make SNB & IVB work without EPB scan, by using the
other bit modes?
Signed-off-by: Gwenole Beauchesne <gwenole.beauchesne at intel.com>
---
src/gen6_mfd.c | 43 ++++++++--------------------------
src/gen7_mfd.c | 37 +++++-----------------------
src/i965_avc_bsd.c | 58 ++++++++++-----------------------------------
src/i965_decoder_utils.c | 53 ++++++++++++++++++++++++++++++++++++++++++
src/i965_decoder_utils.h | 14 +++++++++++
5 files changed, 97 insertions(+), 108 deletions(-)
diff --git a/src/gen6_mfd.c b/src/gen6_mfd.c
index 384b70e..85d9a8c 100644
--- a/src/gen6_mfd.c
+++ b/src/gen6_mfd.c
@@ -847,27 +847,6 @@ gen6_mfd_avc_weightoffset_state(VADriverContextP ctx,
}
}
-static int
-gen6_mfd_avc_get_slice_bit_offset(uint8_t *buf, int mode_flag, int in_slice_data_bit_offset)
-{
- int out_slice_data_bit_offset;
- int slice_header_size = in_slice_data_bit_offset / 8;
- int i, j;
-
- for (i = 0, j = 0; i < slice_header_size; i++, j++) {
- if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3) {
- i++, j += 2;
- }
- }
-
- out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
-
- if (mode_flag == ENTROPY_CABAC)
- out_slice_data_bit_offset = ALIGN(out_slice_data_bit_offset, 0x8);
-
- return out_slice_data_bit_offset;
-}
-
static void
gen6_mfd_avc_bsd_object(VADriverContextP ctx,
VAPictureParameterBufferH264 *pic_param,
@@ -876,21 +855,19 @@ gen6_mfd_avc_bsd_object(VADriverContextP ctx,
struct gen6_mfd_context *gen6_mfd_context)
{
struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
- int slice_data_bit_offset;
- uint8_t *slice_data = NULL;
+ unsigned int slice_data_bit_offset;
- dri_bo_map(slice_data_bo, 0);
- slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
- slice_data_bit_offset = gen6_mfd_avc_get_slice_bit_offset(slice_data,
- pic_param->pic_fields.bits.entropy_coding_mode_flag,
- slice_param->slice_data_bit_offset);
- dri_bo_unmap(slice_data_bo);
+ slice_data_bit_offset = avc_get_first_mb_bit_offset(
+ slice_data_bo,
+ slice_param,
+ pic_param->pic_fields.bits.entropy_coding_mode_flag
+ );
BEGIN_BCS_BATCH(batch, 6);
OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
OUT_BCS_BATCH(batch,
- ((slice_param->slice_data_size - (slice_data_bit_offset >> 3)) << 0));
- OUT_BCS_BATCH(batch, slice_param->slice_data_offset + (slice_data_bit_offset >> 3));
+ (slice_param->slice_data_size - slice_param->slice_data_offset));
+ OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
OUT_BCS_BATCH(batch,
(0 << 31) |
(0 << 14) |
@@ -898,8 +875,8 @@ gen6_mfd_avc_bsd_object(VADriverContextP ctx,
(0 << 10) |
(0 << 8));
OUT_BCS_BATCH(batch,
- (0 << 16) |
- (0 << 6) |
+ ((slice_data_bit_offset >> 3) << 16) |
+ (1 << 6) |
((0x7 - (slice_data_bit_offset & 0x7)) << 0));
OUT_BCS_BATCH(batch, 0);
ADVANCE_BCS_BATCH(batch);
diff --git a/src/gen7_mfd.c b/src/gen7_mfd.c
index 7b89e1c..f9bf09b 100644
--- a/src/gen7_mfd.c
+++ b/src/gen7_mfd.c
@@ -817,27 +817,6 @@ gen7_mfd_avc_weightoffset_state(VADriverContextP ctx,
}
}
-static int
-gen7_mfd_avc_get_slice_bit_offset(uint8_t *buf, int mode_flag, int in_slice_data_bit_offset)
-{
- int out_slice_data_bit_offset;
- int slice_header_size = in_slice_data_bit_offset / 8;
- int i, j;
-
- for (i = 0, j = 0; i < slice_header_size; i++, j++) {
- if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3) {
- i++, j += 2;
- }
- }
-
- out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
-
- if (mode_flag == ENTROPY_CABAC)
- out_slice_data_bit_offset = ALIGN(out_slice_data_bit_offset, 0x8);
-
- return out_slice_data_bit_offset;
-}
-
static void
gen7_mfd_avc_bsd_object(VADriverContextP ctx,
VAPictureParameterBufferH264 *pic_param,
@@ -847,21 +826,19 @@ gen7_mfd_avc_bsd_object(VADriverContextP ctx,
struct gen7_mfd_context *gen7_mfd_context)
{
struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
- int slice_data_bit_offset;
- uint8_t *slice_data = NULL;
+ unsigned int slice_data_bit_offset;
- dri_bo_map(slice_data_bo, 0);
- slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
- slice_data_bit_offset = gen7_mfd_avc_get_slice_bit_offset(slice_data,
- pic_param->pic_fields.bits.entropy_coding_mode_flag,
- slice_param->slice_data_bit_offset);
- dri_bo_unmap(slice_data_bo);
+ slice_data_bit_offset = avc_get_first_mb_bit_offset(
+ slice_data_bo,
+ slice_param,
+ pic_param->pic_fields.bits.entropy_coding_mode_flag
+ );
/* the input bitsteam format on GEN7 differs from GEN6 */
BEGIN_BCS_BATCH(batch, 6);
OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
OUT_BCS_BATCH(batch,
- (slice_param->slice_data_size));
+ (slice_param->slice_data_size - slice_param->slice_data_offset));
OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
OUT_BCS_BATCH(batch,
(0 << 31) |
diff --git a/src/i965_avc_bsd.c b/src/i965_avc_bsd.c
index 612d0ee..b2b6c92 100644
--- a/src/i965_avc_bsd.c
+++ b/src/i965_avc_bsd.c
@@ -534,35 +534,6 @@ i965_avc_bsd_buf_base_state(VADriverContextP ctx,
ADVANCE_BCS_BATCH(batch);
}
-/*
- * Return the bit offset to the first bit of the slice data
- *
- * VASliceParameterBufferH264.slice_data_bit_offset will point into the part
- * of slice header if there are some escaped bytes in the slice header. The offset
- * to slice data is needed for BSD unit so that BSD unit can fetch right slice data
- * for processing. This fixes conformance case BASQP1_Sony_C.jsv
- */
-static int
-i965_avc_bsd_get_slice_bit_offset(uint8_t *buf, int mode_flag, int in_slice_data_bit_offset)
-{
- int out_slice_data_bit_offset;
- int slice_header_size = in_slice_data_bit_offset / 8;
- int i, j;
-
- for (i = 0, j = 0; i < slice_header_size; i++, j++) {
- if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3) {
- i++, j += 2;
- }
- }
-
- out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
-
- if (mode_flag == ENTROPY_CABAC)
- out_slice_data_bit_offset = ALIGN(out_slice_data_bit_offset, 0x8);
-
- return out_slice_data_bit_offset;
-}
-
static void
g4x_avc_bsd_object(VADriverContextP ctx,
struct decode_state *decode_state,
@@ -581,11 +552,10 @@ g4x_avc_bsd_object(VADriverContextP ctx,
int num_ref_idx_l0, num_ref_idx_l1;
int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
- int slice_data_bit_offset;
+ unsigned int slice_data_bit_offset;
int weighted_pred_idc = 0;
int first_mb_in_slice = 0;
int slice_type;
- uint8_t *slice_data = NULL;
encrypted = 0; /* FIXME: which flag in VAAPI is used for encryption? */
@@ -595,12 +565,12 @@ g4x_avc_bsd_object(VADriverContextP ctx,
} else
cmd_len = 8;
- dri_bo_map(decode_state->slice_datas[slice_index]->bo, 0);
- slice_data = (uint8_t *)(decode_state->slice_datas[slice_index]->bo->virtual + slice_param->slice_data_offset);
- slice_data_bit_offset = i965_avc_bsd_get_slice_bit_offset(slice_data,
- pic_param->pic_fields.bits.entropy_coding_mode_flag,
- slice_param->slice_data_bit_offset);
- dri_bo_unmap(decode_state->slice_datas[slice_index]->bo);
+
+ slice_data_bit_offset = avc_get_first_mb_bit_offset_with_epb(
+ decode_state->slice_datas[slice_index]->bo,
+ slice_param,
+ pic_param->pic_fields.bits.entropy_coding_mode_flag
+ );
if (slice_param->slice_type == SLICE_TYPE_I ||
slice_param->slice_type == SLICE_TYPE_SI)
@@ -710,11 +680,10 @@ ironlake_avc_bsd_object(VADriverContextP ctx,
int num_ref_idx_l0, num_ref_idx_l1;
int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
- int slice_data_bit_offset;
+ unsigned int slice_data_bit_offset;
int weighted_pred_idc = 0;
int first_mb_in_slice;
int slice_type;
- uint8_t *slice_data = NULL;
encrypted = 0; /* FIXME: which flag in VAAPI is used for encryption? */
@@ -723,12 +692,11 @@ ironlake_avc_bsd_object(VADriverContextP ctx,
} else
counter_value = 0;
- dri_bo_map(decode_state->slice_datas[slice_index]->bo, 0);
- slice_data = (uint8_t *)(decode_state->slice_datas[slice_index]->bo->virtual + slice_param->slice_data_offset);
- slice_data_bit_offset = i965_avc_bsd_get_slice_bit_offset(slice_data,
- pic_param->pic_fields.bits.entropy_coding_mode_flag,
- slice_param->slice_data_bit_offset);
- dri_bo_unmap(decode_state->slice_datas[slice_index]->bo);
+ slice_data_bit_offset = avc_get_first_mb_bit_offset_with_epb(
+ decode_state->slice_datas[slice_index]->bo,
+ slice_param,
+ pic_param->pic_fields.bits.entropy_coding_mode_flag
+ );
if (slice_param->slice_type == SLICE_TYPE_I ||
slice_param->slice_type == SLICE_TYPE_SI)
diff --git a/src/i965_decoder_utils.c b/src/i965_decoder_utils.c
index d4400c5..6fff67f 100644
--- a/src/i965_decoder_utils.c
+++ b/src/i965_decoder_utils.c
@@ -24,6 +24,7 @@
#include <assert.h>
#include <stddef.h>
#include <string.h>
+#include <alloca.h>
#include "intel_batchbuffer.h"
#include "i965_decoder_utils.h"
#include "i965_defines.h"
@@ -39,6 +40,58 @@ avc_gen_default_iq_matrix(VAIQMatrixBufferH264 *iq_matrix)
memset(&iq_matrix->ScalingList8x8, 16, sizeof(iq_matrix->ScalingList8x8));
}
+/* Get first macroblock bit offset for BSD (AVC) */
+unsigned int
+avc_get_first_mb_bit_offset(
+ dri_bo *slice_data_bo,
+ VASliceParameterBufferH264 *slice_param,
+ unsigned int mode_flag
+)
+{
+ unsigned int in_slice_data_bit_offset = slice_param->slice_data_bit_offset;
+ unsigned int out_slice_data_bit_offset;
+ unsigned int i, n, buf_size, data_size;
+ uint8_t *buf;
+ int ret;
+
+ buf_size = slice_param->slice_data_bit_offset / 8;
+ data_size = slice_param->slice_data_size - slice_param->slice_data_offset;
+ if (buf_size > data_size)
+ buf_size = data_size;
+
+ buf = alloca(buf_size);
+ ret = dri_bo_get_subdata(
+ slice_data_bo, slice_param->slice_data_offset,
+ buf_size, buf
+ );
+ assert(ret == 0);
+
+ for (i = 2, n = 0; i < buf_size; i++) {
+ if (!buf[i - 2] && !buf[i - 1] && buf[i] == 3)
+ i += 2, n++;
+ }
+ out_slice_data_bit_offset = in_slice_data_bit_offset - n * 8;
+
+ if (mode_flag == ENTROPY_CABAC)
+ out_slice_data_bit_offset = ALIGN(out_slice_data_bit_offset, 0x8);
+ return out_slice_data_bit_offset;
+}
+
+/* Get first macroblock bit offset for BSD, with emulation prevention bytes (AVC) */
+unsigned int
+avc_get_first_mb_bit_offset_with_epb(
+ dri_bo *slice_data_bo,
+ VASliceParameterBufferH264 *slice_param,
+ unsigned int mode_flag
+)
+{
+ unsigned int slice_data_bit_offset = slice_param->slice_data_bit_offset;
+
+ if (mode_flag == ENTROPY_CABAC)
+ slice_data_bit_offset = ALIGN(slice_data_bit_offset, 0x8);
+ return slice_data_bit_offset;
+}
+
static inline uint8_t
get_ref_idx_state_1(const VAPictureH264 *va_pic, unsigned int frame_store_id)
{
diff --git a/src/i965_decoder_utils.h b/src/i965_decoder_utils.h
index bf9be84..37402b4 100644
--- a/src/i965_decoder_utils.h
+++ b/src/i965_decoder_utils.h
@@ -30,6 +30,20 @@
void
avc_gen_default_iq_matrix(VAIQMatrixBufferH264 *iq_matrix);
+unsigned int
+avc_get_first_mb_bit_offset(
+ dri_bo *slice_data_bo,
+ VASliceParameterBufferH264 *slice_param,
+ unsigned int mode_flag
+);
+
+unsigned int
+avc_get_first_mb_bit_offset_with_epb(
+ dri_bo *slice_data_bo,
+ VASliceParameterBufferH264 *slice_param,
+ unsigned int mode_flag
+);
+
void
gen5_fill_avc_ref_idx_state(
uint8_t state[32],
--
1.7.0.4
More information about the Libva
mailing list