[Mesa-dev] [PATCH 1/3] vl: Only initialize vlc once
Christian König
deathsimple at vodafone.de
Thu Dec 22 05:25:30 PST 2011
Hi Maarten,
On 21.12.2011 21:52, Maarten Lankhorst wrote:
> It would be nice if you inlined patches for easier reviewing. :)
Well I can try, but I can't promise that Thunderbird isn't badly fucking
up all whitespaces, newest version of the patch is in-lined below.
> I'm spotting an overflow that could be triggered with 64 single-byte unaligned buffers, maybe this is better:
Should be fixed.
> With all the pointer math, maybe change the type for 'end' and 'data' to
> uint8_t? Then you would only need that single cast in fillbits (which
> I did above) and you can kill all the casts everywhere.
Yeah, we now use it as int8_t more often, that saves at least some casts.
> Another thought, this would prevent the need to read past end of file which could show up erroneously in valgrind, with something like this: if (end-data>= 4) { // Nom nom 4 bytes } else { // Read at most 3 bytes }
Ok, take a look below. I now use if(bytes_left ==0) {...} else if
(bytes_left >= 4) {...} else {...} construct, should this work or do you
see any more corner cases I missed?
I also inverted the valid_bits handling to invalid_bits and moved range
where invalid_bits is now moving in between to -32 and 32, that should
save a few more CPU cycles.
Last but not least I added at least one sentence of documentation to
each function. Any more thoughts or should we commit that now?
Only initialize vlc in MPEG2 decoding once for all slices,
add more sanity checks to vlc decoding functions, support
multiple vlc input buffer, improve documentation of the
vlc functions.
v2: also implement multiple inputs for the vlc functions
v3: some bug fixes for buffer size and alignment corner cases
v4: rework of the patch, add some more improvements and documentation
Signed-off-by: Maarten Lankhorst<m.b.lankhorst at gmail.com>
Signed-off-by: Christian König<deathsimple at vodafone.de>
---
src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c | 46 +++----
src/gallium/auxiliary/vl/vl_vlc.h | 169 +++++++++++++++++++-----
2 files changed, 156 insertions(+), 59 deletions(-)
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
index 936cf2c..7e20d71 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_bitstream.c
@@ -786,7 +786,7 @@ entry:
}
}
-static INLINE bool
+static INLINE void
decode_slice(struct vl_mpg12_bs *bs)
{
struct pipe_mpeg12_macroblock mb;
@@ -800,6 +800,7 @@ decode_slice(struct vl_mpg12_bs *bs)
mb.blocks = dct_blocks;
reset_predictor(bs);
+ vl_vlc_fillbits(&bs->vlc);
dct_scale = quant_scale[bs->desc.q_scale_type][vl_vlc_get_uimsbf(&bs->vlc, 5)];
if (vl_vlc_get_uimsbf(&bs->vlc, 1))
@@ -807,13 +808,15 @@ decode_slice(struct vl_mpg12_bs *bs)
vl_vlc_fillbits(&bs->vlc);
vl_vlc_fillbits(&bs->vlc);
+ assert(vl_vlc_bits_left(&bs->vlc)> 23&& vl_vlc_peekbits(&bs->vlc, 23));
do {
int inc = 0;
- while (vl_vlc_peekbits(&bs->vlc, 11) == 15) {
- vl_vlc_eatbits(&bs->vlc, 11);
- vl_vlc_fillbits(&bs->vlc);
- }
+ if (bs->decoder->profile == PIPE_VIDEO_PROFILE_MPEG1)
+ while (vl_vlc_peekbits(&bs->vlc, 11) == 15) {
+ vl_vlc_eatbits(&bs->vlc, 11);
+ vl_vlc_fillbits(&bs->vlc);
+ }
while (vl_vlc_peekbits(&bs->vlc, 11) == 8) {
vl_vlc_eatbits(&bs->vlc, 11);
@@ -928,7 +931,6 @@ decode_slice(struct vl_mpg12_bs *bs)
mb.num_skipped_macroblocks = 0;
bs->decoder->decode_macroblock(bs->decoder,&mb.base, 1);
- return true;
}
void
@@ -959,32 +961,22 @@ void
vl_mpg12_bs_decode(struct vl_mpg12_bs *bs, unsigned num_bytes, const uint8_t *buffer)
{
assert(bs);
- assert(buffer&& num_bytes);
- while(num_bytes> 2) {
- if (buffer[0] == 0x00&& buffer[1] == 0x00&& buffer[2] == 0x01&&
- buffer[3]>= 0x01&& buffer[3]< 0xAF) {
- unsigned consumed;
+ vl_vlc_init(&bs->vlc, 1, (const void * const *)&buffer,&num_bytes);
+ while (vl_vlc_bits_left(&bs->vlc)> 32) {
+ uint32_t code = vl_vlc_peekbits(&bs->vlc, 32);
- buffer += 3;
- num_bytes -= 3;
+ if (code>= 0x101&& code<= 0x1AF) {
+ vl_vlc_eatbits(&bs->vlc, 24);
+ decode_slice(bs);
- vl_vlc_init(&bs->vlc, buffer, num_bytes);
-
- if (!decode_slice(bs))
- return;
-
- consumed = num_bytes - vl_vlc_bits_left(&bs->vlc) / 8;
-
- /* crap, this is a bug we have consumed more bytes than left in the buffer */
- assert(consumed<= num_bytes);
-
- num_bytes -= consumed;
- buffer += consumed;
+ /* align to a byte again */
+ vl_vlc_eatbits(&bs->vlc, vl_vlc_valid_bits(&bs->vlc)& 7);
} else {
- ++buffer;
- --num_bytes;
+ vl_vlc_eatbits(&bs->vlc, 8);
}
+
+ vl_vlc_fillbits(&bs->vlc);
}
}
diff --git a/src/gallium/auxiliary/vl/vl_vlc.h b/src/gallium/auxiliary/vl/vl_vlc.h
index dc4faed..5e5e64c 100644
--- a/src/gallium/auxiliary/vl/vl_vlc.h
+++ b/src/gallium/auxiliary/vl/vl_vlc.h
@@ -25,22 +25,30 @@
*
**************************************************************************/
+/*
+ * Functions for fast bitwise access to multiple probably unaligned input buffers
+ */
+
#ifndef vl_vlc_h
#define vl_vlc_h
-#include<assert.h>
-
#include "pipe/p_compiler.h"
#include "util/u_math.h"
#include "util/u_pointer.h"
+#include "util/u_debug.h"
struct vl_vlc
{
uint64_t buffer;
- unsigned valid_bits;
- uint32_t *data;
- uint32_t *end;
+ signed invalid_bits;
+ const uint8_t *data;
+ const uint8_t *end;
+
+ unsigned num_inputs;
+ const void *const *inputs;
+ const unsigned *sizes;
+ unsigned bytes_left;
};
struct vl_vlc_entry
@@ -55,11 +63,17 @@ struct vl_vlc_compressed
struct vl_vlc_entry entry;
};
+/*
+ * decompress a lookup table
+ */
static INLINE void
vl_vlc_init_table(struct vl_vlc_entry *dst, unsigned dst_size, const struct vl_vlc_compressed *src, unsigned src_size)
{
unsigned i, bits = util_logbase2(dst_size);
+ assert(dst&& dst_size);
+ assert(src&& src_size);
+
for (i=0;i<dst_size;++i) {
dst[i].length = 0;
dst[i].value = 0;
@@ -71,77 +85,161 @@ vl_vlc_init_table(struct vl_vlc_entry *dst, unsigned dst_size, const struct vl_v
}
}
+/*
+ * switch over to next input buffer
+ */
+static INLINE void
+vl_vlc_next_input(struct vl_vlc *vlc)
+{
+ const uint8_t* data = vlc->inputs[0];
+ unsigned len = vlc->sizes[0];
+
+ assert(vlc);
+ assert(vlc->num_inputs);
+
+ vlc->bytes_left -= len;
+
+ /* align the data pointer */
+ while (len&& pointer_to_uintptr(data)& 3) {
+ vlc->buffer |= (uint64_t)*data<< (24 + vlc->invalid_bits);
+ ++data;
+ --len;
+ vlc->invalid_bits -= 8;
+ }
+ vlc->data = data;
+ vlc->end = data + len;
+
+ --vlc->num_inputs;
+ ++vlc->inputs;
+ ++vlc->sizes;
+}
+
+/*
+ * fill the bit buffer, so that at least 32 bits are valid
+ */
static INLINE void
vl_vlc_fillbits(struct vl_vlc *vlc)
{
- if (vlc->valid_bits< 32) {
- uint32_t value = *vlc->data;
+ assert(vlc);
- //assert(vlc->data<= vlc->end);
+ /* as long as the buffer needs to be filled */
+ while (vlc->invalid_bits> 0) {
+ unsigned bytes_left = vlc->end - vlc->data;
+
+ /* if this input is depleted */
+ if (bytes_left == 0) {
+
+ if (vlc->num_inputs)
+ /* go on to next input */
+ vl_vlc_next_input(vlc);
+ else
+ /* or give up since we don't have anymore inputs */
+ return;
+
+ } else if (bytes_left>= 4) {
+
+ /* enough bytes in buffer, read in a whole dword */
+ uint32_t value = *(const uint32_t*)vlc->data;
#ifndef PIPE_ARCH_BIG_ENDIAN
- value = util_bswap32(value);
+ value = util_bswap32(value);
#endif
- vlc->buffer |= (uint64_t)value<< (32 - vlc->valid_bits);
- ++vlc->data;
- vlc->valid_bits += 32;
+ vlc->buffer |= (uint64_t)value<< vlc->invalid_bits;
+ vlc->data += 4;
+ vlc->invalid_bits -= 32;
+
+ /* buffer is now definitely filled up avoid the loop test */
+ break;
+
+ } else while (vlc->data< vlc->end) {
+
+ /* not enough bytes left in buffer, read single bytes */
+ vlc->buffer |= (uint64_t)*vlc->data<< (24 + vlc->invalid_bits);
+ ++vlc->data;
+ vlc->invalid_bits -= 8;
+ }
}
}
+/*
+ * initialize vlc structure and start reading from first input buffer
+ */
static INLINE void
-vl_vlc_init(struct vl_vlc *vlc, const uint8_t *data, unsigned len)
+vl_vlc_init(struct vl_vlc *vlc, unsigned num_inputs,
+ const void *const *inputs, const unsigned *sizes)
{
+ unsigned i;
+
assert(vlc);
- assert(data&& len);
+ assert(num_inputs);
vlc->buffer = 0;
- vlc->valid_bits = 0;
+ vlc->invalid_bits = 32;
+ vlc->num_inputs = num_inputs;
+ vlc->inputs = inputs;
+ vlc->sizes = sizes;
+ vlc->bytes_left = 0;
- /* align the data pointer */
- while (pointer_to_uintptr(data)& 3) {
- vlc->buffer |= (uint64_t)*data<< (56 - vlc->valid_bits);
- ++data;
- --len;
- vlc->valid_bits += 8;
- }
- vlc->data = (uint32_t*)data;
- vlc->end = (uint32_t*)(data + len);
+ for (i = 0; i< num_inputs; ++i)
+ vlc->bytes_left += sizes[i];
+ vl_vlc_next_input(vlc);
vl_vlc_fillbits(vlc);
vl_vlc_fillbits(vlc);
}
+/*
+ * number of bits still valid in bit buffer
+ */
+static INLINE unsigned
+vl_vlc_valid_bits(struct vl_vlc *vlc)
+{
+ return 32 - vlc->invalid_bits;
+}
+
+/*
+ * number of bits left over all inbut buffers
+ */
static INLINE unsigned
vl_vlc_bits_left(struct vl_vlc *vlc)
{
- signed bytes_left = ((uint8_t*)vlc->end)-((uint8_t*)vlc->data);
- return bytes_left * 8 + vlc->valid_bits;
+ signed bytes_left = vlc->end - vlc->data;
+ bytes_left += vlc->bytes_left;
+ return bytes_left * 8 + vl_vlc_valid_bits(vlc);
}
+/*
+ * get num_bits from bit buffer without removing them
+ */
static INLINE unsigned
vl_vlc_peekbits(struct vl_vlc *vlc, unsigned num_bits)
{
- //assert(vlc->valid_bits>= num_bits);
-
+ assert(vl_vlc_valid_bits(vlc)<= num_bits || vlc->data>= vlc->end);
return vlc->buffer>> (64 - num_bits);
}
+/*
+ * remove num_bits from bit buffer
+ */
static INLINE void
vl_vlc_eatbits(struct vl_vlc *vlc, unsigned num_bits)
{
- //assert(vlc->valid_bits> num_bits);
+ assert(vl_vlc_valid_bits(vlc)<= num_bits);
vlc->buffer<<= num_bits;
- vlc->valid_bits -= num_bits;
+ vlc->invalid_bits += num_bits;
}
+/*
+ * get num_bits from bit buffer with removing them
+ */
static INLINE unsigned
vl_vlc_get_uimsbf(struct vl_vlc *vlc, unsigned num_bits)
{
unsigned value;
- //assert(vlc->valid_bits>= num_bits);
+ assert(vl_vlc_valid_bits(vlc)<= num_bits);
value = vlc->buffer>> (64 - num_bits);
vl_vlc_eatbits(vlc, num_bits);
@@ -149,12 +247,15 @@ vl_vlc_get_uimsbf(struct vl_vlc *vlc, unsigned num_bits)
return value;
}
+/*
+ * treat num_bits as signed value and remove them from bit buffer
+ */
static INLINE signed
vl_vlc_get_simsbf(struct vl_vlc *vlc, unsigned num_bits)
{
signed value;
- //assert(vlc->valid_bits>= num_bits);
+ assert(vl_vlc_valid_bits(vlc)<= num_bits);
value = ((int64_t)vlc->buffer)>> (64 - num_bits);
vl_vlc_eatbits(vlc, num_bits);
@@ -162,11 +263,15 @@ vl_vlc_get_simsbf(struct vl_vlc *vlc, unsigned num_bits)
return value;
}
+/*
+ * lookup a value and length in a decompressed table
+ */
static INLINE int8_t
vl_vlc_get_vlclbf(struct vl_vlc *vlc, const struct vl_vlc_entry *tbl, unsigned num_bits)
{
tbl += vl_vlc_peekbits(vlc, num_bits);
vl_vlc_eatbits(vlc, tbl->length);
+ assert(tbl->length);
return tbl->value;
}
-- 1.7.5.4
More information about the mesa-dev
mailing list