[Mesa-dev] [PATCH 04/10] radeon/vcn: implement jpeg decode functions
Leo Liu
leo.liu at amd.com
Tue Aug 7 15:41:17 UTC 2018
On 08/02/2018 03:44 PM, boyuan.zhang at amd.com wrote:
> From: Boyuan Zhang <boyuan.zhang at amd.com>
>
> Signed-off-by: Boyuan Zhang <boyuan.zhang at amd.com>
> ---
> src/gallium/drivers/radeon/radeon_vcn_jpeg.c | 286 ++++++++++++++++++++++++++-
> 1 file changed, 281 insertions(+), 5 deletions(-)
>
> diff --git a/src/gallium/drivers/radeon/radeon_vcn_jpeg.c b/src/gallium/drivers/radeon/radeon_vcn_jpeg.c
> index c078131..72dff57 100644
> --- a/src/gallium/drivers/radeon/radeon_vcn_jpeg.c
> +++ b/src/gallium/drivers/radeon/radeon_vcn_jpeg.c
> @@ -63,12 +63,195 @@ struct radeon_jpeg_decoder {
> unsigned dt_chroma_top_offset;
> };
>
> +static void radeon_jpeg_destroy_associated_data(void *data)
> +{
> + /* NOOP, since we only use an intptr */
> +}
> +
> +static struct pb_buffer *radeon_jpeg_get_decode_param(struct radeon_jpeg_decoder *dec,
> + struct pipe_video_buffer *target,
> + struct pipe_picture_desc *picture)
> +{
> + struct r600_texture *luma = (struct r600_texture *)
> + ((struct vl_video_buffer *)target)->resources[0];
> + struct r600_texture *chroma = (struct r600_texture *)
> + ((struct vl_video_buffer *)target)->resources[1];
> +
> + dec->bsd_size = align(dec->bs_size, 128);
> + dec->dt_luma_top_offset = luma->surface.u.gfx9.surf_offset;
> + if (target->buffer_format == PIPE_FORMAT_NV12) {
> + dec->dt_chroma_top_offset = chroma->surface.u.gfx9.surf_offset;
> + dec->dt_pitch = luma->surface.u.gfx9.surf_pitch * luma->surface.blk_w;
> + }
> + else if (target->buffer_format == PIPE_FORMAT_YUYV)
> + dec->dt_pitch = luma->surface.u.gfx9.surf_pitch;
> + dec->dt_uv_pitch = dec->dt_pitch / 2;
> +
> + return luma->resource.buf;
> +}
> +
> +static void get_mjpeg_slice_header(struct radeon_jpeg_decoder *dec, struct pipe_mjpeg_picture_desc *pic)
It looks like the reconstruction of bitstream is still required for VCN
JPEG, which was thought only for UVD FW JPEG decode when implementing
UVD JPEG decode, that's why it was kept in the driver.
Since both UVD and VCN need this BS reconstruction, and it would be
better to move it to state tracker.
Regards,
Leo
> +{
> + int size = 0, saved_size, len_pos, i;
> + uint16_t *bs;
> + uint8_t *buf = dec->bs_ptr;
> +
> + /* SOI */
> + buf[size++] = 0xff;
> + buf[size++] = 0xd8;
> +
> + /* DQT */
> + buf[size++] = 0xff;
> + buf[size++] = 0xdb;
> +
> + len_pos = size++;
> + size++;
> +
> + for (i = 0; i < 4; ++i) {
> + if (pic->quantization_table.load_quantiser_table[i] == 0)
> + continue;
> +
> + buf[size++] = i;
> + memcpy((buf + size), &pic->quantization_table.quantiser_table[i], 64);
> + size += 64;
> + }
> +
> + bs = (uint16_t*)&buf[len_pos];
> + *bs = util_bswap16(size - 4);
> +
> + saved_size = size;
> +
> + /* DHT */
> + buf[size++] = 0xff;
> + buf[size++] = 0xc4;
> +
> + len_pos = size++;
> + size++;
> +
> + for (i = 0; i < 2; ++i) {
> + if (pic->huffman_table.load_huffman_table[i] == 0)
> + continue;
> +
> + buf[size++] = 0x00 | i;
> + memcpy((buf + size), &pic->huffman_table.table[i].num_dc_codes, 16);
> + size += 16;
> + memcpy((buf + size), &pic->huffman_table.table[i].dc_values, 12);
> + size += 12;
> + }
> +
> + for (i = 0; i < 2; ++i) {
> + if (pic->huffman_table.load_huffman_table[i] == 0)
> + continue;
> +
> + buf[size++] = 0x10 | i;
> + memcpy((buf + size), &pic->huffman_table.table[i].num_ac_codes, 16);
> + size += 16;
> + memcpy((buf + size), &pic->huffman_table.table[i].ac_values, 162);
> + size += 162;
> + }
> +
> + bs = (uint16_t*)&buf[len_pos];
> + *bs = util_bswap16(size - saved_size - 2);
> +
> + saved_size = size;
> +
> + /* DRI */
> + if (pic->slice_parameter.restart_interval) {
> + buf[size++] = 0xff;
> + buf[size++] = 0xdd;
> + buf[size++] = 0x00;
> + buf[size++] = 0x04;
> + bs = (uint16_t*)&buf[size++];
> + *bs = util_bswap16(pic->slice_parameter.restart_interval);
> + saved_size = ++size;
> + }
> +
> + /* SOF */
> + buf[size++] = 0xff;
> + buf[size++] = 0xc0;
> +
> + len_pos = size++;
> + size++;
> +
> + buf[size++] = 0x08;
> +
> + bs = (uint16_t*)&buf[size++];
> + *bs = util_bswap16(pic->picture_parameter.picture_height);
> + size++;
> +
> + bs = (uint16_t*)&buf[size++];
> + *bs = util_bswap16(pic->picture_parameter.picture_width);
> + size++;
> +
> + buf[size++] = pic->picture_parameter.num_components;
> +
> + for (i = 0; i < pic->picture_parameter.num_components; ++i) {
> + buf[size++] = pic->picture_parameter.components[i].component_id;
> + buf[size++] = pic->picture_parameter.components[i].h_sampling_factor << 4 |
> + pic->picture_parameter.components[i].v_sampling_factor;
> + buf[size++] = pic->picture_parameter.components[i].quantiser_table_selector;
> + }
> +
> + bs = (uint16_t*)&buf[len_pos];
> + *bs = util_bswap16(size - saved_size - 2);
> +
> + saved_size = size;
> +
> + /* SOS */
> + buf[size++] = 0xff;
> + buf[size++] = 0xda;
> +
> + len_pos = size++;
> + size++;
> +
> + buf[size++] = pic->slice_parameter.num_components;
> +
> + for (i = 0; i < pic->slice_parameter.num_components; ++i) {
> + buf[size++] = pic->slice_parameter.components[i].component_selector;
> + buf[size++] = pic->slice_parameter.components[i].dc_table_selector << 4 |
> + pic->slice_parameter.components[i].ac_table_selector;
> + }
> +
> + buf[size++] = 0x00;
> + buf[size++] = 0x3f;
> + buf[size++] = 0x00;
> +
> + bs = (uint16_t*)&buf[len_pos];
> + *bs = util_bswap16(size - saved_size - 2);
> +
> + dec->bs_ptr += size;
> + dec->bs_size += size;
> +}
> +
> /* flush IB to the hardware */
> static int flush(struct radeon_jpeg_decoder *dec, unsigned flags)
> {
> return dec->ws->cs_flush(dec->cs, flags, NULL);
> }
>
> +/* add a new set register command to the IB */
> +static void set_reg_jpeg(struct radeon_jpeg_decoder *dec, unsigned reg,
> + unsigned cond, unsigned type, uint32_t val)
> +{
> + /* TODO */
> +}
> +
> +/* send a bitstream buffer command */
> +static void send_cmd_bitstream(struct radeon_jpeg_decoder *dec,
> + struct pb_buffer* buf, uint32_t off,
> + enum radeon_bo_usage usage, enum radeon_bo_domain domain)
> +{
> + /* TODO */
> +}
> +
> +/* send a target buffer command */
> +static void send_cmd_target(struct radeon_jpeg_decoder *dec,
> + struct pb_buffer* buf, uint32_t off,
> + enum radeon_bo_usage usage, enum radeon_bo_domain domain)
> +{
> + /* TODO */
> +}
> +
> /* cycle to the next set of buffers */
> static void next_buffer(struct radeon_jpeg_decoder *dec)
> {
> @@ -81,7 +264,20 @@ static void next_buffer(struct radeon_jpeg_decoder *dec)
> */
> static void radeon_jpeg_destroy(struct pipe_video_codec *decoder)
> {
> - /* TODO */
> + struct radeon_jpeg_decoder *dec = (struct radeon_jpeg_decoder*)decoder;
> + unsigned i;
> +
> + assert(decoder);
> +
> + flush(dec, 0);
> +
> + dec->ws->cs_destroy(dec->cs);
> +
> + for (i = 0; i < NUM_BUFFERS; ++i) {
> + si_vid_destroy_buffer(&dec->bs_buffers[i]);
> + }
> +
> + FREE(dec);
> }
>
> /**
> @@ -91,7 +287,19 @@ static void radeon_jpeg_begin_frame(struct pipe_video_codec *decoder,
> struct pipe_video_buffer *target,
> struct pipe_picture_desc *picture)
> {
> - /* TODO */
> + struct radeon_jpeg_decoder *dec = (struct radeon_jpeg_decoder*)decoder;
> + uintptr_t frame;
> +
> + assert(decoder);
> +
> + frame = ++dec->frame_number;
> + vl_video_buffer_set_associated_data(target, decoder, (void *)frame,
> + &radeon_jpeg_destroy_associated_data);
> +
> + dec->bs_size = 0;
> + dec->bs_ptr = dec->ws->buffer_map(
> + dec->bs_buffers[dec->cur_buffer].res->buf,
> + dec->cs, PIPE_TRANSFER_WRITE);
> }
>
> /**
> @@ -103,7 +311,8 @@ static void radeon_jpeg_decode_macroblock(struct pipe_video_codec *decoder,
> const struct pipe_macroblock *macroblocks,
> unsigned num_macroblocks)
> {
> - /* TODO */
> + /* not supported (yet) */
> + assert(0);
> }
>
> /**
> @@ -116,7 +325,51 @@ static void radeon_jpeg_decode_bitstream(struct pipe_video_codec *decoder,
> const void * const *buffers,
> const unsigned *sizes)
> {
> - /* TODO */
> + struct radeon_jpeg_decoder *dec = (struct radeon_jpeg_decoder*)decoder;
> + enum pipe_video_format format = u_reduce_video_profile(picture->profile);
> + unsigned i;
> +
> + assert(decoder);
> +
> + if (!dec->bs_ptr)
> + return;
> +
> + if (format == PIPE_VIDEO_FORMAT_JPEG)
> + get_mjpeg_slice_header(dec, (struct pipe_mjpeg_picture_desc*)picture);
> +
> + for (i = 0; i < num_buffers; ++i) {
> + struct rvid_buffer *buf = &dec->bs_buffers[dec->cur_buffer];
> + unsigned new_size = dec->bs_size + sizes[i];
> +
> + if (format == PIPE_VIDEO_FORMAT_JPEG)
> + new_size += 2; /* save for EOI */
> +
> + if (new_size > buf->res->buf->size) {
> + dec->ws->buffer_unmap(buf->res->buf);
> + if (!si_vid_resize_buffer(dec->screen, dec->cs, buf, new_size)) {
> + RVID_ERR("Can't resize bitstream buffer!");
> + return;
> + }
> +
> + dec->bs_ptr = dec->ws->buffer_map(buf->res->buf, dec->cs,
> + PIPE_TRANSFER_WRITE);
> + if (!dec->bs_ptr)
> + return;
> +
> + dec->bs_ptr += dec->bs_size;
> + }
> +
> + memcpy(dec->bs_ptr, buffers[i], sizes[i]);
> + dec->bs_size += sizes[i];
> + dec->bs_ptr += sizes[i];
> + }
> +
> + if (format == PIPE_VIDEO_FORMAT_JPEG) {
> + ((uint8_t *)dec->bs_ptr)[0] = 0xff; /* EOI */
> + ((uint8_t *)dec->bs_ptr)[1] = 0xd9;
> + dec->bs_size += 2;
> + dec->bs_ptr += 2;
> + }
> }
>
> /**
> @@ -126,7 +379,30 @@ static void radeon_jpeg_end_frame(struct pipe_video_codec *decoder,
> struct pipe_video_buffer *target,
> struct pipe_picture_desc *picture)
> {
> - /* TODO */
> + struct radeon_jpeg_decoder *dec = (struct radeon_jpeg_decoder*)decoder;
> + struct pb_buffer *dt;
> + struct rvid_buffer *bs_buf;
> +
> + assert(decoder);
> +
> + if (!dec->bs_ptr)
> + return;
> +
> + bs_buf = &dec->bs_buffers[dec->cur_buffer];
> +
> + memset(dec->bs_ptr, 0, align(dec->bs_size, 128) - dec->bs_size);
> + dec->ws->buffer_unmap(bs_buf->res->buf);
> +
> + dt = radeon_jpeg_get_decode_param(dec, target, picture);
> +
> + send_cmd_bitstream(dec, bs_buf->res->buf,
> + 0, RADEON_USAGE_READ, RADEON_DOMAIN_GTT);
> + send_cmd_target(dec, dt, 0,
> + RADEON_USAGE_WRITE, RADEON_DOMAIN_VRAM);
> +
> + flush(dec, PIPE_FLUSH_ASYNC);
> +
> + next_buffer(dec);
> }
>
> /**
More information about the mesa-dev
mailing list