[PATCH v7, 15/15] media: mtk-vcodec: support stateless VP9 decoding

Nicolas Dufresne nicolas at ndufresne.ca
Tue Mar 1 22:22:23 UTC 2022


Le mercredi 23 février 2022 à 11:40 +0800, Yunfei Dong a écrit :
> Add support for VP9 decoding using the stateless API,
> as supported by MT8192. And the drivers is lat and core architecture.

You already have a reviewed tag, but I'm under the impression that there is a
fair amount of duplication with the helper library v4l2-vp9:

  include/media/v4l2-vp9.h
  drivers/media/v4l2-core/v4l2-vp9.c

Can you at least give it a look and comment on why you can't use/adapt it for
this driver ?

> 
> Signed-off-by: Yunfei Dong <yunfei.dong at mediatek.com>
> Signed-off-by: George Sun <george.sun at mediatek.com>
> Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno at collabora.com>
> ---
>  drivers/media/platform/mtk-vcodec/Makefile    |    1 +
>  .../mtk-vcodec/mtk_vcodec_dec_stateless.c     |   26 +-
>  .../platform/mtk-vcodec/mtk_vcodec_drv.h      |    1 +
>  .../mtk-vcodec/vdec/vdec_vp9_req_lat_if.c     | 1971 +++++++++++++++++
>  .../media/platform/mtk-vcodec/vdec_drv_if.c   |    4 +
>  .../media/platform/mtk-vcodec/vdec_drv_if.h   |    1 +
>  6 files changed, 2001 insertions(+), 3 deletions(-)
>  create mode 100644 drivers/media/platform/mtk-vcodec/vdec/vdec_vp9_req_lat_if.c
> 
> diff --git a/drivers/media/platform/mtk-vcodec/Makefile b/drivers/media/platform/mtk-vcodec/Makefile
> index b457daf2d196..93e7a343b5b0 100644
> --- a/drivers/media/platform/mtk-vcodec/Makefile
> +++ b/drivers/media/platform/mtk-vcodec/Makefile
> @@ -9,6 +9,7 @@ mtk-vcodec-dec-y := vdec/vdec_h264_if.o \
>  		vdec/vdec_vp8_if.o \
>  		vdec/vdec_vp8_req_if.o \
>  		vdec/vdec_vp9_if.o \
> +		vdec/vdec_vp9_req_lat_if.o \
>  		vdec/vdec_h264_req_if.o \
>  		vdec/vdec_h264_req_common.o \
>  		vdec/vdec_h264_req_multi_if.o \
> diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_stateless.c b/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_stateless.c
> index 2a0164ddc708..3770e8117488 100644
> --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_stateless.c
> +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_dec_stateless.c
> @@ -91,13 +91,28 @@ static const struct mtk_stateless_control mtk_stateless_controls[] = {
>  			.max = V4L2_MPEG_VIDEO_VP8_PROFILE_3,
>  		},
>  		.codec_type = V4L2_PIX_FMT_VP8_FRAME,
> -	}
> +	},
> +	{
> +		.cfg = {
> +			.id = V4L2_CID_STATELESS_VP9_FRAME,
> +		},
> +		.codec_type = V4L2_PIX_FMT_VP9_FRAME,
> +	},
> +	{
> +		.cfg = {
> +			.id = V4L2_CID_MPEG_VIDEO_VP9_PROFILE,
> +			.min = V4L2_MPEG_VIDEO_VP9_PROFILE_0,
> +			.def = V4L2_MPEG_VIDEO_VP9_PROFILE_0,
> +			.max = V4L2_MPEG_VIDEO_VP9_PROFILE_3,
> +		},
> +		.codec_type = V4L2_PIX_FMT_VP9_FRAME,
> +	},
>  };
>  
>  #define NUM_CTRLS ARRAY_SIZE(mtk_stateless_controls)
>  
> -static struct mtk_video_fmt mtk_video_formats[4];
> -static struct mtk_codec_framesizes mtk_vdec_framesizes[2];
> +static struct mtk_video_fmt mtk_video_formats[5];
> +static struct mtk_codec_framesizes mtk_vdec_framesizes[3];
>  
>  static struct mtk_video_fmt default_out_format;
>  static struct mtk_video_fmt default_cap_format;
> @@ -366,6 +381,7 @@ static void mtk_vcodec_add_formats(unsigned int fourcc,
>  	switch (fourcc) {
>  	case V4L2_PIX_FMT_H264_SLICE:
>  	case V4L2_PIX_FMT_VP8_FRAME:
> +	case V4L2_PIX_FMT_VP9_FRAME:
>  		mtk_video_formats[count_formats].fourcc = fourcc;
>  		mtk_video_formats[count_formats].type = MTK_FMT_DEC;
>  		mtk_video_formats[count_formats].num_planes = 1;
> @@ -413,6 +429,10 @@ static void mtk_vcodec_get_supported_formats(struct mtk_vcodec_ctx *ctx)
>  		mtk_vcodec_add_formats(V4L2_PIX_FMT_VP8_FRAME, ctx);
>  		out_format_count++;
>  	}
> +	if (ctx->dev->dec_capability & MTK_VDEC_FORMAT_VP9_FRAME) {
> +		mtk_vcodec_add_formats(V4L2_PIX_FMT_VP9_FRAME, ctx);
> +		out_format_count++;
> +	}
>  
>  	if (cap_format_count)
>  		default_cap_format = mtk_video_formats[cap_format_count - 1];
> diff --git a/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h b/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h
> index c68297db225e..ea58f11e7659 100644
> --- a/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h
> +++ b/drivers/media/platform/mtk-vcodec/mtk_vcodec_drv.h
> @@ -355,6 +355,7 @@ enum mtk_vdec_format_types {
>  	MTK_VDEC_FORMAT_MT21C = 0x40,
>  	MTK_VDEC_FORMAT_H264_SLICE = 0x100,
>  	MTK_VDEC_FORMAT_VP8_FRAME = 0x200,
> +	MTK_VDEC_FORMAT_VP9_FRAME = 0x400,
>  };
>  
>  /**
> diff --git a/drivers/media/platform/mtk-vcodec/vdec/vdec_vp9_req_lat_if.c b/drivers/media/platform/mtk-vcodec/vdec/vdec_vp9_req_lat_if.c
> new file mode 100644
> index 000000000000..c678170c7ca3
> --- /dev/null
> +++ b/drivers/media/platform/mtk-vcodec/vdec/vdec_vp9_req_lat_if.c
> @@ -0,0 +1,1971 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (c) 2021 MediaTek Inc.
> + * Author: George Sun <george.sun at mediatek.com>
> + */
> +
> +#include <linux/module.h>
> +#include <linux/slab.h>
> +#include <media/videobuf2-dma-contig.h>
> +
> +#include "../mtk_vcodec_util.h"
> +#include "../mtk_vcodec_dec.h"
> +#include "../mtk_vcodec_intr.h"
> +#include "../vdec_drv_base.h"
> +#include "../vdec_drv_if.h"
> +#include "../vdec_vpu_if.h"
> +
> +/* reset_frame_context defined in VP9 spec */
> +#define VP9_RESET_FRAME_CONTEXT_NONE0 0
> +#define VP9_RESET_FRAME_CONTEXT_NONE1 1
> +#define VP9_RESET_FRAME_CONTEXT_SPEC 2
> +#define VP9_RESET_FRAME_CONTEXT_ALL 3
> +
> +#define VP9_TILE_BUF_SIZE 4096
> +#define VP9_PROB_BUF_SIZE 2560
> +#define VP9_COUNTS_BUF_SIZE 16384
> +
> +#define HDR_FLAG(x) (!!((hdr)->flags & V4L2_VP9_FRAME_FLAG_##x))
> +#define LF_FLAG(x) (!!((lf)->flags & V4L2_VP9_LOOP_FILTER_FLAG_##x))
> +#define SEG_FLAG(x) (!!((seg)->flags & V4L2_VP9_SEGMENTATION_FLAG_##x))
> +
> +/*
> + * struct vdec_vp9_slice_frame_ctx - vp9 prob tables footprint
> + */
> +struct vdec_vp9_slice_frame_ctx {
> +	struct {
> +		u8 probs[6][3];
> +		u8 padding[2];
> +	} coef_probs[4][2][2][6];
> +
> +	u8 y_mode_prob[4][16];
> +	u8 switch_interp_prob[4][16];
> +	u8 seg[32];  /* ignore */
> +	u8 comp_inter_prob[16];
> +	u8 comp_ref_prob[16];
> +	u8 single_ref_prob[5][2];
> +	u8 single_ref_prob_padding[6];
> +
> +	u8 joint[3];
> +	u8 joint_padding[13];
> +	struct {
> +		u8 sign;
> +		u8 classes[10];
> +		u8 padding[5];
> +	} sign_classes[2];
> +	struct {
> +		u8 class0[1];
> +		u8 bits[10];
> +		u8 padding[5];
> +	} class0_bits[2];
> +	struct {
> +		u8 class0_fp[2][3];
> +		u8 fp[3];
> +		u8 class0_hp;
> +		u8 hp;
> +		u8 padding[5];
> +	} class0_fp_hp[2];
> +
> +	u8 uv_mode_prob[10][16];
> +	u8 uv_mode_prob_padding[2][16];
> +
> +	u8 partition_prob[16][4];
> +
> +	u8 inter_mode_probs[7][4];
> +	u8 skip_probs[4];
> +
> +	u8 tx_p8x8[2][4];
> +	u8 tx_p16x16[2][4];
> +	u8 tx_p32x32[2][4];
> +	u8 intra_inter_prob[8];
> +};
> +
> +/*
> + * struct vdec_vp9_slice_frame_counts - vp9 counts tables footprint
> + */
> +struct vdec_vp9_slice_frame_counts {
> +	union {
> +		struct {
> +			u32 band_0[3];
> +			u32 padding0[1];
> +			u32 band_1_5[5][6];
> +			u32 padding1[2];
> +		} eob_branch[4][2][2];
> +		u32 eob_branch_space[256 * 4];
> +	};
> +
> +	struct {
> +		u32 band_0[3][4];
> +		u32 band_1_5[5][6][4];
> +	} coef_probs[4][2][2];
> +
> +	u32 intra_inter[4][2];
> +	u32 comp_inter[5][2];
> +	u32 comp_inter_padding[2];
> +	u32 comp_ref[5][2];
> +	u32 comp_ref_padding[2];
> +	u32 single_ref[5][2][2];
> +	u32 inter_mode[7][4];
> +	u32 y_mode[4][12];
> +	u32 uv_mode[10][10];
> +	u32 partition[16][4];
> +	u32 switchable_interp[4][4];
> +
> +	u32 tx_p8x8[2][2];
> +	u32 tx_p16x16[2][4];
> +	u32 tx_p32x32[2][4];
> +
> +	u32 skip[3][4];
> +
> +	u32 joint[4];
> +
> +	struct {
> +		u32 sign[2];
> +		u32 class0[2];
> +		u32 classes[12];
> +		u32 bits[10][2];
> +		u32 padding[4];
> +		u32 class0_fp[2][4];
> +		u32 fp[4];
> +		u32 class0_hp[2];
> +		u32 hp[2];
> +	} mvcomp[2];
> +
> +	u32 reserved[126][4];
> +};
> +
> +/*
> + * struct vdec_vp9_slice_uncompressed_header - vp9 uncompressed header syntax
> + *                                             used for decoding
> + */
> +struct vdec_vp9_slice_uncompressed_header {
> +	u8 profile;
> +	u8 last_frame_type;
> +	u8 frame_type;
> +
> +	u8 last_show_frame;
> +	u8 show_frame;
> +	u8 error_resilient_mode;
> +
> +	u8 bit_depth;
> +	u8 padding0[1];
> +	u16 last_frame_width;
> +	u16 last_frame_height;
> +	u16 frame_width;
> +	u16 frame_height;
> +
> +	u8 intra_only;
> +	u8 reset_frame_context;
> +	u8 ref_frame_sign_bias[4];
> +	u8 allow_high_precision_mv;
> +	u8 interpolation_filter;
> +
> +	u8 refresh_frame_context;
> +	u8 frame_parallel_decoding_mode;
> +	u8 frame_context_idx;
> +
> +	/* loop_filter_params */
> +	u8 loop_filter_level;
> +	u8 loop_filter_sharpness;
> +	u8 loop_filter_delta_enabled;
> +	s8 loop_filter_ref_deltas[4];
> +	s8 loop_filter_mode_deltas[2];
> +
> +	/* quantization_params */
> +	u8 base_q_idx;
> +	s8 delta_q_y_dc;
> +	s8 delta_q_uv_dc;
> +	s8 delta_q_uv_ac;
> +
> +	/* segmentation_params */
> +	u8 segmentation_enabled;
> +	u8 segmentation_update_map;
> +	u8 segmentation_tree_probs[7];
> +	u8 padding1[1];
> +	u8 segmentation_temporal_udpate;
> +	u8 segmentation_pred_prob[3];
> +	u8 segmentation_update_data;
> +	u8 segmentation_abs_or_delta_update;
> +	u8 feature_enabled[8];
> +	s16 feature_value[8][4];
> +
> +	/* tile_info */
> +	u8 tile_cols_log2;
> +	u8 tile_rows_log2;
> +	u8 padding2[2];
> +
> +	u16 uncompressed_header_size;
> +	u16 header_size_in_bytes;
> +
> +	/* LAT OUT, CORE IN */
> +	u32 dequant[8][4];
> +};
> +
> +/*
> + * struct vdec_vp9_slice_compressed_header - vp9 compressed header syntax
> + *                                           used for decoding.
> + */
> +struct vdec_vp9_slice_compressed_header {
> +	u8 tx_mode;
> +	u8 ref_mode;
> +	u8 comp_fixed_ref;
> +	u8 comp_var_ref[2];
> +	u8 padding[3];
> +};
> +
> +/*
> + * struct vdec_vp9_slice_tiles - vp9 tile syntax
> + */
> +struct vdec_vp9_slice_tiles {
> +	u32 size[4][64];
> +	u32 mi_rows[4];
> +	u32 mi_cols[64];
> +	u8 actual_rows;
> +	u8 padding[7];
> +};
> +
> +/*
> + * struct vdec_vp9_slice_reference - vp9 reference frame information
> + */
> +struct vdec_vp9_slice_reference {
> +	u16 frame_width;
> +	u16 frame_height;
> +	u8 bit_depth;
> +	u8 subsampling_x;
> +	u8 subsampling_y;
> +	u8 padding;
> +};
> +
> +/*
> + * struct vdec_vp9_slice_frame - vp9 syntax used for decoding
> + */
> +struct vdec_vp9_slice_frame {
> +	struct vdec_vp9_slice_uncompressed_header uh;
> +	struct vdec_vp9_slice_compressed_header ch;
> +	struct vdec_vp9_slice_tiles tiles;
> +	struct vdec_vp9_slice_reference ref[3];
> +};
> +
> +/*
> + * struct vdec_vp9_slice_init_vsi - VSI used to initialize instance
> + */
> +struct vdec_vp9_slice_init_vsi {
> +	unsigned int architecture;
> +	unsigned int reserved;
> +	u64 core_vsi;
> +	/* default frame context's position in MicroP */
> +	u64 default_frame_ctx;
> +};
> +
> +/*
> + * struct vdec_vp9_slice_mem - memory address and size
> + */
> +struct vdec_vp9_slice_mem {
> +	union {
> +		u64 buf;
> +		dma_addr_t dma_addr;
> +	};
> +	union {
> +		size_t size;
> +		dma_addr_t dma_addr_end;
> +		u64 padding;
> +	};
> +};
> +
> +/*
> + * struct vdec_vp9_slice_bs - input buffer for decoding
> + */
> +struct vdec_vp9_slice_bs {
> +	struct vdec_vp9_slice_mem buf;
> +	struct vdec_vp9_slice_mem frame;
> +};
> +
> +/*
> + * struct vdec_vp9_slice_fb - frame buffer for decoding
> + */
> +struct vdec_vp9_slice_fb {
> +	struct vdec_vp9_slice_mem y;
> +	struct vdec_vp9_slice_mem c;
> +};
> +
> +/*
> + * struct vdec_vp9_slice_state - decoding state
> + */
> +struct vdec_vp9_slice_state {
> +	int err;
> +	unsigned int full;
> +	unsigned int timeout;
> +	unsigned int perf;
> +
> +	unsigned int crc[12];
> +};
> +
> +/**
> + * struct vdec_vp9_slice_vsi - exchange decoding information
> + *                             between Main CPU and MicroP
> + * @bs          : input buffer
> + * @fb          : output buffer
> + * @ref         : 3 reference buffers
> + * @mv          : mv working buffer
> + * @seg         : segmentation working buffer
> + * @tile        : tile buffer
> + * @prob        : prob table buffer, used to set/update prob table
> + * @counts      : counts table buffer, used to update prob table
> + * @ube         : general buffer
> + * @trans       : trans buffer position in general buffer
> + * @err_map     : error buffer
> + * @row_info    : row info buffer
> + * @frame       : decoding syntax
> + * @state       : decoding state
> + */
> +struct vdec_vp9_slice_vsi {
> +	/* used in LAT stage */
> +	struct vdec_vp9_slice_bs bs;
> +	/* used in Core stage */
> +	struct vdec_vp9_slice_fb fb;
> +	struct vdec_vp9_slice_fb ref[3];
> +
> +	struct vdec_vp9_slice_mem mv[2];
> +	struct vdec_vp9_slice_mem seg[2];
> +	struct vdec_vp9_slice_mem tile;
> +	struct vdec_vp9_slice_mem prob;
> +	struct vdec_vp9_slice_mem counts;
> +
> +	/* LAT stage's output, Core stage's input */
> +	struct vdec_vp9_slice_mem ube;
> +	struct vdec_vp9_slice_mem trans;
> +	struct vdec_vp9_slice_mem err_map;
> +	struct vdec_vp9_slice_mem row_info;
> +
> +	/* decoding parameters */
> +	struct vdec_vp9_slice_frame frame;
> +
> +	struct vdec_vp9_slice_state state;
> +};
> +
> +/**
> + * struct vdec_vp9_slice_pfc - per-frame context that contains a local vsi.
> + *                             pass it from lat to core
> + * @vsi         : local vsi. copy to/from remote vsi before/after decoding
> + * @ref_idx     : reference buffer index
> + * @seq         : picture sequence
> + * @state       : decoding state
> + */
> +struct vdec_vp9_slice_pfc {
> +	struct vdec_vp9_slice_vsi vsi;
> +
> +	u64 ref_idx[3];
> +
> +	int seq;
> +
> +	/* LAT/Core CRC */
> +	struct vdec_vp9_slice_state state[2];
> +};
> +
> +/*
> + * enum vdec_vp9_slice_resolution_level
> + */
> +enum vdec_vp9_slice_resolution_level {
> +	VP9_RES_NONE,
> +	VP9_RES_FHD,
> +	VP9_RES_4K,
> +	VP9_RES_8K,
> +};
> +
> +/*
> + * struct vdec_vp9_slice_ref - picture's width & height should kept
> + *                             for later decoding as reference picture
> + */
> +struct vdec_vp9_slice_ref {
> +	unsigned int width;
> +	unsigned int height;
> +};
> +
> +/**
> + * struct vdec_vp9_slice_instance - represent one vp9 instance
> + * @ctx         : pointer to codec's context
> + * @vpu         : VPU instance
> + * @seq         : global picture sequence
> + * @level       : level of current resolution
> + * @width       : width of last picture
> + * @height      : height of last picture
> + * @frame_type  : frame_type of last picture
> + * @irq         : irq to Main CPU or MicroP
> + * @show_frame  : show_frame of last picture
> + * @dpb         : picture information (width/height) for reference
> + * @mv          : mv working buffer
> + * @seg         : segmentation working buffer
> + * @tile        : tile buffer
> + * @prob        : prob table buffer, used to set/update prob table
> + * @counts      : counts table buffer, used to update prob table
> + * @frame_ctx   : 4 frame context according to VP9 Spec
> + * @dirty       : state of each frame context
> + * @init_vsi    : vsi used for initialized VP9 instance
> + * @vsi         : vsi used for decoding/flush ...
> + * @core_vsi    : vsi used for Core stage
> + */
> +struct vdec_vp9_slice_instance {
> +	struct mtk_vcodec_ctx *ctx;
> +	struct vdec_vpu_inst vpu;
> +
> +	int seq;
> +
> +	enum vdec_vp9_slice_resolution_level level;
> +
> +	/* for resolution change and get_pic_info */
> +	unsigned int width;
> +	unsigned int height;
> +
> +	/* for last_frame_type */
> +	unsigned int frame_type;
> +	unsigned int irq;
> +
> +	unsigned int show_frame;
> +
> +	/* maintain vp9 reference frame state */
> +	struct vdec_vp9_slice_ref dpb[VB2_MAX_FRAME];
> +
> +	/*
> +	 * normal working buffers
> +	 * mv[0]/seg[0]/tile/prob/counts is used for LAT
> +	 * mv[1]/seg[1] is used for CORE
> +	 */
> +	struct mtk_vcodec_mem mv[2];
> +	struct mtk_vcodec_mem seg[2];
> +	struct mtk_vcodec_mem tile;
> +	struct mtk_vcodec_mem prob;
> +	struct mtk_vcodec_mem counts;
> +
> +	/* 4 prob tables */
> +	struct vdec_vp9_slice_frame_ctx frame_ctx[4];
> +	unsigned char dirty[4];
> +
> +	/* MicroP vsi */
> +	union {
> +		struct vdec_vp9_slice_init_vsi *init_vsi;
> +		struct vdec_vp9_slice_vsi *vsi;
> +	};
> +	struct vdec_vp9_slice_vsi *core_vsi;
> +};
> +
> +/*
> + * (2, (0, (1, 3)))
> + * max level = 2
> + */
> +static const signed char vdec_vp9_slice_inter_mode_tree[6] = {
> +	-2, 2, 0, 4, -1, -3
> +};
> +
> +/* max level = 6 */
> +static const signed char vdec_vp9_slice_intra_mode_tree[18] = {
> +	0, 2, -9, 4, -1, 6, 8, 12, -2, 10, -4, -5, -3, 14, -8, 16, -6, -7
> +};
> +
> +/* max level = 2 */
> +static const signed char vdec_vp9_slice_partition_tree[6] = {
> +	0, 2, -1, 4, -2, -3
> +};
> +
> +/* max level = 1 */
> +static const signed char vdec_vp9_slice_switchable_interp_tree[4] = {
> +	0, 2, -1, -2
> +};
> +
> +/* max level = 2 */
> +static const signed char vdec_vp9_slice_mv_joint_tree[6] = {
> +	0, 2, -1, 4, -2, -3
> +};
> +
> +/* max level = 6 */
> +static const signed char vdec_vp9_slice_mv_class_tree[20] = {
> +	0, 2, -1, 4, 6, 8, -2, -3, 10, 12,
> +	-4, -5, -6, 14, 16, 18, -7, -8, -9, -10
> +};
> +
> +/* max level = 0 */
> +static const signed char vdec_vp9_slice_mv_class0_tree[2] = {
> +	0, -1
> +};
> +
> +/* max level = 2 */
> +static const signed char vdec_vp9_slice_mv_fp_tree[6] = {
> +	0, 2, -1, 4, -2, -3
> +};
> +
> +/*
> + * all VP9 instances could share this default frame context.
> + */
> +static struct vdec_vp9_slice_frame_ctx *vdec_vp9_slice_default_frame_ctx;
> +static DEFINE_MUTEX(vdec_vp9_slice_frame_ctx_lock);
> +
> +static int vdec_vp9_slice_core_decode(struct vdec_lat_buf *lat_buf);
> +
> +static int vdec_vp9_slice_init_default_frame_ctx(struct vdec_vp9_slice_instance *instance)
> +{
> +	struct vdec_vp9_slice_frame_ctx *remote_frame_ctx;
> +	struct vdec_vp9_slice_frame_ctx *frame_ctx;
> +	struct mtk_vcodec_ctx *ctx;
> +	struct vdec_vp9_slice_init_vsi *vsi;
> +	int ret = 0;
> +
> +	ctx = instance->ctx;
> +	vsi = instance->vpu.vsi;
> +	if (!ctx || !vsi)
> +		return -EINVAL;
> +
> +	remote_frame_ctx = mtk_vcodec_fw_map_dm_addr(ctx->dev->fw_handler,
> +						     (u32)vsi->default_frame_ctx);
> +	if (!remote_frame_ctx) {
> +		mtk_vcodec_err(instance, "failed to map default frame ctx\n");
> +		return -EINVAL;
> +	}
> +
> +	mutex_lock(&vdec_vp9_slice_frame_ctx_lock);
> +	if (vdec_vp9_slice_default_frame_ctx)
> +		goto out;
> +
> +	frame_ctx = kmalloc(sizeof(*frame_ctx), GFP_KERNEL);
> +	if (!frame_ctx) {
> +		ret = -ENOMEM;
> +		goto out;
> +	}
> +
> +	memcpy_fromio(frame_ctx, remote_frame_ctx, sizeof(*frame_ctx));
> +	vdec_vp9_slice_default_frame_ctx = frame_ctx;
> +
> +out:
> +	mutex_unlock(&vdec_vp9_slice_frame_ctx_lock);
> +
> +	return ret;
> +}
> +
> +static int vdec_vp9_slice_alloc_working_buffer(struct vdec_vp9_slice_instance *instance,
> +					       struct vdec_vp9_slice_vsi *vsi)
> +{
> +	struct mtk_vcodec_ctx *ctx = instance->ctx;
> +	enum vdec_vp9_slice_resolution_level level;
> +	/* super blocks */
> +	unsigned int max_sb_w;
> +	unsigned int max_sb_h;
> +	unsigned int max_w;
> +	unsigned int max_h;
> +	unsigned int w;
> +	unsigned int h;
> +	size_t size;
> +	int ret;
> +	int i;
> +
> +	w = vsi->frame.uh.frame_width;
> +	h = vsi->frame.uh.frame_height;
> +
> +	if (w > VCODEC_DEC_4K_CODED_WIDTH ||
> +	    h > VCODEC_DEC_4K_CODED_HEIGHT) {
> +		/* 8K? */
> +		return -EINVAL;
> +	} else if (w > MTK_VDEC_MAX_W || h > MTK_VDEC_MAX_H) {
> +		/* 4K */
> +		level = VP9_RES_4K;
> +		max_w = VCODEC_DEC_4K_CODED_WIDTH;
> +		max_h = VCODEC_DEC_4K_CODED_HEIGHT;
> +	} else {
> +		/* FHD */
> +		level = VP9_RES_FHD;
> +		max_w = MTK_VDEC_MAX_W;
> +		max_h = MTK_VDEC_MAX_H;
> +	}
> +
> +	if (level == instance->level)
> +		return 0;
> +
> +	mtk_vcodec_debug(instance, "resolution level changed, from %u to %u, %ux%u",
> +			 instance->level, level, w, h);
> +
> +	max_sb_w = DIV_ROUND_UP(max_w, 64);
> +	max_sb_h = DIV_ROUND_UP(max_h, 64);
> +	ret = -ENOMEM;
> +
> +	/*
> +	 * Lat-flush must wait core idle, otherwise core will
> +	 * use released buffers
> +	 */
> +
> +	size = (max_sb_w * max_sb_h + 2) * 576;
> +	for (i = 0; i < 2; i++) {
> +		if (instance->mv[i].va)
> +			mtk_vcodec_mem_free(ctx, &instance->mv[i]);
> +		instance->mv[i].size = size;
> +		if (mtk_vcodec_mem_alloc(ctx, &instance->mv[i]))
> +			goto err;
> +	}
> +
> +	size = (max_sb_w * max_sb_h * 32) + 256;
> +	for (i = 0; i < 2; i++) {
> +		if (instance->seg[i].va)
> +			mtk_vcodec_mem_free(ctx, &instance->seg[i]);
> +		instance->seg[i].size = size;
> +		if (mtk_vcodec_mem_alloc(ctx, &instance->seg[i]))
> +			goto err;
> +	}
> +
> +	if (!instance->tile.va) {
> +		instance->tile.size = VP9_TILE_BUF_SIZE;
> +		if (mtk_vcodec_mem_alloc(ctx, &instance->tile))
> +			goto err;
> +	}
> +
> +	if (!instance->prob.va) {
> +		instance->prob.size = VP9_PROB_BUF_SIZE;
> +		if (mtk_vcodec_mem_alloc(ctx, &instance->prob))
> +			goto err;
> +	}
> +
> +	if (!instance->counts.va) {
> +		instance->counts.size = VP9_COUNTS_BUF_SIZE;
> +		if (mtk_vcodec_mem_alloc(ctx, &instance->counts))
> +			goto err;
> +	}
> +
> +	instance->level = level;
> +	return 0;
> +
> +err:
> +	instance->level = VP9_RES_NONE;
> +	return ret;
> +}
> +
> +static void vdec_vp9_slice_free_working_buffer(struct vdec_vp9_slice_instance *instance)
> +{
> +	struct mtk_vcodec_ctx *ctx = instance->ctx;
> +	int i;
> +
> +	for (i = 0; i < ARRAY_SIZE(instance->mv); i++) {
> +		if (instance->mv[i].va)
> +			mtk_vcodec_mem_free(ctx, &instance->mv[i]);
> +	}
> +	for (i = 0; i < ARRAY_SIZE(instance->seg); i++) {
> +		if (instance->seg[i].va)
> +			mtk_vcodec_mem_free(ctx, &instance->seg[i]);
> +	}
> +	if (instance->tile.va)
> +		mtk_vcodec_mem_free(ctx, &instance->tile);
> +	if (instance->prob.va)
> +		mtk_vcodec_mem_free(ctx, &instance->prob);
> +	if (instance->counts.va)
> +		mtk_vcodec_mem_free(ctx, &instance->counts);
> +
> +	instance->level = VP9_RES_NONE;
> +}
> +
> +static void vdec_vp9_slice_vsi_from_remote(struct vdec_vp9_slice_vsi *vsi,
> +					   struct vdec_vp9_slice_vsi *remote_vsi,
> +					   int skip)
> +{
> +	struct vdec_vp9_slice_frame *rf;
> +	struct vdec_vp9_slice_frame *f;
> +
> +	/*
> +	 * compressed header
> +	 * dequant
> +	 * buffer position
> +	 * decode state
> +	 */
> +	if (!skip) {
> +		rf = &remote_vsi->frame;
> +		f = &vsi->frame;
> +		memcpy_fromio(&f->ch, &rf->ch, sizeof(f->ch));
> +		memcpy_fromio(&f->uh.dequant, &rf->uh.dequant, sizeof(f->uh.dequant));
> +		memcpy_fromio(&vsi->trans, &remote_vsi->trans, sizeof(vsi->trans));
> +	}
> +
> +	memcpy_fromio(&vsi->state, &remote_vsi->state, sizeof(vsi->state));
> +}
> +
> +static void vdec_vp9_slice_vsi_to_remote(struct vdec_vp9_slice_vsi *vsi,
> +					 struct vdec_vp9_slice_vsi *remote_vsi)
> +{
> +	memcpy_toio(remote_vsi, vsi, sizeof(*vsi));
> +}
> +
> +static int vdec_vp9_slice_tile_offset(int idx, int mi_num, int tile_log2)
> +{
> +	int sbs = (mi_num + 7) >> 3;
> +	int offset = ((idx * sbs) >> tile_log2) << 3;
> +
> +	return offset < mi_num ? offset : mi_num;
> +}
> +
> +static int vdec_vp9_slice_setup_lat_from_src_buf(struct vdec_vp9_slice_instance *instance,
> +						 struct vdec_lat_buf *lat_buf)
> +{
> +	struct vb2_v4l2_buffer *src;
> +	struct vb2_v4l2_buffer *dst;
> +
> +	src = v4l2_m2m_next_src_buf(instance->ctx->m2m_ctx);
> +	if (!src)
> +		return -EINVAL;
> +
> +	dst = &lat_buf->ts_info;
> +	v4l2_m2m_buf_copy_metadata(src, dst, true);
> +	return 0;
> +}
> +
> +static void vdec_vp9_slice_setup_hdr(struct vdec_vp9_slice_instance *instance,
> +				     struct vdec_vp9_slice_uncompressed_header *uh,
> +				     struct v4l2_ctrl_vp9_frame *hdr)
> +{
> +	int i;
> +
> +	uh->profile = hdr->profile;
> +	uh->last_frame_type = instance->frame_type;
> +	uh->frame_type = !HDR_FLAG(KEY_FRAME);
> +	uh->last_show_frame = instance->show_frame;
> +	uh->show_frame = HDR_FLAG(SHOW_FRAME);
> +	uh->error_resilient_mode = HDR_FLAG(ERROR_RESILIENT);
> +	uh->bit_depth = hdr->bit_depth;
> +	uh->last_frame_width = instance->width;
> +	uh->last_frame_height = instance->height;
> +	uh->frame_width = hdr->frame_width_minus_1 + 1;
> +	uh->frame_height = hdr->frame_height_minus_1 + 1;
> +	uh->intra_only = HDR_FLAG(INTRA_ONLY);
> +	/* map v4l2 enum to values defined in VP9 spec for firmware */
> +	switch (hdr->reset_frame_context) {
> +	case V4L2_VP9_RESET_FRAME_CTX_NONE:
> +		uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_NONE0;
> +		break;
> +	case V4L2_VP9_RESET_FRAME_CTX_SPEC:
> +		uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_SPEC;
> +		break;
> +	case V4L2_VP9_RESET_FRAME_CTX_ALL:
> +		uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_ALL;
> +		break;
> +	default:
> +		uh->reset_frame_context = VP9_RESET_FRAME_CONTEXT_NONE0;
> +		break;
> +	}
> +	/*
> +	 * ref_frame_sign_bias specifies the intended direction
> +	 * of the motion vector in time for each reference frame.
> +	 * - INTRA_FRAME = 0,
> +	 * - LAST_FRAME = 1,
> +	 * - GOLDEN_FRAME = 2,
> +	 * - ALTREF_FRAME = 3,
> +	 * ref_frame_sign_bias[INTRA_FRAME] is always 0
> +	 * and VDA only passes another 3 directions
> +	 */
> +	uh->ref_frame_sign_bias[0] = 0;
> +	for (i = 0; i < 3; i++)
> +		uh->ref_frame_sign_bias[i + 1] =
> +			!!(hdr->ref_frame_sign_bias & (1 << i));
> +	uh->allow_high_precision_mv = HDR_FLAG(ALLOW_HIGH_PREC_MV);
> +	uh->interpolation_filter = hdr->interpolation_filter;
> +	uh->refresh_frame_context = HDR_FLAG(REFRESH_FRAME_CTX);
> +	uh->frame_parallel_decoding_mode = HDR_FLAG(PARALLEL_DEC_MODE);
> +	uh->frame_context_idx = hdr->frame_context_idx;
> +
> +	/* tile info */
> +	uh->tile_cols_log2 = hdr->tile_cols_log2;
> +	uh->tile_rows_log2 = hdr->tile_rows_log2;
> +
> +	uh->uncompressed_header_size = hdr->uncompressed_header_size;
> +	uh->header_size_in_bytes = hdr->compressed_header_size;
> +}
> +
> +static void vdec_vp9_slice_setup_frame_ctx(struct vdec_vp9_slice_instance *instance,
> +					   struct vdec_vp9_slice_uncompressed_header *uh,
> +					   struct v4l2_ctrl_vp9_frame *hdr)
> +{
> +	int error_resilient_mode;
> +	int reset_frame_context;
> +	int key_frame;
> +	int intra_only;
> +	int i;
> +
> +	key_frame = HDR_FLAG(KEY_FRAME);
> +	intra_only = HDR_FLAG(INTRA_ONLY);
> +	error_resilient_mode = HDR_FLAG(ERROR_RESILIENT);
> +	reset_frame_context = uh->reset_frame_context;
> +
> +	/*
> +	 * according to "6.2 Uncompressed header syntax" in
> +	 * "VP9 Bitstream & Decoding Process Specification",
> +	 * reset @frame_context_idx when (FrameIsIntra || error_resilient_mode)
> +	 */
> +	if (key_frame || intra_only || error_resilient_mode) {
> +		/*
> +		 * @reset_frame_context specifies
> +		 * whether the frame context should be
> +		 * reset to default values:
> +		 * 0 or 1 means do not reset any frame context
> +		 * 2 resets just the context specified in the frame header
> +		 * 3 resets all contexts
> +		 */
> +		if (key_frame || error_resilient_mode ||
> +		    reset_frame_context == 3) {
> +			/* use default table */
> +			for (i = 0; i < 4; i++)
> +				instance->dirty[i] = 0;
> +		} else if (reset_frame_context == 2) {
> +			instance->dirty[uh->frame_context_idx] = 0;
> +		}
> +		uh->frame_context_idx = 0;
> +	}
> +}
> +
> +static void vdec_vp9_slice_setup_loop_filter(struct vdec_vp9_slice_uncompressed_header *uh,
> +					     struct v4l2_vp9_loop_filter *lf)
> +{
> +	int i;
> +
> +	uh->loop_filter_level = lf->level;
> +	uh->loop_filter_sharpness = lf->sharpness;
> +	uh->loop_filter_delta_enabled = LF_FLAG(DELTA_ENABLED);
> +	for (i = 0; i < 4; i++)
> +		uh->loop_filter_ref_deltas[i] = lf->ref_deltas[i];
> +	for (i = 0; i < 2; i++)
> +		uh->loop_filter_mode_deltas[i] = lf->mode_deltas[i];
> +}
> +
> +static void vdec_vp9_slice_setup_quantization(struct vdec_vp9_slice_uncompressed_header *uh,
> +					      struct v4l2_vp9_quantization *quant)
> +{
> +	uh->base_q_idx = quant->base_q_idx;
> +	uh->delta_q_y_dc = quant->delta_q_y_dc;
> +	uh->delta_q_uv_dc = quant->delta_q_uv_dc;
> +	uh->delta_q_uv_ac = quant->delta_q_uv_ac;
> +}
> +
> +static void vdec_vp9_slice_setup_segmentation(struct vdec_vp9_slice_uncompressed_header *uh,
> +					      struct v4l2_vp9_segmentation *seg)
> +{
> +	int i;
> +	int j;
> +
> +	uh->segmentation_enabled = SEG_FLAG(ENABLED);
> +	uh->segmentation_update_map = SEG_FLAG(UPDATE_MAP);
> +	for (i = 0; i < 7; i++)
> +		uh->segmentation_tree_probs[i] = seg->tree_probs[i];
> +	uh->segmentation_temporal_udpate = SEG_FLAG(TEMPORAL_UPDATE);
> +	for (i = 0; i < 3; i++)
> +		uh->segmentation_pred_prob[i] = seg->pred_probs[i];
> +	uh->segmentation_update_data = SEG_FLAG(UPDATE_DATA);
> +	uh->segmentation_abs_or_delta_update = SEG_FLAG(ABS_OR_DELTA_UPDATE);
> +	for (i = 0; i < 8; i++) {
> +		uh->feature_enabled[i] = seg->feature_enabled[i];
> +		for (j = 0; j < 4; j++)
> +			uh->feature_value[i][j] = seg->feature_data[i][j];
> +	}
> +}
> +
> +static int vdec_vp9_slice_setup_tile(struct vdec_vp9_slice_vsi *vsi,
> +				     struct v4l2_ctrl_vp9_frame *hdr)
> +{
> +	unsigned int rows_log2;
> +	unsigned int cols_log2;
> +	unsigned int rows;
> +	unsigned int cols;
> +	unsigned int mi_rows;
> +	unsigned int mi_cols;
> +	struct vdec_vp9_slice_tiles *tiles;
> +	int offset;
> +	int start;
> +	int end;
> +	int i;
> +
> +	rows_log2 = hdr->tile_rows_log2;
> +	cols_log2 = hdr->tile_cols_log2;
> +	rows = 1 << rows_log2;
> +	cols = 1 << cols_log2;
> +	tiles = &vsi->frame.tiles;
> +	tiles->actual_rows = 0;
> +
> +	if (rows > 4 || cols > 64)
> +		return -EINVAL;
> +
> +	/* setup mi rows/cols information */
> +	mi_rows = (hdr->frame_height_minus_1 + 1 + 7) >> 3;
> +	mi_cols = (hdr->frame_width_minus_1 + 1 + 7) >> 3;
> +
> +	for (i = 0; i < rows; i++) {
> +		start = vdec_vp9_slice_tile_offset(i, mi_rows, rows_log2);
> +		end = vdec_vp9_slice_tile_offset(i + 1, mi_rows, rows_log2);
> +		offset = end - start;
> +		tiles->mi_rows[i] = (offset + 7) >> 3;
> +		if (tiles->mi_rows[i])
> +			tiles->actual_rows++;
> +	}
> +
> +	for (i = 0; i < cols; i++) {
> +		start = vdec_vp9_slice_tile_offset(i, mi_cols, cols_log2);
> +		end = vdec_vp9_slice_tile_offset(i + 1, mi_cols, cols_log2);
> +		offset = end - start;
> +		tiles->mi_cols[i] = (offset + 7) >> 3;
> +	}
> +
> +	return 0;
> +}
> +
> +static void vdec_vp9_slice_setup_state(struct vdec_vp9_slice_vsi *vsi)
> +{
> +	memset(&vsi->state, 0, sizeof(vsi->state));
> +}
> +
> +static void vdec_vp9_slice_setup_ref_idx(struct vdec_vp9_slice_pfc *pfc,
> +					 struct v4l2_ctrl_vp9_frame *hdr)
> +{
> +	pfc->ref_idx[0] = hdr->last_frame_ts;
> +	pfc->ref_idx[1] = hdr->golden_frame_ts;
> +	pfc->ref_idx[2] = hdr->alt_frame_ts;
> +}
> +
> +static int vdec_vp9_slice_setup_pfc(struct vdec_vp9_slice_instance *instance,
> +				    struct vdec_vp9_slice_pfc *pfc)
> +{
> +	struct v4l2_ctrl_vp9_frame *hdr;
> +	struct vdec_vp9_slice_uncompressed_header *uh;
> +	struct v4l2_ctrl *hdr_ctrl;
> +	struct vdec_vp9_slice_vsi *vsi;
> +	int ret;
> +
> +	/* frame header */
> +	hdr_ctrl = v4l2_ctrl_find(&instance->ctx->ctrl_hdl, V4L2_CID_STATELESS_VP9_FRAME);
> +	if (!hdr_ctrl || !hdr_ctrl->p_cur.p)
> +		return -EINVAL;
> +
> +	hdr = hdr_ctrl->p_cur.p;
> +	vsi = &pfc->vsi;
> +	uh = &vsi->frame.uh;
> +
> +	/* setup vsi information */
> +	vdec_vp9_slice_setup_hdr(instance, uh, hdr);
> +	vdec_vp9_slice_setup_frame_ctx(instance, uh, hdr);
> +	vdec_vp9_slice_setup_loop_filter(uh, &hdr->lf);
> +	vdec_vp9_slice_setup_quantization(uh, &hdr->quant);
> +	vdec_vp9_slice_setup_segmentation(uh, &hdr->seg);
> +	ret = vdec_vp9_slice_setup_tile(vsi, hdr);
> +	if (ret)
> +		return ret;
> +	vdec_vp9_slice_setup_state(vsi);
> +
> +	/* core stage needs buffer index to get ref y/c ... */
> +	vdec_vp9_slice_setup_ref_idx(pfc, hdr);
> +
> +	pfc->seq = instance->seq;
> +	instance->seq++;
> +
> +	return 0;
> +}
> +
> +static int vdec_vp9_slice_setup_lat_buffer(struct vdec_vp9_slice_instance *instance,
> +					   struct vdec_vp9_slice_vsi *vsi,
> +					   struct mtk_vcodec_mem *bs,
> +					   struct vdec_lat_buf *lat_buf)
> +{
> +	int i;
> +
> +	vsi->bs.buf.dma_addr = bs->dma_addr;
> +	vsi->bs.buf.size = bs->size;
> +	vsi->bs.frame.dma_addr = bs->dma_addr;
> +	vsi->bs.frame.size = bs->size;
> +
> +	for (i = 0; i < 2; i++) {
> +		vsi->mv[i].dma_addr = instance->mv[i].dma_addr;
> +		vsi->mv[i].size = instance->mv[i].size;
> +	}
> +	for (i = 0; i < 2; i++) {
> +		vsi->seg[i].dma_addr = instance->seg[i].dma_addr;
> +		vsi->seg[i].size = instance->seg[i].size;
> +	}
> +	vsi->tile.dma_addr = instance->tile.dma_addr;
> +	vsi->tile.size = instance->tile.size;
> +	vsi->prob.dma_addr = instance->prob.dma_addr;
> +	vsi->prob.size = instance->prob.size;
> +	vsi->counts.dma_addr = instance->counts.dma_addr;
> +	vsi->counts.size = instance->counts.size;
> +
> +	vsi->ube.dma_addr = lat_buf->ctx->msg_queue.wdma_addr.dma_addr;
> +	vsi->ube.size = lat_buf->ctx->msg_queue.wdma_addr.size;
> +	vsi->trans.dma_addr = lat_buf->ctx->msg_queue.wdma_wptr_addr;
> +	/* used to store trans end */
> +	vsi->trans.dma_addr_end = lat_buf->ctx->msg_queue.wdma_rptr_addr;
> +	vsi->err_map.dma_addr = lat_buf->wdma_err_addr.dma_addr;
> +	vsi->err_map.size = lat_buf->wdma_err_addr.size;
> +
> +	vsi->row_info.buf = 0;
> +	vsi->row_info.size = 0;
> +
> +	return 0;
> +}
> +
> +static int vdec_vp9_slice_setup_prob_buffer(struct vdec_vp9_slice_instance *instance,
> +					    struct vdec_vp9_slice_vsi *vsi)
> +{
> +	struct vdec_vp9_slice_frame_ctx *frame_ctx;
> +	struct vdec_vp9_slice_uncompressed_header *uh;
> +
> +	uh = &vsi->frame.uh;
> +
> +	mtk_vcodec_debug(instance, "ctx dirty %u idx %d\n",
> +			 instance->dirty[uh->frame_context_idx],
> +			 uh->frame_context_idx);
> +
> +	if (instance->dirty[uh->frame_context_idx])
> +		frame_ctx = &instance->frame_ctx[uh->frame_context_idx];
> +	else
> +		frame_ctx = vdec_vp9_slice_default_frame_ctx;
> +	memcpy(instance->prob.va, frame_ctx, sizeof(*frame_ctx));
> +
> +	return 0;
> +}
> +
> +static void vdec_vp9_slice_setup_seg_buffer(struct vdec_vp9_slice_instance *instance,
> +					    struct vdec_vp9_slice_vsi *vsi,
> +					    struct mtk_vcodec_mem *buf)
> +{
> +	struct vdec_vp9_slice_uncompressed_header *uh;
> +
> +	/* reset segment buffer */
> +	uh = &vsi->frame.uh;
> +	if (uh->frame_type == 0 ||
> +	    uh->intra_only ||
> +	    uh->error_resilient_mode ||
> +	    uh->frame_width != instance->width ||
> +	    uh->frame_height != instance->height) {
> +		mtk_vcodec_debug(instance, "reset seg\n");
> +		memset(buf->va, 0, buf->size);
> +	}
> +}
> +
> +/*
> + * parse tiles according to `6.4 Decode tiles syntax`
> + * in "vp9-bitstream-specification"
> + *
> + * frame contains uncompress header, compressed header and several tiles.
> + * this function parses tiles' position and size, stores them to tile buffer
> + * for decoding.
> + */
> +static int vdec_vp9_slice_setup_tile_buffer(struct vdec_vp9_slice_instance *instance,
> +					    struct vdec_vp9_slice_vsi *vsi,
> +					    struct mtk_vcodec_mem *bs)
> +{
> +	struct vdec_vp9_slice_uncompressed_header *uh;
> +	unsigned int rows_log2;
> +	unsigned int cols_log2;
> +	unsigned int rows;
> +	unsigned int cols;
> +	unsigned int mi_row;
> +	unsigned int mi_col;
> +	unsigned int offset;
> +	unsigned int pa;
> +	unsigned int size;
> +	struct vdec_vp9_slice_tiles *tiles;
> +	unsigned char *pos;
> +	unsigned char *end;
> +	unsigned char *va;
> +	unsigned int *tb;
> +	int i;
> +	int j;
> +
> +	uh = &vsi->frame.uh;
> +	rows_log2 = uh->tile_rows_log2;
> +	cols_log2 = uh->tile_cols_log2;
> +	rows = 1 << rows_log2;
> +	cols = 1 << cols_log2;
> +
> +	if (rows > 4 || cols > 64) {
> +		mtk_vcodec_err(instance, "tile_rows %u tile_cols %u\n",
> +			       rows, cols);
> +		return -EINVAL;
> +	}
> +
> +	offset = uh->uncompressed_header_size +
> +		uh->header_size_in_bytes;
> +	if (bs->size <= offset) {
> +		mtk_vcodec_err(instance, "bs size %zu tile offset %u\n",
> +			       bs->size, offset);
> +		return -EINVAL;
> +	}
> +
> +	tiles = &vsi->frame.tiles;
> +	/* setup tile buffer */
> +
> +	va = (unsigned char *)bs->va;
> +	pos = va + offset;
> +	end = va + bs->size;
> +	/* truncated */
> +	pa = (unsigned int)bs->dma_addr + offset;
> +	tb = instance->tile.va;
> +	for (i = 0; i < rows; i++) {
> +		for (j = 0; j < cols; j++) {
> +			if (i == rows - 1 &&
> +			    j == cols - 1) {
> +				size = (unsigned int)(end - pos);
> +			} else {
> +				if (end - pos < 4)
> +					return -EINVAL;
> +
> +				size = (pos[0] << 24) | (pos[1] << 16) |
> +					(pos[2] << 8) | pos[3];
> +				pos += 4;
> +				pa += 4;
> +				offset += 4;
> +				if (end - pos < size)
> +					return -EINVAL;
> +			}
> +			tiles->size[i][j] = size;
> +			if (tiles->mi_rows[i]) {
> +				*tb++ = (size << 3) + ((offset << 3) & 0x7f);
> +				*tb++ = pa & ~0xf;
> +				*tb++ = (pa << 3) & 0x7f;
> +				mi_row = (tiles->mi_rows[i] - 1) & 0x1ff;
> +				mi_col = (tiles->mi_cols[j] - 1) & 0x3f;
> +				*tb++ = (mi_row << 6) + mi_col;
> +			}
> +			pos += size;
> +			pa += size;
> +			offset += size;
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static int vdec_vp9_slice_setup_lat(struct vdec_vp9_slice_instance *instance,
> +				    struct mtk_vcodec_mem *bs,
> +				    struct vdec_lat_buf *lat_buf,
> +				    struct vdec_vp9_slice_pfc *pfc)
> +{
> +	struct vdec_vp9_slice_vsi *vsi = &pfc->vsi;
> +	int ret;
> +
> +	ret = vdec_vp9_slice_setup_lat_from_src_buf(instance, lat_buf);
> +	if (ret)
> +		goto err;
> +
> +	ret = vdec_vp9_slice_setup_pfc(instance, pfc);
> +	if (ret)
> +		goto err;
> +
> +	ret = vdec_vp9_slice_alloc_working_buffer(instance, vsi);
> +	if (ret)
> +		goto err;
> +
> +	ret = vdec_vp9_slice_setup_lat_buffer(instance, vsi, bs, lat_buf);
> +	if (ret)
> +		goto err;
> +
> +	vdec_vp9_slice_setup_seg_buffer(instance, vsi, &instance->seg[0]);
> +
> +	/* setup prob/tile buffers for LAT */
> +
> +	ret = vdec_vp9_slice_setup_prob_buffer(instance, vsi);
> +	if (ret)
> +		goto err;
> +
> +	ret = vdec_vp9_slice_setup_tile_buffer(instance, vsi, bs);
> +	if (ret)
> +		goto err;
> +
> +	return 0;
> +
> +err:
> +	return ret;
> +}
> +
> +/* implement merge prob process defined in 8.4.1 */
> +static unsigned char vdec_vp9_slice_merge_prob(unsigned char pre, unsigned int ct0,
> +					       unsigned int ct1, unsigned int cs,
> +					       unsigned int uf)
> +{
> +	unsigned int den;
> +	unsigned int prob;
> +	unsigned int count;
> +	unsigned int factor;
> +
> +	/*
> +	 * The variable den representing the total times
> +	 * this boolean has been decoded is set equal to ct0 + ct1.
> +	 */
> +	den = ct0 + ct1;
> +	if (!den)
> +		return pre;  /* => count = 0 => factor = 0 */
> +	/*
> +	 * The variable prob estimating the probability that
> +	 * the boolean is decoded as a 0 is set equal to
> +	 * (den == 0) ? 128 : Clip3(1, 255, (ct0 * 256 + (den >> 1)) / den).
> +	 */
> +	prob = ((ct0 << 8) + (den >> 1)) / den;
> +	prob = prob < 1 ? 1 : (prob > 255 ? 255 : prob);
> +	/* The variable count is set equal to Min(ct0 + ct1, countSat) */
> +	count = den < cs ? den : cs;
> +	/*
> +	 * The variable factor is set equal to
> +	 * maxUpdateFactor * count / countSat.
> +	 */
> +	factor = uf * count / cs;
> +	/*
> +	 * The return variable outProb is set equal to
> +	 * Round2(preProb * (256 - factor) + prob * factor, 8).
> +	 */
> +	return pre + (((prob - pre) * factor + 128) >> 8);
> +}
> +
> +static inline unsigned char vdec_vp9_slice_adapt_prob(unsigned char pre, unsigned int ct0,
> +						      unsigned int ct1)
> +{
> +	return vdec_vp9_slice_merge_prob(pre, ct0, ct1, 20, 128);
> +}
> +
> +/* implement merge probs process defined in 8.4.2 */
> +static unsigned int vdec_vp9_slice_merge_probs(const signed char *tree, int location,
> +					       unsigned char *pre_probs, unsigned int *counts,
> +					       unsigned char *probs, unsigned int cs,
> +					       unsigned int uf)
> +{
> +	int left = tree[location];
> +	int right = tree[location + 1];
> +	unsigned int left_count;
> +	unsigned int right_count;
> +
> +	if (left <= 0)
> +		left_count = counts[-left];
> +	else
> +		left_count = vdec_vp9_slice_merge_probs(tree, left, pre_probs, counts,
> +							probs, cs, uf);
> +
> +	if (right <= 0)
> +		right_count = counts[-right];
> +	else
> +		right_count = vdec_vp9_slice_merge_probs(tree, right, pre_probs, counts,
> +							 probs, cs, uf);
> +
> +	/* merge left and right */
> +	probs[location >> 1] =
> +		vdec_vp9_slice_merge_prob(pre_probs[location >> 1],
> +					  left_count, right_count, cs, uf);
> +	return left_count + right_count;
> +}
> +
> +static inline void vdec_vp9_slice_adapt_probs(const signed char *tree,
> +					      unsigned char *pre_probs,
> +					      unsigned int *counts,
> +					      unsigned char *probs)
> +{
> +	vdec_vp9_slice_merge_probs(tree, 0, pre_probs, counts, probs, 20, 128);
> +}
> +
> +/* 8.4 Probability adaptation process */
> +static void vdec_vp9_slice_adapt_table(struct vdec_vp9_slice_vsi *vsi,
> +				       struct vdec_vp9_slice_frame_ctx *ctx,
> +				       struct vdec_vp9_slice_frame_ctx *pre_ctx,
> +				       struct vdec_vp9_slice_frame_counts *counts)
> +{
> +	unsigned char *pp;
> +	unsigned char *p;
> +	unsigned int *c;
> +	unsigned int *e;
> +	unsigned int uf;
> +	int t, i, j, k, l;
> +
> +	uf = 128;
> +	if (!vsi->frame.uh.frame_type || vsi->frame.uh.intra_only ||
> +	    vsi->frame.uh.last_frame_type)
> +		uf = 112;
> +
> +	p = (unsigned char *)&ctx->coef_probs;
> +	pp = (unsigned char *)&pre_ctx->coef_probs;
> +	c = (unsigned int *)&counts->coef_probs;
> +	e = (unsigned int *)&counts->eob_branch;
> +
> +	/* 8.4.3 Coefficient probability adaption process */
> +	for (t = 0; t < 16; t++) {
> +		for (((k) = 0); ((k) < 6); ((k)++)) {
> +			for (l = 0; l < (k == 0 ? 3 : 6); l++) {
> +				p[0] = vdec_vp9_slice_merge_prob(pp[0], c[3], e[0]
> +								 - c[3], 24, uf);
> +				p[1] = vdec_vp9_slice_merge_prob(pp[1],	c[0], c[1]
> +								 + c[2], 24, uf);
> +				p[2] = vdec_vp9_slice_merge_prob(pp[2], c[1],
> +								 c[2], 24, uf);
> +				p += 3;
> +				pp += 3;
> +				c += 4;
> +				e++;
> +			}
> +			if (k == 0) {
> +				/* 3 * 3 unused values and 2 bytes padding */
> +				p += 11;
> +				pp += 11;
> +				e++;
> +			} else {
> +				/* extra 2 bytes could make 4 bytes align (3 * 6 + 2) */
> +				p += 2;
> +				pp += 2;
> +				/* 5 * 6=30, extra 2 int */
> +				if (k == 5)
> +					e += 2;
> +			}
> +		}
> +	}
> +
> +	if (!vsi->frame.uh.frame_type || vsi->frame.uh.intra_only)
> +		return;
> +
> +	/* 8.4.4 Non coefficient probability adaption process */
> +
> +	for (i = 0; i < 4; i++) {
> +		ctx->intra_inter_prob[i] =
> +			vdec_vp9_slice_adapt_prob(pre_ctx->intra_inter_prob[i],
> +						  counts->intra_inter[i][0],
> +						  counts->intra_inter[i][1]);
> +	}
> +
> +	for (i = 0; i < 5; i++) {
> +		ctx->comp_inter_prob[i] =
> +			vdec_vp9_slice_adapt_prob(pre_ctx->comp_inter_prob[i],
> +						  counts->comp_inter[i][0],
> +						  counts->comp_inter[i][1]);
> +	}
> +
> +	for (i = 0; i < 5; i++) {
> +		ctx->comp_ref_prob[i] =
> +			vdec_vp9_slice_adapt_prob(pre_ctx->comp_ref_prob[i],
> +						  counts->comp_ref[i][0],
> +						  counts->comp_ref[i][1]);
> +	}
> +
> +	for (i = 0; i < 5; i++) {
> +		for (j = 0; j < 2; j++) {
> +			ctx->single_ref_prob[i][j] =
> +				vdec_vp9_slice_adapt_prob(pre_ctx->single_ref_prob[i][j],
> +							  counts->single_ref[i][j][0],
> +							  counts->single_ref[i][j][1]);
> +		}
> +	}
> +
> +	for (i = 0; i < 7; i++) {
> +		vdec_vp9_slice_adapt_probs(vdec_vp9_slice_inter_mode_tree,
> +					   &pre_ctx->inter_mode_probs[i][0],
> +					   &counts->inter_mode[i][0],
> +					   &ctx->inter_mode_probs[i][0]);
> +	}
> +
> +	for (i = 0; i < 4; i++) {
> +		vdec_vp9_slice_adapt_probs(vdec_vp9_slice_intra_mode_tree,
> +					   &pre_ctx->y_mode_prob[i][0],
> +					   &counts->y_mode[i][0],
> +					   &ctx->y_mode_prob[i][0]);
> +	}
> +
> +	for (i = 0; i < 10; i++) {
> +		vdec_vp9_slice_adapt_probs(vdec_vp9_slice_intra_mode_tree,
> +					   &pre_ctx->uv_mode_prob[i][0],
> +					   &counts->uv_mode[i][0],
> +					   &ctx->uv_mode_prob[i][0]);
> +	}
> +
> +	for (i = 0; i < 16; i++) {
> +		vdec_vp9_slice_adapt_probs(vdec_vp9_slice_partition_tree,
> +					   &pre_ctx->partition_prob[i][0],
> +					   &counts->partition[i][0],
> +					   &ctx->partition_prob[i][0]);
> +	}
> +
> +	if (vsi->frame.uh.interpolation_filter == 4) {
> +		for (i = 0; i < 4; i++) {
> +			vdec_vp9_slice_adapt_probs(vdec_vp9_slice_switchable_interp_tree,
> +						   &pre_ctx->switch_interp_prob[i][0],
> +						   &counts->switchable_interp[i][0],
> +						   &ctx->switch_interp_prob[i][0]);
> +		}
> +	}
> +
> +	if (vsi->frame.ch.tx_mode == 4) {
> +		for (i = 0; i < 2; i++) {
> +			ctx->tx_p8x8[i][0] = vdec_vp9_slice_adapt_prob(pre_ctx->tx_p8x8[i][0],
> +								       counts->tx_p8x8[i][0],
> +								       counts->tx_p8x8[i][1]);
> +			ctx->tx_p16x16[i][0] = vdec_vp9_slice_adapt_prob(pre_ctx->tx_p16x16[i][0],
> +									 counts->tx_p16x16[i][0],
> +									 counts->tx_p16x16[i][1] +
> +									 counts->tx_p16x16[i][2]);
> +			ctx->tx_p16x16[i][1] = vdec_vp9_slice_adapt_prob(pre_ctx->tx_p16x16[i][1],
> +									 counts->tx_p16x16[i][1],
> +									 counts->tx_p16x16[i][2]);
> +			ctx->tx_p32x32[i][0] = vdec_vp9_slice_adapt_prob(pre_ctx->tx_p32x32[i][0],
> +									 counts->tx_p32x32[i][0],
> +									 counts->tx_p32x32[i][1] +
> +									 counts->tx_p32x32[i][2] +
> +									 counts->tx_p32x32[i][3]);
> +			ctx->tx_p32x32[i][1] = vdec_vp9_slice_adapt_prob(pre_ctx->tx_p32x32[i][1],
> +									 counts->tx_p32x32[i][1],
> +									 counts->tx_p32x32[i][2] +
> +									 counts->tx_p32x32[i][3]);
> +			ctx->tx_p32x32[i][2] = vdec_vp9_slice_adapt_prob(pre_ctx->tx_p32x32[i][2],
> +									 counts->tx_p32x32[i][2],
> +									 counts->tx_p32x32[i][3]);
> +		}
> +	}
> +
> +	for (i = 0; i < 3; i++) {
> +		ctx->skip_probs[i] = vdec_vp9_slice_adapt_prob(pre_ctx->skip_probs[i],
> +							       counts->skip[i][0],
> +							       counts->skip[i][1]);
> +	}
> +
> +	vdec_vp9_slice_adapt_probs(vdec_vp9_slice_mv_joint_tree,
> +				   &pre_ctx->joint[0],
> +				   &counts->joint[0],
> +				   &ctx->joint[0]);
> +
> +	for (i = 0; i < 2; i++) {
> +		ctx->sign_classes[i].sign = vdec_vp9_slice_adapt_prob(pre_ctx->sign_classes[i].sign,
> +								      counts->mvcomp[i].sign[0],
> +								      counts->mvcomp[i].sign[1]);
> +		vdec_vp9_slice_adapt_probs(vdec_vp9_slice_mv_class_tree,
> +					   &pre_ctx->sign_classes[i].classes[0],
> +					   &counts->mvcomp[i].classes[0],
> +					   &ctx->sign_classes[i].classes[0]);
> +
> +		vdec_vp9_slice_adapt_probs(vdec_vp9_slice_mv_class0_tree,
> +					   pre_ctx->class0_bits[i].class0,
> +					   counts->mvcomp[i].class0,
> +					   ctx->class0_bits[i].class0);
> +		for (j = 0; j < 10; j++) {
> +			ctx->class0_bits[i].bits[j] =
> +				vdec_vp9_slice_adapt_prob(pre_ctx->class0_bits[i].bits[j],
> +							  counts->mvcomp[i].bits[j][0],
> +							  counts->mvcomp[i].bits[j][1]);
> +		}
> +
> +		for (j = 0; j < 2; ++j) {
> +			vdec_vp9_slice_adapt_probs(vdec_vp9_slice_mv_fp_tree,
> +						   pre_ctx->class0_fp_hp[i].class0_fp[j],
> +						   counts->mvcomp[i].class0_fp[j],
> +						   ctx->class0_fp_hp[i].class0_fp[j]);
> +		}
> +		vdec_vp9_slice_adapt_probs(vdec_vp9_slice_mv_fp_tree,
> +					   pre_ctx->class0_fp_hp[i].fp,
> +					   counts->mvcomp[i].fp,
> +					   ctx->class0_fp_hp[i].fp);
> +		if (vsi->frame.uh.allow_high_precision_mv) {
> +			ctx->class0_fp_hp[i].class0_hp =
> +				vdec_vp9_slice_adapt_prob(pre_ctx->class0_fp_hp[i].class0_hp,
> +							  counts->mvcomp[i].class0_hp[0],
> +							  counts->mvcomp[i].class0_hp[1]);
> +			ctx->class0_fp_hp[i].hp =
> +				vdec_vp9_slice_adapt_prob(pre_ctx->class0_fp_hp[i].hp,
> +							  counts->mvcomp[i].hp[0],
> +							  counts->mvcomp[i].hp[1]);
> +		}
> +	}
> +}
> +
> +static int vdec_vp9_slice_update_prob(struct vdec_vp9_slice_instance *instance,
> +				      struct vdec_vp9_slice_vsi *vsi)
> +{
> +	struct vdec_vp9_slice_frame_ctx *pre_frame_ctx;
> +	struct vdec_vp9_slice_frame_ctx *frame_ctx;
> +	struct vdec_vp9_slice_frame_counts *counts;
> +	struct vdec_vp9_slice_uncompressed_header *uh;
> +
> +	uh = &vsi->frame.uh;
> +	pre_frame_ctx = &instance->frame_ctx[uh->frame_context_idx];
> +	frame_ctx = (struct vdec_vp9_slice_frame_ctx *)instance->prob.va;
> +	counts = (struct vdec_vp9_slice_frame_counts *)instance->counts.va;
> +
> +	if (!uh->refresh_frame_context)
> +		return 0;
> +
> +	if (!uh->frame_parallel_decoding_mode) {
> +		/* uh->error_resilient_mode must be 0 */
> +		vdec_vp9_slice_adapt_table(vsi,	frame_ctx,
> +					   /* use default frame ctx? */
> +					   instance->dirty[uh->frame_context_idx] ?
> +					   pre_frame_ctx :
> +					   vdec_vp9_slice_default_frame_ctx,
> +					   counts);
> +	}
> +
> +	memcpy(pre_frame_ctx, frame_ctx, sizeof(*frame_ctx));
> +	instance->dirty[uh->frame_context_idx] = 1;
> +
> +	return 0;
> +}
> +
> +static int vdec_vp9_slice_update_lat(struct vdec_vp9_slice_instance *instance,
> +				     struct vdec_lat_buf *lat_buf,
> +				     struct vdec_vp9_slice_pfc *pfc)
> +{
> +	struct vdec_vp9_slice_vsi *vsi;
> +
> +	vsi = &pfc->vsi;
> +	memcpy(&pfc->state[0], &vsi->state, sizeof(vsi->state));
> +
> +	mtk_vcodec_debug(instance, "Frame %u LAT CRC 0x%08x\n",
> +			 pfc->seq, vsi->state.crc[0]);
> +
> +	/* buffer full, need to re-decode */
> +	if (vsi->state.full) {
> +		/* buffer not enough */
> +		if (vsi->trans.dma_addr_end - vsi->trans.dma_addr ==
> +			vsi->ube.size)
> +			return -ENOMEM;
> +		return -EAGAIN;
> +	}
> +
> +	vdec_vp9_slice_update_prob(instance, vsi);
> +
> +	instance->width = vsi->frame.uh.frame_width;
> +	instance->height = vsi->frame.uh.frame_height;
> +	instance->frame_type = vsi->frame.uh.frame_type;
> +	instance->show_frame = vsi->frame.uh.show_frame;
> +
> +	return 0;
> +}
> +
> +static int vdec_vp9_slice_setup_core_to_dst_buf(struct vdec_vp9_slice_instance *instance,
> +						struct vdec_lat_buf *lat_buf)
> +{
> +	struct vb2_v4l2_buffer *src;
> +	struct vb2_v4l2_buffer *dst;
> +
> +	dst = v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx);
> +	if (!dst)
> +		return -EINVAL;
> +
> +	src = &lat_buf->ts_info;
> +	dst->vb2_buf.timestamp = src->vb2_buf.timestamp;
> +	dst->timecode = src->timecode;
> +	dst->field = src->field;
> +	dst->flags = src->flags;
> +	dst->vb2_buf.copied_timestamp = src->vb2_buf.copied_timestamp;
> +	return 0;
> +}
> +
> +static int vdec_vp9_slice_setup_core_buffer(struct vdec_vp9_slice_instance *instance,
> +					    struct vdec_vp9_slice_pfc *pfc,
> +					    struct vdec_vp9_slice_vsi *vsi,
> +					    struct vdec_fb *fb,
> +					    struct vdec_lat_buf *lat_buf)
> +{
> +	struct vb2_buffer *vb;
> +	struct vb2_queue *vq;
> +	struct vdec_vp9_slice_reference *ref;
> +	int plane;
> +	int size;
> +	int idx;
> +	int w;
> +	int h;
> +	int i;
> +
> +	plane = instance->ctx->q_data[MTK_Q_DATA_DST].fmt->num_planes;
> +	w = vsi->frame.uh.frame_width;
> +	h = vsi->frame.uh.frame_height;
> +	size = ALIGN(w, 64) * ALIGN(h, 64);
> +
> +	/* frame buffer */
> +	vsi->fb.y.dma_addr = fb->base_y.dma_addr;
> +	if (plane == 1)
> +		vsi->fb.c.dma_addr = fb->base_y.dma_addr + size;
> +	else
> +		vsi->fb.c.dma_addr = fb->base_c.dma_addr;
> +
> +	/* reference buffers */
> +	vq = v4l2_m2m_get_vq(instance->ctx->m2m_ctx,
> +			     V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE);
> +	if (!vq)
> +		return -EINVAL;
> +
> +	/* get current output buffer */
> +	vb = &v4l2_m2m_next_dst_buf(instance->ctx->m2m_ctx)->vb2_buf;
> +	if (!vb)
> +		return -EINVAL;
> +
> +	/* update internal buffer's width/height */
> +	for (i = 0; i < vq->num_buffers; i++) {
> +		if (vb == vq->bufs[i]) {
> +			instance->dpb[i].width = w;
> +			instance->dpb[i].height = h;
> +			break;
> +		}
> +	}
> +
> +	/*
> +	 * get buffer's width/height from instance
> +	 * get buffer address from vb2buf
> +	 */
> +	for (i = 0; i < 3; i++) {
> +		ref = &vsi->frame.ref[i];
> +		idx = vb2_find_timestamp(vq, pfc->ref_idx[i], 0);
> +		if (idx < 0) {
> +			ref->frame_width = w;
> +			ref->frame_height = h;
> +			memset(&vsi->ref[i], 0, sizeof(vsi->ref[i]));
> +		} else {
> +			ref->frame_width = instance->dpb[idx].width;
> +			ref->frame_height = instance->dpb[idx].height;
> +			vb = vq->bufs[idx];
> +			vsi->ref[i].y.dma_addr =
> +				vb2_dma_contig_plane_dma_addr(vb, 0);
> +			if (plane == 1)
> +				vsi->ref[i].c.dma_addr =
> +					vsi->ref[i].y.dma_addr + size;
> +			else
> +				vsi->ref[i].c.dma_addr =
> +					vb2_dma_contig_plane_dma_addr(vb, 1);
> +		}
> +	}
> +
> +	return 0;
> +}
> +
> +static int vdec_vp9_slice_setup_core(struct vdec_vp9_slice_instance *instance,
> +				     struct vdec_fb *fb,
> +				     struct vdec_lat_buf *lat_buf,
> +				     struct vdec_vp9_slice_pfc *pfc)
> +{
> +	struct vdec_vp9_slice_vsi *vsi = &pfc->vsi;
> +	int ret;
> +
> +	vdec_vp9_slice_setup_state(vsi);
> +
> +	ret = vdec_vp9_slice_setup_core_to_dst_buf(instance, lat_buf);
> +	if (ret)
> +		goto err;
> +
> +	ret = vdec_vp9_slice_setup_core_buffer(instance, pfc, vsi, fb, lat_buf);
> +	if (ret)
> +		goto err;
> +
> +	vdec_vp9_slice_setup_seg_buffer(instance, vsi, &instance->seg[1]);
> +
> +	return 0;
> +
> +err:
> +	return ret;
> +}
> +
> +static int vdec_vp9_slice_update_core(struct vdec_vp9_slice_instance *instance,
> +				      struct vdec_lat_buf *lat_buf,
> +				      struct vdec_vp9_slice_pfc *pfc)
> +{
> +	struct vdec_vp9_slice_vsi *vsi;
> +
> +	vsi = &pfc->vsi;
> +	memcpy(&pfc->state[1], &vsi->state, sizeof(vsi->state));
> +
> +	mtk_vcodec_debug(instance, "Frame %u Y_CRC %08x %08x %08x %08x\n",
> +			 pfc->seq,
> +			 vsi->state.crc[0], vsi->state.crc[1],
> +			 vsi->state.crc[2], vsi->state.crc[3]);
> +	mtk_vcodec_debug(instance, "Frame %u C_CRC %08x %08x %08x %08x\n",
> +			 pfc->seq,
> +			 vsi->state.crc[4], vsi->state.crc[5],
> +			 vsi->state.crc[6], vsi->state.crc[7]);
> +
> +	return 0;
> +}
> +
> +static int vdec_vp9_slice_init(struct mtk_vcodec_ctx *ctx)
> +{
> +	struct vdec_vp9_slice_instance *instance;
> +	struct vdec_vp9_slice_init_vsi *vsi;
> +	int ret;
> +
> +	instance = kzalloc(sizeof(*instance), GFP_KERNEL);
> +	if (!instance)
> +		return -ENOMEM;
> +
> +	instance->ctx = ctx;
> +	instance->vpu.id = SCP_IPI_VDEC_LAT;
> +	instance->vpu.core_id = SCP_IPI_VDEC_CORE;
> +	instance->vpu.ctx = ctx;
> +	instance->vpu.codec_type = ctx->current_codec;
> +
> +	ret = vpu_dec_init(&instance->vpu);
> +	if (ret) {
> +		mtk_vcodec_err(instance, "failed to init vpu dec, ret %d\n", ret);
> +		goto error_vpu_init;
> +	}
> +
> +	/* init vsi and global flags */
> +
> +	vsi = instance->vpu.vsi;
> +	if (!vsi) {
> +		mtk_vcodec_err(instance, "failed to get VP9 vsi\n");
> +		ret = -EINVAL;
> +		goto error_vsi;
> +	}
> +	instance->init_vsi = vsi;
> +	instance->core_vsi = mtk_vcodec_fw_map_dm_addr(ctx->dev->fw_handler,
> +						       (u32)vsi->core_vsi);
> +	if (!instance->core_vsi) {
> +		mtk_vcodec_err(instance, "failed to get VP9 core vsi\n");
> +		ret = -EINVAL;
> +		goto error_vsi;
> +	}
> +
> +	instance->irq = 1;
> +
> +	ret = vdec_vp9_slice_init_default_frame_ctx(instance);
> +	if (ret)
> +		goto error_default_frame_ctx;
> +
> +	ctx->drv_handle = instance;
> +
> +	return 0;
> +
> +error_default_frame_ctx:
> +error_vsi:
> +	vpu_dec_deinit(&instance->vpu);
> +error_vpu_init:
> +	kfree(instance);
> +	return ret;
> +}
> +
> +static void vdec_vp9_slice_deinit(void *h_vdec)
> +{
> +	struct vdec_vp9_slice_instance *instance = h_vdec;
> +
> +	if (!instance)
> +		return;
> +
> +	vpu_dec_deinit(&instance->vpu);
> +	vdec_vp9_slice_free_working_buffer(instance);
> +	vdec_msg_queue_deinit(&instance->ctx->msg_queue, instance->ctx);
> +	kfree(instance);
> +}
> +
> +static int vdec_vp9_slice_flush(void *h_vdec, struct mtk_vcodec_mem *bs,
> +				struct vdec_fb *fb, bool *res_chg)
> +{
> +	struct vdec_vp9_slice_instance *instance = h_vdec;
> +
> +	mtk_vcodec_debug(instance, "flush ...\n");
> +
> +	vdec_msg_queue_wait_lat_buf_full(&instance->ctx->msg_queue);
> +	return vpu_dec_reset(&instance->vpu);
> +}
> +
> +static void vdec_vp9_slice_get_pic_info(struct vdec_vp9_slice_instance *instance)
> +{
> +	struct mtk_vcodec_ctx *ctx = instance->ctx;
> +	unsigned int data[3];
> +
> +	mtk_vcodec_debug(instance, "w %u h %u\n",
> +			 ctx->picinfo.pic_w, ctx->picinfo.pic_h);
> +
> +	data[0] = ctx->picinfo.pic_w;
> +	data[1] = ctx->picinfo.pic_h;
> +	data[2] = ctx->capture_fourcc;
> +	vpu_dec_get_param(&instance->vpu, data, 3, GET_PARAM_PIC_INFO);
> +
> +	ctx->picinfo.buf_w = ALIGN(ctx->picinfo.pic_w, 64);
> +	ctx->picinfo.buf_h = ALIGN(ctx->picinfo.pic_h, 64);
> +	ctx->picinfo.fb_sz[0] = instance->vpu.fb_sz[0];
> +	ctx->picinfo.fb_sz[1] = instance->vpu.fb_sz[1];
> +}
> +
> +static void vdec_vp9_slice_get_dpb_size(struct vdec_vp9_slice_instance *instance,
> +					unsigned int *dpb_sz)
> +{
> +	/* refer VP9 specification */
> +	*dpb_sz = 9;
> +}
> +
> +static void vdec_vp9_slice_get_crop_info(struct vdec_vp9_slice_instance *instance,
> +					 struct v4l2_rect *cr)
> +{
> +	struct mtk_vcodec_ctx *ctx = instance->ctx;
> +
> +	cr->left = 0;
> +	cr->top = 0;
> +	cr->width = ctx->picinfo.pic_w;
> +	cr->height = ctx->picinfo.pic_h;
> +
> +	mtk_vcodec_debug(instance, "l=%d, t=%d, w=%d, h=%d\n",
> +			 cr->left, cr->top, cr->width, cr->height);
> +}
> +
> +static int vdec_vp9_slice_get_param(void *h_vdec, enum vdec_get_param_type type, void *out)
> +{
> +	struct vdec_vp9_slice_instance *instance = h_vdec;
> +
> +	switch (type) {
> +	case GET_PARAM_PIC_INFO:
> +		vdec_vp9_slice_get_pic_info(instance);
> +		break;
> +	case GET_PARAM_DPB_SIZE:
> +		vdec_vp9_slice_get_dpb_size(instance, out);
> +		break;
> +	case GET_PARAM_CROP_INFO:
> +		vdec_vp9_slice_get_crop_info(instance, out);
> +		break;
> +	default:
> +		mtk_vcodec_err(instance, "invalid get parameter type=%d\n",
> +			       type);
> +		return -EINVAL;
> +	}
> +
> +	return 0;
> +}
> +
> +static int vdec_vp9_slice_lat_decode(void *h_vdec, struct mtk_vcodec_mem *bs,
> +				     struct vdec_fb *fb, bool *res_chg)
> +{
> +	struct vdec_vp9_slice_instance *instance = h_vdec;
> +	struct vdec_lat_buf *lat_buf;
> +	struct vdec_vp9_slice_pfc *pfc;
> +	struct vdec_vp9_slice_vsi *vsi;
> +	struct mtk_vcodec_ctx *ctx;
> +	int ret;
> +
> +	if (!instance || !instance->ctx)
> +		return -EINVAL;
> +	ctx = instance->ctx;
> +
> +	/* init msgQ for the first time */
> +	if (vdec_msg_queue_init(&ctx->msg_queue, ctx,
> +				vdec_vp9_slice_core_decode,
> +				sizeof(*pfc)))
> +		return -ENOMEM;
> +
> +	/* bs NULL means flush decoder */
> +	if (!bs)
> +		return vdec_vp9_slice_flush(h_vdec, bs, fb, res_chg);
> +
> +	lat_buf = vdec_msg_queue_dqbuf(&instance->ctx->msg_queue.lat_ctx);
> +	if (!lat_buf) {
> +		mtk_vcodec_err(instance, "Failed to get VP9 lat buf\n");
> +		return -EBUSY;
> +	}
> +	pfc = (struct vdec_vp9_slice_pfc *)lat_buf->private_data;
> +	if (!pfc)
> +		return -EINVAL;
> +	vsi = &pfc->vsi;
> +
> +	ret = vdec_vp9_slice_setup_lat(instance, bs, lat_buf, pfc);
> +	if (ret) {
> +		mtk_vcodec_err(instance, "Failed to setup VP9 lat ret %d\n", ret);
> +		return ret;
> +	}
> +	vdec_vp9_slice_vsi_to_remote(vsi, instance->vsi);
> +
> +	ret = vpu_dec_start(&instance->vpu, 0, 0);
> +	if (ret) {
> +		mtk_vcodec_err(instance, "Failed to dec VP9 ret %d\n", ret);
> +		return ret;
> +	}
> +
> +	if (instance->irq) {
> +		ret = mtk_vcodec_wait_for_done_ctx(ctx,	MTK_INST_IRQ_RECEIVED,
> +						   WAIT_INTR_TIMEOUT_MS, MTK_VDEC_LAT0);
> +		/* update remote vsi if decode timeout */
> +		if (ret) {
> +			mtk_vcodec_err(instance, "VP9 decode timeout %d\n", ret);
> +			writel(1, &instance->vsi->state.timeout);
> +		}
> +		vpu_dec_end(&instance->vpu);
> +	}
> +
> +	vdec_vp9_slice_vsi_from_remote(vsi, instance->vsi, 0);
> +	ret = vdec_vp9_slice_update_lat(instance, lat_buf, pfc);
> +
> +	/* LAT trans full, no more UBE or decode timeout */
> +	if (ret) {
> +		mtk_vcodec_err(instance, "VP9 decode error: %d\n", ret);
> +		return ret;
> +	}
> +
> +	mtk_vcodec_debug(instance, "lat dma 1 0x%llx 0x%llx\n",
> +			 pfc->vsi.trans.dma_addr, pfc->vsi.trans.dma_addr_end);
> +
> +	vdec_msg_queue_update_ube_wptr(&ctx->msg_queue,
> +				       vsi->trans.dma_addr_end +
> +				       ctx->msg_queue.wdma_addr.dma_addr);
> +	vdec_msg_queue_qbuf(&ctx->dev->msg_queue_core_ctx, lat_buf);
> +
> +	return 0;
> +}
> +
> +static int vdec_vp9_slice_core_decode(struct vdec_lat_buf *lat_buf)
> +{
> +	struct vdec_vp9_slice_instance *instance;
> +	struct vdec_vp9_slice_pfc *pfc;
> +	struct mtk_vcodec_ctx *ctx = NULL;
> +	struct vdec_fb *fb = NULL;
> +	int ret = -EINVAL;
> +
> +	if (!lat_buf)
> +		goto err;
> +
> +	pfc = lat_buf->private_data;
> +	ctx = lat_buf->ctx;
> +	if (!pfc || !ctx)
> +		goto err;
> +
> +	instance = ctx->drv_handle;
> +	if (!instance)
> +		goto err;
> +
> +	fb = ctx->dev->vdec_pdata->get_cap_buffer(ctx);
> +	if (!fb) {
> +		ret = -EBUSY;
> +		goto err;
> +	}
> +
> +	ret = vdec_vp9_slice_setup_core(instance, fb, lat_buf, pfc);
> +	if (ret) {
> +		mtk_vcodec_err(instance, "vdec_vp9_slice_setup_core\n");
> +		goto err;
> +	}
> +	vdec_vp9_slice_vsi_to_remote(&pfc->vsi, instance->core_vsi);
> +
> +	ret = vpu_dec_core(&instance->vpu);
> +	if (ret) {
> +		mtk_vcodec_err(instance, "vpu_dec_core\n");
> +		goto err;
> +	}
> +
> +	if (instance->irq) {
> +		ret = mtk_vcodec_wait_for_done_ctx(ctx, MTK_INST_IRQ_RECEIVED,
> +						   WAIT_INTR_TIMEOUT_MS, MTK_VDEC_CORE);
> +		/* update remote vsi if decode timeout */
> +		if (ret) {
> +			mtk_vcodec_err(instance, "VP9 core timeout\n");
> +			writel(1, &instance->core_vsi->state.timeout);
> +		}
> +		vpu_dec_core_end(&instance->vpu);
> +	}
> +
> +	vdec_vp9_slice_vsi_from_remote(&pfc->vsi, instance->core_vsi, 1);
> +	ret = vdec_vp9_slice_update_core(instance, lat_buf, pfc);
> +	if (ret) {
> +		mtk_vcodec_err(instance, "vdec_vp9_slice_update_core\n");
> +		goto err;
> +	}
> +
> +	pfc->vsi.trans.dma_addr_end += ctx->msg_queue.wdma_addr.dma_addr;
> +	mtk_vcodec_debug(instance, "core dma_addr_end 0x%llx\n", pfc->vsi.trans.dma_addr_end);
> +	vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc->vsi.trans.dma_addr_end);
> +	ctx->dev->vdec_pdata->cap_to_disp(ctx, fb, 0);
> +
> +	return 0;
> +
> +err:
> +	if (ctx) {
> +		/* always update read pointer */
> +		vdec_msg_queue_update_ube_rptr(&ctx->msg_queue, pfc->vsi.trans.dma_addr_end);
> +
> +		if (fb)
> +			ctx->dev->vdec_pdata->cap_to_disp(ctx, fb, 1);
> +	}
> +	return ret;
> +}
> +
> +const struct vdec_common_if vdec_vp9_slice_lat_if = {
> +	.init		= vdec_vp9_slice_init,
> +	.decode		= vdec_vp9_slice_lat_decode,
> +	.get_param	= vdec_vp9_slice_get_param,
> +	.deinit		= vdec_vp9_slice_deinit,
> +};
> diff --git a/drivers/media/platform/mtk-vcodec/vdec_drv_if.c b/drivers/media/platform/mtk-vcodec/vdec_drv_if.c
> index 9db9a57da2c1..2d3a45781359 100644
> --- a/drivers/media/platform/mtk-vcodec/vdec_drv_if.c
> +++ b/drivers/media/platform/mtk-vcodec/vdec_drv_if.c
> @@ -44,6 +44,10 @@ int vdec_if_init(struct mtk_vcodec_ctx *ctx, unsigned int fourcc)
>  		ctx->dec_if = &vdec_vp9_if;
>  		ctx->hw_id = MTK_VDEC_CORE;
>  		break;
> +	case V4L2_PIX_FMT_VP9_FRAME:
> +		ctx->dec_if = &vdec_vp9_slice_lat_if;
> +		ctx->hw_id = MTK_VDEC_LAT0;
> +		break;
>  	default:
>  		return -EINVAL;
>  	}
> diff --git a/drivers/media/platform/mtk-vcodec/vdec_drv_if.h b/drivers/media/platform/mtk-vcodec/vdec_drv_if.h
> index e3adf8f36342..e383a04db7b8 100644
> --- a/drivers/media/platform/mtk-vcodec/vdec_drv_if.h
> +++ b/drivers/media/platform/mtk-vcodec/vdec_drv_if.h
> @@ -60,6 +60,7 @@ extern const struct vdec_common_if vdec_h264_slice_lat_if;
>  extern const struct vdec_common_if vdec_vp8_if;
>  extern const struct vdec_common_if vdec_vp8_slice_if;
>  extern const struct vdec_common_if vdec_vp9_if;
> +extern const struct vdec_common_if vdec_vp9_slice_lat_if;
>  
>  /**
>   * vdec_if_init() - initialize decode driver



More information about the dri-devel mailing list