[Mesa-dev] [PATCH v5 2/4] nvc0: add support for st/va
Ilia Mirkin
imirkin at alum.mit.edu
Wed Dec 16 20:13:42 PST 2015
On Wed, Dec 16, 2015 at 9:39 AM, Julien Isorce <j.isorce at samsung.com> wrote:
> - split nvc0_decoder_bsp in begin/next/end
> - preserve content buffer when calling nvc0_decoder_bsp_next
> - implement pipe_video_codec::begin_frame/end_frame
>
> https://bugs.freedesktop.org/show_bug.cgi?id=89969
>
> Signed-off-by: Julien Isorce <j.isorce at samsung.com>
> ---
> src/gallium/drivers/nouveau/nouveau_vp3_video.h | 3 +
> .../drivers/nouveau/nouveau_vp3_video_bsp.c | 2 +
> src/gallium/drivers/nouveau/nvc0/nvc0_video.c | 44 ++++++-
> src/gallium/drivers/nouveau/nvc0/nvc0_video.h | 18 ++-
> src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c | 138 ++++++++++++++-------
> 5 files changed, 150 insertions(+), 55 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/nouveau_vp3_video.h b/src/gallium/drivers/nouveau/nouveau_vp3_video.h
> index 9db8c63..1e10693 100644
> --- a/src/gallium/drivers/nouveau/nouveau_vp3_video.h
> +++ b/src/gallium/drivers/nouveau/nouveau_vp3_video.h
> @@ -118,6 +118,9 @@ struct nouveau_vp3_decoder {
> /* End of the bsp bo where new data should be appended between one begin/end
> * frame. */
> char *bsp_ptr;
> +
> + /* Total data appended so far after last begin frame. */
> + unsigned bsp_size;
> };
>
> struct comm {
> diff --git a/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c b/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c
> index 2c15955..aab8e25 100644
> --- a/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c
> +++ b/src/gallium/drivers/nouveau/nouveau_vp3_video_bsp.c
> @@ -238,6 +238,7 @@ nouveau_vp3_bsp_begin(struct nouveau_vp3_decoder *dec)
> struct strparm_bsp *str_bsp = NULL;
>
> dec->bsp_ptr = bsp_bo->map;
> + dec->bsp_size = NOUVEAU_VP3_BSP_RESERVED_SIZE;
>
> dec->bsp_ptr += 0x100;
>
> @@ -329,6 +330,7 @@ nouveau_vp3_bsp_end(struct nouveau_vp3_decoder *dec, union pipe_desc desc)
> *(uint32_t *)dec->bsp_ptr = 0x00000000;
>
> dec->bsp_ptr = NULL;
> + dec->bsp_size = 0;
>
> return caps;
> }
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_video.c b/src/gallium/drivers/nouveau/nvc0/nvc0_video.c
> index 48ffac1..e28016a 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_video.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_video.c
> @@ -26,6 +26,24 @@
> #include "util/u_format.h"
>
> static void
> +nvc0_decoder_begin_frame(struct pipe_video_codec *decoder,
> + struct pipe_video_buffer *target,
> + struct pipe_picture_desc *picture)
> +{
> + struct nouveau_vp3_decoder *dec = (struct nouveau_vp3_decoder *)decoder;
> + uint32_t comm_seq = ++dec->fence_seq;
> + unsigned ret = 0;
> +
> + assert(dec);
> + assert(target);
> + assert(target->buffer_format == PIPE_FORMAT_NV12);
> +
> + ret = nvc0_decoder_bsp_begin(dec, comm_seq);
> +
> + assert(ret == 2);
> +}
> +
> +static void
> nvc0_decoder_decode_bitstream(struct pipe_video_codec *decoder,
> struct pipe_video_buffer *video_target,
> struct pipe_picture_desc *picture,
> @@ -34,8 +52,24 @@ nvc0_decoder_decode_bitstream(struct pipe_video_codec *decoder,
> const unsigned *num_bytes)
> {
> struct nouveau_vp3_decoder *dec = (struct nouveau_vp3_decoder *)decoder;
> + uint32_t comm_seq = dec->fence_seq;
> + unsigned ret = 0;
> +
> + assert(decoder);
> +
> + ret = nvc0_decoder_bsp_next(dec, comm_seq, num_buffers, data, num_bytes);
> +
> + assert(ret == 2);
> +}
> +
> +static void
> +nvc0_decoder_end_frame(struct pipe_video_codec *decoder,
> + struct pipe_video_buffer *video_target,
> + struct pipe_picture_desc *picture)
> +{
> + struct nouveau_vp3_decoder *dec = (struct nouveau_vp3_decoder *)decoder;
> struct nouveau_vp3_video_buffer *target = (struct nouveau_vp3_video_buffer *)video_target;
> - uint32_t comm_seq = ++dec->fence_seq;
> + uint32_t comm_seq = dec->fence_seq;
> union pipe_desc desc;
>
> unsigned vp_caps, is_ref, ret;
> @@ -43,11 +77,7 @@ nvc0_decoder_decode_bitstream(struct pipe_video_codec *decoder,
>
> desc.base = picture;
>
> - assert(target->base.buffer_format == PIPE_FORMAT_NV12);
> -
> - ret = nvc0_decoder_bsp(dec, desc, target, comm_seq,
> - num_buffers, data, num_bytes,
> - &vp_caps, &is_ref, refs);
> + ret = nvc0_decoder_bsp_end(dec, desc, target, comm_seq, &vp_caps, &is_ref, refs);
>
> /* did we decode bitstream correctly? */
> assert(ret == 2);
> @@ -164,7 +194,9 @@ nvc0_create_decoder(struct pipe_context *context,
> PUSH_DATA (push[2], dec->ppp->handle);
>
> dec->base.context = context;
> + dec->base.begin_frame = nvc0_decoder_begin_frame;
> dec->base.decode_bitstream = nvc0_decoder_decode_bitstream;
> + dec->base.end_frame = nvc0_decoder_end_frame;
>
> for (i = 0; i < NOUVEAU_VP3_VIDEO_QDEPTH && !ret; ++i)
> ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM,
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_video.h b/src/gallium/drivers/nouveau/nvc0/nvc0_video.h
> index 9ee0280..cf3c942 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_video.h
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_video.h
> @@ -30,12 +30,18 @@
> #include "util/u_video.h"
>
> extern unsigned
> -nvc0_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
> - struct nouveau_vp3_video_buffer *target,
> - unsigned comm_seq, unsigned num_buffers,
> - const void *const *data, const unsigned *num_bytes,
> - unsigned *vp_caps, unsigned *is_ref,
> - struct nouveau_vp3_video_buffer *refs[16]);
> +nvc0_decoder_bsp_begin(struct nouveau_vp3_decoder *dec, unsigned comm_seq);
> +
> +extern unsigned
> +nvc0_decoder_bsp_next(struct nouveau_vp3_decoder *dec,
> + unsigned comm_seq, unsigned num_buffers,
> + const void *const *data, const unsigned *num_bytes);
> +
> +extern unsigned
> +nvc0_decoder_bsp_end(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
> + struct nouveau_vp3_video_buffer *target,
> + unsigned comm_seq, unsigned *vp_caps, unsigned *is_ref,
> + struct nouveau_vp3_video_buffer *refs[16]);
>
> extern void
> nvc0_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
> diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c b/src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c
> index b0ee4a4..04528d8 100644
> --- a/src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c
> +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c
> @@ -32,40 +32,34 @@ static void dump_comm_bsp(struct comm *comm)
> #endif
>
> unsigned
> -nvc0_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
> - struct nouveau_vp3_video_buffer *target,
> - unsigned comm_seq, unsigned num_buffers,
> - const void *const *data, const unsigned *num_bytes,
> - unsigned *vp_caps, unsigned *is_ref,
> - struct nouveau_vp3_video_buffer *refs[16])
> +nvc0_decoder_bsp_begin(struct nouveau_vp3_decoder *dec, unsigned comm_seq)
> {
> - struct nouveau_pushbuf *push = dec->pushbuf[0];
> - enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile);
> - uint32_t bsp_addr, comm_addr, inter_addr;
> - uint32_t slice_size, bucket_size, ring_size, bsp_size;
> - uint32_t caps, i;
> - int ret;
> struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH];
> - struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1];
> - unsigned fence_extra = 0;
> - struct nouveau_pushbuf_refn bo_refs[] = {
> - { bsp_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM },
> - { inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
> -#if NOUVEAU_VP3_DEBUG_FENCE
> - { dec->fence_bo, NOUVEAU_BO_WR | NOUVEAU_BO_GART },
> -#endif
> - { dec->bitplane_bo, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
> - };
> - int num_refs = ARRAY_SIZE(bo_refs);
> + unsigned ret = 0;
>
> - if (!dec->bitplane_bo)
> - num_refs--;
> + ret = nouveau_bo_map(bsp_bo, NOUVEAU_BO_WR, dec->client);
> + if (ret) {
> + debug_printf("map failed: %i %s\n", ret, strerror(-ret));
> + return -1;
> + }
>
> -#if NOUVEAU_VP3_DEBUG_FENCE
> - fence_extra = 4;
> -#endif
> + nouveau_vp3_bsp_begin(dec);
> +
> + return 2;
> +}
> +
> +unsigned
> +nvc0_decoder_bsp_next(struct nouveau_vp3_decoder *dec,
> + unsigned comm_seq, unsigned num_buffers,
> + const void *const *data, const unsigned *num_bytes)
> +{
> + struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH];
> + struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1];
> + uint32_t bsp_size = 0;
> + uint32_t i = 0;
> + unsigned ret = 0;
>
> - bsp_size = NOUVEAU_VP3_BSP_RESERVED_SIZE;
> + bsp_size = dec->bsp_size;
> for (i = 0; i < num_buffers; i++)
> bsp_size += num_bytes[i];
> bsp_size += 256; /* the 4 end markers */
> @@ -73,22 +67,36 @@ nvc0_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
> if (!bsp_bo || bsp_size > bsp_bo->size) {
> union nouveau_bo_config cfg;
> struct nouveau_bo *tmp_bo = NULL;
> + uint32_t bsp_new_size = bsp_size;
Err, why not just use bsp_size as before. And not move it around the
cfg.* setting, to reduce churn?
> +
> + /* round up to the nearest mb */
> + bsp_new_size += (1 << 20) - 1;
> + bsp_new_size &= ~((1 << 20) - 1);
>
> cfg.nvc0.tile_mode = 0x10;
> cfg.nvc0.memtype = 0xfe;
>
> - /* round up to the nearest mb */
> - bsp_size += (1 << 20) - 1;
> - bsp_size &= ~((1 << 20) - 1);
> + ret = nouveau_bo_new(dec->bitplane_bo->device, NOUVEAU_BO_VRAM, 0, bsp_new_size, &cfg, &tmp_bo);
> + if (ret) {
> + debug_printf("resizing bsp %u -> %u failed with %i\n",
There's no resizing... the previous message was more accurate ("reallocating").
> + bsp_bo ? (unsigned)bsp_bo->size : 0, bsp_new_size, ret);
> + return -1;
> + }
>
> - ret = nouveau_bo_new(dec->bitplane_bo->device, NOUVEAU_BO_VRAM, 0, bsp_size, &cfg, &tmp_bo);
> + ret = nouveau_bo_map(tmp_bo, NOUVEAU_BO_WR, dec->client);
> if (ret) {
> - debug_printf("reallocating bsp %u -> %u failed with %i\n",
> - bsp_bo ? (unsigned)bsp_bo->size : 0, bsp_size, ret);
> + debug_printf("map failed: %i %s\n", ret, strerror(-ret));
> return -1;
> }
> +
> + /* Preserve previous buffer. */
> + memcpy(tmp_bo->map, bsp_bo->map, bsp_bo->size);
> +
> nouveau_bo_ref(NULL, &bsp_bo);
> - bo_refs[0].bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH] = bsp_bo = tmp_bo;
> + dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH] = bsp_bo = tmp_bo;
> +
> + /* update position to current chunk */
> + dec->bsp_ptr = bsp_bo->map + dec->bsp_size;
Instead of keeping the bsp_size around (which I'm moderately sure
you're not doing *quite* right), you can just do (before freeing the
old bsp bo):
dec->bsp_ptr = tmp_bo->map + (dec->bsp_ptr - bsp_bo->map);
That will avoid the double bookkeeping between dec->bsp_ptr and dec->bsp_size.
> }
>
> if (!inter_bo || bsp_bo->size * 4 > inter_bo->size) {
> @@ -104,18 +112,62 @@ nvc0_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
> inter_bo ? (unsigned)inter_bo->size : 0, (unsigned)bsp_bo->size * 4, ret);
> return -1;
> }
> +
> + ret = nouveau_bo_map(tmp_bo, NOUVEAU_BO_WR, dec->client);
> + if (ret) {
> + debug_printf("map failed: %i %s\n", ret, strerror(-ret));
> + return -1;
> + }
> +
> + /* Preserve previous buffer. */
> + memcpy(tmp_bo->map, inter_bo->map, bsp_bo->size * 4);
Isn't bsp_bo->size * 4 the new size? You want to copy over the old
size's worth of data. But AFAIK, the contents can be uninitialized for
the inter_bo... I don't think this is necessary in the first place.
> +
> nouveau_bo_ref(NULL, &inter_bo);
> - bo_refs[1].bo = dec->inter_bo[comm_seq & 1] = inter_bo = tmp_bo;
> + dec->inter_bo[comm_seq & 1] = inter_bo = tmp_bo;
> }
>
> - ret = nouveau_bo_map(bsp_bo, NOUVEAU_BO_WR, dec->client);
> - if (ret) {
> - debug_printf("map failed: %i %s\n", ret, strerror(-ret));
> - return -1;
> - }
> + dec->bsp_size = bsp_size - 256;
>
> - nouveau_vp3_bsp_begin(dec);
> nouveau_vp3_bsp_next(dec, num_buffers, data, num_bytes);
> +
> + return 2;
> +}
> +
> +
> +unsigned
> +nvc0_decoder_bsp_end(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
> + struct nouveau_vp3_video_buffer *target, unsigned comm_seq,
> + unsigned *vp_caps, unsigned *is_ref,
> + struct nouveau_vp3_video_buffer *refs[16])
> +{
> + struct nouveau_pushbuf *push = dec->pushbuf[0];
> + enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile);
> + uint32_t bsp_addr, comm_addr, inter_addr;
> + uint32_t slice_size, bucket_size, ring_size;
> + uint32_t caps = 0;
> + struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH];
> + struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1];
> + unsigned fence_extra = 0;
> + struct nouveau_pushbuf_refn bo_refs[] = {
> + { bsp_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM },
> + { inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
> +#if NOUVEAU_VP3_DEBUG_FENCE
> + { dec->fence_bo, NOUVEAU_BO_WR | NOUVEAU_BO_GART },
> +#endif
> + { dec->bitplane_bo, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
> + };
> + int num_refs = ARRAY_SIZE(bo_refs);
> +
> + if (!dec->bitplane_bo)
> + num_refs--;
> +
> +#if NOUVEAU_VP3_DEBUG_FENCE
> + fence_extra = 4;
> +#endif
> +
> + bo_refs[0].bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH];
> + bo_refs[1].bo = dec->inter_bo[comm_seq & 1];
I don't get it... what's the point of these?
> +
> caps = nouveau_vp3_bsp_end(dec, desc);
>
> nouveau_vp3_vp_caps(dec, desc, target, comm_seq, vp_caps, is_ref, refs);
> --
> 1.9.1
>
More information about the mesa-dev
mailing list