Mesa (master): radeon/vcn: implement dynamic dpb Tier2 support

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Tue Feb 9 14:47:48 UTC 2021


Module: Mesa
Branch: master
Commit: c7a481872ef4cac04e28cc954e00d07073fdbfb3
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=c7a481872ef4cac04e28cc954e00d07073fdbfb3

Author: Leo Liu <leo.liu at amd.com>
Date:   Mon Feb  8 08:31:52 2021 -0500

radeon/vcn: implement dynamic dpb Tier2 support

Fill up the t2 message buffers based on reference lists, so to
avoid unnecessary allocation of the buffers.

Signed-off-by: Leo Liu <leo.liu at amd.com>
Reviewed-by: James Zhu <James.Zhu at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8916>

---

 src/gallium/drivers/radeon/radeon_vcn_dec.c | 162 ++++++++++++++++++++++++++--
 src/gallium/drivers/radeon/radeon_vcn_dec.h |  21 ++++
 2 files changed, 172 insertions(+), 11 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_vcn_dec.c b/src/gallium/drivers/radeon/radeon_vcn_dec.c
index edc091a771b..de21f9d4932 100644
--- a/src/gallium/drivers/radeon/radeon_vcn_dec.c
+++ b/src/gallium/drivers/radeon/radeon_vcn_dec.c
@@ -552,6 +552,15 @@ static rvcn_dec_message_vp9_t get_vp9_msg(struct radeon_decoder *dec,
       }
    }
 
+   if (dec->dpb_type == DPB_DYNAMIC_TIER_2) {
+      dec->ref_codec.bts = (pic->base.profile == PIPE_VIDEO_PROFILE_VP9_PROFILE2) ?
+         CODEC_10_BITS : CODEC_8_BITS;
+      dec->ref_codec.index = result.curr_pic_idx;
+      dec->ref_codec.ref_size = 8;
+      memset(dec->ref_codec.ref_list, 0x7f, sizeof(dec->ref_codec.ref_list));
+      memcpy(dec->ref_codec.ref_list, result.ref_frame_map, sizeof(result.ref_frame_map));
+   }
+
    return result;
 }
 
@@ -943,6 +952,14 @@ static rvcn_dec_message_av1_t get_av1_msg(struct radeon_decoder *dec,
       result.tile_info[i].size = pic->slice_parameter.slice_data_size[i];
    }
 
+   if (dec->dpb_type == DPB_DYNAMIC_TIER_2) {
+      dec->ref_codec.bts = pic->picture_parameter.bit_depth_idx ? CODEC_10_BITS : CODEC_8_BITS;
+      dec->ref_codec.index = result.curr_pic_idx;
+      dec->ref_codec.ref_size = 8;
+      memset(dec->ref_codec.ref_list, 0x7f, sizeof(dec->ref_codec.ref_list));
+      memcpy(dec->ref_codec.ref_list, result.ref_frame_map, sizeof(result.ref_frame_map));
+   }
+
    return result;
 }
 
@@ -1308,6 +1325,99 @@ static void rvcn_dec_message_create(struct radeon_decoder *dec)
    create->height_in_samples = dec->base.height;
 }
 
+static unsigned rvcn_dec_dynamic_dpb_t2_message(struct radeon_decoder *dec, rvcn_dec_message_decode_t *decode,
+      rvcn_dec_message_dynamic_dpb_t2_t *dynamic_dpb_t2, unsigned db_alignment)
+{
+   struct rvcn_dec_dynamic_dpb_t2 *dpb = NULL;
+   unsigned width, height, size;
+   uint64_t addr;
+   int i;
+
+   width = align(decode->width_in_samples, db_alignment);
+   height = align(decode->height_in_samples, db_alignment);
+   size = align((width * height * 3) / 2, 256);
+   if (dec->ref_codec.bts == CODEC_10_BITS)
+      size = size * 3 / 2;
+
+   list_for_each_entry_safe(struct rvcn_dec_dynamic_dpb_t2, d, &dec->dpb_ref_list, list) {
+      for (i = 0; i < dec->ref_codec.ref_size; ++i) {
+         if ((dec->ref_codec.ref_list[i] != 0x7f) && (d->index == (dec->ref_codec.ref_list[i] & 0x7f))) {
+            addr = dec->ws->buffer_get_virtual_address(d->dpb.res->buf);
+            dynamic_dpb_t2->dpbAddrLo[i] = addr;
+            dynamic_dpb_t2->dpbAddrHi[i] = addr >> 32;
+            ++dynamic_dpb_t2->dpbArraySize;
+            break;
+         }
+      }
+      if (i == dec->ref_codec.ref_size) {
+         list_del(&d->list);
+         list_addtail(&d->list, &dec->dpb_unref_list);
+      }
+   }
+
+   list_for_each_entry_safe(struct rvcn_dec_dynamic_dpb_t2, d, &dec->dpb_ref_list, list) {
+      if (d->dpb.res->b.b.width0 * d->dpb.res->b.b.height0 == size && d->index == dec->ref_codec.index) {
+         dpb = d;
+         break;
+      }
+   }
+
+   if (!dpb) {
+      list_for_each_entry_safe(struct rvcn_dec_dynamic_dpb_t2, d, &dec->dpb_unref_list, list) {
+         if (d->dpb.res->b.b.width0 * d->dpb.res->b.b.height0 == size) {
+            d->index = dec->ref_codec.index;
+            list_del(&d->list);
+            list_addtail(&d->list, &dec->dpb_ref_list);
+            dpb = d;
+            break;
+         }
+      }
+   }
+
+   list_for_each_entry_safe(struct rvcn_dec_dynamic_dpb_t2, d, &dec->dpb_unref_list, list) {
+      list_del(&d->list);
+      si_vid_destroy_buffer(&d->dpb);
+      FREE(d);
+   }
+
+   if (!dpb) {
+      dpb = CALLOC_STRUCT(rvcn_dec_dynamic_dpb_t2);
+      if (!dpb)
+         return 1;
+      dpb->index = dec->ref_codec.index;
+      if (!si_vid_create_buffer(dec->screen, &dpb->dpb, size, PIPE_USAGE_DEFAULT)) {
+         RVID_ERR("Can't allocated dpb buffer.\n");
+         FREE(dpb);
+         return 1;
+      }
+      list_addtail(&dpb->list, &dec->dpb_ref_list);
+   }
+
+   dec->ws->cs_add_buffer(&dec->cs, dpb->dpb.res->buf,
+      RADEON_USAGE_READWRITE | RADEON_USAGE_SYNCHRONIZED, RADEON_DOMAIN_VRAM, 0);
+   addr = dec->ws->buffer_get_virtual_address(dpb->dpb.res->buf);
+   dynamic_dpb_t2->dpbCurrLo = addr;
+   dynamic_dpb_t2->dpbCurrHi = addr >> 32;
+
+   decode->decode_flags = 1;
+   dynamic_dpb_t2->dpbConfigFlags = 0;
+   dynamic_dpb_t2->dpbLumaPitch = align(decode->width_in_samples, db_alignment);
+   dynamic_dpb_t2->dpbLumaAlignedHeight = align(decode->height_in_samples, db_alignment);
+   dynamic_dpb_t2->dpbLumaAlignedSize = dynamic_dpb_t2->dpbLumaPitch *
+      dynamic_dpb_t2->dpbLumaAlignedHeight;
+   dynamic_dpb_t2->dpbChromaPitch = dynamic_dpb_t2->dpbLumaPitch >> 1;
+   dynamic_dpb_t2->dpbChromaAlignedHeight = dynamic_dpb_t2->dpbLumaAlignedHeight >> 1;
+   dynamic_dpb_t2->dpbChromaAlignedSize = dynamic_dpb_t2->dpbChromaPitch *
+      dynamic_dpb_t2->dpbChromaAlignedHeight * 2;
+
+   if (dec->ref_codec.bts == CODEC_10_BITS) {
+      dynamic_dpb_t2->dpbLumaAlignedSize = dynamic_dpb_t2->dpbLumaAlignedSize * 3 / 2;
+      dynamic_dpb_t2->dpbChromaAlignedSize = dynamic_dpb_t2->dpbChromaAlignedSize * 3 / 2;
+   }
+
+   return 0;
+}
+
 static struct pb_buffer *rvcn_dec_message_decode(struct radeon_decoder *dec,
                                                  struct pipe_video_buffer *target,
                                                  struct pipe_picture_desc *picture)
@@ -1328,6 +1438,7 @@ static struct pb_buffer *rvcn_dec_message_decode(struct radeon_decoder *dec,
    void *codec;
    rvcn_dec_message_drm_t *drm = NULL;
    rvcn_dec_message_dynamic_dpb_t *dynamic_dpb = NULL;
+   rvcn_dec_message_dynamic_dpb_t2_t *dynamic_dpb_t2 = NULL;
    unsigned db_alignment;
 
    header = dec->msg;
@@ -1341,7 +1452,7 @@ static struct pb_buffer *rvcn_dec_message_decode(struct radeon_decoder *dec,
       sizes += sizeof(rvcn_dec_message_index_t);
    }
 
-   if (dec->dpb_type == DPB_DYNAMIC_TIER_1) {
+   if (dec->dpb_type >= DPB_DYNAMIC_TIER_1) {
       index_dynamic_dpb = (void*)header + sizes;
       sizes += sizeof(rvcn_dec_message_index_t);
    }
@@ -1356,10 +1467,16 @@ static struct pb_buffer *rvcn_dec_message_decode(struct radeon_decoder *dec,
       sizes += sizeof(rvcn_dec_message_drm_t);
    }
 
-   if (dec->dpb_type == DPB_DYNAMIC_TIER_1) {
+   if (dec->dpb_type >= DPB_DYNAMIC_TIER_1) {
       offset_dynamic_dpb = sizes;
-      dynamic_dpb = (void*)header + sizes;
-      sizes += sizeof(rvcn_dec_message_dynamic_dpb_t);
+      if (dec->dpb_type == DPB_DYNAMIC_TIER_1) {
+         dynamic_dpb = (void*)header + sizes;
+         sizes += sizeof(rvcn_dec_message_dynamic_dpb_t);
+      }
+      else if (dec->dpb_type == DPB_DYNAMIC_TIER_2) {
+         dynamic_dpb_t2 = (void*)header + sizes;
+         sizes += sizeof(rvcn_dec_message_dynamic_dpb_t2_t);
+      }
    }
 
    offset_codec = sizes;
@@ -1391,12 +1508,15 @@ static struct pb_buffer *rvcn_dec_message_decode(struct radeon_decoder *dec,
       ++header->num_buffers;
    }
 
-   if (dec->dpb_type == DPB_DYNAMIC_TIER_1) {
+   if (dec->dpb_type >= DPB_DYNAMIC_TIER_1) {
       index_dynamic_dpb->message_id = RDECODE_MESSAGE_DYNAMIC_DPB;
       index_dynamic_dpb->offset = offset_dynamic_dpb;
-      index_dynamic_dpb->size = sizeof(rvcn_dec_message_dynamic_dpb_t);
       index_dynamic_dpb->filled = 0;
       ++header->num_buffers;
+      if (dec->dpb_type == DPB_DYNAMIC_TIER_1)
+         index_dynamic_dpb->size = sizeof(rvcn_dec_message_dynamic_dpb_t);
+      else if (dec->dpb_type == DPB_DYNAMIC_TIER_2)
+         index_dynamic_dpb->size = sizeof(rvcn_dec_message_dynamic_dpb_t2_t);
    }
 
    decode->stream_type = dec->stream_type;
@@ -1408,9 +1528,10 @@ static struct pb_buffer *rvcn_dec_message_decode(struct radeon_decoder *dec,
 
    db_alignment = (((struct si_screen *)dec->screen)->info.family >= CHIP_RENOIR &&
                    dec->base.width > 32 && (dec->stream_type == RDECODE_CODEC_VP9 ||
+                   dec->stream_type == RDECODE_CODEC_AV1 ||
                    dec->base.profile == PIPE_VIDEO_PROFILE_HEVC_MAIN_10)) ? 64 : 32;
 
-   if (!dec->dpb.res) {
+   if (!dec->dpb.res && dec->dpb_type != DPB_DYNAMIC_TIER_2) {
       unsigned dpb_size = calc_dpb_size(dec, db_alignment);
       bool r;
       if (dpb_size) {
@@ -1510,7 +1631,7 @@ static struct pb_buffer *rvcn_dec_message_decode(struct radeon_decoder *dec,
       dec->ws->cs_flush(&dec->cs, RADEON_FLUSH_TOGGLE_SECURE_SUBMISSION, NULL);
    }
 
-   decode->dpb_size = dec->dpb.res->buf->size;
+   decode->dpb_size = (dec->dpb_type != DPB_DYNAMIC_TIER_2) ? dec->dpb.res->buf->size : 0;
    decode->dt_size = si_resource(((struct vl_video_buffer *)target)->resources[0])->buf->size +
                      si_resource(((struct vl_video_buffer *)target)->resources[1])->buf->size;
 
@@ -1680,6 +1801,10 @@ static struct pb_buffer *rvcn_dec_message_decode(struct radeon_decoder *dec,
    if (dec->ctx.res)
       decode->hw_ctxt_size = dec->ctx.res->buf->size;
 
+   if (dec->dpb_type == DPB_DYNAMIC_TIER_2)
+      if (rvcn_dec_dynamic_dpb_t2_message(dec, decode, dynamic_dpb_t2, db_alignment))
+         return NULL;
+
    return luma->buffer.buf;
 }
 
@@ -2018,7 +2143,15 @@ static void radeon_dec_destroy(struct pipe_video_codec *decoder)
       si_vid_destroy_buffer(&dec->bs_buffers[i]);
    }
 
-   si_vid_destroy_buffer(&dec->dpb);
+   if (dec->dpb_type != DPB_DYNAMIC_TIER_2) {
+      si_vid_destroy_buffer(&dec->dpb);
+   } else {
+      list_for_each_entry_safe(struct rvcn_dec_dynamic_dpb_t2, d, &dec->dpb_ref_list, list) {
+         list_del(&d->list);
+         si_vid_destroy_buffer(&d->dpb);
+         FREE(d);
+      }
+   }
    si_vid_destroy_buffer(&dec->ctx);
    si_vid_destroy_buffer(&dec->sessionctx);
 
@@ -2123,7 +2256,8 @@ void send_cmd_dec(struct radeon_decoder *dec, struct pipe_video_buffer *target,
    rvcn_dec_message_feedback(dec);
    send_msg_buf(dec);
 
-   send_cmd(dec, RDECODE_CMD_DPB_BUFFER, dec->dpb.res->buf, 0, RADEON_USAGE_READWRITE,
+   if (dec->dpb_type != DPB_DYNAMIC_TIER_2)
+      send_cmd(dec, RDECODE_CMD_DPB_BUFFER, dec->dpb.res->buf, 0, RADEON_USAGE_READWRITE,
             RADEON_DOMAIN_VRAM);
    if (dec->ctx.res)
       send_cmd(dec, RDECODE_CMD_CONTEXT_BUFFER, dec->ctx.res->buf, 0, RADEON_USAGE_READWRITE,
@@ -2346,6 +2480,11 @@ struct pipe_video_codec *radeon_create_decoder(struct pipe_context *context,
    else
       dec->dpb_type = DPB_MAX_RES;
 
+   if (dec->dpb_type == DPB_DYNAMIC_TIER_2) {
+      list_inithead(&dec->dpb_ref_list);
+      list_inithead(&dec->dpb_unref_list);
+   }
+
    return &dec->base;
 
 error:
@@ -2356,7 +2495,8 @@ error:
       si_vid_destroy_buffer(&dec->bs_buffers[i]);
    }
 
-   si_vid_destroy_buffer(&dec->dpb);
+   if (dec->dpb_type != DPB_DYNAMIC_TIER_2)
+      si_vid_destroy_buffer(&dec->dpb);
    si_vid_destroy_buffer(&dec->ctx);
    si_vid_destroy_buffer(&dec->sessionctx);
 
diff --git a/src/gallium/drivers/radeon/radeon_vcn_dec.h b/src/gallium/drivers/radeon/radeon_vcn_dec.h
index a283001e972..0e3443d1eae 100644
--- a/src/gallium/drivers/radeon/radeon_vcn_dec.h
+++ b/src/gallium/drivers/radeon/radeon_vcn_dec.h
@@ -29,6 +29,7 @@
 #define _RADEON_VCN_DEC_H
 
 #include "radeon_video.h"
+#include "util/list.h"
 
 #define RDECODE_PKT_TYPE_S(x)        (((unsigned)(x)&0x3) << 30)
 #define RDECODE_PKT_TYPE_G(x)        (((x) >> 30) & 0x3)
@@ -1066,6 +1067,12 @@ struct jpeg_params {
    bool direct_reg;
 };
 
+struct rvcn_dec_dynamic_dpb_t2 {
+   struct list_head list;
+   uint8_t index;
+   struct rvid_buffer dpb;
+};
+
 struct radeon_decoder {
    struct pipe_video_codec base;
 
@@ -1104,8 +1111,22 @@ struct radeon_decoder {
    enum {
       DPB_MAX_RES = 0,
       DPB_DYNAMIC_TIER_1,
+      DPB_DYNAMIC_TIER_2
    } dpb_type;
 
+   struct {
+      enum {
+         CODEC_8_BITS = 0,
+         CODEC_10_BITS
+      } bts;
+      uint8_t index;
+      unsigned ref_size;
+      uint8_t ref_list[16];
+   } ref_codec;
+
+   struct list_head dpb_ref_list;
+   struct list_head dpb_unref_list;
+
    void (*send_cmd)(struct radeon_decoder *dec, struct pipe_video_buffer *target,
                     struct pipe_picture_desc *picture);
 };



More information about the mesa-commit mailing list