[PATCH] drm/amdgpu/vcn: enable AV1 on both instances

Mon Sep 23 12:14:24 UTC 2024

[AMD Official Use Only - AMD Internal Distribution Only]

Reviewed-by: Leo Liu <leo.liu at amd.com>

> -----Original Message-----
> From: Jamadar, Saleemkhan <Saleemkhan.Jamadar at amd.com>
> Sent: September 21, 2024 3:14 AM
> To: Jamadar, Saleemkhan <Saleemkhan.Jamadar at amd.com>; Liu, Leo
> <Leo.Liu at amd.com>; Rao, Srinath <Srinath.rao at amd.com>; Gopalakrishnan,
> Veerabadhran (Veera) <Veerabadhran.Gopalakrishnan at amd.com>;
> Sundararaju, Sathishkumar <Sathishkumar.Sundararaju at amd.com>; amd-
> gfx at lists.freedesktop.org; Deucher, Alexander
> <Alexander.Deucher at amd.com>
> Cc: Koenig, Christian <Christian.Koenig at amd.com>
> Subject: [PATCH] drm/amdgpu/vcn: enable AV1 on both instances
>
> v1 - remove cs parse code (Christian)
>
> On VCN v4_0_6 AV1 is supported on both the instances.
> Remove cs IB parse code since explict handling of AV1 schedule is not
> required.
>
> Signed-off-by: Saleemkhan Jamadar <saleemkhan.jamadar at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c | 165 ------------------------
>  1 file changed, 165 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
> b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
> index b1fd226b7efb..9d4f5352a62c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
> @@ -1395,170 +1395,6 @@ static void
> vcn_v4_0_5_unified_ring_set_wptr(struct amdgpu_ring *ring)
>       }
>  }
>
> -static int vcn_v4_0_5_limit_sched(struct amdgpu_cs_parser *p,
> -                             struct amdgpu_job *job)
> -{
> -     struct drm_gpu_scheduler **scheds;
> -
> -     /* The create msg must be in the first IB submitted */
> -     if (atomic_read(&job->base.entity->fence_seq))
> -             return -EINVAL;
> -
> -     /* if VCN0 is harvested, we can't support AV1 */
> -     if (p->adev->vcn.harvest_config & AMDGPU_VCN_HARVEST_VCN0)
> -             return -EINVAL;
> -
> -     scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_ENC]
> -             [AMDGPU_RING_PRIO_0].sched;
> -     drm_sched_entity_modify_sched(job->base.entity, scheds, 1);
> -     return 0;
> -}
> -
> -static int vcn_v4_0_5_dec_msg(struct amdgpu_cs_parser *p, struct
> amdgpu_job *job,
> -                         uint64_t addr)
> -{
> -     struct ttm_operation_ctx ctx = { false, false };
> -     struct amdgpu_bo_va_mapping *map;
> -     uint32_t *msg, num_buffers;
> -     struct amdgpu_bo *bo;
> -     uint64_t start, end;
> -     unsigned int i;
> -     void *ptr;
> -     int r;
> -
> -     addr &= AMDGPU_GMC_HOLE_MASK;
> -     r = amdgpu_cs_find_mapping(p, addr, &bo, &map);
> -     if (r) {
> -             DRM_ERROR("Can't find BO for addr 0x%08llx\n", addr);
> -             return r;
> -     }
> -
> -     start = map->start * AMDGPU_GPU_PAGE_SIZE;
> -     end = (map->last + 1) * AMDGPU_GPU_PAGE_SIZE;
> -     if (addr & 0x7) {
> -             DRM_ERROR("VCN messages must be 8 byte aligned!\n");
> -             return -EINVAL;
> -     }
> -
> -     bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
> -     amdgpu_bo_placement_from_domain(bo, bo->allowed_domains);
> -     r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
> -     if (r) {
> -             DRM_ERROR("Failed validating the VCN message BO
> (%d)!\n", r);
> -             return r;
> -     }
> -
> -     r = amdgpu_bo_kmap(bo, &ptr);
> -     if (r) {
> -             DRM_ERROR("Failed mapping the VCN message (%d)!\n", r);
> -             return r;
> -     }
> -
> -     msg = ptr + addr - start;
> -
> -     /* Check length */
> -     if (msg[1] > end - addr) {
> -             r = -EINVAL;
> -             goto out;
> -     }
> -
> -     if (msg[3] != RDECODE_MSG_CREATE)
> -             goto out;
> -
> -     num_buffers = msg[2];
> -     for (i = 0, msg = &msg[6]; i < num_buffers; ++i, msg += 4) {
> -             uint32_t offset, size, *create;
> -
> -             if (msg[0] != RDECODE_MESSAGE_CREATE)
> -                     continue;
> -
> -             offset = msg[1];
> -             size = msg[2];
> -
> -             if (offset + size > end) {
> -                     r = -EINVAL;
> -                     goto out;
> -             }
> -
> -             create = ptr + addr + offset - start;
> -
> -             /* H264, HEVC and VP9 can run on any instance */
> -             if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11)
> -                     continue;
> -
> -             r = vcn_v4_0_5_limit_sched(p, job);
> -             if (r)
> -                     goto out;
> -     }
> -
> -out:
> -     amdgpu_bo_kunmap(bo);
> -     return r;
> -}
> -
> -#define RADEON_VCN_ENGINE_TYPE_ENCODE
>       (0x00000002)
> -#define RADEON_VCN_ENGINE_TYPE_DECODE
>       (0x00000003)
> -
> -#define RADEON_VCN_ENGINE_INFO                               (0x30000001)
> -#define RADEON_VCN_ENGINE_INFO_MAX_OFFSET            16
> -
> -#define RENCODE_ENCODE_STANDARD_AV1                  2
> -#define RENCODE_IB_PARAM_SESSION_INIT                        0x00000003
> -#define RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET     64
> -
> -/* return the offset in ib if id is found, -1 otherwise
> - * to speed up the searching we only search upto max_offset
> - */
> -static int vcn_v4_0_5_enc_find_ib_param(struct amdgpu_ib *ib, uint32_t id,
> int max_offset) -{
> -     int i;
> -
> -     for (i = 0; i < ib->length_dw && i < max_offset && ib->ptr[i] >= 8; i +=
> ib->ptr[i]/4) {
> -             if (ib->ptr[i + 1] == id)
> -                     return i;
> -     }
> -     return -1;
> -}
> -
> -static int vcn_v4_0_5_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,
> -                                        struct amdgpu_job *job,
> -                                        struct amdgpu_ib *ib)
> -{
> -     struct amdgpu_ring *ring = amdgpu_job_ring(job);
> -     struct amdgpu_vcn_decode_buffer *decode_buffer;
> -     uint64_t addr;
> -     uint32_t val;
> -     int idx;
> -
> -     /* The first instance can decode anything */
> -     if (!ring->me)
> -             return 0;
> -
> -     /* RADEON_VCN_ENGINE_INFO is at the top of ib block */
> -     idx = vcn_v4_0_5_enc_find_ib_param(ib,
> RADEON_VCN_ENGINE_INFO,
> -                     RADEON_VCN_ENGINE_INFO_MAX_OFFSET);
> -     if (idx < 0) /* engine info is missing */
> -             return 0;
> -
> -     val = amdgpu_ib_get_value(ib, idx + 2); /*
> RADEON_VCN_ENGINE_TYPE */
> -     if (val == RADEON_VCN_ENGINE_TYPE_DECODE) {
> -             decode_buffer = (struct amdgpu_vcn_decode_buffer *)&ib-
> >ptr[idx + 6];
> -
> -             if (!(decode_buffer->valid_buf_flag  & 0x1))
> -                     return 0;
> -
> -             addr = ((u64)decode_buffer->msg_buffer_address_hi) << 32 |
> -                     decode_buffer->msg_buffer_address_lo;
> -             return vcn_v4_0_5_dec_msg(p, job, addr);
> -     } else if (val == RADEON_VCN_ENGINE_TYPE_ENCODE) {
> -             idx = vcn_v4_0_5_enc_find_ib_param(ib,
> RENCODE_IB_PARAM_SESSION_INIT,
> -                     RENCODE_IB_PARAM_SESSION_INIT_MAX_OFFSET);
> -             if (idx >= 0 && ib->ptr[idx + 2] ==
> RENCODE_ENCODE_STANDARD_AV1)
> -                     return vcn_v4_0_5_limit_sched(p, job);
> -     }
> -     return 0;
> -}
> -
>  static const struct amdgpu_ring_funcs vcn_v4_0_5_unified_ring_vm_funcs =
> {
>       .type = AMDGPU_RING_TYPE_VCN_ENC,
>       .align_mask = 0x3f,
> @@ -1566,7 +1402,6 @@ static const struct amdgpu_ring_funcs
> vcn_v4_0_5_unified_ring_vm_funcs = {
>       .get_rptr = vcn_v4_0_5_unified_ring_get_rptr,
>       .get_wptr = vcn_v4_0_5_unified_ring_get_wptr,
>       .set_wptr = vcn_v4_0_5_unified_ring_set_wptr,
> -     .patch_cs_in_place = vcn_v4_0_5_ring_patch_cs_in_place,
>       .emit_frame_size =
>               SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
>               SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
> --
> 2.34.1