[PATCH v3] drm/amdgpu: fix incorrect VCN revision in SRIOV
Lazar, Lijo
lijo.lazar at amd.com
Thu Dec 9 08:53:19 UTC 2021
On 12/9/2021 1:56 PM, Leslie Shi wrote:
> Guest OS will setup VCN instance 1 which is disabled as an enabled instance and
> execute initialization work on it, but this causes VCN ib ring test failure
> on the disabled VCN instance during modprobe:
>
> amdgpu 0000:00:08.0: amdgpu: ring vcn_enc_1.0 uses VM inv eng 5 on hub 1
> amdgpu 0000:00:08.0: [drm:amdgpu_ib_ring_tests [amdgpu]] *ERROR* IB test failed on vcn_dec_0 (-110).
> amdgpu 0000:00:08.0: [drm:amdgpu_ib_ring_tests [amdgpu]] *ERROR* IB test failed on vcn_enc_0.0 (-110).
> [drm:amdgpu_device_delayed_init_work_handler [amdgpu]] *ERROR* ib ring test failed (-110).
>
> v2: drop amdgpu_discovery_get_vcn_version and rename sriov_config to
> vcn_config
> v3: modify VCN's revision in SR-IOV and bare-metal
>
> Fixes: 36b7d5646476 ("drm/amdgpu: handle SRIOV VCN revision parsing")
> Signed-off-by: Leslie Shi <Yuliang.Shi at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c | 29 ++++++-------------
> drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h | 2 --
> drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 15 +++-------
> drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h | 2 +-
> 4 files changed, 14 insertions(+), 34 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> index 552031950518..f31bc0187394 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
> @@ -380,18 +380,15 @@ int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
> ip->revision);
>
> if (le16_to_cpu(ip->hw_id) == VCN_HWID) {
> - if (amdgpu_sriov_vf(adev)) {
> - /* SR-IOV modifies each VCN’s revision (uint8)
> - * Bit [5:0]: original revision value
> - * Bit [7:6]: en/decode capability:
> - * 0b00 : VCN function normally
> - * 0b10 : encode is disabled
> - * 0b01 : decode is disabled
> - */
> - adev->vcn.sriov_config[adev->vcn.num_vcn_inst] =
> - (ip->revision & 0xc0) >> 6;
> - ip->revision &= ~0xc0;
> - }
> + /* Bit [5:0]: original revision value
> + * Bit [7:6]: en/decode capability:
> + * 0b00 : VCN function normally
> + * 0b10 : encode is disabled
> + * 0b01 : decode is disabled
> + */
> + adev->vcn.vcn_config[adev->vcn.num_vcn_inst] =
> + ip->revision & 0xc0;
> + ip->revision &= ~0xc0;
> adev->vcn.num_vcn_inst++;
> }
> if (le16_to_cpu(ip->hw_id) == SDMA0_HWID ||
> @@ -485,14 +482,6 @@ int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, int n
> return -EINVAL;
> }
>
> -
> -int amdgpu_discovery_get_vcn_version(struct amdgpu_device *adev, int vcn_instance,
> - int *major, int *minor, int *revision)
> -{
> - return amdgpu_discovery_get_ip_version(adev, VCN_HWID,
> - vcn_instance, major, minor, revision);
> -}
> -
> void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev)
> {
> struct binary_header *bhdr;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
> index 0ea029e3b850..14537cec19db 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
> @@ -33,8 +33,6 @@ void amdgpu_discovery_harvest_ip(struct amdgpu_device *adev);
> int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id, int number_instance,
> int *major, int *minor, int *revision);
>
> -int amdgpu_discovery_get_vcn_version(struct amdgpu_device *adev, int vcn_instance,
> - int *major, int *minor, int *revision);
> int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev);
> int amdgpu_discovery_set_ip_blocks(struct amdgpu_device *adev);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
> index 2658414c503d..38036cbf6203 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
> @@ -284,20 +284,13 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
> bool amdgpu_vcn_is_disabled_vcn(struct amdgpu_device *adev, enum vcn_ring_type type, uint32_t vcn_instance)
> {
> bool ret = false;
> + int vcn_config = adev->vcn.vcn_config[vcn_instance];
Missed it. I guess there should also be a check for valid instance,
otherwise it could return false (by default nothing is disabled for a
non-existent VCN instance).
If the check is not there in the caller and assuming instance is 0 based
index,
vcn_instance >= adev->vcn.num_vcn_inst
return true;
Thanks,
Lijo
>
> - int major;
> - int minor;
> - int revision;
> -
> - /* if cannot find IP data, then this VCN does not exist */
> - if (amdgpu_discovery_get_vcn_version(adev, vcn_instance, &major, &minor, &revision) != 0)
> - return true;
> -
> - if ((type == VCN_ENCODE_RING) && (revision & VCN_BLOCK_ENCODE_DISABLE_MASK)) {
> + if ((type == VCN_ENCODE_RING) && (vcn_config & VCN_BLOCK_ENCODE_DISABLE_MASK)) {
> ret = true;
> - } else if ((type == VCN_DECODE_RING) && (revision & VCN_BLOCK_DECODE_DISABLE_MASK)) {
> + } else if ((type == VCN_DECODE_RING) && (vcn_config & VCN_BLOCK_DECODE_DISABLE_MASK)) {
> ret = true;
> - } else if ((type == VCN_UNIFIED_RING) && (revision & VCN_BLOCK_QUEUE_DISABLE_MASK)) {
> + } else if ((type == VCN_UNIFIED_RING) && (vcn_config & VCN_BLOCK_QUEUE_DISABLE_MASK)) {
> ret = true;
> }
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
> index 938a5ead3f20..5d3728b027d3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
> @@ -235,7 +235,7 @@ struct amdgpu_vcn {
>
> uint8_t num_vcn_inst;
> struct amdgpu_vcn_inst inst[AMDGPU_MAX_VCN_INSTANCES];
> - uint8_t sriov_config[AMDGPU_MAX_VCN_INSTANCES];
> + uint8_t vcn_config[AMDGPU_MAX_VCN_INSTANCES];
> struct amdgpu_vcn_reg internal;
> struct mutex vcn_pg_lock;
> struct mutex vcn1_jpeg1_workaround;
>
More information about the amd-gfx
mailing list