[PATCH 2/2] drm/amdgpu: Add support for CPERs on virtualization
Zhou1, Tao
Tao.Zhou1 at amd.com
Fri Feb 28 09:03:40 UTC 2025
[AMD Official Use Only - AMD Internal Distribution Only]
> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> On Behalf Of Tony Yi
> Sent: Thursday, February 27, 2025 11:12 PM
> To: Yi, Tony <Tony.Yi at amd.com>; Skvortsov, Victor <Victor.Skvortsov at amd.com>;
> amd-gfx at lists.freedesktop.org; Zhang, Hawking <Hawking.Zhang at amd.com>; Luo,
> Zhigang <Zhigang.Luo at amd.com>
> Cc: Yi, Tony <Tony.Yi at amd.com>
> Subject: [PATCH 2/2] drm/amdgpu: Add support for CPERs on virtualization
>
> Add support for CPERs on VFs.
>
> VFs do not receive PMFW messages directly; as such, they need to query them
> from the host. To avoid hitting host event guard, CPER queries need to be rate
> limited. CPER queries share the same RAS telemetry buffer as error count query, so
> a mutex protecting the shared buffer was added as well.
>
> For readability, the amdgpu_detect_virtualization was refactored into multiple
> individual functions.
>
> Signed-off-by: Tony Yi <Tony.Yi at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 7 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 31 ++++-
> drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 138 ++++++++++++++++++++-
> drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h | 18 ++-
> drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c | 14 +++
> 5 files changed, 195 insertions(+), 13 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 5e1d8f0039d0..198d29faa754 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -3099,7 +3099,8 @@ static int amdgpu_device_ip_init(struct amdgpu_device
> *adev)
>
> amdgpu_fru_get_product_info(adev);
>
> - r = amdgpu_cper_init(adev);
> + if (!amdgpu_sriov_vf(adev) || amdgpu_sriov_ras_cper_en(adev))
> + r = amdgpu_cper_init(adev);
>
> init_failed:
>
> @@ -4335,10 +4336,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
> * for throttling interrupt) = 60 seconds.
> */
> ratelimit_state_init(&adev->throttling_logging_rs, (60 - 1) * HZ, 1);
> - ratelimit_state_init(&adev->virt.ras_telemetry_rs, 5 * HZ, 1);
>
> ratelimit_set_flags(&adev->throttling_logging_rs,
> RATELIMIT_MSG_ON_RELEASE);
> - ratelimit_set_flags(&adev->virt.ras_telemetry_rs,
> RATELIMIT_MSG_ON_RELEASE);
>
> /* Registers mapping */
> /* TODO: block userspace mapping of io register */ @@ -4370,7 +4369,7
> @@ int amdgpu_device_init(struct amdgpu_device *adev,
> return -ENOMEM;
>
> /* detect hw virtualization here */
> - amdgpu_detect_virtualization(adev);
> + amdgpu_virt_init(adev);
>
> amdgpu_device_get_pcie_info(adev);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> index 81a7d4faac9c..d55c8b7fdb59 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> @@ -578,12 +578,32 @@ static ssize_t amdgpu_debugfs_ring_read(struct file *f,
> char __user *buf,
> return result;
> }
>
> +static ssize_t amdgpu_debugfs_virt_ring_read(struct file *f, char __user *buf,
> + size_t size, loff_t *pos)
> +{
> + struct amdgpu_ring *ring = file_inode(f)->i_private;
> +
> + if (*pos & 3 || size & 3)
> + return -EINVAL;
> +
> + if (ring->funcs->type == AMDGPU_RING_TYPE_CPER)
> + amdgpu_virt_req_ras_cper_dump(ring->adev, false);
> +
> + return amdgpu_debugfs_ring_read(f, buf, size, pos); }
> +
> static const struct file_operations amdgpu_debugfs_ring_fops = {
> .owner = THIS_MODULE,
> .read = amdgpu_debugfs_ring_read,
> .llseek = default_llseek
> };
>
> +static const struct file_operations amdgpu_debugfs_virt_ring_fops = {
> + .owner = THIS_MODULE,
> + .read = amdgpu_debugfs_virt_ring_read,
> + .llseek = default_llseek
> +};
> +
> static ssize_t amdgpu_debugfs_mqd_read(struct file *f, char __user *buf,
> size_t size, loff_t *pos)
> {
> @@ -671,9 +691,14 @@ void amdgpu_debugfs_ring_init(struct amdgpu_device
> *adev,
> char name[32];
>
> sprintf(name, "amdgpu_ring_%s", ring->name);
> - debugfs_create_file_size(name, S_IFREG | 0444, root, ring,
> - &amdgpu_debugfs_ring_fops,
> - ring->ring_size + 12);
> + if (amdgpu_sriov_vf(adev))
> + debugfs_create_file_size(name, S_IFREG | 0444, root, ring,
> + &amdgpu_debugfs_virt_ring_fops,
> + ring->ring_size + 12);
> + else
> + debugfs_create_file_size(name, S_IFREG | 0444, root, ring,
> + &amdgpu_debugfs_ring_fops,
> + ring->ring_size + 12);
>
> if (ring->mqd_obj) {
> sprintf(name, "amdgpu_mqd_%s", ring->name); diff --git
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index e6f0152e5b08..3832513ec7bf 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -739,7 +739,7 @@ void amdgpu_virt_exchange_data(struct amdgpu_device
> *adev)
> }
> }
>
> -void amdgpu_detect_virtualization(struct amdgpu_device *adev)
> +static u32 amdgpu_virt_init_detect_asic(struct amdgpu_device *adev)
> {
> uint32_t reg;
>
> @@ -775,8 +775,17 @@ void amdgpu_detect_virtualization(struct amdgpu_device
> *adev)
> adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE;
> }
>
> + return reg;
> +}
> +
> +static bool amdgpu_virt_init_req_data(struct amdgpu_device *adev, u32
> +reg) {
> + bool is_sriov = false;
> +
> /* we have the ability to check now */
> if (amdgpu_sriov_vf(adev)) {
> + is_sriov = true;
> +
> switch (adev->asic_type) {
> case CHIP_TONGA:
> case CHIP_FIJI:
> @@ -805,10 +814,39 @@ void amdgpu_detect_virtualization(struct amdgpu_device
> *adev)
> amdgpu_virt_request_init_data(adev);
> break;
> default: /* other chip doesn't support SRIOV */
> + is_sriov = false;
> DRM_ERROR("Unknown asic type: %d!\n", adev-
> >asic_type);
> break;
> }
> }
> +
> + return is_sriov;
> +}
> +
> +static void amdgpu_virt_init_ras(struct amdgpu_device *adev) {
> + ratelimit_state_init(&adev->virt.ras.ras_error_cnt_rs, 5 * HZ, 1);
> + ratelimit_state_init(&adev->virt.ras.ras_cper_dump_rs, 5 * HZ, 1);
> +
> + ratelimit_set_flags(&adev->virt.ras.ras_error_cnt_rs,
> + RATELIMIT_MSG_ON_RELEASE);
> + ratelimit_set_flags(&adev->virt.ras.ras_cper_dump_rs,
> + RATELIMIT_MSG_ON_RELEASE);
> +
> + mutex_init(&adev->virt.ras.ras_telemetry_mutex);
> +
> + adev->virt.ras.cper_rptr = 0;
> +}
> +
> +void amdgpu_virt_init(struct amdgpu_device *adev) {
> + bool is_sriov = false;
> + uint32_t reg = amdgpu_virt_init_detect_asic(adev);
> +
> + is_sriov = amdgpu_virt_init_req_data(adev, reg);
> +
> + if (is_sriov)
> + amdgpu_virt_init_ras(adev);
> }
>
> static bool amdgpu_virt_access_debugfs_is_mmio(struct amdgpu_device *adev)
> @@ -1288,10 +1326,12 @@ static int
> amdgpu_virt_req_ras_err_count_internal(struct amdgpu_device *adev, bo
> * will ignore incoming guest messages. Ratelimit the guest messages to
> * prevent guest self DOS.
> */
> - if (__ratelimit(&adev->virt.ras_telemetry_rs) || force_update) {
> + if (__ratelimit(&virt->ras.ras_error_cnt_rs) || force_update) {
> + mutex_lock(&virt->ras.ras_telemetry_mutex);
> if (!virt->ops->req_ras_err_count(adev))
> amdgpu_virt_cache_host_error_counts(adev,
> - adev->virt.fw_reserve.ras_telemetry);
> + virt->fw_reserve.ras_telemetry);
> + mutex_unlock(&virt->ras.ras_telemetry_mutex);
> }
>
> return 0;
> @@ -1322,6 +1362,98 @@ int amdgpu_virt_req_ras_err_count(struct
> amdgpu_device *adev, enum amdgpu_ras_bl
> return 0;
> }
>
> +static int
> +amdgpu_virt_write_cpers_to_ring(struct amdgpu_device *adev,
> + struct amdsriov_ras_telemetry *host_telemetry,
> + u32 *more)
> +{
> + struct amd_sriov_ras_cper_dump *cper_dump = NULL;
> + struct cper_hdr *entry = NULL;
> + struct amdgpu_ring *ring = &adev->cper.ring_buf;
> + uint32_t checksum, used_size, i, j;
> + int ret = 0;
> +
> + checksum = host_telemetry->header.checksum;
> + used_size = host_telemetry->header.used_size;
> +
> + if (used_size > (AMD_SRIOV_RAS_TELEMETRY_SIZE_KB << 10))
> + return 0;
> +
> + cper_dump = kmemdup(&host_telemetry->body.cper_dump, used_size,
> GFP_KERNEL);
> + if (!cper_dump)
> + return -ENOMEM;
> +
> + if (checksum != amd_sriov_msg_checksum(cper_dump, used_size, 0, 0))
> + goto out;
> +
> + *more = cper_dump->more;
> +
> + if (cper_dump->wptr < adev->virt.ras.cper_rptr) {
> + dev_warn(
> + adev->dev,
> + "guest specified rptr that was too high! guest rptr: 0x%llx, host
> rptr: 0x%llx\n",
> + adev->virt.ras.cper_rptr, cper_dump->wptr);
> +
> + adev->virt.ras.cper_rptr = cper_dump->wptr;
> + goto out;
> + }
> +
> + entry = (struct cper_hdr *)&cper_dump->buf[0];
> +
> + for (i = 0; i < cper_dump->count; i++) {
> + amdgpu_cper_ring_write(ring, entry, entry->record_length);
> + entry = (struct cper_hdr *)((char *)entry +
> + entry->record_length);
> + }
> +
> + if (cper_dump->overflow_count)
> + dev_warn(adev->dev,
> + "host reported CPER overflow of 0x%llx entries!\n",
> + cper_dump->overflow_count);
> +
> + adev->virt.ras.cper_rptr = cper_dump->wptr;
> +out:
> + kfree(cper_dump);
> +
> + return ret;
> +}
> +
> +static int amdgpu_virt_req_ras_cper_dump_internal(struct amdgpu_device
> +*adev) {
> + struct amdgpu_virt *virt = &adev->virt;
> + int ret = 0;
> + uint32_t more = 0;
> +
> + if (!amdgpu_sriov_ras_cper_en(adev))
> + return -EOPNOTSUPP;
> +
> + do {
> + if (!virt->ops->req_ras_cper_dump(adev, virt->ras.cper_rptr))
> + ret = amdgpu_virt_write_cpers_to_ring(
> + adev, virt->fw_reserve.ras_telemetry, &more);
> + else
> + ret = 0;
> + } while (more);
> +
> + return ret;
> +}
> +
> +int amdgpu_virt_req_ras_cper_dump(struct amdgpu_device *adev, bool
> +force_update) {
> + struct amdgpu_virt *virt = &adev->virt;
> + int ret = 0;
> +
> + if ((__ratelimit(&virt->ras.ras_cper_dump_rs) || force_update) &&
> + down_read_trylock(&adev->reset_domain->sem)) {
> + mutex_lock(&virt->ras.ras_telemetry_mutex);
> + ret = amdgpu_virt_req_ras_cper_dump_internal(adev);
> + mutex_unlock(&virt->ras.ras_telemetry_mutex);
> + up_read(&adev->reset_domain->sem);
> + }
> +
> + return ret;
> +}
> +
> int amdgpu_virt_ras_telemetry_post_reset(struct amdgpu_device *adev) {
> unsigned long ue_count, ce_count;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> index 0f3ccae5c1ab..9f65487e60f5 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> @@ -96,6 +96,7 @@ struct amdgpu_virt_ops {
> enum amdgpu_ras_block block);
> bool (*rcvd_ras_intr)(struct amdgpu_device *adev);
> int (*req_ras_err_count)(struct amdgpu_device *adev);
> + int (*req_ras_cper_dump)(struct amdgpu_device *adev, u64 vf_rptr);
> };
>
> /*
> @@ -140,6 +141,7 @@ enum AMDGIM_FEATURE_FLAG {
> AMDGIM_FEATURE_MES_INFO_ENABLE = (1 << 8),
> AMDGIM_FEATURE_RAS_CAPS = (1 << 9),
> AMDGIM_FEATURE_RAS_TELEMETRY = (1 << 10),
> + AMDGIM_FEATURE_RAS_CPER = (1 << 11),
> };
>
> enum AMDGIM_REG_ACCESS_FLAG {
> @@ -242,6 +244,13 @@ struct amdgpu_virt_ras_err_handler_data {
> int last_reserved;
> };
>
> +struct amdgpu_virt_ras {
> + struct ratelimit_state ras_error_cnt_rs;
> + struct ratelimit_state ras_cper_dump_rs;
> + struct mutex ras_telemetry_mutex;
> + uint64_t cper_rptr;
> +};
> +
> /* GPU virtualization */
> struct amdgpu_virt {
> uint32_t caps;
> @@ -284,8 +293,7 @@ struct amdgpu_virt {
>
> union amd_sriov_ras_caps ras_en_caps;
> union amd_sriov_ras_caps ras_telemetry_en_caps;
> -
> - struct ratelimit_state ras_telemetry_rs;
> + struct amdgpu_virt_ras ras;
> struct amd_sriov_ras_telemetry_error_count count_cache; };
>
> @@ -340,6 +348,9 @@ struct amdgpu_video_codec_info; #define
> amdgpu_sriov_ras_telemetry_block_en(adev, sriov_blk) \
> (amdgpu_sriov_ras_telemetry_en((adev)) && (adev)->virt.ras_telemetry_en_caps.all
> & BIT(sriov_blk))
>
> +#define amdgpu_sriov_ras_cper_en(adev) \ ((adev)->virt.gim_feature &
> +AMDGIM_FEATURE_RAS_CPER)
> +
> static inline bool is_virtual_machine(void) { #if defined(CONFIG_X86) @@ -378,7
> +389,7 @@ void amdgpu_virt_release_ras_err_handler_data(struct amdgpu_device
> *adev); void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev); void
> amdgpu_virt_exchange_data(struct amdgpu_device *adev); void
> amdgpu_virt_fini_data_exchange(struct amdgpu_device *adev); -void
> amdgpu_detect_virtualization(struct amdgpu_device *adev);
> +void amdgpu_virt_init(struct amdgpu_device *adev);
>
> bool amdgpu_virt_can_access_debugfs(struct amdgpu_device *adev); int
> amdgpu_virt_enable_access_debugfs(struct amdgpu_device *adev); @@ -406,6
> +417,7 @@ u32 amdgpu_virt_rlcg_reg_rw(struct amdgpu_device *adev, u32 offset,
> u32 v, u32 f bool amdgpu_virt_get_ras_capability(struct amdgpu_device *adev); int
> amdgpu_virt_req_ras_err_count(struct amdgpu_device *adev, enum
> amdgpu_ras_block block,
> struct ras_err_data *err_data);
> +int amdgpu_virt_req_ras_cper_dump(struct amdgpu_device *adev, bool
> +force_update);
> int amdgpu_virt_ras_telemetry_post_reset(struct amdgpu_device *adev); bool
> amdgpu_virt_ras_telemetry_block_en(struct amdgpu_device *adev,
> enum amdgpu_ras_block block);
> diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
> b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
> index 4dcb72d1bdda..5aadf24cb202 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_nv.c
> @@ -184,6 +184,9 @@ static int
> xgpu_nv_send_access_requests_with_param(struct amdgpu_device *adev,
> case IDH_REQ_RAS_ERROR_COUNT:
> event = IDH_RAS_ERROR_COUNT_READY;
> break;
> + case IDH_REQ_RAS_CPER_DUMP:
> + event = IDH_RAS_CPER_DUMP_READY;
> + break;
> default:
> break;
> }
> @@ -467,6 +470,16 @@ static int xgpu_nv_req_ras_err_count(struct
> amdgpu_device *adev)
> return xgpu_nv_send_access_requests(adev,
> IDH_REQ_RAS_ERROR_COUNT); }
>
> +static int xgpu_nv_req_ras_cper_dump(struct amdgpu_device *adev, u64
> +vf_rptr) {
> + uint32_t vf_rptr_hi, vf_rptr_lo;
> +
> + vf_rptr_hi = (uint32_t)(vf_rptr >> 32);
> + vf_rptr_lo = (uint32_t)(vf_rptr & 0xFFFFFFFF);
> + return xgpu_nv_send_access_requests_with_param(
> + adev, IDH_REQ_RAS_CPER_DUMP, vf_rptr_hi, vf_rptr_lo, 0);
[Tao] so the cper info will be sent to host? Can host handle cper work by itself?
}
> +
> const struct amdgpu_virt_ops xgpu_nv_virt_ops = {
> .req_full_gpu = xgpu_nv_request_full_gpu_access,
> .rel_full_gpu = xgpu_nv_release_full_gpu_access,
> @@ -478,4 +491,5 @@ const struct amdgpu_virt_ops xgpu_nv_virt_ops = {
> .ras_poison_handler = xgpu_nv_ras_poison_handler,
> .rcvd_ras_intr = xgpu_nv_rcvd_ras_intr,
> .req_ras_err_count = xgpu_nv_req_ras_err_count,
> + .req_ras_cper_dump = xgpu_nv_req_ras_cper_dump,
> };
> --
> 2.34.1
More information about the amd-gfx
mailing list