[PATCH] drm/amdgpu: Support passing amdgpu critical error to host via GPU Mailbox.

Deucher, Alexander Alexander.Deucher at amd.com
Fri Jun 23 21:42:51 UTC 2017


> -----Original Message-----
> From: Gavin Wan [mailto:Gavin.Wan at amd.com]
> Sent: Friday, June 23, 2017 5:33 PM
> To: dl.gcr.gpu-virtual; brahma_sw_dev; amd-gfx at lists.freedesktop.org
> Cc: Wan, Gavin
> Subject: [PATCH] drm/amdgpu: Support passing amdgpu critical error to host
> via GPU Mailbox.
> 
> This feature works for SRIOV enviroment. For non-SRIOV enviroment, the
> trans_error function does nothing.
> 
> The error information includes error_code (16bit), error_flags(16bit)
> and error_data(64bit). Since there are not many errors, we keep the
> errors in an array and transfer all errors to Host before amdgpu
> initialization function (amdgpu_device_init) exit.
> 
> Change-Id: Ib20156130553b3c47046e6ca967e2c6c02b8e9ff
> Signed-off-by: Gavin Wan <Gavin.Wan at amd.com>

I noticed a few more minor issues noted below.

Alex

> ---
>  drivers/gpu/drm/amd/amdgpu/Makefile        |   2 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c    |   2 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c   |   2 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  18 ++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c     |   3 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c    |   2 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |   6 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c   |   9 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c     |   6 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c  |   5 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c    |   4 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c    |   6 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h   |   1 +
>  drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c      |  11 ++
>  drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c      |   2 +
>  drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c      |  46 ++++---
>  drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h      |   4 +-
>  drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c      |   1 +
>  drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h      |   4 +-
>  drivers/gpu/drm/amd/amdgpu/psp_v3_1.c      |   2 +
>  drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c      |   4 +
>  drivers/gpu/drm/amd/amdgpu/vce_v4_0.c      |   2 +
>  drivers/gpu/drm/amd/amdgpu/vf_error.c      | 210
> +++++++++++++++++++++++++++++
>  drivers/gpu/drm/amd/amdgpu/vf_error.h      | 120 +++++++++++++++++
>  24 files changed, 446 insertions(+), 26 deletions(-)
>  create mode 100644 drivers/gpu/drm/amd/amdgpu/vf_error.c
>  create mode 100644 drivers/gpu/drm/amd/amdgpu/vf_error.h
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile
> b/drivers/gpu/drm/amd/amdgpu/Makefile
> index 2062127b03a8..0e45a5c7b0f0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/Makefile
> +++ b/drivers/gpu/drm/amd/amdgpu/Makefile
> @@ -31,7 +31,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
>  	amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
>  	amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
>  	amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o
> amdgpu_atomfirmware.o \
> -	amdgpu_queue_mgr.o
> +	amdgpu_queue_mgr.o vf_error.o
> 
>  # add asic specific block
>  amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o
> kv_dpm.o \
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
> index 0e512fa1e9ae..ac2062106ae0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
> @@ -34,6 +34,7 @@
>  #include "amdgpu_acp.h"
> 
>  #include "acp_gfx_if.h"
> +#include "vf_error.h"
> 
>  #define ACP_TILE_ON_MASK                0x03
>  #define ACP_TILE_OFF_MASK               0x02
> @@ -395,6 +396,7 @@ static int acp_hw_init(void *handle)
>  			r = pm_genpd_add_device(&adev->acp.acp_genpd-
> >gpd, dev);
>  			if (r) {
>  				dev_err(dev, "Failed to add dev to
> genpd\n");
> +
> 	amdgpu_vf_error_put(MDGIM_ERROR_VF_ADD_DEV_TO_GENPD_
> FAIL, 0, 0);
>  				return r;
>  			}
>  		}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
> index 365e735f6647..b6e6342f322e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
> @@ -31,6 +31,7 @@
> 
>  #include <linux/slab.h>
>  #include <linux/acpi.h>
> +#include "vf_error.h"
>  /*
>   * BIOS.
>   */
> @@ -452,6 +453,7 @@ bool amdgpu_get_bios(struct amdgpu_device *adev)
>  		goto success;
> 
>  	DRM_ERROR("Unable to locate a BIOS ROM\n");
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_UNLOCATE_BIOS_RO
> M, 0, 0);
>  	return false;
> 
>  success:
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index 91133a132fb6..af886f10f556 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -54,6 +54,7 @@
>  #include "bif/bif_4_1_d.h"
>  #include <linux/pci.h>
>  #include <linux/firmware.h>
> +#include "vf_error.h"
> 
>  MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
>  MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
> @@ -2176,6 +2177,7 @@ int amdgpu_device_init(struct amdgpu_device
> *adev,
>  	r = amdgpu_atombios_init(adev);
>  	if (r) {
>  		dev_err(adev->dev, "amdgpu_atombios_init failed\n");
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL,
> 0, 0);
>  		goto failed;
>  	}
> 
> @@ -2186,6 +2188,7 @@ int amdgpu_device_init(struct amdgpu_device
> *adev,
>  	if (amdgpu_vpost_needed(adev)) {
>  		if (!adev->bios) {
>  			dev_err(adev->dev, "no vBIOS found\n");
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_NO_VBIOS, 0, 0);
>  			r = -EINVAL;
>  			goto failed;
>  		}
> @@ -2193,6 +2196,7 @@ int amdgpu_device_init(struct amdgpu_device
> *adev,
>  		r = amdgpu_atom_asic_init(adev-
> >mode_info.atom_context);
>  		if (r) {
>  			dev_err(adev->dev, "gpu post error!\n");
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_GPU_POST_ERROR, 0,
> 0);
>  			goto failed;
>  		}
>  	} else {
> @@ -2204,6 +2208,7 @@ int amdgpu_device_init(struct amdgpu_device
> *adev,
>  		r = amdgpu_atombios_get_clock_info(adev);
>  		if (r) {
>  			dev_err(adev->dev,
> "amdgpu_atombios_get_clock_info failed\n");
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOC
> K_FAIL, 0, 0);
>  			return r;
>  		}
>  		/* init i2c buses */
> @@ -2215,6 +2220,7 @@ int amdgpu_device_init(struct amdgpu_device
> *adev,
>  	r = amdgpu_fence_driver_init(adev);
>  	if (r) {
>  		dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_FENCE_INIT_FAIL, 0,
> 0);
>  		goto failed;
>  	}
> 
> @@ -2224,6 +2230,7 @@ int amdgpu_device_init(struct amdgpu_device
> *adev,
>  	r = amdgpu_init(adev);
>  	if (r) {
>  		dev_err(adev->dev, "amdgpu_init failed\n");
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL,
> 0, 0);
>  		amdgpu_fini(adev);
>  		goto failed;
>  	}
> @@ -2287,12 +2294,15 @@ int amdgpu_device_init(struct amdgpu_device
> *adev,
>  	r = amdgpu_late_init(adev);
>  	if (r) {
>  		dev_err(adev->dev, "amdgpu_late_init failed\n");
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_
> FAIL, 0, r);
>  		goto failed;
>  	}
> 
> +	amdgpu_vf_error_trans_all(adev);
>  	return 0;
> 
>  failed:
> +	amdgpu_vf_error_trans_all(adev);
>  	if (runtime)
>  		vga_switcheroo_fini_domain_pm_ops(adev->dev);
>  	return r;
> @@ -3002,6 +3012,7 @@ int amdgpu_gpu_reset(struct amdgpu_device
> *adev)
>  		}
>  	} else {
>  		dev_err(adev->dev, "asic resume failed (%d).\n", r);
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_ASIC_RESUME_FAIL, 0,
> r);
>  		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
>  			if (adev->rings[i] && adev->rings[i]->sched.thread) {
>  				kthread_unpark(adev->rings[i]-
> >sched.thread);
> @@ -3016,11 +3027,14 @@ int amdgpu_gpu_reset(struct amdgpu_device
> *adev)
>  		drm_helper_resume_force_mode(adev->ddev);
> 
>  	ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev,
> resched);
> -	if (r)
> +	if (r) {
>  		/* bad news, how to tell it to userspace ? */
>  		dev_info(adev->dev, "GPU reset failed\n");
> -	else
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0,
> r);
> +	}
> +	else {
>  		dev_info(adev->dev, "GPU reset successed!\n");
> +	}
> 
>  	return r;
>  }
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> index 3ab4c65ecc8b..1ff65d2a5bee 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
> @@ -25,6 +25,7 @@
>  #include "amdgpu.h"
>  #include "amdgpu_ih.h"
>  #include "amdgpu_amdkfd.h"
> +#include "vf_error.h"
> 
>  /**
>   * amdgpu_ih_ring_alloc - allocate memory for the IH ring
> @@ -95,6 +96,7 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev,
> unsigned ring_size,
>  		r = amdgpu_wb_get(adev, &adev->irq.ih.wptr_offs);
>  		if (r) {
>  			dev_err(adev->dev, "(%d) ih wptr_offs wb alloc
> failed\n", r);
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_IH_WB_ALLOC_FAIL,
> 0, r);
>  			return r;
>  		}
> 
> @@ -102,6 +104,7 @@ int amdgpu_ih_ring_init(struct amdgpu_device
> *adev, unsigned ring_size,
>  		if (r) {
>  			amdgpu_wb_free(adev, adev->irq.ih.wptr_offs);
>  			dev_err(adev->dev, "(%d) ih rptr_offs wb alloc
> failed\n", r);
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_IH_WB_ALLOC_FAIL,
> 0, r);
>  			return r;
>  		}
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> index 00ef2fc8c30f..c10977c302c1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> @@ -35,6 +35,7 @@
>  #include <linux/slab.h>
>  #include <linux/pm_runtime.h>
>  #include "amdgpu_amdkfd.h"
> +#include "vf_error.h"
> 
>  /**
>   * amdgpu_driver_unload_kms - Main unload function for KMS.
> @@ -144,6 +145,7 @@ int amdgpu_driver_load_kms(struct drm_device
> *dev, unsigned long flags)
>  	r = amdgpu_device_init(adev, dev, dev->pdev, flags);
>  	if (r) {
>  		dev_err(&dev->pdev->dev, "Fatal error during GPU init\n");
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_GPU_INIT_FATAL_FAI
> L, 0, 0);
>  		goto out;
>  	}
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> index 8ee69652be8c..3c33f4019a6a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> @@ -36,7 +36,7 @@
>  #include <drm/drm_cache.h>
>  #include "amdgpu.h"
>  #include "amdgpu_trace.h"
> -
> +#include "vf_error.h"
> 
> 
>  static u64 amdgpu_get_vis_part_size(struct amdgpu_device *adev,
> @@ -246,18 +246,21 @@ int amdgpu_bo_create_kernel(struct
> amdgpu_device *adev,
>  			     NULL, NULL, bo_ptr);
>  	if (r) {
>  		dev_err(adev->dev, "(%d) failed to allocate kernel bo\n", r);
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_BO_ALLOC_K_FAIL, 0,
> r);
>  		return r;
>  	}
> 
>  	r = amdgpu_bo_reserve(*bo_ptr, false);
>  	if (r) {
>  		dev_err(adev->dev, "(%d) failed to reserve kernel bo\n", r);
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_BO_RESERVE_FAIL, 0,
> r);
>  		goto error_free;
>  	}
> 
>  	r = amdgpu_bo_pin(*bo_ptr, domain, gpu_addr);
>  	if (r) {
>  		dev_err(adev->dev, "(%d) kernel bo pin failed\n", r);
> +		amdgpu_vf_error_put(AMDGIM_ERROR_VF_BO_PIN_FAIL,
> 0, r);
>  		goto error_unreserve;
>  	}
> 
> @@ -265,6 +268,7 @@ int amdgpu_bo_create_kernel(struct amdgpu_device
> *adev,
>  		r = amdgpu_bo_kmap(*bo_ptr, cpu_addr);
>  		if (r) {
>  			dev_err(adev->dev, "(%d) kernel bo map failed\n",
> r);
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_BO_MAP_FAIL, 0, r);
>  			goto error_unreserve;
>  		}
>  	}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> index 75165e07b1cd..92e0eadfa4b2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> @@ -33,6 +33,7 @@
>  #include <drm/amdgpu_drm.h>
>  #include "amdgpu.h"
>  #include "atom.h"
> +#include "vf_error.h"
> 
>  /*
>   * Rings
> @@ -188,12 +189,14 @@ int amdgpu_ring_init(struct amdgpu_device *adev,
> struct amdgpu_ring *ring,
>  		r = amdgpu_wb_get_64bit(adev, &ring->rptr_offs);
>  		if (r) {
>  			dev_err(adev->dev, "(%d) ring rptr_offs wb alloc
> failed\n", r);
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_RING_R_WB_ALLOC_F
> AIL, 0, r);
>  			return r;
>  		}
> 
>  		r = amdgpu_wb_get_64bit(adev, &ring->wptr_offs);
>  		if (r) {
>  			dev_err(adev->dev, "(%d) ring wptr_offs wb alloc
> failed\n", r);
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_RING_W_WB_ALLOC_
> FAIL, 0, r);
>  			return r;
>  		}
> 
> @@ -201,12 +204,14 @@ int amdgpu_ring_init(struct amdgpu_device *adev,
> struct amdgpu_ring *ring,
>  		r = amdgpu_wb_get(adev, &ring->rptr_offs);
>  		if (r) {
>  			dev_err(adev->dev, "(%d) ring rptr_offs wb alloc
> failed\n", r);
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_RING_R_WB_ALLOC_F
> AIL, 0, r);
>  			return r;
>  		}
> 
>  		r = amdgpu_wb_get(adev, &ring->wptr_offs);
>  		if (r) {
>  			dev_err(adev->dev, "(%d) ring wptr_offs wb alloc
> failed\n", r);
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_RING_W_WB_ALLOC_
> FAIL, 0, r);
>  			return r;
>  		}
> 
> @@ -215,12 +220,14 @@ int amdgpu_ring_init(struct amdgpu_device *adev,
> struct amdgpu_ring *ring,
>  	r = amdgpu_wb_get(adev, &ring->fence_offs);
>  	if (r) {
>  		dev_err(adev->dev, "(%d) ring fence_offs wb alloc failed\n",
> r);
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_RING_F_WB_ALLOC_F
> AIL, 0, r);
>  		return r;
>  	}
> 
>  	r = amdgpu_wb_get(adev, &ring->cond_exe_offs);
>  	if (r) {
>  		dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc
> failed\n", r);
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_RING_C_WB_ALLOC_F
> AIL, 0, r);
>  		return r;
>  	}
>  	ring->cond_exe_gpu_addr = adev->wb.gpu_addr + (ring-
> >cond_exe_offs * 4);
> @@ -231,6 +238,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev,
> struct amdgpu_ring *ring,
>  	r = amdgpu_fence_driver_start_ring(ring, irq_src, irq_type);
>  	if (r) {
>  		dev_err(adev->dev, "failed initializing fences (%d).\n", r);
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_INIT_FENCE_FAIL, 0,
> r);
>  		return r;
>  	}
> 
> @@ -249,6 +257,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev,
> struct amdgpu_ring *ring,
>  					    (void **)&ring->ring);
>  		if (r) {
>  			dev_err(adev->dev, "(%d) ring create failed\n", r);
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_RING_CREATE_FAIL, 0,
> r);
>  			return r;
>  		}
>  		amdgpu_ring_clear_ring(ring);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
> index 5ca75a456ad2..e2a08d7c2a7d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
> @@ -43,6 +43,7 @@
>   */
>  #include <drm/drmP.h>
>  #include "amdgpu.h"
> +#include "vf_error.h"
> 
>  static void amdgpu_sa_bo_remove_locked(struct amdgpu_sa_bo *sa_bo);
>  static void amdgpu_sa_bo_try_free(struct amdgpu_sa_manager
> *sa_manager);
> @@ -67,6 +68,7 @@ int amdgpu_sa_bo_manager_init(struct
> amdgpu_device *adev,
>  			     0, NULL, NULL, &sa_manager->bo);
>  	if (r) {
>  		dev_err(adev->dev, "(%d) failed to allocate bo for
> manager\n", r);
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_BO_ALLOC_M_FAIL, 0,
> r);
>  		return r;
>  	}
> 
> @@ -99,6 +101,7 @@ int amdgpu_sa_bo_manager_start(struct
> amdgpu_device *adev,
> 
>  	if (sa_manager->bo == NULL) {
>  		dev_err(adev->dev, "no bo for sa manager\n");
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_NO_BO_FOR_SA, 0, 0);
>  		return -EINVAL;
>  	}
> 
> @@ -106,12 +109,14 @@ int amdgpu_sa_bo_manager_start(struct
> amdgpu_device *adev,
>  	r = amdgpu_bo_reserve(sa_manager->bo, false);
>  	if (r) {
>  		dev_err(adev->dev, "(%d) failed to reserve manager bo\n",
> r);
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_BO_RESERVE_FAIL, 0,
> r);
>  		return r;
>  	}
>  	r = amdgpu_bo_pin(sa_manager->bo, sa_manager->domain,
> &sa_manager->gpu_addr);
>  	if (r) {
>  		amdgpu_bo_unreserve(sa_manager->bo);
>  		dev_err(adev->dev, "(%d) failed to pin manager bo\n", r);
> +		amdgpu_vf_error_put(AMDGIM_ERROR_VF_BO_PIN_FAIL,
> 0, r);
>  		return r;
>  	}
>  	r = amdgpu_bo_kmap(sa_manager->bo, &sa_manager->cpu_ptr);
> @@ -127,6 +132,7 @@ int amdgpu_sa_bo_manager_suspend(struct
> amdgpu_device *adev,
> 
>  	if (sa_manager->bo == NULL) {
>  		dev_err(adev->dev, "no bo for sa manager\n");
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_NO_BO_FOR_SA, 0, 0);
>  		return -EINVAL;
>  	}
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
> index 4f50eeb65855..039b1eefb115 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c
> @@ -27,6 +27,7 @@
>  #include <drm/drmP.h>
>  #include "amdgpu.h"
>  #include "amdgpu_ucode.h"
> +#include "vf_error.h"
> 
>  static void amdgpu_ucode_print_common_hdr(const struct
> common_firmware_header *hdr)
>  {
> @@ -383,12 +384,14 @@ int amdgpu_ucode_init_bo(struct amdgpu_device
> *adev)
>  				NULL, NULL, bo);
>  	if (err) {
>  		dev_err(adev->dev, "(%d) Firmware buffer allocate
> failed\n", err);
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_FW_ALLOC_FAIL, 0,
> err);
>  		goto failed;
>  	}
> 
>  	err = amdgpu_bo_reserve(*bo, false);
>  	if (err) {
>  		dev_err(adev->dev, "(%d) Firmware buffer reserve
> failed\n", err);
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_FW_RESERVE_FAIL, 0,
> err);
>  		goto failed_reserve;
>  	}
> 
> @@ -396,12 +399,14 @@ int amdgpu_ucode_init_bo(struct amdgpu_device
> *adev)
>  				&fw_mc_addr);
>  	if (err) {
>  		dev_err(adev->dev, "(%d) Firmware buffer pin failed\n",
> err);
> +		amdgpu_vf_error_put(AMDGIM_ERROR_VF_FW_PIN_FAIL,
> 0, err);
>  		goto failed_pin;
>  	}
> 
>  	err = amdgpu_bo_kmap(*bo, &fw_buf_ptr);
>  	if (err) {
>  		dev_err(adev->dev, "(%d) Firmware buffer kmap failed\n",
> err);
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_FW_KMAP_FAIL, 0,
> err);
>  		goto failed_kmap;
>  	}
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> index 2ca09f111f08..cf41b308f72e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
> @@ -38,6 +38,7 @@
>  #include "amdgpu_uvd.h"
>  #include "cikd.h"
>  #include "uvd/uvd_4_2_d.h"
> +#include "vf_error.h"
> 
>  /* 1 second timeout */
>  #define UVD_IDLE_TIMEOUT	msecs_to_jiffies(1000)
> @@ -175,6 +176,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device
> *adev)
>  	if (r) {
>  		dev_err(adev->dev, "amdgpu_uvd: Can't load firmware
> \"%s\"\n",
>  			fw_name);
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_UVD_NOT_LOAD_FW,
> 0, 0);
>  		return r;
>  	}
> 
> @@ -182,6 +184,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device
> *adev)
>  	if (r) {
>  		dev_err(adev->dev, "amdgpu_uvd: Can't validate firmware
> \"%s\"\n",
>  			fw_name);
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_UVD_NOT_VALIDATE_
> FW, 0, 0);
>  		release_firmware(adev->uvd.fw);
>  		adev->uvd.fw = NULL;
>  		return r;
> @@ -226,6 +229,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device
> *adev)
>  				    &adev->uvd.gpu_addr, &adev-
> >uvd.cpu_addr);
>  	if (r) {
>  		dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r);
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_ALLOC_UVD_BO_FAIL,
> 0, 0);
>  		return r;
>  	}
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> index b692ad402252..a6d9a0bdd762 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
> @@ -34,6 +34,7 @@
>  #include "amdgpu_pm.h"
>  #include "amdgpu_vce.h"
>  #include "cikd.h"
> +#include "vf_error.h"
> 
>  /* 1 second timeout */
>  #define VCE_IDLE_TIMEOUT	msecs_to_jiffies(1000)
> @@ -142,6 +143,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device
> *adev, unsigned long size)
>  	if (r) {
>  		dev_err(adev->dev, "amdgpu_vce: Can't load firmware
> \"%s\"\n",
>  			fw_name);
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_VCE_NOT_LOAD_FW,
> 0, 0);
>  		return r;
>  	}
> 
> @@ -149,6 +151,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device
> *adev, unsigned long size)
>  	if (r) {
>  		dev_err(adev->dev, "amdgpu_vce: Can't validate firmware
> \"%s\"\n",
>  			fw_name);
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_VCE_NOT_VALIDATE_
> FW, 0, 0);
>  		release_firmware(adev->vce.fw);
>  		adev->vce.fw = NULL;
>  		return r;
> @@ -170,6 +173,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device
> *adev, unsigned long size)
>  				    &adev->vce.gpu_addr, &adev-
> >vce.cpu_addr);
>  	if (r) {
>  		dev_err(adev->dev, "(%d) failed to allocate VCE bo\n", r);
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_ALLOC_VCE_BO_FAIL,
> 0, 0);
>  		return r;
>  	}
> 
> @@ -265,6 +269,7 @@ int amdgpu_vce_resume(struct amdgpu_device
> *adev)
>  	r = amdgpu_bo_reserve(adev->vce.vcpu_bo, false);
>  	if (r) {
>  		dev_err(adev->dev, "(%d) failed to reserve VCE bo\n", r);
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_VCE_RESERVE_FAIL, 0,
> r);
>  		return r;
>  	}
> 
> @@ -272,6 +277,7 @@ int amdgpu_vce_resume(struct amdgpu_device
> *adev)
>  	if (r) {
>  		amdgpu_bo_unreserve(adev->vce.vcpu_bo);
>  		dev_err(adev->dev, "(%d) VCE map failed\n", r);
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_VCE_KMAP_FAIL, 0, r);
>  		return r;
>  	}
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> index 9e1062edb76e..e5b1baf387c1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h
> @@ -43,6 +43,7 @@ struct amdgpu_virt_ops {
>  	int (*req_full_gpu)(struct amdgpu_device *adev, bool init);
>  	int (*rel_full_gpu)(struct amdgpu_device *adev, bool init);
>  	int (*reset_gpu)(struct amdgpu_device *adev);
> +	void (*trans_msg)(struct amdgpu_device *adev, u32 req, u32 data1,
> u32 data2, u32 data3);
>  };
> 
>  /* GPU virtualization */
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> index d479a627f03e..7d43758b60fb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
> @@ -36,6 +36,7 @@
>  #include "soc15_common.h"
>  #include "clearstate_gfx9.h"
>  #include "v9_structs.h"
> +#include "vf_error.h"
> 
>  #define GFX9_NUM_GFX_RINGS     1
>  #define GFX9_MEC_HPD_SIZE 2048
> @@ -548,6 +549,7 @@ static int gfx_v9_0_init_microcode(struct
> amdgpu_device *adev)
>  		dev_err(adev->dev,
>  			"gfx9: Failed to load firmware \"%s\"\n",
>  			fw_name);
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_GFX_LOAD_FW_FAIL,
> 0, 0);
>  		release_firmware(adev->gfx.pfp_fw);
>  		adev->gfx.pfp_fw = NULL;
>  		release_firmware(adev->gfx.me_fw);
> @@ -1081,6 +1083,7 @@ static int gfx_v9_0_ngg_create_buf(struct
> amdgpu_device *adev,
> 
>  	if (size_se < 0) {
>  		dev_err(adev->dev, "Buffer size is invalid: %d\n", size_se);
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_BUFL_SIZE_INVALID,
> 0, size_se);
>  		return -EINVAL;
>  	}
>  	size_se = size_se ? size_se : default_size_se;
> @@ -1093,6 +1096,7 @@ static int gfx_v9_0_ngg_create_buf(struct
> amdgpu_device *adev,
>  				    NULL);
>  	if (r) {
>  		dev_err(adev->dev, "(%d) failed to create NGG buffer\n", r);
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_NGG_CREATE_BUF_F
> AIL, 0, r);
>  		return r;
>  	}
>  	ngg_buf->bo_size = amdgpu_bo_size(ngg_buf->bo);
> @@ -1137,6 +1141,7 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device
> *adev)
>  				    64 * 1024);
>  	if (r) {
>  		dev_err(adev->dev, "Failed to create Primitive Buffer\n");
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_NGG_CREATE_PR_BU
> F_FAIL, 0, r);
>  		goto err;
>  	}
> 
> @@ -1146,6 +1151,7 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device
> *adev)
>  				    256 * 1024);
>  	if (r) {
>  		dev_err(adev->dev, "Failed to create Position Buffer\n");
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_NGG_CREATE_PO_BU
> F_FAIL, 0, r);
>  		goto err;
>  	}
> 
> @@ -1155,6 +1161,7 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device
> *adev)
>  				    256);
>  	if (r) {
>  		dev_err(adev->dev, "Failed to create Control Sideband
> Buffer\n");
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_NGG_CREATE_CS_BUF
> _FAIL, 0, r);
>  		goto err;
>  	}
> 
> @@ -1167,6 +1174,7 @@ static int gfx_v9_0_ngg_init(struct amdgpu_device
> *adev)
>  				    512 * 1024);
>  	if (r) {
>  		dev_err(adev->dev, "Failed to create Parameter Cache\n");
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_NGG_CREATE_PC_BU
> F_FAIL, 0, r);
>  		goto err;
>  	}
> 
> @@ -1349,18 +1357,21 @@ static int gfx_v9_0_sw_init(void *handle)
>  	r = gfx_v9_0_init_microcode(adev);
>  	if (r) {
>  		DRM_ERROR("Failed to load gfx firmware!\n");
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_LOAD_GFX_FIRMWAR
> E_FAIL, 0, 0);
>  		return r;
>  	}
> 
>  	r = gfx_v9_0_rlc_init(adev);
>  	if (r) {
>  		DRM_ERROR("Failed to init rlc BOs!\n");
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_RLC_BO_INIT_FAIL, 0,
> 0);
>  		return r;
>  	}
> 
>  	r = gfx_v9_0_mec_init(adev);
>  	if (r) {
>  		DRM_ERROR("Failed to init MEC BOs!\n");
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_MEC_BO_INIT_FAIL, 0,
> 0);
>  		return r;
>  	}
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> index 175ba5f9691c..91ad658dac13 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
> @@ -36,6 +36,7 @@
>  #include "nbio_v7_0.h"
>  #include "gfxhub_v1_0.h"
>  #include "mmhub_v1_0.h"
> +#include "vf_error.h"
> 
>  #define mmDF_CS_AON0_DramBaseAddress0
> 0x0044
>  #define mmDF_CS_AON0_DramBaseAddress0_BASE_IDX
> 0
> @@ -683,6 +684,7 @@ static int gmc_v9_0_gart_enable(struct
> amdgpu_device *adev)
> 
>  	if (adev->gart.robj == NULL) {
>  		dev_err(adev->dev, "No VRAM object for PCIE GART.\n");
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_NO_VRAM_FOR_GAR
> T, 0, 0);
>  		return -EINVAL;
>  	}
>  	r = amdgpu_gart_table_vram_pin(adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> index bde3ca3c21c1..2812d88a8bdd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
> @@ -72,21 +72,6 @@ static void xgpu_ai_mailbox_set_valid(struct
> amdgpu_device *adev, bool val)
>  		      reg);
>  }
> 
> -static void xgpu_ai_mailbox_trans_msg(struct amdgpu_device *adev,
> -				      enum idh_request req)
> -{
> -	u32 reg;
> -
> -	reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
> -
> mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0));
> -	reg = REG_SET_FIELD(reg,
> BIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0,
> -			    MSGBUF_DATA, req);
> -	WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
> mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0),
> -		      reg);
> -
> -	xgpu_ai_mailbox_set_valid(adev, true);
> -}
> -
>  static int xgpu_ai_mailbox_rcv_msg(struct amdgpu_device *adev,
>  				   enum idh_event event)
>  {
> @@ -154,13 +139,25 @@ static int xgpu_ai_poll_msg(struct amdgpu_device
> *adev, enum idh_event event)
>  	return r;
>  }
> 
> -
> -static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
> -					enum idh_request req)
> -{
> +static void xgpu_ai_mailbox_trans_msg (struct amdgpu_device *adev,
> +	      enum idh_request req, u32 data1, u32 data2, u32 data3) {
> +	u32 reg;
>  	int r;
> 
> -	xgpu_ai_mailbox_trans_msg(adev, req);
> +	reg = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
> +
> mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0));
> +	reg = REG_SET_FIELD(reg,
> BIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0,
> +			    MSGBUF_DATA, req);
> +	WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
> mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW0),
> +		      reg);
> +	WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
> mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW1),
> +				data1);
> +	WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
> mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW2),
> +				data2);
> +	WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
> mmBIF_BX_PF0_MAILBOX_MSGBUF_TRN_DW3),
> +				data3);
> +
> +	xgpu_ai_mailbox_set_valid(adev, true);
> 
>  	/* start to poll ack */
>  	r = xgpu_ai_poll_ack(adev);
> @@ -168,6 +165,14 @@ static int xgpu_ai_send_access_requests(struct
> amdgpu_device *adev,
>  		pr_err("Doesn't get ack from pf, continue\n");
> 
>  	xgpu_ai_mailbox_set_valid(adev, false);
> +}
> +
> +static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
> +					enum idh_request req)
> +{
> +	int r;
> +
> +	xgpu_ai_mailbox_trans_msg(adev, req, 0, 0, 0);
> 
>  	/* start to check msg if request is idh_req_gpu_init_access */
>  	if (req == IDH_REQ_GPU_INIT_ACCESS ||
> @@ -342,4 +347,5 @@ const struct amdgpu_virt_ops xgpu_ai_virt_ops = {
>  	.req_full_gpu	= xgpu_ai_request_full_gpu_access,
>  	.rel_full_gpu	= xgpu_ai_release_full_gpu_access,
>  	.reset_gpu = xgpu_ai_request_reset,
> +	.trans_msg = xgpu_ai_mailbox_trans_msg,
>  };
> diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
> b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
> index 9aefc44d2c34..1e91b9a1c591 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
> +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
> @@ -31,7 +31,9 @@ enum idh_request {
>  	IDH_REL_GPU_INIT_ACCESS,
>  	IDH_REQ_GPU_FINI_ACCESS,
>  	IDH_REL_GPU_FINI_ACCESS,
> -	IDH_REQ_GPU_RESET_ACCESS
> +	IDH_REQ_GPU_RESET_ACCESS,
> +
> +	IDH_LOG_VF_ERROR       = 200,
>  };
> 
>  enum idh_event {
> diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
> b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
> index 171a658135b5..c25a831f94ec 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c
> @@ -613,4 +613,5 @@ const struct amdgpu_virt_ops xgpu_vi_virt_ops = {
>  	.req_full_gpu		= xgpu_vi_request_full_gpu_access,
>  	.rel_full_gpu		= xgpu_vi_release_full_gpu_access,
>  	.reset_gpu		= xgpu_vi_request_reset,
> +	.trans_msg		= NULL, /* Does not need to trans VF errors
> to host. */
>  };
> diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h
> b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h
> index 2db741131bc6..c791d73d2d54 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h
> +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.h
> @@ -32,7 +32,9 @@ enum idh_request {
>  	IDH_REL_GPU_INIT_ACCESS,
>  	IDH_REQ_GPU_FINI_ACCESS,
>  	IDH_REL_GPU_FINI_ACCESS,
> -	IDH_REQ_GPU_RESET_ACCESS
> +	IDH_REQ_GPU_RESET_ACCESS,
> +
> +	IDH_LOG_VF_ERROR       = 200,
>  };
> 
>  /* VI mailbox messages data */
> diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
> b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
> index 58ba3966f070..02a79ef6329c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
> +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
> @@ -37,6 +37,7 @@
>  #include "vega10/GC/gc_9_0_offset.h"
>  #include "vega10/SDMA0/sdma0_4_0_offset.h"
>  #include "vega10/NBIO/nbio_6_1_offset.h"
> +#include "vf_error.h"
> 
>  MODULE_FIRMWARE("amdgpu/vega10_sos.bin");
>  MODULE_FIRMWARE("amdgpu/vega10_asd.bin");
> @@ -153,6 +154,7 @@ int psp_v3_1_init_microcode(struct psp_context
> *psp)
>  		dev_err(adev->dev,
>  			"psp v3.1: Failed to load firmware \"%s\"\n",
>  			fw_name);
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_PSP_LOAD_FW_FAIL,
> 0, 0);
>  		release_firmware(adev->psp.sos_fw);
>  		adev->psp.sos_fw = NULL;
>  		release_firmware(adev->psp.asd_fw);
> diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> index 987b958368ac..8eef64f16085 100644
> --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
> @@ -39,6 +39,7 @@
>  #include "vega10/HDP/hdp_4_0_offset.h"
>  #include "vega10/MMHUB/mmhub_1_0_offset.h"
>  #include "vega10/MMHUB/mmhub_1_0_sh_mask.h"
> +#include "vf_error.h"
> 
>  static void uvd_v7_0_set_ring_funcs(struct amdgpu_device *adev);
>  static void uvd_v7_0_set_enc_ring_funcs(struct amdgpu_device *adev);
> @@ -700,6 +701,7 @@ static int uvd_v7_0_mmsch_start(struct
> amdgpu_device *adev,
> 
>  	if (!loop) {
>  		dev_err(adev->dev, "failed to init MMSCH,
> mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_MMSCH_INIT_FAIL, 0,
> data);
>  		return -EBUSY;
>  	}
>  	WDOORBELL32(adev->uvd.ring_enc[0].doorbell_index, 0);
> @@ -1001,6 +1003,7 @@ static int uvd_v7_0_start(struct amdgpu_device
> *adev)
>  			break;
> 
>  		DRM_ERROR("UVD not responding, trying to reset the
> VCPU!!!\n");
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_UVD_NORESP_RESET,
> 0, 0);
>  		WREG32_P(SOC15_REG_OFFSET(UVD, 0,
> mmUVD_SOFT_RESET),
> 
> 	UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK,
> 
> 	~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
> @@ -1013,6 +1016,7 @@ static int uvd_v7_0_start(struct amdgpu_device
> *adev)
> 
>  	if (r) {
>  		DRM_ERROR("UVD not responding, giving up!!!\n");
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_UVD_NORESP_GIVEU
> P, 0, 0);
>  		return r;
>  	}
>  	/* enable master interrupt */
> diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
> b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
> index 1ecd6bb90c1f..5d5aa1cbd140 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
> @@ -38,6 +38,7 @@
>  #include "vega10/VCE/vce_4_0_sh_mask.h"
>  #include "vega10/MMHUB/mmhub_1_0_offset.h"
>  #include "vega10/MMHUB/mmhub_1_0_sh_mask.h"
> +#include "vf_error.h"
> 
>  #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
> 
> @@ -188,6 +189,7 @@ static int vce_v4_0_mmsch_start(struct
> amdgpu_device *adev,
> 
>  	if (!loop) {
>  		dev_err(adev->dev, "failed to init MMSCH,
> mmVCE_MMSCH_VF_MAILBOX_RESP = %x\n", data);
> +
> 	amdgpu_vf_error_put(AMDGIM_ERROR_VF_INIT_MMSCH_FAIL, 0,
> data);
>  		return -EBUSY;
>  	}
>  	WDOORBELL32(adev->vce.ring[0].doorbell_index, 0);
> diff --git a/drivers/gpu/drm/amd/amdgpu/vf_error.c
> b/drivers/gpu/drm/amd/amdgpu/vf_error.c
> new file mode 100644
> index 000000000000..9ae58f3237d1
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/vf_error.c
> @@ -0,0 +1,210 @@
> +/*
> + * Copyright 2017 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the
> "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO
> EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,
> DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
> OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
> THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#include "amdgpu.h"
> +#include "vf_error.h"
> +/* Needs enum IDH_LOG_VF_ERROR, it is defined in both mxgpu_ai.h and
> mxgpu_vi.h. */
> +#include "mxgpu_ai.h"
> +
> +#define AMDGPU_VF_ERROR_ENTRY_SIZE     32
> +
> +/* struct error_entry - amdgpu VF error information. */
> +struct amdgpu_vf_error_buffer {
> +	int read_count;
> +	int write_count;
> +	uint16_t code[AMDGPU_VF_ERROR_ENTRY_SIZE];
> +	uint16_t flags[AMDGPU_VF_ERROR_ENTRY_SIZE];
> +	uint64_t data[AMDGPU_VF_ERROR_ENTRY_SIZE];
> +};
> +
> +struct amdgpu_vf_error_buffer admgpu_vf_errors;
> +
> +#if 0 /* amdgpu driver does not need following code, but we should keep
> them same as the code of AMD GIM driver */

You can probably drop these big chunks of commented out code.  Likely someone will send a patch to drop them eventually.

> +struct error_text
> +{
> +	uint8_t arg_type;
> +	char* text;
> +};
> +
> +enum error_data_type
> +{
> +	ERROR_DATA_ARG_NONE = 0,  // No error data
> +	ERROR_DATA_ARG_64,        // 64-bit
> +	ERROR_DATA_ARG_32_32,     // 32bit 32bit
> +	ERROR_DATA_ARG_16_16_32,  // 16bit 16bit 32bit
> +};
> +
> +static const struct error_text amdgim_error_vf
> [AMDGIM_ERROR_VF_MAX] =
> +{
> +	/* AMDGIM_ERROR_VF_GPU_INIT_FATAL_FAIL */
> {ERROR_DATA_ARG_NONE, "Fatal error during GPU init."},
> +	/* AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL */
> {ERROR_DATA_ARG_NONE, "amdgpu_atombios_init failed."},
> +	/* AMDGIM_ERROR_VF_UNLOCATE_BIOS_ROM */
> {ERROR_DATA_ARG_NONE, "Unable to locate a BIOS ROM."},
> +	/* AMDGIM_ERROR_VF_NO_VBIOS */
> {ERROR_DATA_ARG_NONE, "no vBIOS found"},
> +	/* AMDGIM_ERROR_VF_GPU_POST_ERROR */
> {ERROR_DATA_ARG_NONE, "gpu post error."},
> +
> +	/* AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL */
> {ERROR_DATA_ARG_NONE, "amdgpu_atombios_get_clock_info failed."},
> +	/* AMDGIM_ERROR_VF_FENCE_INIT_FAIL */
> {ERROR_DATA_ARG_NONE, "amdgpu_fence_driver_init failed."},
> +	/* AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL */
> {ERROR_DATA_ARG_NONE, "amdgpu_init failed."},
> +	/* AMDGIM_ERROR_VF_IB_INIT_FAIL */
> {ERROR_DATA_ARG_64,   "IB initialization failed (%d)."},
> +	/* AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL */
> {ERROR_DATA_ARG_NONE, "amdgpu_late_init failed."},
> +
> +	/* AMDGIM_ERROR_VF_ASIC_RESUME_FAIL */
> {ERROR_DATA_ARG_64,   "asic resume failed (%d)."},
> +	/* AMDGIM_ERROR_VF_GPU_RESET_FAIL */
> {ERROR_DATA_ARG_NONE, "GPU reset failed."},
> +	/* AMDGIM_ERROR_VF_MMSCH_INIT_FAIL */
> {ERROR_DATA_ARG_64,   "failed to init MMSCH,
> mmVCE_MMSCH_VF_MAILBOX_RESP = 0x%x."},
> +	/* AMDGIM_ERROR_VF_UVD_NORESP_GIVEUP */
> {ERROR_DATA_ARG_NONE, "UVD not responding, giving up."},
> +	/* AMDGIM_ERROR_VF_UVD_NORESP_RESET */
> {ERROR_DATA_ARG_NONE, "UVD not responding, trying to reset the
> VCPU."},
> +
> +	/* AMDGIM_ERROR_VF_LOAD_GFX_FIRMWARE_FAIL */
> {ERROR_DATA_ARG_NONE, "Failed to load gfx firmware."},
> +	/* AMDGIM_ERROR_VF_MEC_BO_INIT_FAIL */
> {ERROR_DATA_ARG_NONE, "Failed to init MEC BOs."},
> +	/* AMDGIM_ERROR_VF_ADD_DEV_TO_GENPD_FAIL */
> {ERROR_DATA_ARG_NONE, "Failed to add dev to genpd."},
> +	/* AMDGIM_ERROR_VF_IH_WB_ALLOC_FAIL */
> {ERROR_DATA_ARG_64,   "IH wptr_offs wb alloc failed (%d)."},
> +	/* AMDGIM_ERROR_VF_BO_ALLOC_K_FAIL */
> {ERROR_DATA_ARG_64,   "Failed to allocate kernel bo (%d)."},
> +
> +	/* AMDGIM_ERROR_VF_BO_RESERVE_FAIL */
> {ERROR_DATA_ARG_64,   "Failed to reserve kernel bo (%d)."},
> +	/* AMDGIM_ERROR_VF_BO_PIN_FAIL */
> {ERROR_DATA_ARG_64,   "Kernel bo pin failed (%d)."},
> +	/* AMDGIM_ERROR_VF_BO_MAP_FAIL */
> {ERROR_DATA_ARG_64,   "Kernel bo map failed (%d)."},
> +	/* AMDGIM_ERROR_VF_RING_R_WB_ALLOC_FAIL */
> {ERROR_DATA_ARG_64,   "Ring rptr_offs wb alloc failed (%d)."},
> +	/* AMDGIM_ERROR_VF_RING_W_WB_ALLOC_FAIL */
> {ERROR_DATA_ARG_64,   "Ring wptr_offs wb alloc failed (%d)."},
> +
> +	/* AMDGIM_ERROR_VF_RING_F_WB_ALLOC_FAIL */
> {ERROR_DATA_ARG_64,   "Ring fence_offs wb alloc failed (%d)."},
> +	/* AMDGIM_ERROR_VF_RING_C_WB_ALLOC_FAIL */
> {ERROR_DATA_ARG_64,   "Ring cond_exec_polling wb alloc failed (%d)."},
> +	/* AMDGIM_ERROR_VF_INIT_FENCE_FAIL */
> {ERROR_DATA_ARG_64,   "failed initializing fences (%d)."},
> +	/* AMDGIM_ERROR_VF_RING_CREATE_FAIL */
> {ERROR_DATA_ARG_64,   "Ring create failed (%d)."},
> +	/* AMDGIM_ERROR_VF_BO_ALLOC_M_FAIL */
> {ERROR_DATA_ARG_64,   "Failed to allocate bo for manager (%d)."},
> +
> +	/* AMDGIM_ERROR_VF_NO_BO_FOR_SA */
> {ERROR_DATA_ARG_NONE, "No bo for sa manager."},
> +	/* AMDGIM_ERROR_VF_FW_ALLOC_FAIL */
> {ERROR_DATA_ARG_64,   "Firmware buffer allocate failed (%d)."},
> +	/* AMDGIM_ERROR_VF_FW_RESERVE_FAIL */
> {ERROR_DATA_ARG_64,   "Firmware buffer reserve failed (%d)."},
> +	/* AMDGIM_ERROR_VF_FW_PIN_FAIL */
> {ERROR_DATA_ARG_64,   "Firmware buffer pin failed (%d)."},
> +	/* AMDGIM_ERROR_VF_FW_KMAP_FAIL */
> {ERROR_DATA_ARG_64,   "Firmware buffer kmap failed (%d)."},
> +
> +	/* AMDGIM_ERROR_VF_UVD_NOT_LOAD_FW */
> {ERROR_DATA_ARG_NONE, "amdgpu_uvd: Can't load firmware."},
> +	/* AMDGIM_ERROR_VF_UVD_NOT_VALIDATE_FW */
> {ERROR_DATA_ARG_NONE, "amdgpu_uvd: Can't validate firmware."},
> +	/* AMDGIM_ERROR_VF_ALLOC_UVD_BO_FAIL */
> {ERROR_DATA_ARG_64,   "Failed to allocate UVD bo (%d)."},
> +	/* AMDGIM_ERROR_VF_VCE_NOT_LOAD_FW */
> {ERROR_DATA_ARG_NONE, "amdgpu_vce: Can't load firmware."},
> +	/* AMDGIM_ERROR_VF_VCE_NOT_VALIDATE_FW */
> {ERROR_DATA_ARG_NONE, "amdgpu_vce: Can't validate firmware."},
> +
> +	/* AMDGIM_ERROR_VF_ALLOC_VCE_BO_FAIL */
> {ERROR_DATA_ARG_64,   "Failed to allocate VCE bo (%d)."},
> +	/* AMDGIM_ERROR_VF_VCE_RESERVE_FAIL */
> {ERROR_DATA_ARG_64,   "Failed to reserve VCE bo (%d)."},
> +	/* AMDGIM_ERROR_VF_VCE_KMAP_FAIL */
> {ERROR_DATA_ARG_64,   "VCE kmap failed (%d)."},
> +	/* AMDGIM_ERROR_VF_NO_VRAM_FOR_GART */
> {ERROR_DATA_ARG_NONE, "No VRAM object for PCIE GART."},
> +	/* AMDGIM_ERROR_VF_PSP_LOAD_FW_FAIL */
> {ERROR_DATA_ARG_NONE, "PSP: Failed to load firmware."},
> +
> +	/* AMDGIM_ERROR_VF_INIT_MMSCH_FAIL */
> {ERROR_DATA_ARG_64,   "failed to init MMSCH,
> mmVCE_MMSCH_VF_MAILBOX_RESP = %x."},
> +	/* AMDGIM_ERROR_VF_GFX_LOAD_FW_FAIL */
> {ERROR_DATA_ARG_NONE, "gfx: Failed to load firmware."},
> +	/* AMDGIM_ERROR_VF_NGG_CREATE_BUF_FAIL */
> {ERROR_DATA_ARG_64,   "Failed to create NGG buffer (%d)."},
> +	/* AMDGIM_ERROR_VF_NGG_CREATE_PR_BUF_FAIL */
> {ERROR_DATA_ARG_64,   "Failed to create Primitive Buffer (%d)."},
> +	/* AMDGIM_ERROR_VF_NGG_CREATE_PO_BUF_FAIL */
> {ERROR_DATA_ARG_64,   "Failed to create Position Buffer (%d)."},
> +
> +	/* AMDGIM_ERROR_VF_NGG_CREATE_CS_BUF_FAIL */
> {ERROR_DATA_ARG_64,   "Failed to create Control Sideband Buffer (%d)."},
> +	/* AMDGIM_ERROR_VF_NGG_CREATE_PC_BUF_FAIL */
> {ERROR_DATA_ARG_64,   "Failed to create Parameter Cache (%d)."},
> +	/* AMDGIM_ERROR_VF_BUFL_SIZE_INVALID */
> {ERROR_DATA_ARG_64,   "Buffer size is invalid: %d"},
> +	/* AMDGIM_ERROR_VF_RLC_BO_INIT_FAIL */
> {ERROR_DATA_ARG_64,   "Failed to init rlc BOs (%d)"},
> +
> +	/* AMDGIM_ERROR_VF_TEST */                     {ERROR_DATA_ARG_64,
> "This is error log collect test for VF component (test count %llu)."}
> +};
> +
> +int get_vf_error_text (uint32_t error_code, uint64_t error_data, char*
> error_msg, int buf_size)
> +{
> +	int error_catagory = AMDGIM_ERROR_CATAGORY(error_code);
> +	int error_sub_code = AMDGIM_ERROR_SUBCODE(error_code);
> +
> +	if (AMDGIM_ERROR_CATEGORY_VF != error_catagory) {
> +		return 0;
> +	}
> +	if (error_sub_code >= AMDGIM_ERROR_VF_MAX) {
> +		return 0;
> +	}
> +	switch (amdgim_error_vf[error_sub_code].arg_type) {
> +	case ERROR_DATA_ARG_NONE:
> +		snprintf (error_msg, buf_size - 1,
> amdgim_error_vf[error_sub_code].text);
> +		break;
> +	case ERROR_DATA_ARG_64:
> +		snprintf (error_msg, buf_size - 1,
> amdgim_error_vf[error_sub_code].text,
> +				error_data);
> +		break;
> +	case ERROR_DATA_ARG_32_32:
> +		snprintf (error_msg, buf_size - 1,
> amdgim_error_vf[error_sub_code].text,
> +				(uint32_t)(error_data >> 32),
> +				(uint32_t)(error_data & 0xFFFFFFFF));
> +		break;
> +	case ERROR_DATA_ARG_16_16_32:
> +		snprintf (error_msg, buf_size - 1,
> amdgim_error_vf[error_sub_code].text,
> +				(uint16_t)(error_data >> 48),
> +				(uint16_t)((error_data >> 32) & 0xFFFF),
> +				(uint32_t)(error_data & 0xFFFFFFFF));
> +		break;
> +	default:
> +		return 0;
> +		break;
> +	}
> +	return strlen (error_msg);
> +}
> +
> +#endif
> +
> +void amdgpu_vf_error_put(uint16_t sub_error_code, uint16_t error_flags,
> uint64_t error_data)
> +{
> +	int index;
> +	uint16_t error_code =
> AMDGIM_ERROR_CODE(AMDGIM_ERROR_CATEGORY_VF,
> sub_error_code);
> +
> +	index = admgpu_vf_errors.write_count %
> AMDGPU_VF_ERROR_ENTRY_SIZE;
> +	admgpu_vf_errors.code [index] = error_code;
> +	admgpu_vf_errors.flags [index] = error_flags;
> +	admgpu_vf_errors.data [index] = error_data;
> +	admgpu_vf_errors.write_count ++;
> +}
> +
> +
> +void amdgpu_vf_error_trans_all(struct amdgpu_device *adev)
> +{
> +	//u32 pf2vf_flags = 0;

Please use C comments (/* */) to comment out code

> +	u32 data1, data2, data3;
> +	int index;
> +
> +	if ((NULL == adev) || (!amdgpu_sriov_vf(adev)) || (!adev->virt.ops)
> || (!adev->virt.ops->trans_msg)){
> +		return;
> +	}
> +/*
> + 	TODO: Enable these code when pv2vf_info is merged
> +	AMDGPU_FW_VRAM_PF2VF_READ (adev, feature_flags,
> &pf2vf_flags);
> +	if (!(pf2vf_flags & AMDGIM_FEATURE_ERROR_LOG_COLLECT))
> +	{
> +		return;
> +	}

Sorry I missed this before, either move the { on the same line as the if, or drop the parens.

> +*/
> +	/* The errors are overlay of array, correct read_count as full. */
> +	if (admgpu_vf_errors.write_count - admgpu_vf_errors.read_count >
> AMDGPU_VF_ERROR_ENTRY_SIZE) {
> +		admgpu_vf_errors.read_count =
> admgpu_vf_errors.write_count - AMDGPU_VF_ERROR_ENTRY_SIZE;
> +	}
> +
> +	while (admgpu_vf_errors.read_count <
> admgpu_vf_errors.write_count) {
> +		index =admgpu_vf_errors.read_count %
> AMDGPU_VF_ERROR_ENTRY_SIZE;
> +		data1 = AMDGIM_ERROR_CODE_FLAGS_TO_MAILBOX
> (admgpu_vf_errors.code[index], admgpu_vf_errors.flags[index]);
> +		data2 = admgpu_vf_errors.data[index] & 0xFFFFFFFF;
> +		data3 = (admgpu_vf_errors.data[index] >> 32) & 0xFFFFFFFF;
> +
> +		adev->virt.ops->trans_msg(adev, IDH_LOG_VF_ERROR,
> data1, data2, data3);
> +		admgpu_vf_errors.read_count ++;
> +	}
> +}
> diff --git a/drivers/gpu/drm/amd/amdgpu/vf_error.h
> b/drivers/gpu/drm/amd/amdgpu/vf_error.h
> new file mode 100644
> index 000000000000..612ed246aa7c
> --- /dev/null
> +++ b/drivers/gpu/drm/amd/amdgpu/vf_error.h
> @@ -0,0 +1,120 @@
> +/*
> + * Copyright 2017 Advanced Micro Devices, Inc.
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the
> "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice shall be included in
> + * all copies or substantial portions of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO
> EVENT SHALL
> + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM,
> DAMAGES OR
> + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
> OTHERWISE,
> + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
> THE USE OR
> + * OTHER DEALINGS IN THE SOFTWARE.
> + *
> + */
> +
> +#ifndef __VF_ERROR_H__
> +#define __VF_ERROR_H__
> +
> +#define AMDGIM_ERROR_CODE_FLAGS_TO_MAILBOX(c,f)    (((c & 0xFFFF)
> << 16) | (f & 0xFFFF))
> +#define AMDGIM_ERROR_CODE(t,c)       (((t&0xF)<<12)|(c&0xFFF))
> +#define AMDGIM_ERROR_CATAGORY(c)     ((c>>12) & 0xF)
> +#define AMDGIM_ERROR_SUBCODE(c)      (c&0xFFF)
> +#define AMDGIM_GPU_ERROR_MSG_SIZE         256        /* The length of
> error text should be less than this number. */
> +#define AMDGIM_GPU_ERROR_BUF_SAFE_SIZE
> (AMDGIM_GPU_ERROR_MSG_SIZE + 120) /* The safe length of printing out
> the full error message to buffer. */
> +
> +
> +/* Please keep enum same as AMD GIM driver */
> +enum {

Please use named enums.  Some compilers complain about anonymous enums.

> +	AMDGIM_ERROR_VF_GPU_INIT_FATAL_FAIL = 0,
> +	AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL,
> +	AMDGIM_ERROR_VF_UNLOCATE_BIOS_ROM,
> +	AMDGIM_ERROR_VF_NO_VBIOS,
> +	AMDGIM_ERROR_VF_GPU_POST_ERROR,
> +
> +	AMDGIM_ERROR_VF_ATOMBIOS_GET_CLOCK_FAIL,
> +	AMDGIM_ERROR_VF_FENCE_INIT_FAIL,
> +	AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL,
> +	AMDGIM_ERROR_VF_IB_INIT_FAIL,
> +	AMDGIM_ERROR_VF_AMDGPU_LATE_INIT_FAIL,
> +
> +	AMDGIM_ERROR_VF_ASIC_RESUME_FAIL,
> +	AMDGIM_ERROR_VF_GPU_RESET_FAIL,
> +	AMDGIM_ERROR_VF_MMSCH_INIT_FAIL,
> +	AMDGIM_ERROR_VF_UVD_NORESP_GIVEUP,
> +	AMDGIM_ERROR_VF_UVD_NORESP_RESET,
> +
> +	AMDGIM_ERROR_VF_LOAD_GFX_FIRMWARE_FAIL,
> +	AMDGIM_ERROR_VF_MEC_BO_INIT_FAIL,
> +	AMDGIM_ERROR_VF_ADD_DEV_TO_GENPD_FAIL,
> +	AMDGIM_ERROR_VF_IH_WB_ALLOC_FAIL,
> +	AMDGIM_ERROR_VF_BO_ALLOC_K_FAIL,
> +
> +	AMDGIM_ERROR_VF_BO_RESERVE_FAIL,
> +	AMDGIM_ERROR_VF_BO_PIN_FAIL,
> +	AMDGIM_ERROR_VF_BO_MAP_FAIL,
> +	AMDGIM_ERROR_VF_RING_R_WB_ALLOC_FAIL,
> +	AMDGIM_ERROR_VF_RING_W_WB_ALLOC_FAIL,
> +
> +	AMDGIM_ERROR_VF_RING_F_WB_ALLOC_FAIL,
> +	AMDGIM_ERROR_VF_RING_C_WB_ALLOC_FAIL,
> +	AMDGIM_ERROR_VF_INIT_FENCE_FAIL,
> +	AMDGIM_ERROR_VF_RING_CREATE_FAIL,
> +	AMDGIM_ERROR_VF_BO_ALLOC_M_FAIL,
> +
> +	AMDGIM_ERROR_VF_NO_BO_FOR_SA,
> +	AMDGIM_ERROR_VF_FW_ALLOC_FAIL,
> +	AMDGIM_ERROR_VF_FW_RESERVE_FAIL,
> +	AMDGIM_ERROR_VF_FW_PIN_FAIL,
> +	AMDGIM_ERROR_VF_FW_KMAP_FAIL,
> +
> +	AMDGIM_ERROR_VF_UVD_NOT_LOAD_FW,
> +	AMDGIM_ERROR_VF_UVD_NOT_VALIDATE_FW,
> +	AMDGIM_ERROR_VF_ALLOC_UVD_BO_FAIL,
> +	AMDGIM_ERROR_VF_VCE_NOT_LOAD_FW,
> +	AMDGIM_ERROR_VF_VCE_NOT_VALIDATE_FW,
> +
> +	AMDGIM_ERROR_VF_ALLOC_VCE_BO_FAIL,
> +	AMDGIM_ERROR_VF_VCE_RESERVE_FAIL,
> +	AMDGIM_ERROR_VF_VCE_KMAP_FAIL,
> +	AMDGIM_ERROR_VF_NO_VRAM_FOR_GART,
> +	AMDGIM_ERROR_VF_PSP_LOAD_FW_FAIL,
> +
> +	AMDGIM_ERROR_VF_INIT_MMSCH_FAIL,
> +	AMDGIM_ERROR_VF_GFX_LOAD_FW_FAIL,
> +	AMDGIM_ERROR_VF_NGG_CREATE_BUF_FAIL,
> +	AMDGIM_ERROR_VF_NGG_CREATE_PR_BUF_FAIL,
> +	AMDGIM_ERROR_VF_NGG_CREATE_PO_BUF_FAIL,
> +
> +	AMDGIM_ERROR_VF_NGG_CREATE_CS_BUF_FAIL,
> +	AMDGIM_ERROR_VF_NGG_CREATE_PC_BUF_FAIL,
> +	AMDGIM_ERROR_VF_BUFL_SIZE_INVALID,
> +	AMDGIM_ERROR_VF_RLC_BO_INIT_FAIL,
> +
> +	AMDGIM_ERROR_VF_TEST,
> +	AMDGIM_ERROR_VF_MAX
> +};
> +
> +enum {

Same here.

> +    AMDGIM_ERROR_CATEGORY_NON_USED = 0,
> +    AMDGIM_ERROR_CATEGORY_GIM,
> +    AMDGIM_ERROR_CATEGORY_PF,
> +    AMDGIM_ERROR_CATEGORY_VF,
> +    AMDGIM_ERROR_CATEGORY_VBIOS,
> +    AMDGIM_ERROR_CATEGORY_MONITOR,
> +
> +    AMDGIM_ERROR_CATEGORY_MAX
> +};
> +
> +void amdgpu_vf_error_put(uint16_t sub_error_code, uint16_t error_flags,
> uint64_t error_data);
> +void amdgpu_vf_error_trans_all (struct amdgpu_device *adev);
> +
> +#endif /* __VF_ERROR_H__ */
> --
> 2.11.0



More information about the amd-gfx mailing list