[PATCH] drm/amdgpu: Try To using WARN() instead BUG() avoid kernel panic

Deucher, Alexander Alexander.Deucher at amd.com
Fri Dec 17 16:39:15 UTC 2021


[Public]

I think these are pretty fundamental errors.  You should never hit them in practice and if you do, I think a BUG is fine.

Alex

________________________________
From: ZhiJie.Zhang <zhangzhijie at loongson.cn>
Sent: Thursday, December 16, 2021 9:38 PM
To: Koenig, Christian <Christian.Koenig at amd.com>; Deucher, Alexander <Alexander.Deucher at amd.com>; amd-gfx at lists.freedesktop.org <amd-gfx at lists.freedesktop.org>
Cc: zhangzhijie at loongson.cn <zhangzhijie at loongson.cn>; botton_zhang at 163.com <botton_zhang at 163.com>; airlied at linux.ie <airlied at linux.ie>; daniel at ffwll.ch <daniel at ffwll.ch>; Jack.Zhang1 at amd.com <Jack.Zhang1 at amd.com>
Subject: [PATCH] drm/amdgpu: Try To using WARN() instead BUG() avoid kernel panic

Signed-off-by: ZhiJie.Zhang <zhangzhijie at loongson.cn>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c    |  4 +--
 drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 31 +++++++++++++++-------
 drivers/gpu/drm/amd/amdgpu/cik_sdma.c      |  5 +++-
 drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c      | 18 +++++++++----
 4 files changed, 41 insertions(+), 17 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index f1a050379190..edf2de4cec8c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -76,7 +76,7 @@ static uint32_t amdgpu_cgs_read_ind_register(struct cgs_device *cgs_device,
                 DRM_ERROR("audio endpt register access not implemented.\n");
                 return 0;
         default:
-               BUG();
+               adev->accel_working = false;
         }
         WARN(1, "Invalid indirect register space");
         return 0;
@@ -104,9 +104,9 @@ static void amdgpu_cgs_write_ind_register(struct cgs_device *cgs_device,
                 DRM_ERROR("audio endpt register access not implemented.\n");
                 return;
         default:
-               BUG();
         }
         WARN(1, "Invalid indirect register space");
+       adev->accel_working = false;
 }

 static uint32_t fw_type_convert(struct cgs_device *cgs_device, uint32_t fw_type)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 188accb71249..b9ecf7f70409 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -488,7 +488,11 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset)

         if (offset < adev->rmmio_size)
                 return (readb(adev->rmmio + offset));
-       BUG();
+
+       WARN(1, "Invalid indirect register space");
+       adev->accel_working = false;
+
+       return 0;
 }

 /*
@@ -513,8 +517,10 @@ void amdgpu_mm_wreg8(struct amdgpu_device *adev, uint32_t offset, uint8_t value)

         if (offset < adev->rmmio_size)
                 writeb(value, adev->rmmio + offset);
-       else
-               BUG();
+       else {
+               WARN(1, "Invalid indirect register space");
+               adev->accel_working = false;
+       }
 }

 /**
@@ -803,7 +809,8 @@ void amdgpu_device_indirect_wreg64(struct amdgpu_device *adev,
 static uint32_t amdgpu_invalid_rreg(struct amdgpu_device *adev, uint32_t reg)
 {
         DRM_ERROR("Invalid callback to read register 0x%04X\n", reg);
-       BUG();
+
+       adev->accel_working = false;
         return 0;
 }

@@ -821,7 +828,8 @@ static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32
 {
         DRM_ERROR("Invalid callback to write register 0x%04X with 0x%08X\n",
                   reg, v);
-       BUG();
+
+       adev->accel_working = false;
 }

 /**
@@ -837,7 +845,8 @@ static void amdgpu_invalid_wreg(struct amdgpu_device *adev, uint32_t reg, uint32
 static uint64_t amdgpu_invalid_rreg64(struct amdgpu_device *adev, uint32_t reg)
 {
         DRM_ERROR("Invalid callback to read 64 bit register 0x%04X\n", reg);
-       BUG();
+
+       adev->accel_working = false;
         return 0;
 }

@@ -855,7 +864,8 @@ static void amdgpu_invalid_wreg64(struct amdgpu_device *adev, uint32_t reg, uint
 {
         DRM_ERROR("Invalid callback to write 64 bit register 0x%04X with 0x%08llX\n",
                   reg, v);
-       BUG();
+
+       adev->accel_working = false;
 }

 /**
@@ -874,7 +884,9 @@ static uint32_t amdgpu_block_invalid_rreg(struct amdgpu_device *adev,
 {
         DRM_ERROR("Invalid callback to read register 0x%04X in block 0x%04X\n",
                   reg, block);
-       BUG();
+
+       adev->accel_working = false;
+
         return 0;
 }

@@ -895,7 +907,8 @@ static void amdgpu_block_invalid_wreg(struct amdgpu_device *adev,
 {
         DRM_ERROR("Invalid block callback to write register 0x%04X in block 0x%04X with 0x%08X\n",
                   reg, block, v);
-       BUG();
+
+       adev->accel_working = false;
 }

 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index c8ebd108548d..957169142e57 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -129,7 +129,10 @@ static int cik_sdma_init_microcode(struct amdgpu_device *adev)
         case CHIP_MULLINS:
                 chip_name = "mullins";
                 break;
-       default: BUG();
+       default:
+               DRM_ERROR("Invalid CHIPS");
+               err = -EINVAL;
+               goto out;
         }

         for (i = 0; i < adev->sdma.num_instances; i++) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index 6a8dadea40f9..e312a2146f6f 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -334,7 +334,10 @@ static int gfx_v6_0_init_microcode(struct amdgpu_device *adev)
         case CHIP_HAINAN:
                 chip_name = "hainan";
                 break;
-       default: BUG();
+       default:
+               DRM_ERROR("Invalid CHIPS");
+               err = -EINVAL;
+               goto out;
         }

         snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_pfp.bin", chip_name);
@@ -1668,8 +1671,8 @@ static void gfx_v6_0_constants_init(struct amdgpu_device *adev)
                 gb_addr_config = HAINAN_GB_ADDR_CONFIG_GOLDEN;
                 break;
         default:
-               BUG();
-               break;
+               DRM_ERROR("Invalid CHIPS");
+               return;
         }

         WREG32(mmGRBM_CNTL, (0xff << GRBM_CNTL__READ_TIMEOUT__SHIFT));
@@ -2153,7 +2156,10 @@ static u64 gfx_v6_0_ring_get_wptr(struct amdgpu_ring *ring)
         else if (ring == &adev->gfx.compute_ring[1])
                 return RREG32(mmCP_RB2_WPTR);
         else
-               BUG();
+               WARN(1, "Invalid Ring Buffer");
+
+       adev->accel_working = false;
+       return -EINVAL;
 }

 static void gfx_v6_0_ring_set_wptr_gfx(struct amdgpu_ring *ring)
@@ -2175,7 +2181,9 @@ static void gfx_v6_0_ring_set_wptr_compute(struct amdgpu_ring *ring)
                 WREG32(mmCP_RB2_WPTR, lower_32_bits(ring->wptr));
                 (void)RREG32(mmCP_RB2_WPTR);
         } else {
-               BUG();
+               WARN(1, "Invalid Ring Buffer");
+
+               adev->accel_working = false;
         }

 }
--
2.34.0

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20211217/00d34845/attachment-0001.htm>


More information about the amd-gfx mailing list