[PATCH] drm/amdgpu: Add recovery_lock to save bad pages function

Candice Li candice.li at amd.com
Tue Nov 16 08:01:46 UTC 2021


Fix race condition failure during UMC UE injection.

Signed-off-by: Candice Li <candice.li at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 08133de21fdd63..711b5fb26d47d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1931,10 +1931,12 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev)
 	struct ras_err_handler_data *data;
 	struct amdgpu_ras_eeprom_control *control;
 	int save_count;
+	int ret = 0;
 
 	if (!con || !con->eh_data)
 		return 0;
 
+	mutex_lock(&con->recovery_lock);
 	control = &con->eeprom_control;
 	data = con->eh_data;
 	save_count = data->count - control->ras_num_recs;
@@ -1944,13 +1946,16 @@ int amdgpu_ras_save_bad_pages(struct amdgpu_device *adev)
 					     &data->bps[control->ras_num_recs],
 					     save_count)) {
 			dev_err(adev->dev, "Failed to save EEPROM table data!");
-			return -EIO;
+			ret = -EIO;
+			goto out;
 		}
 
 		dev_info(adev->dev, "Saved %d pages to EEPROM table.\n", save_count);
 	}
 
-	return 0;
+out:
+	mutex_unlock(&con->recovery_lock);
+	return ret;
 }
 
 /*
-- 
2.17.1



More information about the amd-gfx mailing list