[PATCH] drm/amdgpu: revert VRAM lost handling
Christian König
ckoenig.leichtzumerken at gmail.com
Mon Oct 9 08:16:00 UTC 2017
From: Christian König <christian.koenig at amd.com>
Revert "drm/amdgpu: skip all jobs of guilty vm" and
"drm/amdgpu: return -ENODEV to user space when vram is lost v2"
Forcing userspace to restart without a chance to recover in case of a GPU reset
doesn't make much sense and just completely breaks GPU reset handling and makes
the system unuseable after a reset.
Signed-off-by: Christian König <christian.koenig at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ----
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 14 --------------
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 +---
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 5 -----
drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 15 ++++-----------
drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 10 ----------
6 files changed, 5 insertions(+), 47 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 71e971f..81dd5ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -772,7 +772,6 @@ struct amdgpu_fpriv {
struct mutex bo_list_lock;
struct idr bo_list_handles;
struct amdgpu_ctx_mgr ctx_mgr;
- u32 vram_lost_counter;
};
/*
@@ -1501,7 +1500,6 @@ struct amdgpu_device {
atomic64_t num_evictions;
atomic64_t num_vram_cpu_page_faults;
atomic_t gpu_reset_counter;
- atomic_t vram_lost_counter;
/* data for buffer migration throttling */
struct {
@@ -1845,8 +1843,6 @@ static inline bool amdgpu_has_atpx(void) { return false; }
extern const struct drm_ioctl_desc amdgpu_ioctls_kms[];
extern const int amdgpu_max_kms_ioctl;
-bool amdgpu_kms_vram_lost(struct amdgpu_device *adev,
- struct amdgpu_fpriv *fpriv);
int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags);
void amdgpu_driver_unload_kms(struct drm_device *dev);
void amdgpu_driver_lastclose_kms(struct drm_device *dev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index ab83dfc..adb0c1c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -1189,7 +1189,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
{
struct amdgpu_device *adev = dev->dev_private;
- struct amdgpu_fpriv *fpriv = filp->driver_priv;
union drm_amdgpu_cs *cs = data;
struct amdgpu_cs_parser parser = {};
bool reserved_buffers = false;
@@ -1197,8 +1196,6 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
if (!adev->accel_working)
return -EBUSY;
- if (amdgpu_kms_vram_lost(adev, fpriv))
- return -ENODEV;
parser.adev = adev;
parser.filp = filp;
@@ -1257,16 +1254,12 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data,
{
union drm_amdgpu_wait_cs *wait = data;
struct amdgpu_device *adev = dev->dev_private;
- struct amdgpu_fpriv *fpriv = filp->driver_priv;
unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout);
struct amdgpu_ring *ring = NULL;
struct amdgpu_ctx *ctx;
struct dma_fence *fence;
long r;
- if (amdgpu_kms_vram_lost(adev, fpriv))
- return -ENODEV;
-
ctx = amdgpu_ctx_get(filp->driver_priv, wait->in.ctx_id);
if (ctx == NULL)
return -EINVAL;
@@ -1335,16 +1328,12 @@ int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp)
{
struct amdgpu_device *adev = dev->dev_private;
- struct amdgpu_fpriv *fpriv = filp->driver_priv;
union drm_amdgpu_fence_to_handle *info = data;
struct dma_fence *fence;
struct drm_syncobj *syncobj;
struct sync_file *sync_file;
int fd, r;
- if (amdgpu_kms_vram_lost(adev, fpriv))
- return -ENODEV;
-
fence = amdgpu_cs_get_fence(adev, filp, &info->in.fence);
if (IS_ERR(fence))
return PTR_ERR(fence);
@@ -1506,15 +1495,12 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
struct drm_file *filp)
{
struct amdgpu_device *adev = dev->dev_private;
- struct amdgpu_fpriv *fpriv = filp->driver_priv;
union drm_amdgpu_wait_fences *wait = data;
uint32_t fence_count = wait->in.fence_count;
struct drm_amdgpu_fence *fences_user;
struct drm_amdgpu_fence *fences;
int r;
- if (amdgpu_kms_vram_lost(adev, fpriv))
- return -ENODEV;
/* Get the fences from userspace */
fences = kmalloc_array(fence_count, sizeof(struct drm_amdgpu_fence),
GFP_KERNEL);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 24f6e3c..6d641e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2951,10 +2951,8 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
if (r)
goto out;
vram_lost = amdgpu_check_vram_lost(adev);
- if (vram_lost) {
+ if (vram_lost)
DRM_ERROR("VRAM is lost!\n");
- atomic_inc(&adev->vram_lost_counter);
- }
r = amdgpu_ttm_recover_gart(adev);
if (r)
goto out;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index b0d45c8..c69048c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -577,11 +577,6 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
args->operation);
return -EINVAL;
}
- if ((args->operation == AMDGPU_VA_OP_MAP) ||
- (args->operation == AMDGPU_VA_OP_REPLACE)) {
- if (amdgpu_kms_vram_lost(adev, fpriv))
- return -ENODEV;
- }
INIT_LIST_HEAD(&list);
INIT_LIST_HEAD(&duplicates);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index 4510627..63bd372 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -178,7 +178,6 @@ static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job)
{
struct dma_fence *fence = NULL;
struct amdgpu_job *job;
- struct amdgpu_fpriv *fpriv = NULL;
int r;
if (!sched_job) {
@@ -190,16 +189,10 @@ static struct dma_fence *amdgpu_job_run(struct amd_sched_job *sched_job)
BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL));
trace_amdgpu_sched_run_job(job);
- if (job->vm)
- fpriv = container_of(job->vm, struct amdgpu_fpriv, vm);
- /* skip ib schedule when vram is lost */
- if (fpriv && amdgpu_kms_vram_lost(job->adev, fpriv))
- DRM_ERROR("Skip scheduling IBs!\n");
- else {
- r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job, &fence);
- if (r)
- DRM_ERROR("Error scheduling IBs (%d)\n", r);
- }
+ r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job, &fence);
+ if (r)
+ DRM_ERROR("Error scheduling IBs (%d)\n", r);
+
/* if gpu reset, hw fence will be replaced here */
dma_fence_put(job->fence);
job->fence = dma_fence_get(fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 8c6fd56..0fc36b2 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -269,7 +269,6 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
{
struct amdgpu_device *adev = dev->dev_private;
- struct amdgpu_fpriv *fpriv = filp->driver_priv;
struct drm_amdgpu_info *info = data;
struct amdgpu_mode_info *minfo = &adev->mode_info;
void __user *out = (void __user *)(uintptr_t)info->return_pointer;
@@ -282,8 +281,6 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
if (!info->return_size || !info->return_pointer)
return -EINVAL;
- if (amdgpu_kms_vram_lost(adev, fpriv))
- return -ENODEV;
switch (info->query) {
case AMDGPU_INFO_ACCEL_WORKING:
@@ -791,12 +788,6 @@ void amdgpu_driver_lastclose_kms(struct drm_device *dev)
vga_switcheroo_process_delayed_switch();
}
-bool amdgpu_kms_vram_lost(struct amdgpu_device *adev,
- struct amdgpu_fpriv *fpriv)
-{
- return fpriv->vram_lost_counter != atomic_read(&adev->vram_lost_counter);
-}
-
/**
* amdgpu_driver_open_kms - drm callback for open
*
@@ -853,7 +844,6 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
amdgpu_ctx_mgr_init(&fpriv->ctx_mgr);
- fpriv->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
file_priv->driver_priv = fpriv;
out_suspend:
--
2.7.4
More information about the amd-gfx
mailing list