[PATCH 1/2] drm/amdgpu: don't return when sdma ring not ready(v2)
Monk Liu
Monk.Liu at amd.com
Thu Mar 1 07:04:45 UTC 2018
because this time SDMA may under GPU RESET so its ring->ready
can be false(e.g. IB test failed during GPU reset), just keep
going and GPU scheduler will reschedule this job once it failed.
v2:
consider error if ring not ready only finds gpu is not doing GPU reset
handle all places in amdgpu_ttm.c as well
Signed-off-by: Monk Liu <Monk.Liu at amd.com>
Change-Id: I241036e0ba54c3aadc573d507c7bd615b8b978f9
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 17 +++++++++++++----
1 file changed, 13 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index e38e6db..9c9e596 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -215,7 +215,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
case TTM_PL_VRAM:
if (adev->mman.buffer_funcs &&
adev->mman.buffer_funcs_ring &&
- adev->mman.buffer_funcs_ring->ready == false) {
+ (adev->mman.buffer_funcs_ring->ready == false && !adev->in_gpu_reset) ) {
amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
} else if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
!(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
@@ -331,7 +331,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev,
const uint64_t GTT_MAX_BYTES = (AMDGPU_GTT_MAX_TRANSFER_SIZE *
AMDGPU_GPU_PAGE_SIZE);
- if (!ring->ready) {
+ if (!ring->ready && !adev->in_gpu_reset) {
DRM_ERROR("Trying to move memory with ring turned off.\n");
return -EINVAL;
}
@@ -579,7 +579,7 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
}
if (adev->mman.buffer_funcs == NULL ||
adev->mman.buffer_funcs_ring == NULL ||
- !adev->mman.buffer_funcs_ring->ready) {
+ (!adev->mman.buffer_funcs_ring->ready && !adev->in_gpu_reset)) {
/* use memcpy */
goto memcpy;
}
@@ -1656,6 +1656,11 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
amdgpu_ring_pad_ib(ring, &job->ibs[0]);
WARN_ON(job->ibs[0].length_dw > num_dw);
if (direct_submit) {
+ if (!ring->ready) {
+ r = -EINVAL;
+ goto error_free;
+ }
+
r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs,
NULL, fence);
job->fence = dma_fence_get(*fence);
@@ -1663,6 +1668,10 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,
DRM_ERROR("Error scheduling IBs (%d)\n", r);
amdgpu_job_free(job);
} else {
+ if (!ring->ready && !adev->in_gpu_reset) {
+ r = -EINVAL;
+ goto error_free;
+ }
r = amdgpu_job_submit(job, ring, &adev->mman.entity,
AMDGPU_FENCE_OWNER_UNDEFINED, fence);
if (r)
@@ -1692,7 +1701,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,
struct amdgpu_job *job;
int r;
- if (!ring->ready) {
+ if (!ring->ready && !adev->in_gpu_reset) {
DRM_ERROR("Trying to clear memory with ring turned off.\n");
return -EINVAL;
}
--
2.7.4
More information about the amd-gfx
mailing list