[Mesa-dev] [PATCH 1/7] winsys/amdgpu: report a rejected IB as a lost context

Marek Olšák maraeo at gmail.com
Fri Jan 20 19:07:06 UTC 2017


From: Marek Olšák <marek.olsak at amd.com>

---
 src/gallium/winsys/amdgpu/drm/amdgpu_cs.c     | 11 +++++++++++
 src/gallium/winsys/amdgpu/drm/amdgpu_cs.h     |  2 ++
 src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h |  1 +
 3 files changed, 14 insertions(+)

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
index 87246f7..d63ff36 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.c
@@ -171,20 +171,21 @@ static struct radeon_winsys_ctx *amdgpu_ctx_create(struct radeon_winsys *ws)
    struct amdgpu_ctx *ctx = CALLOC_STRUCT(amdgpu_ctx);
    int r;
    struct amdgpu_bo_alloc_request alloc_buffer = {};
    amdgpu_bo_handle buf_handle;
 
    if (!ctx)
       return NULL;
 
    ctx->ws = amdgpu_winsys(ws);
    ctx->refcount = 1;
+   ctx->initial_num_total_rejected_cs = ctx->ws->num_total_rejected_cs;
 
    r = amdgpu_cs_ctx_create(ctx->ws->dev, &ctx->ctx);
    if (r) {
       fprintf(stderr, "amdgpu: amdgpu_cs_ctx_create failed. (%i)\n", r);
       goto error_create;
    }
 
    alloc_buffer.alloc_size = ctx->ws->info.gart_page_size;
    alloc_buffer.phys_alignment = ctx->ws->info.gart_page_size;
    alloc_buffer.preferred_heap = AMDGPU_GEM_DOMAIN_GTT;
@@ -220,20 +221,27 @@ static void amdgpu_ctx_destroy(struct radeon_winsys_ctx *rwctx)
    amdgpu_ctx_unref((struct amdgpu_ctx*)rwctx);
 }
 
 static enum pipe_reset_status
 amdgpu_ctx_query_reset_status(struct radeon_winsys_ctx *rwctx)
 {
    struct amdgpu_ctx *ctx = (struct amdgpu_ctx*)rwctx;
    uint32_t result, hangs;
    int r;
 
+   /* Return a failure due to a rejected command submission. */
+   if (ctx->ws->num_total_rejected_cs > ctx->initial_num_total_rejected_cs) {
+      return ctx->num_rejected_cs ? PIPE_GUILTY_CONTEXT_RESET :
+                                    PIPE_INNOCENT_CONTEXT_RESET;
+   }
+
+   /* Return a failure due to a GPU hang. */
    r = amdgpu_cs_query_reset_state(ctx->ctx, &result, &hangs);
    if (r) {
       fprintf(stderr, "amdgpu: amdgpu_cs_query_reset_state failed. (%i)\n", r);
       return PIPE_NO_RESET;
    }
 
    switch (result) {
    case AMDGPU_CTX_GUILTY_RESET:
       return PIPE_GUILTY_CONTEXT_RESET;
    case AMDGPU_CTX_INNOCENT_RESET:
@@ -1034,20 +1042,23 @@ void amdgpu_cs_submit_ib(void *job, int thread_index)
    r = amdgpu_cs_submit(acs->ctx->ctx, 0, &cs->request, 1);
    cs->error_code = r;
    if (r) {
       if (r == -ENOMEM)
          fprintf(stderr, "amdgpu: Not enough memory for command submission.\n");
       else
          fprintf(stderr, "amdgpu: The CS has been rejected, "
                  "see dmesg for more information (%i).\n", r);
 
       amdgpu_fence_signalled(cs->fence);
+
+      acs->ctx->num_rejected_cs++;
+      ws->num_total_rejected_cs++;
    } else {
       /* Success. */
       uint64_t *user_fence = NULL;
       if (amdgpu_cs_has_user_fence(cs))
          user_fence = acs->ctx->user_fence_cpu_address_base +
                       cs->request.fence_info.offset;
       amdgpu_fence_submitted(cs->fence, &cs->request, user_fence);
    }
 
    /* Cleanup. */
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
index 5f181a5..90b9e83 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_cs.h
@@ -34,20 +34,22 @@
 
 #include "amdgpu_bo.h"
 #include "util/u_memory.h"
 
 struct amdgpu_ctx {
    struct amdgpu_winsys *ws;
    amdgpu_context_handle ctx;
    amdgpu_bo_handle user_fence_bo;
    uint64_t *user_fence_cpu_address_base;
    int refcount;
+   unsigned initial_num_total_rejected_cs;
+   unsigned num_rejected_cs;
 };
 
 struct amdgpu_cs_buffer {
    struct amdgpu_winsys_bo *bo;
    union {
       struct {
          uint64_t priority_usage;
       } real;
       struct {
          uint32_t real_idx; /* index of underlying real BO */
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
index 2a7900a..c56c342 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
@@ -48,20 +48,21 @@ struct amdgpu_winsys {
    struct radeon_winsys base;
    struct pipe_reference reference;
    struct pb_cache bo_cache;
    struct pb_slabs bo_slabs;
 
    amdgpu_device_handle dev;
 
    pipe_mutex bo_fence_lock;
 
    int num_cs; /* The number of command streams created. */
+   unsigned num_total_rejected_cs;
    uint32_t next_bo_unique_id;
    uint64_t allocated_vram;
    uint64_t allocated_gtt;
    uint64_t mapped_vram;
    uint64_t mapped_gtt;
    uint64_t buffer_wait_time; /* time spent in buffer_wait in ns */
    uint64_t num_gfx_IBs;
    uint64_t num_sdma_IBs;
 
    struct radeon_info info;
-- 
2.7.4



More information about the mesa-dev mailing list