Mesa (staging/21.3): radv: fix a sync issue on GFX9+ by clearing the upload BO fence

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Mon Nov 15 21:11:00 UTC 2021


Module: Mesa
Branch: staging/21.3
Commit: 59f40822adbf8f16bfb008539bc254e3acdbbcc0
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=59f40822adbf8f16bfb008539bc254e3acdbbcc0

Author: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Date:   Fri Nov 12 15:40:43 2021 +0100

radv: fix a sync issue on GFX9+ by clearing the upload BO fence

If the same cmdbuf is submitted more than once, they were waiting on
the same fence value. Fix this by clearing the value when beginning
a new command buffer.

This might fix spurious GPU hangs, especially on GFX9.

Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5401
Cc: 21.3 mesa-stable
Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/13777>
(cherry picked from commit e94a899c0ef411c19d82a072a4aa70aa8d149522)

---

 .pick_status.json                |  2 +-
 src/amd/vulkan/radv_cmd_buffer.c | 51 +++++++++++++++++++++++++---------------
 2 files changed, 33 insertions(+), 20 deletions(-)

diff --git a/.pick_status.json b/.pick_status.json
index 1d81e580314..70f5a30c7cd 100644
--- a/.pick_status.json
+++ b/.pick_status.json
@@ -211,7 +211,7 @@
         "description": "radv: fix a sync issue on GFX9+ by clearing the upload BO fence",
         "nominated": true,
         "nomination_type": 0,
-        "resolution": 0,
+        "resolution": 1,
         "main_sha": null,
         "because_sha": null
     },
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index dd2ed5078fa..c9b6f8135bb 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -389,6 +389,30 @@ radv_queue_family_to_ring(int f)
    }
 }
 
+static void
+radv_emit_write_data_packet(struct radv_cmd_buffer *cmd_buffer, unsigned engine_sel, uint64_t va,
+                            unsigned count, const uint32_t *data)
+{
+   struct radeon_cmdbuf *cs = cmd_buffer->cs;
+
+   radeon_check_space(cmd_buffer->device->ws, cs, 4 + count);
+
+   radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0));
+   radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(engine_sel));
+   radeon_emit(cs, va);
+   radeon_emit(cs, va >> 32);
+   radeon_emit_array(cs, data, count);
+}
+
+static void
+radv_emit_clear_data(struct radv_cmd_buffer *cmd_buffer, unsigned engine_sel, uint64_t va,
+                     unsigned size)
+{
+   uint32_t *zeroes = alloca(size);
+   memset(zeroes, 0, size);
+   radv_emit_write_data_packet(cmd_buffer, engine_sel, va, size / 4, zeroes);
+}
+
 static void
 radv_destroy_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
 {
@@ -516,12 +540,16 @@ radv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
       cmd_buffer->gfx9_fence_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
       cmd_buffer->gfx9_fence_va += fence_offset;
 
+      radv_emit_clear_data(cmd_buffer, V_370_PFP, cmd_buffer->gfx9_fence_va, 8);
+
       if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) {
          /* Allocate a buffer for the EOP bug on GFX9. */
          radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, &eop_bug_offset, &fence_ptr);
          memset(fence_ptr, 0, 16 * num_db);
          cmd_buffer->gfx9_eop_bug_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
          cmd_buffer->gfx9_eop_bug_va += eop_bug_offset;
+
+         radv_emit_clear_data(cmd_buffer, V_370_PFP, cmd_buffer->gfx9_eop_bug_va, 16 * num_db);
       }
    }
 
@@ -624,21 +652,6 @@ radv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, unsigned size, c
    return true;
 }
 
-static void
-radv_emit_write_data_packet(struct radv_cmd_buffer *cmd_buffer, uint64_t va, unsigned count,
-                            const uint32_t *data)
-{
-   struct radeon_cmdbuf *cs = cmd_buffer->cs;
-
-   radeon_check_space(cmd_buffer->device->ws, cs, 4 + count);
-
-   radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0));
-   radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME));
-   radeon_emit(cs, va);
-   radeon_emit(cs, va >> 32);
-   radeon_emit_array(cs, data, count);
-}
-
 void
 radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer)
 {
@@ -651,7 +664,7 @@ radv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer)
       va += 4;
 
    ++cmd_buffer->state.trace_id;
-   radv_emit_write_data_packet(cmd_buffer, va, 1, &cmd_buffer->state.trace_id);
+   radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, 1, &cmd_buffer->state.trace_id);
 
    radeon_check_space(cmd_buffer->device->ws, cs, 2);
 
@@ -712,7 +725,7 @@ radv_save_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pip
    data[0] = pipeline_address;
    data[1] = pipeline_address >> 32;
 
-   radv_emit_write_data_packet(cmd_buffer, va, 2, data);
+   radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, 2, data);
 }
 
 static void
@@ -728,7 +741,7 @@ radv_save_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer, uint64_t vb_ptr
    data[0] = vb_ptr;
    data[1] = vb_ptr >> 32;
 
-   radv_emit_write_data_packet(cmd_buffer, va, 2, data);
+   radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, 2, data);
 }
 
 void
@@ -761,7 +774,7 @@ radv_save_descriptors(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bi
       data[i * 2 + 1] = (uint64_t)(uintptr_t)set >> 32;
    }
 
-   radv_emit_write_data_packet(cmd_buffer, va, MAX_SETS * 2, data);
+   radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, MAX_SETS * 2, data);
 }
 
 struct radv_userdata_info *



More information about the mesa-commit mailing list