[RFC PATCH v3 1/2] drm/amdgpu: add wave limit functionality for gfx8, 9
Nirmoy Das
nirmoy.das at amd.com
Thu Jan 28 14:35:53 UTC 2021
Wave limiting can be use to load balance high priority
compute jobs along with gfx jobs. When enabled, this will reserve
~75% of waves for compute jobs.
v3: Updated emit_frame_size.
v2: Use amdgpu_ring_emit_wreg() in gfx8.
Updated comments.
Signed-off-by: Nirmoy Das <nirmoy.das at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 +
drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 20 +++++++++++++++++++-
drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 21 ++++++++++++++++++++-
3 files changed, 40 insertions(+), 2 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 7112137689db..220eec886f23 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -197,6 +197,7 @@ struct amdgpu_ring_funcs {
void (*soft_recovery)(struct amdgpu_ring *ring, unsigned vmid);
int (*preempt_ib)(struct amdgpu_ring *ring);
void (*emit_mem_sync)(struct amdgpu_ring *ring);
+ void (*emit_wave_limit)(struct amdgpu_ring *ring, bool enable);
};
struct amdgpu_ring {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 37639214cbbb..9140e3f75a26 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -29,6 +29,7 @@
#include "amdgpu.h"
#include "amdgpu_gfx.h"
+#include "amdgpu_ring.h"
#include "vi.h"
#include "vi_structs.h"
#include "vid.h"
@@ -6847,6 +6848,21 @@ static void gfx_v8_0_emit_mem_sync_compute(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, 0x0000000A); /* poll interval */
}
+#define mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT 0x07ffffff
+static void gfx_v8_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t val;
+
+
+ /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
+ * number of gfx waves. Setting 5 bit will make sure gfx only gets
+ * around 25% of gpu resources.
+ */
+ val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
+ amdgpu_ring_emit_wreg(ring, mmSPI_WCL_PIPE_PERCENT_GFX, val);
+}
+
static const struct amd_ip_funcs gfx_v8_0_ip_funcs = {
.name = "gfx_v8_0",
.early_init = gfx_v8_0_early_init,
@@ -6930,7 +6946,8 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
7 + /* gfx_v8_0_ring_emit_pipeline_sync */
VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
7 + 7 + 7 + /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
- 7, /* gfx_v8_0_emit_mem_sync_compute */
+ 7 + /* gfx_v8_0_emit_mem_sync_compute */
+ 5, /* gfx_v8_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
.emit_ib_size = 7, /* gfx_v8_0_ring_emit_ib_compute */
.emit_ib = gfx_v8_0_ring_emit_ib_compute,
.emit_fence = gfx_v8_0_ring_emit_fence_compute,
@@ -6944,6 +6961,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
.pad_ib = amdgpu_ring_generic_pad_ib,
.emit_wreg = gfx_v8_0_ring_emit_wreg,
.emit_mem_sync = gfx_v8_0_emit_mem_sync_compute,
+ .emit_wave_limit = gfx_v8_0_emit_wave_limit,
};
static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index a896e3d0fcf8..3022aff50ebc 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -52,6 +52,7 @@
#include "asic_reg/pwr/pwr_10_0_offset.h"
#include "asic_reg/pwr/pwr_10_0_sh_mask.h"
+#include "asic_reg/gc/gc_9_0_default.h"
#define GFX9_NUM_GFX_RINGS 1
#define GFX9_MEC_HPD_SIZE 4096
@@ -6670,6 +6671,22 @@ static void gfx_v9_0_emit_mem_sync(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
}
+static void gfx_v9_0_emit_wave_limit(struct amdgpu_ring *ring, bool enable)
+{
+ struct amdgpu_device *adev = ring->adev;
+ uint32_t val;
+
+
+ /* mmSPI_WCL_PIPE_PERCENT_GFX is 7 bit multiplier register to limit
+ * number of gfx waves. Setting 5 bit will make sure gfx only gets
+ * around 25% of gpu resources.
+ */
+ val = enable ? 0x1f : mmSPI_WCL_PIPE_PERCENT_GFX_DEFAULT;
+ amdgpu_ring_emit_wreg(ring,
+ SOC15_REG_OFFSET(GC, 0, mmSPI_WCL_PIPE_PERCENT_GFX),
+ val);
+}
+
static const struct amd_ip_funcs gfx_v9_0_ip_funcs = {
.name = "gfx_v9_0",
.early_init = gfx_v9_0_early_init,
@@ -6759,7 +6776,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
2 + /* gfx_v9_0_ring_emit_vm_flush */
8 + 8 + 8 + /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
- 7, /* gfx_v9_0_emit_mem_sync */
+ 7 + /* gfx_v9_0_emit_mem_sync */
+ 5, /* gfx_v9_0_emit_wave_limit for updating mmSPI_WCL_PIPE_PERCENT_GFX register */
.emit_ib_size = 7, /* gfx_v9_0_ring_emit_ib_compute */
.emit_ib = gfx_v9_0_ring_emit_ib_compute,
.emit_fence = gfx_v9_0_ring_emit_fence,
@@ -6775,6 +6793,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
.emit_reg_wait = gfx_v9_0_ring_emit_reg_wait,
.emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait,
.emit_mem_sync = gfx_v9_0_emit_mem_sync,
+ .emit_wave_limit = gfx_v9_0_emit_wave_limit,
};
static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
--
2.30.0
More information about the amd-gfx
mailing list