[PATCH 11/12] drm/amdgpu: Convert GFX v10.0 to variadic amdgpu_ring_write()
Tvrtko Ursulin
tursulin at igalia.com
Fri Dec 27 11:19:37 UTC 2024
From: Tvrtko Ursulin <tvrtko.ursulin at igalia.com>
Converting the GFX v10.0 ring helpers to use the variadic
amdgpu_ring_write().
Also small cleanups in gfx_v10_0_cp_gfx_start(),
gfx_v10_0_ring_emit_ce_meta() and
gfx_v10_0_ring_emit_de_meta.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at igalia.com>
Cc: Christian König <christian.koenig at amd.com>
Cc: Sunil Khatri <sunil.khatri at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 2 +-
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 399 ++++++++++++-----------
2 files changed, 204 insertions(+), 197 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 4f467864ed09..1b428dda706a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -611,7 +611,7 @@ amdgpu_ring_write11(struct amdgpu_ring *ring,
NULL)(__VA_ARGS__)
static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring,
- void *src, int count_dw)
+ const void *src, int count_dw)
{
unsigned occupied, chunk1, chunk2;
u32 buf_mask = ring->buf_mask;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 003522c2d902..63fc94c5d989 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -3683,15 +3683,16 @@ static void gfx10_kiq_set_resources(struct amdgpu_ring *kiq_ring, uint64_t queue
/* Cleaner shader MC address */
shader_mc_addr = adev->gfx.cleaner_shader_gpu_addr >> 8;
- amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_RESOURCES, 6));
- amdgpu_ring_write(kiq_ring, PACKET3_SET_RESOURCES_VMID_MASK(0) |
- PACKET3_SET_RESOURCES_QUEUE_TYPE(0)); /* vmid_mask:0 queue_type:0 (KIQ) */
- amdgpu_ring_write(kiq_ring, lower_32_bits(queue_mask)); /* queue mask lo */
- amdgpu_ring_write(kiq_ring, upper_32_bits(queue_mask)); /* queue mask hi */
- amdgpu_ring_write(kiq_ring, lower_32_bits(shader_mc_addr)); /* cleaner shader addr lo */
- amdgpu_ring_write(kiq_ring, upper_32_bits(shader_mc_addr)); /* cleaner shader addr hi */
- amdgpu_ring_write(kiq_ring, 0); /* oac mask */
- amdgpu_ring_write(kiq_ring, 0); /* gds heap base:0, gds heap size:0 */
+ amdgpu_ring_write(kiq_ring,
+ PACKET3(PACKET3_SET_RESOURCES, 6),
+ PACKET3_SET_RESOURCES_VMID_MASK(0) |
+ PACKET3_SET_RESOURCES_QUEUE_TYPE(0), /* vmid_mask:0 queue_type:0 (KIQ) */
+ lower_32_bits(queue_mask), /* queue mask lo */
+ upper_32_bits(queue_mask), /* queue mask hi */
+ lower_32_bits(shader_mc_addr), /* cleaner shader addr lo */
+ upper_32_bits(shader_mc_addr), /* cleaner shader addr hi */
+ 0, /* oac mask */
+ 0); /* gds heap base:0, gds heap size:0 */
}
static void gfx10_kiq_map_queues(struct amdgpu_ring *kiq_ring,
@@ -3715,10 +3716,9 @@ static void gfx10_kiq_map_queues(struct amdgpu_ring *kiq_ring,
WARN_ON(1);
}
- amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_MAP_QUEUES, 5));
- /* Q_sel:0, vmid:0, vidmem: 1, engine:0, num_Q:1*/
- amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
- PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Queue_Sel */
+ amdgpu_ring_write(kiq_ring,
+ PACKET3(PACKET3_MAP_QUEUES, 5),
+ PACKET3_MAP_QUEUES_QUEUE_SEL(0) | /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
PACKET3_MAP_QUEUES_VMID(0) | /* VMID */
PACKET3_MAP_QUEUES_QUEUE(ring->queue) |
PACKET3_MAP_QUEUES_PIPE(ring->pipe) |
@@ -3726,12 +3726,12 @@ static void gfx10_kiq_map_queues(struct amdgpu_ring *kiq_ring,
PACKET3_MAP_QUEUES_QUEUE_TYPE(0) | /*queue_type: normal compute queue */
PACKET3_MAP_QUEUES_ALLOC_FORMAT(0) | /* alloc format: all_on_one_pipe */
PACKET3_MAP_QUEUES_ENGINE_SEL(eng_sel) |
- PACKET3_MAP_QUEUES_NUM_QUEUES(1)); /* num_queues: must be 1 */
- amdgpu_ring_write(kiq_ring, PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index));
- amdgpu_ring_write(kiq_ring, lower_32_bits(mqd_addr));
- amdgpu_ring_write(kiq_ring, upper_32_bits(mqd_addr));
- amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr));
- amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
+ PACKET3_MAP_QUEUES_NUM_QUEUES(1), /* num_queues: must be 1 */
+ PACKET3_MAP_QUEUES_DOORBELL_OFFSET(ring->doorbell_index),
+ lower_32_bits(mqd_addr),
+ upper_32_bits(mqd_addr),
+ lower_32_bits(wptr_addr),
+ upper_32_bits(wptr_addr));
}
static void gfx10_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
@@ -3741,23 +3741,21 @@ static void gfx10_kiq_unmap_queues(struct amdgpu_ring *kiq_ring,
{
uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
- amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4));
- amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ amdgpu_ring_write(kiq_ring,
+ PACKET3(PACKET3_UNMAP_QUEUES, 4),
PACKET3_UNMAP_QUEUES_ACTION(action) |
PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) |
PACKET3_UNMAP_QUEUES_ENGINE_SEL(eng_sel) |
- PACKET3_UNMAP_QUEUES_NUM_QUEUES(1));
- amdgpu_ring_write(kiq_ring,
- PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
+ PACKET3_UNMAP_QUEUES_NUM_QUEUES(1) /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */,
+ PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index));
if (action == PREEMPT_QUEUES_NO_UNMAP) {
- amdgpu_ring_write(kiq_ring, lower_32_bits(gpu_addr));
- amdgpu_ring_write(kiq_ring, upper_32_bits(gpu_addr));
- amdgpu_ring_write(kiq_ring, seq);
+ amdgpu_ring_write(kiq_ring,
+ lower_32_bits(gpu_addr),
+ upper_32_bits(gpu_addr),
+ seq);
} else {
- amdgpu_ring_write(kiq_ring, 0);
- amdgpu_ring_write(kiq_ring, 0);
- amdgpu_ring_write(kiq_ring, 0);
+ amdgpu_ring_fill(kiq_ring, 0, 3);
}
}
@@ -3768,18 +3766,17 @@ static void gfx10_kiq_query_status(struct amdgpu_ring *kiq_ring,
{
uint32_t eng_sel = ring->funcs->type == AMDGPU_RING_TYPE_GFX ? 4 : 0;
- amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_QUERY_STATUS, 5));
amdgpu_ring_write(kiq_ring,
+ PACKET3(PACKET3_QUERY_STATUS, 5),
PACKET3_QUERY_STATUS_CONTEXT_ID(0) |
PACKET3_QUERY_STATUS_INTERRUPT_SEL(0) |
- PACKET3_QUERY_STATUS_COMMAND(2));
- amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
+ PACKET3_QUERY_STATUS_COMMAND(2), /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */
PACKET3_QUERY_STATUS_DOORBELL_OFFSET(ring->doorbell_index) |
- PACKET3_QUERY_STATUS_ENG_SEL(eng_sel));
- amdgpu_ring_write(kiq_ring, lower_32_bits(addr));
- amdgpu_ring_write(kiq_ring, upper_32_bits(addr));
- amdgpu_ring_write(kiq_ring, lower_32_bits(seq));
- amdgpu_ring_write(kiq_ring, upper_32_bits(seq));
+ PACKET3_QUERY_STATUS_ENG_SEL(eng_sel),
+ lower_32_bits(addr),
+ upper_32_bits(addr),
+ lower_32_bits(seq),
+ upper_32_bits(seq));
}
static void gfx10_kiq_invalidate_tlbs(struct amdgpu_ring *kiq_ring,
@@ -3918,12 +3915,13 @@ static void gfx_v10_0_init_golden_registers(struct amdgpu_device *adev)
static void gfx_v10_0_write_data_to_reg(struct amdgpu_ring *ring, int eng_sel,
bool wc, uint32_t reg, uint32_t val)
{
- amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
- amdgpu_ring_write(ring, WRITE_DATA_ENGINE_SEL(eng_sel) |
- WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0));
- amdgpu_ring_write(ring, reg);
- amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, val);
+ amdgpu_ring_write(ring,
+ PACKET3(PACKET3_WRITE_DATA, 3),
+ WRITE_DATA_ENGINE_SEL(eng_sel) |
+ WRITE_DATA_DST_SEL(0) | (wc ? WR_CONFIRM : 0),
+ reg,
+ 0,
+ val);
}
static void gfx_v10_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
@@ -3931,21 +3929,21 @@ static void gfx_v10_0_wait_reg_mem(struct amdgpu_ring *ring, int eng_sel,
uint32_t addr1, uint32_t ref, uint32_t mask,
uint32_t inv)
{
- amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
+ if (mem_space)
+ BUG_ON(addr0 & 0x3); /* Dword align */
+
amdgpu_ring_write(ring,
+ PACKET3(PACKET3_WAIT_REG_MEM, 5),
/* memory (1) or register (0) */
(WAIT_REG_MEM_MEM_SPACE(mem_space) |
WAIT_REG_MEM_OPERATION(opt) | /* wait */
WAIT_REG_MEM_FUNCTION(3) | /* equal */
- WAIT_REG_MEM_ENGINE(eng_sel)));
-
- if (mem_space)
- BUG_ON(addr0 & 0x3); /* Dword align */
- amdgpu_ring_write(ring, addr0);
- amdgpu_ring_write(ring, addr1);
- amdgpu_ring_write(ring, ref);
- amdgpu_ring_write(ring, mask);
- amdgpu_ring_write(ring, inv); /* poll interval */
+ WAIT_REG_MEM_ENGINE(eng_sel)),
+ addr0,
+ addr1,
+ ref,
+ mask,
+ inv); /* poll interval */
}
static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring)
@@ -3964,10 +3962,11 @@ static int gfx_v10_0_ring_test_ring(struct amdgpu_ring *ring)
return r;
}
- amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1));
- amdgpu_ring_write(ring, scratch -
- PACKET3_SET_UCONFIG_REG_START);
- amdgpu_ring_write(ring, 0xDEADBEEF);
+ amdgpu_ring_write(ring,
+ PACKET3(PACKET3_SET_UCONFIG_REG, 1),
+ scratch - PACKET3_SET_UCONFIG_REG_START,
+ 0xDEADBEEF);
+
amdgpu_ring_commit(ring);
for (i = 0; i < adev->usec_timeout; i++) {
@@ -6239,8 +6238,8 @@ static int gfx_v10_0_cp_gfx_start(struct amdgpu_device *adev)
struct amdgpu_ring *ring;
const struct cs_section_def *sect = NULL;
const struct cs_extent_def *ext = NULL;
- int r, i;
int ctx_reg_offset;
+ int r;
/* init the CP */
WREG32_SOC15(GC, 0, mmCP_MAX_CONTEXT,
@@ -6256,43 +6255,46 @@ static int gfx_v10_0_cp_gfx_start(struct amdgpu_device *adev)
return r;
}
- amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- amdgpu_ring_write(ring, PACKET3_PREAMBLE_BEGIN_CLEAR_STATE);
+ amdgpu_ring_write(ring,
+ PACKET3(PACKET3_PREAMBLE_CNTL, 0),
+ PACKET3_PREAMBLE_BEGIN_CLEAR_STATE,
- amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
- amdgpu_ring_write(ring, 0x80000000);
- amdgpu_ring_write(ring, 0x80000000);
+ PACKET3(PACKET3_CONTEXT_CONTROL, 1),
+ 0x80000000,
+ 0x80000000);
for (sect = gfx10_cs_data; sect->section != NULL; ++sect) {
for (ext = sect->section; ext->extent != NULL; ++ext) {
if (sect->id == SECT_CONTEXT) {
amdgpu_ring_write(ring,
PACKET3(PACKET3_SET_CONTEXT_REG,
- ext->reg_count));
- amdgpu_ring_write(ring, ext->reg_index -
+ ext->reg_count),
+ ext->reg_index -
PACKET3_SET_CONTEXT_REG_START);
- for (i = 0; i < ext->reg_count; i++)
- amdgpu_ring_write(ring, ext->extent[i]);
+ amdgpu_ring_write_multiple(ring, ext->extent,
+ ext->reg_count);
}
}
}
ctx_reg_offset =
SOC15_REG_OFFSET(GC, 0, mmPA_SC_TILE_STEERING_OVERRIDE) - PACKET3_SET_CONTEXT_REG_START;
- amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONTEXT_REG, 1));
- amdgpu_ring_write(ring, ctx_reg_offset);
- amdgpu_ring_write(ring, adev->gfx.config.pa_sc_tile_steering_override);
- amdgpu_ring_write(ring, PACKET3(PACKET3_PREAMBLE_CNTL, 0));
- amdgpu_ring_write(ring, PACKET3_PREAMBLE_END_CLEAR_STATE);
+ amdgpu_ring_write(ring,
+ PACKET3(PACKET3_SET_CONTEXT_REG, 1),
+ ctx_reg_offset,
+ adev->gfx.config.pa_sc_tile_steering_override,
- amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
- amdgpu_ring_write(ring, 0);
+ PACKET3(PACKET3_PREAMBLE_CNTL, 0),
+ PACKET3_PREAMBLE_END_CLEAR_STATE,
- amdgpu_ring_write(ring, PACKET3(PACKET3_SET_BASE, 2));
- amdgpu_ring_write(ring, PACKET3_BASE_INDEX(CE_PARTITION_BASE));
- amdgpu_ring_write(ring, 0x8000);
- amdgpu_ring_write(ring, 0x8000);
+ PACKET3(PACKET3_CLEAR_STATE, 0),
+ 0,
+
+ PACKET3(PACKET3_SET_BASE, 2),
+ PACKET3_BASE_INDEX(CE_PARTITION_BASE),
+ 0x8000,
+ 0x8000);
amdgpu_ring_commit(ring);
@@ -6306,9 +6308,7 @@ static int gfx_v10_0_cp_gfx_start(struct amdgpu_device *adev)
return r;
}
- amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0));
- amdgpu_ring_write(ring, 0);
-
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CLEAR_STATE, 0), 0);
amdgpu_ring_commit(ring);
}
return 0;
@@ -8564,6 +8564,8 @@ static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
unsigned int vmid = AMDGPU_JOB_GET_VMID(job);
u32 header, control = 0;
+ BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+
if (ib->flags & AMDGPU_IB_FLAG_CE)
header = PACKET3(PACKET3_INDIRECT_BUFFER_CNST, 2);
else
@@ -8582,15 +8584,14 @@ static void gfx_v10_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
(!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
}
- amdgpu_ring_write(ring, header);
- BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
amdgpu_ring_write(ring,
+ header,
#ifdef __BIG_ENDIAN
- (2 << 0) |
+ (2 << 0) |
#endif
- lower_32_bits(ib->gpu_addr));
- amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
- amdgpu_ring_write(ring, control);
+ lower_32_bits(ib->gpu_addr),
+ upper_32_bits(ib->gpu_addr),
+ control);
}
static void gfx_v10_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
@@ -8601,6 +8602,8 @@ static void gfx_v10_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
unsigned int vmid = AMDGPU_JOB_GET_VMID(job);
u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);
+ BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
+
/* Currently, there is a high possibility to get wave ID mismatch
* between ME and GDS, leading to a hw deadlock, because ME generates
* different wave IDs than the GDS expects. This situation happens
@@ -8612,20 +8615,20 @@ static void gfx_v10_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
* GDS to 0 for this ring (me/pipe).
*/
if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
- amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
- amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
- amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
+ amdgpu_ring_write(ring,
+ PACKET3(PACKET3_SET_CONFIG_REG, 1),
+ mmGDS_COMPUTE_MAX_WAVE_ID,
+ ring->adev->gds.gds_compute_max_wave_id);
}
- amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
- BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
amdgpu_ring_write(ring,
+ PACKET3(PACKET3_INDIRECT_BUFFER, 2),
#ifdef __BIG_ENDIAN
- (2 << 0) |
+ (2 << 0) |
#endif
- lower_32_bits(ib->gpu_addr));
- amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr));
- amdgpu_ring_write(ring, control);
+ lower_32_bits(ib->gpu_addr),
+ upper_32_bits(ib->gpu_addr),
+ control);
}
static void gfx_v10_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
@@ -8634,18 +8637,6 @@ static void gfx_v10_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
bool write64bit = flags & AMDGPU_FENCE_FLAG_64BIT;
bool int_sel = flags & AMDGPU_FENCE_FLAG_INT;
- /* RELEASE_MEM - flush caches, send int */
- amdgpu_ring_write(ring, PACKET3(PACKET3_RELEASE_MEM, 6));
- amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_GCR_SEQ |
- PACKET3_RELEASE_MEM_GCR_GL2_WB |
- PACKET3_RELEASE_MEM_GCR_GLM_INV | /* must be set with GLM_WB */
- PACKET3_RELEASE_MEM_GCR_GLM_WB |
- PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
- PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
- PACKET3_RELEASE_MEM_EVENT_INDEX(5)));
- amdgpu_ring_write(ring, (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) |
- PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0)));
-
/*
* the address should be Qword aligned if 64bit write, Dword
* aligned if only send 32bit data low (discard data high)
@@ -8654,11 +8645,24 @@ static void gfx_v10_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
BUG_ON(addr & 0x7);
else
BUG_ON(addr & 0x3);
- amdgpu_ring_write(ring, lower_32_bits(addr));
- amdgpu_ring_write(ring, upper_32_bits(addr));
- amdgpu_ring_write(ring, lower_32_bits(seq));
- amdgpu_ring_write(ring, upper_32_bits(seq));
- amdgpu_ring_write(ring, 0);
+
+ amdgpu_ring_write(ring,
+ /* RELEASE_MEM - flush caches, send int */
+ PACKET3(PACKET3_RELEASE_MEM, 6),
+ (PACKET3_RELEASE_MEM_GCR_SEQ |
+ PACKET3_RELEASE_MEM_GCR_GL2_WB |
+ PACKET3_RELEASE_MEM_GCR_GLM_INV | /* must be set with GLM_WB */
+ PACKET3_RELEASE_MEM_GCR_GLM_WB |
+ PACKET3_RELEASE_MEM_CACHE_POLICY(3) |
+ PACKET3_RELEASE_MEM_EVENT_TYPE(CACHE_FLUSH_AND_INV_TS_EVENT) |
+ PACKET3_RELEASE_MEM_EVENT_INDEX(5)),
+ (PACKET3_RELEASE_MEM_DATA_SEL(write64bit ? 2 : 1) |
+ PACKET3_RELEASE_MEM_INT_SEL(int_sel ? 2 : 0)),
+ lower_32_bits(addr),
+ upper_32_bits(addr),
+ lower_32_bits(seq),
+ upper_32_bits(seq),
+ 0);
}
static void gfx_v10_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
@@ -8675,8 +8679,8 @@ static void gfx_v10_0_ring_invalidate_tlbs(struct amdgpu_ring *ring,
uint16_t pasid, uint32_t flush_type,
bool all_hub, uint8_t dst_sel)
{
- amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
amdgpu_ring_write(ring,
+ PACKET3(PACKET3_INVALIDATE_TLBS, 0),
PACKET3_INVALIDATE_TLBS_DST_SEL(dst_sel) |
PACKET3_INVALIDATE_TLBS_ALL_HUB(all_hub) |
PACKET3_INVALIDATE_TLBS_PASID(pasid) |
@@ -8691,8 +8695,7 @@ static void gfx_v10_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
/* compute doesn't have PFP */
if (ring->funcs->type == AMDGPU_RING_TYPE_GFX) {
/* sync PFP to ME, otherwise we might get invalid PFP reads */
- amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0));
- amdgpu_ring_write(ring, 0x0);
+ amdgpu_ring_write(ring, PACKET3(PACKET3_PFP_SYNC_ME, 0), 0);
}
}
@@ -8705,40 +8708,42 @@ static void gfx_v10_0_ring_emit_fence_kiq(struct amdgpu_ring *ring, u64 addr,
BUG_ON(flags & AMDGPU_FENCE_FLAG_64BIT);
/* write fence seq to the "addr" */
- amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
- amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
- WRITE_DATA_DST_SEL(5) | WR_CONFIRM));
- amdgpu_ring_write(ring, lower_32_bits(addr));
- amdgpu_ring_write(ring, upper_32_bits(addr));
- amdgpu_ring_write(ring, lower_32_bits(seq));
+ amdgpu_ring_write(ring,
+ PACKET3(PACKET3_WRITE_DATA, 3),
+ (WRITE_DATA_ENGINE_SEL(0) | WRITE_DATA_DST_SEL(5) |
+ WR_CONFIRM),
+ lower_32_bits(addr),
+ upper_32_bits(addr),
+ lower_32_bits(seq));
if (flags & AMDGPU_FENCE_FLAG_INT) {
/* set register to trigger INT */
- amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
- amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
- WRITE_DATA_DST_SEL(0) | WR_CONFIRM));
- amdgpu_ring_write(ring, SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS));
- amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, 0x20000000); /* src_id is 178 */
+ amdgpu_ring_write(ring,
+ PACKET3(PACKET3_WRITE_DATA, 3),
+ (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(0) |
+ WR_CONFIRM),
+ SOC15_REG_OFFSET(GC, 0, mmCPC_INT_STATUS),
+ 0,
+ 0x20000000); /* src_id is 178 */
}
}
static void gfx_v10_0_ring_emit_sb(struct amdgpu_ring *ring)
{
- amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
- amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0), 0);
}
static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
uint32_t flags)
{
- uint32_t dw2 = 0;
+ uint32_t dw2;
if (ring->adev->gfx.mcbp)
gfx_v10_0_ring_emit_ce_meta(ring,
(!amdgpu_sriov_vf(ring->adev) && flags & AMDGPU_IB_PREEMPTED) ? true : false);
- dw2 |= 0x80000000; /* set load_enable otherwise this package is just NOPs */
+ dw2 = 0x80000000; /* set load_enable otherwise this package is just NOPs */
if (flags & AMDGPU_HAVE_CTX_SWITCH) {
/* set load_global_config & load_global_uconfig */
dw2 |= 0x8001;
@@ -8758,9 +8763,7 @@ static void gfx_v10_0_ring_emit_cntxcntl(struct amdgpu_ring *ring,
dw2 |= 0x10000000;
}
- amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1));
- amdgpu_ring_write(ring, dw2);
- amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, PACKET3(PACKET3_CONTEXT_CONTROL, 1), dw2, 0);
}
static unsigned int gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
@@ -8768,11 +8771,12 @@ static unsigned int gfx_v10_0_ring_emit_init_cond_exec(struct amdgpu_ring *ring,
{
unsigned int ret;
- amdgpu_ring_write(ring, PACKET3(PACKET3_COND_EXEC, 3));
- amdgpu_ring_write(ring, lower_32_bits(addr));
- amdgpu_ring_write(ring, upper_32_bits(addr));
+ amdgpu_ring_write(ring,
+ PACKET3(PACKET3_COND_EXEC, 3),
+ lower_32_bits(addr),
+ upper_32_bits(addr),
/* discard following DWs if *cond_exec_gpu_addr==0 */
- amdgpu_ring_write(ring, 0);
+ 0);
ret = ring->wptr & ring->buf_mask;
/* patch dummy value later */
amdgpu_ring_write(ring, 0);
@@ -8839,22 +8843,21 @@ static void gfx_v10_0_ring_emit_ce_meta(struct amdgpu_ring *ring, bool resume)
offset = offsetof(struct v10_gfx_meta_data, ce_payload);
ce_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
- ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
-
- amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
- amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(2) |
- WRITE_DATA_DST_SEL(8) |
- WR_CONFIRM) |
- WRITE_DATA_CACHE_POLICY(0));
- amdgpu_ring_write(ring, lower_32_bits(ce_payload_gpu_addr));
- amdgpu_ring_write(ring, upper_32_bits(ce_payload_gpu_addr));
if (resume)
- amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
- sizeof(ce_payload) >> 2);
+ ce_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
else
- amdgpu_ring_write_multiple(ring, (void *)&ce_payload,
- sizeof(ce_payload) >> 2);
+ ce_payload_cpu_addr = (void *)&ce_payload;
+
+ amdgpu_ring_write(ring,
+ PACKET3(PACKET3_WRITE_DATA, cnt),
+ (WRITE_DATA_ENGINE_SEL(2) | WRITE_DATA_DST_SEL(8) |
+ WR_CONFIRM) | WRITE_DATA_CACHE_POLICY(0),
+ lower_32_bits(ce_payload_gpu_addr),
+ upper_32_bits(ce_payload_gpu_addr));
+
+ amdgpu_ring_write_multiple(ring, ce_payload_cpu_addr,
+ sizeof(ce_payload) >> 2);
}
static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
@@ -8867,7 +8870,10 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
offset = offsetof(struct v10_gfx_meta_data, de_payload);
de_payload_gpu_addr = amdgpu_csa_vaddr(ring->adev) + offset;
- de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
+ if (resume)
+ de_payload_cpu_addr = adev->virt.csa_cpu_addr + offset;
+ else
+ de_payload_cpu_addr = (void *)&de_payload;
gds_addr = ALIGN(amdgpu_csa_vaddr(ring->adev) +
AMDGPU_CSA_SIZE - adev->gds.gds_size,
@@ -8877,20 +8883,15 @@ static void gfx_v10_0_ring_emit_de_meta(struct amdgpu_ring *ring, bool resume)
de_payload.gds_backup_addrhi = upper_32_bits(gds_addr);
cnt = (sizeof(de_payload) >> 2) + 4 - 2;
- amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, cnt));
- amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(1) |
- WRITE_DATA_DST_SEL(8) |
- WR_CONFIRM) |
- WRITE_DATA_CACHE_POLICY(0));
- amdgpu_ring_write(ring, lower_32_bits(de_payload_gpu_addr));
- amdgpu_ring_write(ring, upper_32_bits(de_payload_gpu_addr));
+ amdgpu_ring_write(ring,
+ PACKET3(PACKET3_WRITE_DATA, cnt),
+ (WRITE_DATA_ENGINE_SEL(1) | WRITE_DATA_DST_SEL(8) |
+ WR_CONFIRM) | WRITE_DATA_CACHE_POLICY(0),
+ lower_32_bits(de_payload_gpu_addr),
+ upper_32_bits(de_payload_gpu_addr));
- if (resume)
- amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
- sizeof(de_payload) >> 2);
- else
- amdgpu_ring_write_multiple(ring, (void *)&de_payload,
- sizeof(de_payload) >> 2);
+ amdgpu_ring_write_multiple(ring, de_payload_cpu_addr,
+ sizeof(de_payload) >> 2);
}
static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
@@ -8898,31 +8899,32 @@ static void gfx_v10_0_ring_emit_frame_cntl(struct amdgpu_ring *ring, bool start,
{
uint32_t v = secure ? FRAME_TMZ : 0;
- amdgpu_ring_write(ring, PACKET3(PACKET3_FRAME_CONTROL, 0));
- amdgpu_ring_write(ring, v | FRAME_CMD(start ? 0 : 1));
+ amdgpu_ring_write(ring,
+ PACKET3(PACKET3_FRAME_CONTROL, 0),
+ v | FRAME_CMD(start ? 0 : 1));
}
static void gfx_v10_0_ring_emit_rreg(struct amdgpu_ring *ring, uint32_t reg,
uint32_t reg_val_offs)
{
struct amdgpu_device *adev = ring->adev;
+ u64 gpu_addr = adev->wb.gpu_addr + reg_val_offs * 4;
- amdgpu_ring_write(ring, PACKET3(PACKET3_COPY_DATA, 4));
- amdgpu_ring_write(ring, 0 | /* src: register*/
- (5 << 8) | /* dst: memory */
- (1 << 20)); /* write confirm */
- amdgpu_ring_write(ring, reg);
- amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, lower_32_bits(adev->wb.gpu_addr +
- reg_val_offs * 4));
- amdgpu_ring_write(ring, upper_32_bits(adev->wb.gpu_addr +
- reg_val_offs * 4));
+ amdgpu_ring_write(ring,
+ PACKET3(PACKET3_COPY_DATA, 4),
+ 0 | /* src: register*/
+ (5 << 8) | /* dst: memory */
+ (1 << 20), /* write confirm */
+ reg,
+ 0,
+ lower_32_bits(gpu_addr),
+ upper_32_bits(gpu_addr));
}
static void gfx_v10_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
uint32_t val)
{
- uint32_t cmd = 0;
+ uint32_t cmd;
switch (ring->funcs->type) {
case AMDGPU_RING_TYPE_GFX:
@@ -8935,11 +8937,13 @@ static void gfx_v10_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
cmd = WR_CONFIRM;
break;
}
- amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
- amdgpu_ring_write(ring, cmd);
- amdgpu_ring_write(ring, reg);
- amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, val);
+
+ amdgpu_ring_write(ring,
+ PACKET3(PACKET3_WRITE_DATA, 3),
+ cmd,
+ reg,
+ 0,
+ val);
}
static void gfx_v10_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg,
@@ -9416,15 +9420,17 @@ static void gfx_v10_0_emit_mem_sync(struct amdgpu_ring *ring)
PACKET3_ACQUIRE_MEM_GCR_CNTL_GLK_INV(1) |
PACKET3_ACQUIRE_MEM_GCR_CNTL_GLI_INV(1);
- /* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */
- amdgpu_ring_write(ring, PACKET3(PACKET3_ACQUIRE_MEM, 6));
- amdgpu_ring_write(ring, 0); /* CP_COHER_CNTL */
- amdgpu_ring_write(ring, 0xffffffff); /* CP_COHER_SIZE */
- amdgpu_ring_write(ring, 0xffffff); /* CP_COHER_SIZE_HI */
- amdgpu_ring_write(ring, 0); /* CP_COHER_BASE */
- amdgpu_ring_write(ring, 0); /* CP_COHER_BASE_HI */
- amdgpu_ring_write(ring, 0x0000000A); /* POLL_INTERVAL */
- amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
+
+ amdgpu_ring_write(ring,
+ /* ACQUIRE_MEM - make one or more surfaces valid for use by the subsequent operations */
+ PACKET3(PACKET3_ACQUIRE_MEM, 6),
+ 0, /* CP_COHER_CNTL */
+ 0xffffffff, /* CP_COHER_SIZE */
+ 0xffffff, /* CP_COHER_SIZE_HI */
+ 0, /* CP_COHER_BASE */
+ 0, /* CP_COHER_BASE_HI */
+ 0x0000000A, /* POLL_INTERVAL */
+ gcr_cntl); /* GCR_CNTL */
}
static void gfx_v10_ring_insert_nop(struct amdgpu_ring *ring, uint32_t num_nop)
@@ -9726,8 +9732,9 @@ static void gfx_v10_ip_dump(struct amdgpu_ip_block *ip_block)
static void gfx_v10_0_ring_emit_cleaner_shader(struct amdgpu_ring *ring)
{
/* Emit the cleaner shader */
- amdgpu_ring_write(ring, PACKET3(PACKET3_RUN_CLEANER_SHADER, 0));
- amdgpu_ring_write(ring, 0); /* RESERVED field, programmed to zero */
+ amdgpu_ring_write(ring,
+ PACKET3(PACKET3_RUN_CLEANER_SHADER, 0),
+ 0); /* RESERVED field, programmed to zero */
}
static const struct amd_ip_funcs gfx_v10_0_ip_funcs = {
--
2.47.1
More information about the amd-gfx
mailing list