[PATCH] drm/amdgpu: reserve GDS resources on the gfx ring for gfx10
Marek Olšák
maraeo at gmail.com
Wed Jul 17 00:06:06 UTC 2019
From: Marek Olšák <marek.olsak at amd.com>
Hopefully we'll only use 1 gfx ring, because otherwise we'd have to have
separate GDS buffers for each gfx ring.
This is a workaround to ensure stability of transform feedback. Shaders hang
waiting for a GDS instruction (ds_sub, not ds_ordered_count).
The disadvantage is that compute IBs might get a different VMID,
because now gfx always has GDS and compute doesn't.
Signed-off-by: Marek Olšák <marek.olsak at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 10 ++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h | 6 ++++++
drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 20 ++++++++++++++++++++
4 files changed, 37 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 4b514a44184c..cbd55d061b72 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -456,20 +456,21 @@ struct amdgpu_cs_parser {
struct drm_file *filp;
struct amdgpu_ctx *ctx;
/* chunks */
unsigned nchunks;
struct amdgpu_cs_chunk *chunks;
/* scheduler job object */
struct amdgpu_job *job;
struct drm_sched_entity *entity;
+ unsigned hw_ip;
/* buffer objects */
struct ww_acquire_ctx ticket;
struct amdgpu_bo_list *bo_list;
struct amdgpu_mn *mn;
struct amdgpu_bo_list_entry vm_pd;
struct list_head validated;
struct dma_fence *fence;
uint64_t bytes_moved_threshold;
uint64_t bytes_moved_vis_threshold;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index c691df6f7a57..9125cd69a124 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -678,20 +678,28 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
if (r)
goto error_validate;
amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
p->bytes_moved_vis);
gds = p->bo_list->gds_obj;
gws = p->bo_list->gws_obj;
oa = p->bo_list->oa_obj;
+ if (p->hw_ip == AMDGPU_HW_IP_GFX) {
+ /* Only gfx10 allocates these. */
+ if (!gds)
+ gds = p->adev->gds.gds_gfx_bo;
+ if (!oa)
+ oa = p->adev->gds.oa_gfx_bo;
+ }
+
amdgpu_bo_list_for_each_entry(e, p->bo_list) {
struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
/* Make sure we use the exclusive slot for shared BOs */
if (bo->prime_shared_count)
e->tv.num_shared = 0;
e->bo_va = amdgpu_vm_bo_find(vm, bo);
}
if (gds) {
@@ -954,20 +962,22 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
struct drm_amdgpu_cs_chunk_ib *chunk_ib;
struct drm_sched_entity *entity;
chunk = &parser->chunks[i];
ib = &parser->job->ibs[j];
chunk_ib = (struct drm_amdgpu_cs_chunk_ib *)chunk->kdata;
if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
continue;
+ parser->hw_ip = chunk_ib->ip_type;
+
if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
(amdgpu_mcbp || amdgpu_sriov_vf(adev))) {
if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
ce_preempt++;
else
de_preempt++;
}
/* each GFX command submit allows 0 or 1 IB preemptible for CE & DE */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
index df8a23554831..0943b8e1d97e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
@@ -26,20 +26,26 @@
struct amdgpu_ring;
struct amdgpu_bo;
struct amdgpu_gds {
uint32_t gds_size;
uint32_t gws_size;
uint32_t oa_size;
uint32_t gds_compute_max_wave_id;
uint32_t vgt_gs_max_wave_id;
+
+ /* Reserved OA for the gfx ring. (gfx10) */
+ uint32_t gds_gfx_reservation_size;
+ uint32_t oa_gfx_reservation_size;
+ struct amdgpu_bo *gds_gfx_bo;
+ struct amdgpu_bo *oa_gfx_bo;
};
struct amdgpu_gds_reg_offset {
uint32_t mem_base;
uint32_t mem_size;
uint32_t gws;
uint32_t oa;
};
#endif /* __AMDGPU_GDS_H__ */
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
index 618291df659b..3952754c04ff 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
@@ -1314,20 +1314,33 @@ static int gfx_v10_0_sw_init(void *handle)
kiq = &adev->gfx.kiq;
r = amdgpu_gfx_kiq_init_ring(adev, &kiq->ring, &kiq->irq);
if (r)
return r;
r = amdgpu_gfx_mqd_sw_init(adev, sizeof(struct v10_compute_mqd));
if (r)
return r;
+ /* allocate reserved GDS resources for transform feedback */
+ r = amdgpu_bo_create_kernel(adev, adev->gds.gds_gfx_reservation_size,
+ 4, AMDGPU_GEM_DOMAIN_GDS,
+ &adev->gds.gds_gfx_bo, NULL, NULL);
+ if (r)
+ return r;
+
+ r = amdgpu_bo_create_kernel(adev, adev->gds.oa_gfx_reservation_size,
+ 1, AMDGPU_GEM_DOMAIN_OA,
+ &adev->gds.oa_gfx_bo, NULL, NULL);
+ if (r)
+ return r;
+
/* allocate visible FB for rlc auto-loading fw */
if (adev->firmware.load_type == AMDGPU_FW_LOAD_RLC_BACKDOOR_AUTO) {
r = gfx_v10_0_rlc_backdoor_autoload_buffer_init(adev);
if (r)
return r;
}
adev->gfx.ce_ram_size = F32_CE_PROGRAM_RAM_SIZE;
gfx_v10_0_gpu_early_init(adev);
@@ -1354,20 +1367,23 @@ static void gfx_v10_0_me_fini(struct amdgpu_device *adev)
amdgpu_bo_free_kernel(&adev->gfx.me.me_fw_obj,
&adev->gfx.me.me_fw_gpu_addr,
(void **)&adev->gfx.me.me_fw_ptr);
}
static int gfx_v10_0_sw_fini(void *handle)
{
int i;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ amdgpu_bo_free_kernel(&adev->gds.gds_gfx_bo, NULL, NULL);
+ amdgpu_bo_free_kernel(&adev->gds.oa_gfx_bo, NULL, NULL);
+
for (i = 0; i < adev->gfx.num_gfx_rings; i++)
amdgpu_ring_fini(&adev->gfx.gfx_ring[i]);
for (i = 0; i < adev->gfx.num_compute_rings; i++)
amdgpu_ring_fini(&adev->gfx.compute_ring[i]);
amdgpu_gfx_mqd_sw_fini(adev);
amdgpu_gfx_kiq_free_ring(&adev->gfx.kiq.ring, &adev->gfx.kiq.irq);
amdgpu_gfx_kiq_fini(adev);
gfx_v10_0_pfp_fini(adev);
@@ -5181,20 +5197,24 @@ static void gfx_v10_0_set_gds_init(struct amdgpu_device *adev)
case CHIP_NAVI10:
default:
adev->gds.gds_size = 0x10000;
adev->gds.gds_compute_max_wave_id = 0x4ff;
adev->gds.vgt_gs_max_wave_id = 0x3ff;
break;
}
adev->gds.gws_size = 64;
adev->gds.oa_size = 16;
+
+ /* Reserved for transform feedback. */
+ adev->gds.gds_gfx_reservation_size = 256;
+ adev->gds.oa_gfx_reservation_size = 4;
}
static void gfx_v10_0_set_user_wgp_inactive_bitmap_per_sh(struct amdgpu_device *adev,
u32 bitmap)
{
u32 data;
if (!bitmap)
return;
--
2.17.1
More information about the amd-gfx
mailing list