[PATCH 3/3] drm/amdkfd: Optimize out sdma doorbell array in kgd2kfd_shared_resources
Zhao, Yong
Yong.Zhao at amd.com
Thu Jan 31 22:27:28 UTC 2019
We can directly calculate the sdma doorbell index in the process doorbell
pages through the doorbell_index structure in amdgpu_device, so no need
to cache them in kgd2kfd_shared_resources any more, resulting in more
portable code.
Change-Id: Ic657799856ed0256f36b01e502ef0cab263b1f49
Signed-off-by: Yong Zhao <Yong.Zhao at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 55 ++++++-------------
.../drm/amd/amdkfd/kfd_device_queue_manager.c | 18 ++++--
.../gpu/drm/amd/include/kgd_kfd_interface.h | 2 +-
3 files changed, 31 insertions(+), 44 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 13710f34191a..f050adc3f5da 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -131,7 +131,7 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
{
- int i, n;
+ int i;
int last_valid_bit;
if (adev->kfd.dev) {
@@ -142,7 +142,9 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
.gpuvm_size = min(adev->vm_manager.max_pfn
<< AMDGPU_GPU_PAGE_SHIFT,
AMDGPU_GMC_HOLE_START),
- .drm_render_minor = adev->ddev->render->index
+ .drm_render_minor = adev->ddev->render->index,
+ .sdma_doorbell_idx = adev->doorbell_index.sdma_engine,
+
};
/* this is going to have a few of the MSBs set that we need to
@@ -172,45 +174,22 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
&gpu_resources.doorbell_aperture_size,
&gpu_resources.doorbell_start_offset);
- if (adev->asic_type < CHIP_VEGA10) {
- kgd2kfd_device_init(adev->kfd.dev, &gpu_resources);
- return;
- }
-
- n = (adev->asic_type < CHIP_VEGA20) ? 2 : 8;
-
- for (i = 0; i < n; i += 2) {
- /* On SOC15 the BIF is involved in routing
- * doorbells using the low 12 bits of the
- * address. Communicate the assignments to
- * KFD. KFD uses two doorbell pages per
- * process in case of 64-bit doorbells so we
- * can use each doorbell assignment twice.
+ if (adev->asic_type >= CHIP_VEGA10) {
+ /* Because of the setting in registers like
+ * SDMA0_DOORBELL_RANGE etc., BIF statically uses the
+ * lower 12 bits of doorbell address for routing, in
+ * order to route the CP queue doorbells to CP engine,
+ * the doorbells allocated to CP queues have to be
+ * outside the range set for SDMA, VCN, and IH blocks
+ * Prior to SOC15, all queues use queue ID to
+ * determine doorbells.
*/
- gpu_resources.sdma_doorbell[0][i] =
- adev->doorbell_index.sdma_engine[0] + (i >> 1);
- gpu_resources.sdma_doorbell[0][i+1] =
- adev->doorbell_index.sdma_engine[0] + 0x200 + (i >> 1);
- gpu_resources.sdma_doorbell[1][i] =
- adev->doorbell_index.sdma_engine[1] + (i >> 1);
- gpu_resources.sdma_doorbell[1][i+1] =
- adev->doorbell_index.sdma_engine[1] + 0x200 + (i >> 1);
+ gpu_resources.reserved_doorbells_start =
+ adev->doorbell_index.sdma_engine[0];
+ gpu_resources.reserved_doorbells_end =
+ adev->doorbell_index.last_non_cp;
}
- /* Because of the setting in registers like
- * SDMA0_DOORBELL_RANGE etc., BIF statically uses the
- * lower 12 bits of doorbell address for routing, in
- * order to route the CP queue doorbells to CP engine,
- * the doorbells allocated to CP queues have to be
- * outside the range set for SDMA, VCN, and IH blocks
- * Prior to SOC15, all queues use queue ID to
- * determine doorbells.
- */
- gpu_resources.reserved_doorbells_start =
- adev->doorbell_index.sdma_engine[0];
- gpu_resources.reserved_doorbells_end =
- adev->doorbell_index.last_non_cp;
-
kgd2kfd_device_init(adev->kfd.dev, &gpu_resources);
}
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index 8372556b52eb..81280ce5aa27 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -134,12 +134,20 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
*/
q->doorbell_id = q->properties.queue_id;
} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
- /* For SDMA queues on SOC15, use static doorbell
- * assignments based on the engine and queue.
+ /* For SDMA queues on SOC15 with 8-byte doorbell, use static
+ * doorbell assignments based on the engine and queue id.
+ * The doobell index distance between RLC (2*i) and (2*i+1)
+ * for a SDMA engine is 512.
+ * 512 8-byte doorbell distance (i.e. one page away) ensures
+ * that SDMA RLC (2*i+1) doorbell lies exactly in the doorbell
+ * OFFSET and SIZE set in register BIF_SDMA0_DOORBELL_RANGE.
*/
- q->doorbell_id = dev->shared_resources.sdma_doorbell
- [q->properties.sdma_engine_id]
- [q->properties.sdma_queue_id];
+ unsigned int *idx_offset =
+ dev->shared_resources.sdma_doorbell_idx;
+ q->doorbell_id = idx_offset[q->properties.sdma_engine_id]
+ + (q->properties.sdma_queue_id >> 1)
+ + (q->properties.sdma_queue_id % 2)
+ * KFD_QUEUE_DOORBELL_MIRROR_OFFSET;
} else {
/* For CP queues on SOC15 reserve a free doorbell ID */
unsigned int found;
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index b1bf45419d93..3559170f6fb3 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -137,7 +137,7 @@ struct kgd2kfd_shared_resources {
/* Bit n == 1 means Queue n is available for KFD */
DECLARE_BITMAP(queue_bitmap, KGD_MAX_QUEUES);
- unsigned int sdma_doorbell[2][8];
+ unsigned int *sdma_doorbell_idx;
/* From SOC15 onwards, the doorbell indexes reserved for SDMA, IH,
* and VCN
--
2.17.1
More information about the amd-gfx
mailing list