[PATCH 10/13] drm/amdkfd: allow split HQD split on per-queue granularity
Andres Rodriguez
andresx7 at gmail.com
Sat Feb 4 04:51:39 UTC 2017
Update the KGD to KFD interface to allow sharing pipes with queue
granularity instead of pipe granularity.
This allows for more interesting pipe/queue splits.
Signed-off-by: Andres Rodriguez <andresx7 at gmail.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 22 ++++-
drivers/gpu/drm/amd/amdkfd/kfd_device.c | 4 +
.../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 100 ++++++++++++++-------
.../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 10 +--
.../drm/amd/amdkfd/kfd_device_queue_manager_cik.c | 2 +-
drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 3 +-
.../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 2 +-
drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 17 ++--
drivers/gpu/drm/radeon/radeon_kfd.c | 21 ++++-
9 files changed, 126 insertions(+), 55 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 3200ff9..8fc5aa3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -95,14 +95,30 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
{
+ int i;
+ int last_valid_bit;
if (adev->kfd) {
struct kgd2kfd_shared_resources gpu_resources = {
.compute_vmid_bitmap = 0xFF00,
-
- .first_compute_pipe = 1,
- .compute_pipe_count = 4 - 1,
+ .num_mec = adev->gfx.mec.num_mec,
+ .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
+ .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe
};
+ /* this is going to have a few of the MSBs set that we need to
+ * clear */
+ bitmap_complement(gpu_resources.queue_bitmap,
+ adev->gfx.mec.queue_bitmap,
+ KGD_MAX_QUEUES);
+
+ /* According to linux/bitmap.h we shouldn't use bitmap_clear if
+ * nbits is not compile time constant */
+ last_valid_bit = adev->gfx.mec.num_mec
+ * adev->gfx.mec.num_pipe_per_mec
+ * adev->gfx.mec.num_queue_per_pipe;
+ for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i)
+ clear_bit(i, gpu_resources.queue_bitmap);
+
amdgpu_doorbell_get_kfd_info(adev,
&gpu_resources.doorbell_physical_address,
&gpu_resources.doorbell_aperture_size,
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 3f95f7c..88187bf 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -226,6 +226,10 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
kfd->shared_resources = *gpu_resources;
+ /* We only use the first MEC */
+ if (kfd->shared_resources.num_mec > 1)
+ kfd->shared_resources.num_mec = 1;
+
/* calculate max size of mqds needed for queues */
size = max_num_of_queues_per_device *
kfd->device_info->mqd_size_aligned;
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
index c064dea..5321d18 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
@@ -63,21 +63,44 @@ enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
return KFD_MQD_TYPE_CP;
}
-unsigned int get_first_pipe(struct device_queue_manager *dqm)
+static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe)
+{
+ int i;
+ int pipe_offset = mec * dqm->dev->shared_resources.num_pipe_per_mec
+ + pipe * dqm->dev->shared_resources.num_queue_per_pipe;
+
+ /* queue is available for KFD usage if bit is 0 */
+ for (i = 0; i < dqm->dev->shared_resources.num_queue_per_pipe; ++i)
+ if (test_bit(pipe_offset + i,
+ dqm->dev->shared_resources.queue_bitmap))
+ return true;
+ return false;
+}
+
+unsigned int get_mec_num(struct device_queue_manager *dqm)
+{
+ BUG_ON(!dqm || !dqm->dev);
+
+ return dqm->dev->shared_resources.num_mec;
+}
+
+unsigned int get_queues_num(struct device_queue_manager *dqm)
{
BUG_ON(!dqm || !dqm->dev);
- return dqm->dev->shared_resources.first_compute_pipe;
+ return bitmap_weight(dqm->dev->shared_resources.queue_bitmap,
+ KGD_MAX_QUEUES);
}
-unsigned int get_pipes_num(struct device_queue_manager *dqm)
+unsigned int get_queues_per_pipe(struct device_queue_manager *dqm)
{
BUG_ON(!dqm || !dqm->dev);
- return dqm->dev->shared_resources.compute_pipe_count;
+ return dqm->dev->shared_resources.num_queue_per_pipe;
}
-static inline unsigned int get_pipes_num_cpsch(void)
+unsigned int get_pipes_per_mec(struct device_queue_manager *dqm)
{
- return PIPE_PER_ME_CP_SCHEDULING;
+ BUG_ON(!dqm || !dqm->dev);
+ return dqm->dev->shared_resources.num_pipe_per_mec;
}
void program_sh_mem_settings(struct device_queue_manager *dqm,
@@ -200,12 +223,16 @@ static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
set = false;
- for (pipe = dqm->next_pipe_to_allocate, i = 0; i < get_pipes_num(dqm);
- pipe = ((pipe + 1) % get_pipes_num(dqm)), ++i) {
+ for (pipe = dqm->next_pipe_to_allocate, i = 0; i < get_pipes_per_mec(dqm);
+ pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) {
+
+ if (!is_pipe_enabled(dqm, 0, pipe))
+ continue;
+
if (dqm->allocated_queues[pipe] != 0) {
bit = find_first_bit(
(unsigned long *)&dqm->allocated_queues[pipe],
- QUEUES_PER_PIPE);
+ get_queues_per_pipe(dqm));
clear_bit(bit,
(unsigned long *)&dqm->allocated_queues[pipe]);
@@ -222,7 +249,7 @@ static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
pr_debug("kfd: DQM %s hqd slot - pipe (%d) queue(%d)\n",
__func__, q->pipe, q->queue);
/* horizontal hqd allocation */
- dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_num(dqm);
+ dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm);
return 0;
}
@@ -469,36 +496,25 @@ set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid,
vmid);
}
-int init_pipelines(struct device_queue_manager *dqm,
- unsigned int pipes_num, unsigned int first_pipe)
-{
- BUG_ON(!dqm || !dqm->dev);
-
- pr_debug("kfd: In func %s\n", __func__);
-
- return 0;
-}
-
static void init_interrupts(struct device_queue_manager *dqm)
{
unsigned int i;
BUG_ON(dqm == NULL);
- for (i = 0 ; i < get_pipes_num(dqm) ; i++)
- dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd,
- i + get_first_pipe(dqm));
+ for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++)
+ if (is_pipe_enabled(dqm, 0, i))
+ dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i);
}
static int init_scheduler(struct device_queue_manager *dqm)
{
- int retval;
+ int retval = 0;
BUG_ON(!dqm);
pr_debug("kfd: In %s\n", __func__);
- retval = init_pipelines(dqm, get_pipes_num(dqm), get_first_pipe(dqm));
return retval;
}
@@ -509,21 +525,21 @@ static int initialize_nocpsch(struct device_queue_manager *dqm)
BUG_ON(!dqm);
pr_debug("kfd: In func %s num of pipes: %d\n",
- __func__, get_pipes_num(dqm));
+ __func__, get_pipes_per_mec(dqm));
mutex_init(&dqm->lock);
INIT_LIST_HEAD(&dqm->queues);
dqm->queue_count = dqm->next_pipe_to_allocate = 0;
dqm->sdma_queue_count = 0;
- dqm->allocated_queues = kcalloc(get_pipes_num(dqm),
+ dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm),
sizeof(unsigned int), GFP_KERNEL);
if (!dqm->allocated_queues) {
mutex_destroy(&dqm->lock);
return -ENOMEM;
}
- for (i = 0; i < get_pipes_num(dqm); i++)
- dqm->allocated_queues[i] = (1 << QUEUES_PER_PIPE) - 1;
+ for (i = 0; i < get_pipes_per_mec(dqm); i++)
+ dqm->allocated_queues[i] = (1 << get_queues_per_pipe(dqm)) - 1;
dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1;
dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1;
@@ -630,18 +646,34 @@ static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
static int set_sched_resources(struct device_queue_manager *dqm)
{
+ int i;
struct scheduling_resources res;
- unsigned int queue_num, queue_mask;
BUG_ON(!dqm);
pr_debug("kfd: In func %s\n", __func__);
- queue_num = get_pipes_num_cpsch() * QUEUES_PER_PIPE;
- queue_mask = (1 << queue_num) - 1;
res.vmid_mask = (1 << VMID_PER_DEVICE) - 1;
res.vmid_mask <<= KFD_VMID_START_OFFSET;
- res.queue_mask = queue_mask << (get_first_pipe(dqm) * QUEUES_PER_PIPE);
+
+ /* Avoid touching the internal representation queue_bitmap directly.
+ * Even though doing a simple memcpy might sound tempting, it would
+ * silently break if the implementation of bitmaps is changed */
+ res.queue_mask = 0;
+ for (i = 0; i < KGD_MAX_QUEUES; ++i) {
+ if (!test_bit(i, dqm->dev->shared_resources.queue_bitmap))
+ continue;
+
+ /* This situation may be hit in the future if a new HW
+ * generation exposes more than 64 queues. If so, the
+ * definition of res.queue_mask needs updating */
+ if (WARN_ON(i > sizeof(res.queue_mask))) {
+ pr_err("Invalid queue enabled by amdgpu: %d\n", i);
+ break;
+ }
+
+ res.queue_mask |= 1 << i;
+ }
res.gws_mask = res.oac_mask = res.gds_heap_base =
res.gds_heap_size = 0;
@@ -660,7 +692,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm)
BUG_ON(!dqm);
pr_debug("kfd: In func %s num of pipes: %d\n",
- __func__, get_pipes_num_cpsch());
+ __func__, get_pipes_per_mec(dqm));
mutex_init(&dqm->lock);
INIT_LIST_HEAD(&dqm->queues);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
index a625b91..66b9615 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
@@ -30,8 +30,6 @@
#include "kfd_mqd_manager.h"
#define QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS (500)
-#define QUEUES_PER_PIPE (8)
-#define PIPE_PER_ME_CP_SCHEDULING (3)
#define CIK_VMID_NUM (8)
#define KFD_VMID_START_OFFSET (8)
#define VMID_PER_DEVICE CIK_VMID_NUM
@@ -182,10 +180,10 @@ void device_queue_manager_init_cik(struct device_queue_manager_asic_ops *ops);
void device_queue_manager_init_vi(struct device_queue_manager_asic_ops *ops);
void program_sh_mem_settings(struct device_queue_manager *dqm,
struct qcm_process_device *qpd);
-int init_pipelines(struct device_queue_manager *dqm,
- unsigned int pipes_num, unsigned int first_pipe);
-unsigned int get_first_pipe(struct device_queue_manager *dqm);
-unsigned int get_pipes_num(struct device_queue_manager *dqm);
+unsigned int get_mec_num(struct device_queue_manager *dqm);
+unsigned int get_queues_num(struct device_queue_manager *dqm);
+unsigned int get_queues_per_pipe(struct device_queue_manager *dqm);
+unsigned int get_pipes_per_mec(struct device_queue_manager *dqm);
static inline unsigned int get_sh_mem_bases_32(struct kfd_process_device *pdd)
{
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
index c6f435a..48dc056 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager_cik.c
@@ -151,5 +151,5 @@ static void init_sdma_vm(struct device_queue_manager *dqm, struct queue *q,
static int initialize_cpsch_cik(struct device_queue_manager *dqm)
{
- return init_pipelines(dqm, get_pipes_num(dqm), get_first_pipe(dqm));
+ return 0;
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
index ca8c093..7131998 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
@@ -65,8 +65,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
/* check if there is over subscription*/
*over_subscription = false;
- if ((process_count > 1) ||
- queue_count > PIPE_PER_ME_CP_SCHEDULING * QUEUES_PER_PIPE) {
+ if ((process_count > 1) || queue_count > get_queues_num(pm->dqm)) {
*over_subscription = true;
pr_debug("kfd: over subscribed runlist\n");
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
index e1fb40b..32cdf2b 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
@@ -209,7 +209,7 @@ int pqm_create_queue(struct process_queue_manager *pqm,
/* check if there is over subscription */
if ((sched_policy == KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION) &&
((dev->dqm->processes_count >= VMID_PER_DEVICE) ||
- (dev->dqm->queue_count >= PIPE_PER_ME_CP_SCHEDULING * QUEUES_PER_PIPE))) {
+ (dev->dqm->queue_count >= get_queues_num(dev->dqm)))) {
pr_err("kfd: over-subscription is not allowed in radeon_kfd.sched_policy == 1\n");
retval = -EPERM;
goto err_create_queue;
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 67f6d19..91ef148 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -29,10 +29,11 @@
#define KGD_KFD_INTERFACE_H_INCLUDED
#include <linux/types.h>
+#include <linux/bitmap.h>
struct pci_dev;
-#define KFD_INTERFACE_VERSION 1
+#define KFD_INTERFACE_VERSION 2
#define KGD_MAX_QUEUES 128
struct kfd_dev;
@@ -62,11 +63,17 @@ struct kgd2kfd_shared_resources {
/* Bit n == 1 means VMID n is available for KFD. */
unsigned int compute_vmid_bitmap;
- /* Compute pipes are counted starting from MEC0/pipe0 as 0. */
- unsigned int first_compute_pipe;
+ /* number of mec available from the hardware */
+ uint32_t num_mec;
- /* Number of MEC pipes available for KFD. */
- unsigned int compute_pipe_count;
+ /* number of pipes per mec */
+ uint32_t num_pipe_per_mec;
+
+ /* number of queues per pipe */
+ uint32_t num_queue_per_pipe;
+
+ /* Bit n == 1 means Queue n is available for KFD */
+ DECLARE_BITMAP(queue_bitmap, KGD_MAX_QUEUES);
/* Base address of doorbell aperture. */
phys_addr_t doorbell_physical_address;
diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c
index a06e3b1..699fe7f 100644
--- a/drivers/gpu/drm/radeon/radeon_kfd.c
+++ b/drivers/gpu/drm/radeon/radeon_kfd.c
@@ -179,14 +179,29 @@ void radeon_kfd_device_probe(struct radeon_device *rdev)
void radeon_kfd_device_init(struct radeon_device *rdev)
{
+ int i, queue, pipe, mec;
+
if (rdev->kfd) {
struct kgd2kfd_shared_resources gpu_resources = {
.compute_vmid_bitmap = 0xFF00,
-
- .first_compute_pipe = 1,
- .compute_pipe_count = 4 - 1,
+ .num_mec = 1,
+ .num_pipe_per_mec = 4,
+ .num_queue_per_pipe = 8
};
+ bitmap_zero(gpu_resources.queue_bitmap, KGD_MAX_QUEUES);
+
+ for (i = 0; i < KGD_MAX_QUEUES; ++i) {
+ queue = i % gpu_resources.num_queue_per_pipe;
+ pipe = (i / gpu_resources.num_queue_per_pipe)
+ % gpu_resources.num_pipe_per_mec;
+ mec = (i / gpu_resources.num_queue_per_pipe)
+ / gpu_resources.num_pipe_per_mec;
+
+ if (mec == 0 && pipe > 0)
+ set_bit(i, gpu_resources.queue_bitmap);
+ }
+
radeon_doorbell_get_kfd_info(rdev,
&gpu_resources.doorbell_physical_address,
&gpu_resources.doorbell_aperture_size,
--
2.9.3
More information about the amd-gfx
mailing list