[PATCH v2 1/4] drm/amdgpu: set compute queue priority at mqd_init
Nirmoy
nirmodas at amd.com
Fri Feb 28 15:22:27 UTC 2020
Thanks Christian, I will send a updated one soon.
On 2/28/20 3:44 PM, Christian König wrote:
> Am 28.02.20 um 15:39 schrieb Nirmoy Das:
>> We were changing compute ring priority while rings were being used
>> before every job submission which is not recommended. This patch
>> sets compute queue priority at mqd initialization for gfx8, gfx9 and
>> gfx10.
>>
>> Policy: make queue 0 of each pipe as high priority compute queue
>>
>> High/normal priority compute sched lists are generated from set of
>> high/normal
>> priority compute queues. At context creation, entity of compute queue
>> get a sched list from high or normal priority depending on ctx->priority
>>
>> Signed-off-by: Nirmoy Das <nirmoy.das at amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 4 ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c | 40 +++++++++++++++++++-----
>> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 8 +++++
>> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h | 13 +++++++-
>> drivers/gpu/drm/amd/amdgpu/amdgpu_job.c | 6 ----
>> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 +
>> drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 19 +++++++++++
>> drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c | 23 ++++++++++++--
>> drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c | 20 ++++++++++++
>> 9 files changed, 113 insertions(+), 21 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> index f397ff97b4e4..8304d0c87899 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
>> @@ -1205,7 +1205,6 @@ static int amdgpu_cs_submit(struct
>> amdgpu_cs_parser *p,
>> struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
>> struct drm_sched_entity *entity = p->entity;
>> enum drm_sched_priority priority;
>> - struct amdgpu_ring *ring;
>> struct amdgpu_bo_list_entry *e;
>> struct amdgpu_job *job;
>> uint64_t seq;
>> @@ -1258,9 +1257,6 @@ static int amdgpu_cs_submit(struct
>> amdgpu_cs_parser *p,
>> priority = job->base.s_priority;
>> drm_sched_entity_push_job(&job->base, entity);
>>
>> - ring = to_amdgpu_ring(entity->rq->sched);
>> - amdgpu_ring_priority_get(ring, priority);
>> -
>> amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm);
>>
>> ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> index 94a6c42f29ea..b21771b37300 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
>> @@ -85,8 +85,8 @@ static int amdgpu_ctx_init_entity(struct amdgpu_ctx
>> *ctx, const u32 hw_ip, const
>> num_scheds = 1;
>> break;
>> case AMDGPU_HW_IP_COMPUTE:
>> - scheds = adev->gfx.compute_sched;
>> - num_scheds = adev->gfx.num_compute_sched;
>> + scheds = adev->gfx.compute_prio_sched[priority];
>> + num_scheds = adev->gfx.num_compute_sched[priority];
>> break;
>> case AMDGPU_HW_IP_DMA:
>> scheds = adev->sdma.sdma_sched;
>> @@ -628,20 +628,46 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr
>> *mgr)
>> mutex_destroy(&mgr->lock);
>> }
>>
>> +
>> +static void amdgpu_ctx_init_compute_sched(struct amdgpu_device *adev)
>> +{
>> + int num_compute_sched_normal = 0;
>> + int num_compute_sched_high = AMDGPU_MAX_COMPUTE_RINGS - 1;
>> + int i;
>> +
>> +
>> + for (i = 0; i < adev->gfx.num_compute_rings; i++) {
>> + if (adev->gfx.compute_ring[i].high_priority)
>> + adev->gfx.compute_sched[num_compute_sched_normal++] =
>> + &adev->gfx.compute_ring[i].sched;
>> + else
>> + adev->gfx.compute_sched[num_compute_sched_high--] =
>> + &adev->gfx.compute_ring[i].sched;
>> + }
>> +
>> + for (i = DRM_SCHED_PRIORITY_MIN; i <= DRM_SCHED_PRIORITY_NORMAL;
>> i++) {
>> + adev->gfx.compute_prio_sched[i] = &adev->gfx.compute_sched[0];
>> + adev->gfx.num_compute_sched[i] = num_compute_sched_normal;
>> + }
>> +
>> + for (i = DRM_SCHED_PRIORITY_NORMAL + 1; i <
>> DRM_SCHED_PRIORITY_MAX; i++) {
>> + adev->gfx.compute_prio_sched[i] =
>> + &adev->gfx.compute_sched[num_compute_sched_high - 1];
>> + adev->gfx.num_compute_sched[i] =
>> + adev->gfx.num_compute_rings - num_compute_sched_normal;
>> + }
>> +}
>> +
>> void amdgpu_ctx_init_sched(struct amdgpu_device *adev)
>> {
>> int i, j;
>>
>> + amdgpu_ctx_init_compute_sched(adev);
>> for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
>> adev->gfx.gfx_sched[i] = &adev->gfx.gfx_ring[i].sched;
>> adev->gfx.num_gfx_sched++;
>> }
>>
>> - for (i = 0; i < adev->gfx.num_compute_rings; i++) {
>> - adev->gfx.compute_sched[i] = &adev->gfx.compute_ring[i].sched;
>> - adev->gfx.num_compute_sched++;
>> - }
>> -
>> for (i = 0; i < adev->sdma.num_instances; i++) {
>> adev->sdma.sdma_sched[i] = &adev->sdma.instance[i].ring.sched;
>> adev->sdma.num_sdma_sched++;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> index 7403588684b3..952725e7243c 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> @@ -192,6 +192,14 @@ static bool
>> amdgpu_gfx_is_multipipe_capable(struct amdgpu_device *adev)
>> return adev->gfx.mec.num_mec > 1;
>> }
>>
>> +bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device
>> *adev,
>> + int queue)
>> +{
>> + /* Policy: make queue 0 of each pipe as high priority compute
>> queue */
>> + return (queue == 0);
>> +
>> +}
>> +
>> void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
>> {
>> int i, queue, pipe, mec;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>> index 37ba05b63b2a..f87b6df67694 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
>> @@ -41,6 +41,14 @@
>> #define AMDGPU_MAX_GFX_QUEUES KGD_MAX_QUEUES
>> #define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES
>>
>> +#define AMDGPU_GFX_PIPE_PRIO_LOW 0
>> +#define AMDGPU_GFX_PIPE_PRIO_NORMAL 1
>> +#define AMDGPU_GFX_PIPE_PRIO_HIGH 2
>> +
>> +#define AMDGPU_GFX_QUEUE_PRIORITY_MINIMUM 0
>> +#define AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM 15
>> +
>> +
>> struct amdgpu_mec {
>> struct amdgpu_bo *hpd_eop_obj;
>> u64 hpd_eop_gpu_addr;
>> @@ -280,8 +288,9 @@ struct amdgpu_gfx {
>> uint32_t num_gfx_sched;
>> unsigned num_gfx_rings;
>> struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS];
>> + struct drm_gpu_scheduler
>> **compute_prio_sched[DRM_SCHED_PRIORITY_MAX];
>
> Mhm, that mixes the middle layer implementation with the hardware
> backend.
>
> Can you instead use an enum for AMDGPU_GFX_PIPE_PRIO_* and a switch to
> map the scheduler priority to the hardware priority?
>
> Alternatively using something like compute_lo, compute, compute_high
> should work as well.
>
> Regards,
> Christian.
>
>> struct drm_gpu_scheduler *compute_sched[AMDGPU_MAX_COMPUTE_RINGS];
>> - uint32_t num_compute_sched;
>> + uint32_t num_compute_sched[DRM_SCHED_PRIORITY_MAX];
>> unsigned num_compute_rings;
>> struct amdgpu_irq_src eop_irq;
>> struct amdgpu_irq_src priv_reg_irq;
>> @@ -363,6 +372,8 @@ void amdgpu_gfx_bit_to_mec_queue(struct
>> amdgpu_device *adev, int bit,
>> int *mec, int *pipe, int *queue);
>> bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
>> int mec,
>> int pipe, int queue);
>> +bool amdgpu_gfx_is_high_priority_compute_queue(struct amdgpu_device
>> *adev,
>> + int queue);
>> int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, int me,
>> int pipe, int queue);
>> void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit,
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>> index d42be880a236..4981e443a884 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
>> @@ -117,12 +117,10 @@ void amdgpu_job_free_resources(struct
>> amdgpu_job *job)
>>
>> static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
>> {
>> - struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
>> struct amdgpu_job *job = to_amdgpu_job(s_job);
>>
>> drm_sched_job_cleanup(s_job);
>>
>> - amdgpu_ring_priority_put(ring, s_job->s_priority);
>> dma_fence_put(job->fence);
>> amdgpu_sync_free(&job->sync);
>> amdgpu_sync_free(&job->sched_sync);
>> @@ -143,7 +141,6 @@ int amdgpu_job_submit(struct amdgpu_job *job,
>> struct drm_sched_entity *entity,
>> void *owner, struct dma_fence **f)
>> {
>> enum drm_sched_priority priority;
>> - struct amdgpu_ring *ring;
>> int r;
>>
>> if (!f)
>> @@ -158,9 +155,6 @@ int amdgpu_job_submit(struct amdgpu_job *job,
>> struct drm_sched_entity *entity,
>> priority = job->base.s_priority;
>> drm_sched_entity_push_job(&job->base, entity);
>>
>> - ring = to_amdgpu_ring(entity->rq->sched);
>> - amdgpu_ring_priority_get(ring, priority);
>> -
>> return 0;
>> }
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> index 24caff085d00..34fcd467f18d 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> @@ -222,6 +222,7 @@ struct amdgpu_ring {
>> struct mutex priority_mutex;
>> /* protected by priority_mutex */
>> int priority;
>> + bool high_priority;
>>
>> #if defined(CONFIG_DEBUG_FS)
>> struct dentry *ent;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> index 94ca9ffa0ccb..0e6ce65e1e54 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
>> @@ -3211,6 +3211,22 @@ static int
>> gfx_v10_0_cp_async_gfx_ring_resume(struct amdgpu_device *adev)
>> return r;
>> }
>>
>> +static void gfx_v10_0_compute_mqd_set_priority(struct amdgpu_ring
>> *ring, struct v10_compute_mqd *mqd)
>> +{
>> + struct amdgpu_device *adev = ring->adev;
>> +
>> + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
>> + if (amdgpu_gfx_is_high_priority_compute_queue(adev,
>> ring->queue)) {
>> + mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
>> + ring->high_priority = true;
>> + mqd->cp_hqd_queue_priority =
>> + AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
>> + }
>> + else
>> + ring->high_priority = false;
>> + }
>> +}
>> +
>> static int gfx_v10_0_compute_mqd_init(struct amdgpu_ring *ring)
>> {
>> struct amdgpu_device *adev = ring->adev;
>> @@ -3336,6 +3352,9 @@ static int gfx_v10_0_compute_mqd_init(struct
>> amdgpu_ring *ring)
>> tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
>> mqd->cp_hqd_ib_control = tmp;
>>
>> + /* set static priority for a queue/ring */
>> + gfx_v10_0_compute_mqd_set_priority(ring, mqd);
>> +
>> /* map_queues packet doesn't need activate the queue,
>> * so only kiq need set this field.
>> */
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> index 393a1324daa9..6c4b7e49f97f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
>> @@ -4430,6 +4430,22 @@ static int gfx_v8_0_deactivate_hqd(struct
>> amdgpu_device *adev, u32 req)
>> return r;
>> }
>>
>> +static void gfx_v8_0_mqd_set_priority(struct amdgpu_ring *ring,
>> struct vi_mqd *mqd)
>> +{
>> + struct amdgpu_device *adev = ring->adev;
>> +
>> + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
>> + if (amdgpu_gfx_is_high_priority_compute_queue(adev,
>> ring->queue)) {
>> + mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
>> + ring->high_priority = true;
>> + mqd->cp_hqd_queue_priority =
>> + AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
>> + }
>> + else
>> + ring->high_priority = false;
>> + }
>> +}
>> +
>> static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
>> {
>> struct amdgpu_device *adev = ring->adev;
>> @@ -4553,9 +4569,6 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring
>> *ring)
>> /* defaults */
>> mqd->cp_hqd_eop_rptr = RREG32(mmCP_HQD_EOP_RPTR);
>> mqd->cp_hqd_eop_wptr = RREG32(mmCP_HQD_EOP_WPTR);
>> - mqd->cp_hqd_pipe_priority = RREG32(mmCP_HQD_PIPE_PRIORITY);
>> - mqd->cp_hqd_queue_priority = RREG32(mmCP_HQD_QUEUE_PRIORITY);
>> - mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
>> mqd->cp_hqd_ctx_save_base_addr_lo =
>> RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_LO);
>> mqd->cp_hqd_ctx_save_base_addr_hi =
>> RREG32(mmCP_HQD_CTX_SAVE_BASE_ADDR_HI);
>> mqd->cp_hqd_cntl_stack_offset =
>> RREG32(mmCP_HQD_CNTL_STACK_OFFSET);
>> @@ -4567,6 +4580,10 @@ static int gfx_v8_0_mqd_init(struct
>> amdgpu_ring *ring)
>> mqd->cp_hqd_eop_wptr_mem = RREG32(mmCP_HQD_EOP_WPTR_MEM);
>> mqd->cp_hqd_eop_dones = RREG32(mmCP_HQD_EOP_DONES);
>>
>> + /* set static priority for a queue/ring */
>> + gfx_v8_0_mqd_set_priority(ring, mqd);
>> + mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
>> +
>> /* map_queues packet doesn't need activate the queue,
>> * so only kiq need set this field.
>> */
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> index 015647959d69..ff5e913f244d 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
>> @@ -3314,6 +3314,22 @@ static void gfx_v9_0_kiq_setting(struct
>> amdgpu_ring *ring)
>> WREG32_SOC15_RLC(GC, 0, mmRLC_CP_SCHEDULERS, tmp);
>> }
>>
>> +static void gfx_v9_0_mqd_set_priority(struct amdgpu_ring *ring,
>> struct v9_mqd *mqd)
>> +{
>> + struct amdgpu_device *adev = ring->adev;
>> +
>> + if (ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE) {
>> + if (amdgpu_gfx_is_high_priority_compute_queue(adev,
>> ring->queue)) {
>> + mqd->cp_hqd_pipe_priority = AMDGPU_GFX_PIPE_PRIO_HIGH;
>> + ring->high_priority = true;
>> + mqd->cp_hqd_queue_priority =
>> + AMDGPU_GFX_QUEUE_PRIORITY_MAXIMUM;
>> + }
>> + else
>> + ring->high_priority = false;
>> + }
>> +}
>> +
>> static int gfx_v9_0_mqd_init(struct amdgpu_ring *ring)
>> {
>> struct amdgpu_device *adev = ring->adev;
>> @@ -3450,6 +3466,10 @@ static int gfx_v9_0_mqd_init(struct
>> amdgpu_ring *ring)
>> tmp = REG_SET_FIELD(tmp, CP_HQD_IB_CONTROL, MIN_IB_AVAIL_SIZE, 3);
>> mqd->cp_hqd_ib_control = tmp;
>>
>> + /* set static priority for a queue/ring */
>> + gfx_v9_0_mqd_set_priority(ring, mqd);
>> + mqd->cp_hqd_quantum = RREG32(mmCP_HQD_QUANTUM);
>> +
>> /* map_queues packet doesn't need activate the queue,
>> * so only kiq need set this field.
>> */
>> --
>> 2.25.0
>>
>
More information about the amd-gfx
mailing list