[PATCH v2] drm/amdgpu: Fix KFD oversubscription by tracking queues correctly
Felix Kuehling
felix.kuehling at amd.com
Thu Jul 13 21:16:24 UTC 2017
People don't seem to like cross-component changes if they can be
avoided. I'd prefer separate commits for KFD, radeon, amdgpu, and
finally the kgd2kfd interface.
With that fixed, Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>
On 17-07-13 04:36 PM, Andres Rodriguez wrote:
>
> Reviewed-by: Andres Rodriguez <andresx7 at gmail.com>
>
> On 2017-07-13 04:23 PM, Jay Cornwall wrote:
>> The number of compute queues available to the KFD was erroneously
>> calculated as 64. Only the first MEC can execute compute queues and
>> it has 32 queue slots.
>>
>> This caused the oversubscription limit to be calculated incorrectly,
>> leading to a missing chained runlist command at the end of an
>> oversubscribed runlist.
>>
>> v2: Remove unused num_mec field to avoid duplicate logic
>>
>> Change-Id: Ic4a139c04b8a6d025fbb831a0a67e98728bfe461
>> Signed-off-by: Jay Cornwall <Jay.Cornwall at amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 3 +--
>> drivers/gpu/drm/amd/amdkfd/kfd_device.c | 4 ----
>> drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 7 -------
>> drivers/gpu/drm/amd/include/kgd_kfd_interface.h | 3 ---
>> drivers/gpu/drm/radeon/radeon_kfd.c | 1 -
>> 5 files changed, 1 insertion(+), 17 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>> index 7060daf..8c710f7 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
>> @@ -116,7 +116,6 @@ void amdgpu_amdkfd_device_init(struct
>> amdgpu_device *adev)
>> if (adev->kfd) {
>> struct kgd2kfd_shared_resources gpu_resources = {
>> .compute_vmid_bitmap = global_compute_vmid_bitmap,
>> - .num_mec = adev->gfx.mec.num_mec,
>> .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
>> .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe,
>> .gpuvm_size = (uint64_t)amdgpu_vm_size << 30
>> @@ -140,7 +139,7 @@ void amdgpu_amdkfd_device_init(struct
>> amdgpu_device *adev)
>> /* According to linux/bitmap.h we shouldn't use
>> bitmap_clear if
>> * nbits is not compile time constant
>> */
>> - last_valid_bit = adev->gfx.mec.num_mec
>> + last_valid_bit = 1 /* only first MEC can have compute queues */
>> * adev->gfx.mec.num_pipe_per_mec
>> * adev->gfx.mec.num_queue_per_pipe;
>> for (i = last_valid_bit; i < KGD_MAX_QUEUES; ++i)
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> index 1cf00d4..95f9396 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
>> @@ -494,10 +494,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
>> } else
>> kfd->max_proc_per_quantum = hws_max_conc_proc;
>> - /* We only use the first MEC */
>> - if (kfd->shared_resources.num_mec > 1)
>> - kfd->shared_resources.num_mec = 1;
>> -
>> /* calculate max size of mqds needed for queues */
>> size = max_num_of_queues_per_device *
>> kfd->device_info->mqd_size_aligned;
>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> index 7607989..306144f 100644
>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> @@ -82,13 +82,6 @@ static bool is_pipe_enabled(struct
>> device_queue_manager *dqm, int mec, int pipe)
>> return false;
>> }
>> -unsigned int get_mec_num(struct device_queue_manager *dqm)
>> -{
>> - BUG_ON(!dqm || !dqm->dev);
>> -
>> - return dqm->dev->shared_resources.num_mec;
>> -}
>> -
>> unsigned int get_queues_num(struct device_queue_manager *dqm)
>> {
>> BUG_ON(!dqm || !dqm->dev);
>> diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
>> b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
>> index a4d2fee..10794b3 100644
>> --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
>> +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
>> @@ -107,9 +107,6 @@ struct kgd2kfd_shared_resources {
>> /* Bit n == 1 means VMID n is available for KFD. */
>> unsigned int compute_vmid_bitmap;
>> - /* number of mec available from the hardware */
>> - uint32_t num_mec;
>> -
>> /* number of pipes per mec */
>> uint32_t num_pipe_per_mec;
>> diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c
>> b/drivers/gpu/drm/radeon/radeon_kfd.c
>> index 719ea51..8f8c7c1 100644
>> --- a/drivers/gpu/drm/radeon/radeon_kfd.c
>> +++ b/drivers/gpu/drm/radeon/radeon_kfd.c
>> @@ -251,7 +251,6 @@ void radeon_kfd_device_init(struct radeon_device
>> *rdev)
>> if (rdev->kfd) {
>> struct kgd2kfd_shared_resources gpu_resources = {
>> .compute_vmid_bitmap = 0xFF00,
>> - .num_mec = 1,
>> .num_pipe_per_mec = 4,
>> .num_queue_per_pipe = 8,
>> .gpuvm_size = (uint64_t)radeon_vm_size << 30
>>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
More information about the amd-gfx
mailing list