[PATCH 5/5] drm/amdkfd: Optimize out sdma doorbell array in kgd2kfd_shared_resources

Kuehling, Felix Felix.Kuehling at amd.com
Fri Feb 8 22:06:01 UTC 2019


Some nit-picks inline. Looks good otherwise.

On 2019-02-05 3:31 p.m., Zhao, Yong wrote:
> We can directly calculate the sdma doorbell index in the process doorbell
> pages through the doorbell_index structure in amdgpu_device, so no need
> to cache them in kgd2kfd_shared_resources any more, resulting in more
> portable code.

What do you mean by "portable" here? Portable to what? Other 
architectures? Kernels? GPUs?


>
> Change-Id: Ic657799856ed0256f36b01e502ef0cab263b1f49
> Signed-off-by: Yong Zhao <Yong.Zhao at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c    | 55 ++++++-------------
>   .../drm/amd/amdkfd/kfd_device_queue_manager.c | 18 ++++--
>   .../gpu/drm/amd/include/kgd_kfd_interface.h   |  2 +-
>   3 files changed, 31 insertions(+), 44 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index ee8527701731..e62c3703169a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -131,7 +131,7 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,
>   
>   void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
>   {
> -	int i, n;
> +	int i;
>   	int last_valid_bit;
>   
>   	if (adev->kfd.dev) {
> @@ -142,7 +142,9 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
>   			.gpuvm_size = min(adev->vm_manager.max_pfn
>   					  << AMDGPU_GPU_PAGE_SHIFT,
>   					  AMDGPU_GMC_HOLE_START),
> -			.drm_render_minor = adev->ddev->render->index
> +			.drm_render_minor = adev->ddev->render->index,
> +			.sdma_doorbell_idx = adev->doorbell_index.sdma_engine,
> +
>   		};
>   
>   		/* this is going to have a few of the MSBs set that we need to
> @@ -172,45 +174,22 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
>   				&gpu_resources.doorbell_aperture_size,
>   				&gpu_resources.doorbell_start_offset);
>   
> -		if (adev->asic_type < CHIP_VEGA10) {
> -			kgd2kfd_device_init(adev->kfd.dev, &gpu_resources);
> -			return;
> -		}
> -
> -		n = (adev->asic_type < CHIP_VEGA20) ? 2 : 8;
> -
> -		for (i = 0; i < n; i += 2) {
> -			/* On SOC15 the BIF is involved in routing
> -			 * doorbells using the low 12 bits of the
> -			 * address. Communicate the assignments to
> -			 * KFD. KFD uses two doorbell pages per
> -			 * process in case of 64-bit doorbells so we
> -			 * can use each doorbell assignment twice.
> +		if (adev->asic_type >= CHIP_VEGA10) {
> +			/* Because of the setting in registers like
> +			 * SDMA0_DOORBELL_RANGE etc., BIF statically uses the
> +			 * lower 12 bits of doorbell address for routing. In
> +			 * order to route the CP queue doorbells to CP engine,
> +			 * the doorbells allocated to CP queues have to be
> +			 * outside the range set for SDMA, VCN, and IH blocks
> +			 * Prior to SOC15, all queues use queue ID to
> +			 * determine doorbells.
>   			 */
> -			gpu_resources.sdma_doorbell[0][i] =
> -				adev->doorbell_index.sdma_engine[0] + (i >> 1);
> -			gpu_resources.sdma_doorbell[0][i+1] =
> -				adev->doorbell_index.sdma_engine[0] + 0x200 + (i >> 1);
> -			gpu_resources.sdma_doorbell[1][i] =
> -				adev->doorbell_index.sdma_engine[1] + (i >> 1);
> -			gpu_resources.sdma_doorbell[1][i+1] =
> -				adev->doorbell_index.sdma_engine[1] + 0x200 + (i >> 1);
> +			gpu_resources.reserved_doorbells_start =
> +					adev->doorbell_index.sdma_engine[0];
> +			gpu_resources.reserved_doorbells_end =
> +					adev->doorbell_index.last_non_cp;
>   		}
>   
> -		/* Because of the setting in registers like
> -		 * SDMA0_DOORBELL_RANGE etc., BIF statically uses the
> -		 * lower 12 bits of doorbell address for routing. In
> -		 * order to route the CP queue doorbells to CP engine,
> -		 * the doorbells allocated to CP queues have to be
> -		 * outside the range set for SDMA, VCN, and IH blocks
> -		 * Prior to SOC15, all queues use queue ID to
> -		 * determine doorbells.
> -		 */
> -		gpu_resources.reserved_doorbells_start =
> -				adev->doorbell_index.sdma_engine[0];
> -		gpu_resources.reserved_doorbells_end =
> -				adev->doorbell_index.last_non_cp;
> -
>   		kgd2kfd_device_init(adev->kfd.dev, &gpu_resources);
>   	}
>   }
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index 8372556b52eb..81280ce5aa27 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -134,12 +134,20 @@ static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
>   		 */
>   		q->doorbell_id = q->properties.queue_id;
>   	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
> -		/* For SDMA queues on SOC15, use static doorbell
> -		 * assignments based on the engine and queue.
> +		/* For SDMA queues on SOC15 with 8-byte doorbell, use static
> +		 * doorbell assignments based on the engine and queue id.
> +		 * The doobell index distance between RLC (2*i) and (2*i+1)
> +		 * for a SDMA engine is 512.
> +		 * 512 8-byte doorbell distance (i.e. one page away) ensures
> +		 * that SDMA RLC (2*i+1) doorbell lies exactly in the doorbell
> +		 * OFFSET and SIZE set in register BIF_SDMA0_DOORBELL_RANGE.
>   		 */
> -		q->doorbell_id = dev->shared_resources.sdma_doorbell
> -			[q->properties.sdma_engine_id]
> -			[q->properties.sdma_queue_id];
> +		unsigned int *idx_offset =
> +				dev->shared_resources.sdma_doorbell_idx;
The type in struct amdgpu_doorbell_index is uint32_t. I'd prefer 
matching the type of the pointer here.


> +		q->doorbell_id = idx_offset[q->properties.sdma_engine_id]
> +			+ (q->properties.sdma_queue_id >> 1)
> +			+ (q->properties.sdma_queue_id % 2)

Be consistent with the use of bit-wise operations (x >> 1 and x & 1) or 
division (x / 2 and x %2). Here you're mixing them. I prefer the 
bit-wise operations for division and modulo with powers of two.


> +			* KFD_QUEUE_DOORBELL_MIRROR_OFFSET;
>   	} else {
>   		/* For CP queues on SOC15 reserve a free doorbell ID */
>   		unsigned int found;
> diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> index b1bf45419d93..3559170f6fb3 100644
> --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> @@ -137,7 +137,7 @@ struct kgd2kfd_shared_resources {
>   	/* Bit n == 1 means Queue n is available for KFD */
>   	DECLARE_BITMAP(queue_bitmap, KGD_MAX_QUEUES);
>   
> -	unsigned int sdma_doorbell[2][8];
> +	unsigned int *sdma_doorbell_idx;

The type in struct amdgpu_doorbell_index is uint32_t. I'd prefer 
matching the type of the pointer here.

Regards,
   Felix


>   
>   	/* From SOC15 onwards, the doorbell indexes reserved for SDMA, IH,
>   	 * and VCN


More information about the amd-gfx mailing list