[PATCH 2/3] drm/amdkfd: Fix bugs regarding CP user queue doorbells mask on SOC15

Kuehling, Felix Felix.Kuehling at amd.com
Fri Feb 1 20:03:35 UTC 2019


On 2019-01-31 5:27 p.m., Zhao, Yong wrote:
> Reserved doorbells for SDMA IH and VCN were not properly masked out
> when allocating doorbells for CP user queues. This patch fixed that.
>
> Change-Id: I670adfc3fd7725d2ed0bd9665cb7f69f8b9023c2
> Signed-off-by: Yong Zhao <Yong.Zhao at amd.com>

One mostly cosmetic comment inline. With that fixed, the series is 
Reviewed-by: Felix Kuehling <Felix.Kuehling at amd.com>


> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c    | 17 ++++++++++---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h  |  4 +++
>   drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c  |  3 +++
>   drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c  |  3 +++
>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |  9 +++++++
>   drivers/gpu/drm/amd/amdkfd/kfd_process.c      | 25 +++++++++++++------
>   .../gpu/drm/amd/include/kgd_kfd_interface.h   | 19 +++++---------
>   7 files changed, 56 insertions(+), 24 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index e957e42c539a..13710f34191a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -196,11 +196,20 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
>   			gpu_resources.sdma_doorbell[1][i+1] =
>   				adev->doorbell_index.sdma_engine[1] + 0x200 + (i >> 1);
>   		}
> -		/* Doorbells 0x0e0-0ff and 0x2e0-2ff are reserved for
> -		 * SDMA, IH and VCN. So don't use them for the CP.
> +
> +		/* Because of the setting in registers like
> +		 * SDMA0_DOORBELL_RANGE etc., BIF statically uses the
> +		 * lower 12 bits of doorbell address for routing, in
> +		 * order to route the CP queue doorbells to CP engine,
> +		 * the doorbells allocated to CP queues have to be
> +		 * outside the range set for SDMA, VCN, and IH blocks
> +		 * Prior to SOC15, all queues use queue ID to
> +		 * determine doorbells.
>   		 */
> -		gpu_resources.reserved_doorbell_mask = 0x1e0;
> -		gpu_resources.reserved_doorbell_val  = 0x0e0;
> +		gpu_resources.reserved_doorbells_start =
> +				adev->doorbell_index.sdma_engine[0];
> +		gpu_resources.reserved_doorbells_end =
> +				adev->doorbell_index.last_non_cp;
>   
>   		kgd2kfd_device_init(adev->kfd.dev, &gpu_resources);
>   	}
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
> index 1cfec06f81d4..4de431f7f380 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
> @@ -71,6 +71,7 @@ struct amdgpu_doorbell_index {
>   			uint32_t vce_ring6_7;
>   		} uvd_vce;
>   	};
> +	uint32_t last_non_cp;
>   	uint32_t max_assignment;
>   	/* Per engine SDMA doorbell size in dword */
>   	uint32_t sdma_doorbell_range;
> @@ -143,6 +144,7 @@ typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT
>   	AMDGPU_VEGA20_DOORBELL64_VCE_RING2_3             = 0x18D,
>   	AMDGPU_VEGA20_DOORBELL64_VCE_RING4_5             = 0x18E,
>   	AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7             = 0x18F,
> +	AMDGPU_VEGA20_DOORBELL64_LAST_NON_CP             = AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7,
>   	AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT            = 0x18F,
>   	AMDGPU_VEGA20_DOORBELL_INVALID                   = 0xFFFF
>   } AMDGPU_VEGA20_DOORBELL_ASSIGNMENT;
> @@ -222,6 +224,8 @@ typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT
>   	AMDGPU_DOORBELL64_VCE_RING4_5             = 0xFE,
>   	AMDGPU_DOORBELL64_VCE_RING6_7             = 0xFF,
>   
> +	AMDGPU_DOORBELL64_LAST_NON_CP             = AMDGPU_DOORBELL64_VCE_RING6_7,
> +
>   	AMDGPU_DOORBELL64_MAX_ASSIGNMENT          = 0xFF,
>   	AMDGPU_DOORBELL64_INVALID                 = 0xFFFF
>   } AMDGPU_DOORBELL64_ASSIGNMENT;
> diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
> index 4b5d60ea3e78..fa0433199215 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c
> @@ -81,6 +81,9 @@ void vega10_doorbell_index_init(struct amdgpu_device *adev)
>   	adev->doorbell_index.uvd_vce.vce_ring2_3 = AMDGPU_DOORBELL64_VCE_RING2_3;
>   	adev->doorbell_index.uvd_vce.vce_ring4_5 = AMDGPU_DOORBELL64_VCE_RING4_5;
>   	adev->doorbell_index.uvd_vce.vce_ring6_7 = AMDGPU_DOORBELL64_VCE_RING6_7;
> +
> +	adev->doorbell_index.last_non_cp = AMDGPU_DOORBELL64_LAST_NON_CP;
> +
>   	/* In unit of dword doorbell */
>   	adev->doorbell_index.max_assignment = AMDGPU_DOORBELL64_MAX_ASSIGNMENT << 1;
>   	adev->doorbell_index.sdma_doorbell_range = 4;
> diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
> index 53716c593b2b..b1052caaff5e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
> @@ -85,6 +85,9 @@ void vega20_doorbell_index_init(struct amdgpu_device *adev)
>   	adev->doorbell_index.uvd_vce.vce_ring2_3 = AMDGPU_VEGA20_DOORBELL64_VCE_RING2_3;
>   	adev->doorbell_index.uvd_vce.vce_ring4_5 = AMDGPU_VEGA20_DOORBELL64_VCE_RING4_5;
>   	adev->doorbell_index.uvd_vce.vce_ring6_7 = AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7;
> +
> +	adev->doorbell_index.last_non_cp = AMDGPU_VEGA20_DOORBELL64_LAST_NON_CP;
> +
>   	adev->doorbell_index.max_assignment = AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT << 1;
>   	adev->doorbell_index.sdma_doorbell_range = 20;
>   }
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index e5ebcca7f031..6b8459f852cc 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -103,6 +103,15 @@
>   
>   #define KFD_KERNEL_QUEUE_SIZE 2048
>   
> +/* 512 = 0x200
> + * On SOC15, the doorbell index distance for SDMA RLC i and (i + 1) in the
> + * same SDMA engine, where i is a even number.
> + * For 8-bytes doorbells, it ensures that the mirror doorbell range (in terms
> + * of low 12 bit address for each HW engine) on the second doorbell page is
> + * the same as the range of the first doorbell page.*/
> +#define KFD_QUEUE_DOORBELL_MIRROR_OFFSET 512
> +
> +
>   /*
>    * Kernel module parameter to specify maximum number of supported queues per
>    * device
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index 80b36e860a0a..c7b67c65d62d 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -607,13 +607,24 @@ static int init_doorbell_bitmap(struct qcm_process_device *qpd,
>   	if (!qpd->doorbell_bitmap)
>   		return -ENOMEM;
>   
> -	/* Mask out any reserved doorbells */
> -	for (i = 0; i < KFD_MAX_NUM_OF_QUEUES_PER_PROCESS; i++)
> -		if ((dev->shared_resources.reserved_doorbell_mask & i) ==
> -		    dev->shared_resources.reserved_doorbell_val) {
> -			set_bit(i, qpd->doorbell_bitmap);
> -			pr_debug("reserved doorbell 0x%03x\n", i);
> -		}
> +	/* Mask out all reserved doorbells for SDMA, IH, and VCN on SOC15.
> +	 * Because of the setting in registers like SDMA0_DOORBELL_RANGE etc.,
> +	 * BIF statically uses the lower 12 bits of doorbell address for
> +	 * routing. In order to route the CP queue doorbells to CP engine,
> +	 * the doorbells allocated to CP queues have to be outside the range
> +	 * set for SDMA, VCN, and IH blocks.
> +	 * Prior to SOC15, all queues use queue ID to
> +	 * determine doorbells. */
> +	i = dev->shared_resources.reserved_doorbells_start;
> +	while (i <= dev->shared_resources.reserved_doorbells_end) {

This should be a for-loop to be more obvious:

         for (i = dev->shared_resources.reserved_doorbells_start;
              i <= dev->shared_resources.reserved_doorbells_end;
              i++) {
                 ...
         }


> +		set_bit(i, qpd->doorbell_bitmap);
> +		set_bit(i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET,
> +				qpd->doorbell_bitmap);
> +		pr_debug("reserved doorbell 0x%03x and 0x%03x\n", i,
> +			i + KFD_QUEUE_DOORBELL_MIRROR_OFFSET);
> +
> +		i++;
> +	}
>   
>   	return 0;
>   }
> diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> index 83d960110d23..b1bf45419d93 100644
> --- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> +++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
> @@ -137,20 +137,13 @@ struct kgd2kfd_shared_resources {
>   	/* Bit n == 1 means Queue n is available for KFD */
>   	DECLARE_BITMAP(queue_bitmap, KGD_MAX_QUEUES);
>   
> -	/* Doorbell assignments (SOC15 and later chips only). Only
> -	 * specific doorbells are routed to each SDMA engine. Others
> -	 * are routed to IH and VCN. They are not usable by the CP.
> -	 *
> -	 * Any doorbell number D that satisfies the following condition
> -	 * is reserved: (D & reserved_doorbell_mask) == reserved_doorbell_val
> -	 *
> -	 * KFD currently uses 1024 (= 0x3ff) doorbells per process. If
> -	 * doorbells 0x0e0-0x0ff and 0x2e0-0x2ff are reserved, that means
> -	 * mask would be set to 0x1e0 and val set to 0x0e0.
> -	 */
>   	unsigned int sdma_doorbell[2][8];
> -	unsigned int reserved_doorbell_mask;
> -	unsigned int reserved_doorbell_val;
> +
> +	/* From SOC15 onwards, the doorbell indexes reserved for SDMA, IH,
> +	 * and VCN
> +	 */
> +	unsigned int reserved_doorbells_start;
> +	unsigned int reserved_doorbells_end;
>   
>   	/* Base address of doorbell aperture. */
>   	phys_addr_t doorbell_physical_address;


More information about the amd-gfx mailing list