[PATCH] drm/amdgpu: Disable cwsr for vega10 and Sienna_Cichlid in sriov

Felix Kuehling felix.kuehling at amd.com
Wed May 19 15:54:59 UTC 2021


Am 2021-05-19 um 5:02 a.m. schrieb Chengzhe Liu:
> In sriov, cwsr is not stable
NAK. Without CWSR, ROCm is not stable. Any compute application with long
running waves can cause a hang.

Regards,
  Felix

>
> Signed-off-by: Chengzhe Liu <ChengZhe.Liu at amd.com>
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_device.c | 24 +++++++++++++++++++++---
>  1 file changed, 21 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index 80015e866498..89bd0059329b 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -335,7 +335,7 @@ static const struct kfd_device_info vega10_vf_device_info = {
>  	.event_interrupt_class = &event_interrupt_class_v9,
>  	.num_of_watch_points = 4,
>  	.mqd_size_aligned = MQD_SIZE_ALIGNED,
> -	.supports_cwsr = true,
> +	.supports_cwsr = false,
>  	.needs_iommu_device = false,
>  	.needs_pci_atomics = false,
>  	.num_sdma_engines = 2,
> @@ -505,6 +505,24 @@ static const struct kfd_device_info sienna_cichlid_device_info = {
>  	.num_sdma_queues_per_engine = 8,
>  };
>  
> +static const struct kfd_device_info sienna_cichlid_vf_device_info = {
> +	.asic_family = CHIP_SIENNA_CICHLID,
> +	.asic_name = "sienna_cichlid",
> +	.max_pasid_bits = 16,
> +	.max_no_of_hqd  = 24,
> +	.doorbell_size  = 8,
> +	.ih_ring_entry_size = 8 * sizeof(uint32_t),
> +	.event_interrupt_class = &event_interrupt_class_v10,
> +	.num_of_watch_points = 4,
> +	.mqd_size_aligned = MQD_SIZE_ALIGNED,
> +	.needs_iommu_device = false,
> +	.supports_cwsr = false,
> +	.needs_pci_atomics = true,
> +	.num_sdma_engines = 4,
> +	.num_xgmi_sdma_engines = 0,
> +	.num_sdma_queues_per_engine = 8,
> +};
> +
>  static const struct kfd_device_info navy_flounder_device_info = {
>  	.asic_family = CHIP_NAVY_FLOUNDER,
>  	.asic_name = "navy_flounder",
> @@ -601,7 +619,7 @@ static const struct kfd_device_info *kfd_supported_devices[][2] = {
>  	[CHIP_NAVI10] = {&navi10_device_info, NULL},
>  	[CHIP_NAVI12] = {&navi12_device_info, &navi12_device_info},
>  	[CHIP_NAVI14] = {&navi14_device_info, NULL},
> -	[CHIP_SIENNA_CICHLID] = {&sienna_cichlid_device_info, &sienna_cichlid_device_info},
> +	[CHIP_SIENNA_CICHLID] = {&sienna_cichlid_device_info, &sienna_cichlid_vf_device_info},
>  	[CHIP_NAVY_FLOUNDER] = {&navy_flounder_device_info, &navy_flounder_device_info},
>  	[CHIP_VANGOGH] = {&vangogh_device_info, NULL},
>  	[CHIP_DIMGREY_CAVEFISH] = {&dimgrey_cavefish_device_info, &dimgrey_cavefish_device_info},
> @@ -674,7 +692,7 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd,
>  
>  static void kfd_cwsr_init(struct kfd_dev *kfd)
>  {
> -	if (cwsr_enable && kfd->device_info->supports_cwsr) {
> +	if ((cwsr_enable && kfd->device_info->supports_cwsr) || cwsr_enable == 2) {
>  		if (kfd->device_info->asic_family < CHIP_VEGA10) {
>  			BUILD_BUG_ON(sizeof(cwsr_trap_gfx8_hex) > PAGE_SIZE);
>  			kfd->cwsr_isa = cwsr_trap_gfx8_hex;


More information about the amd-gfx mailing list