[PATCH v2 1/2] drm/amdgpu: Merge debug module parameters

Christian König christian.koenig at amd.com
Thu Aug 31 06:29:30 UTC 2023



Am 31.08.23 um 00:08 schrieb André Almeida:
> Merge all developer debug options available as separated module
> parameters in one, making it obvious that are for developers.
>
> Drop the obsolete module options in favor of the new ones.
>
> Signed-off-by: André Almeida <andrealmeid at igalia.com>
> ---
> v2:
> - drop old module params
> - use BIT() macros
> - replace global var with adev-> vars
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h      |  4 ++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c   |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c  | 48 ++++++++++++++----------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c  |  2 +-
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   |  2 +-
>   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c |  2 +-
>   drivers/gpu/drm/amd/amdkfd/kfd_crat.c    |  2 +-
>   drivers/gpu/drm/amd/include/amd_shared.h |  8 ++++
>   8 files changed, 45 insertions(+), 25 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 4de074243c4d..82eaccfce347 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1101,6 +1101,10 @@ struct amdgpu_device {
>   	bool                            dc_enabled;
>   	/* Mask of active clusters */
>   	uint32_t			aid_mask;
> +
> +	/* Debug */
> +	bool                            debug_vm;
> +	bool                            debug_largebar;
>   };
>   
>   static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index fb78a8f47587..8a26bed76505 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -1191,7 +1191,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
>   		job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.bo);
>   	}
>   
> -	if (amdgpu_vm_debug) {
> +	if (adev->debug_vm) {
>   		/* Invalidate all BOs to test for userspace bugs */
>   		amdgpu_bo_list_for_each_entry(e, p->bo_list) {
>   			struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index f5856b82605e..0cd48c025433 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -140,7 +140,6 @@ int amdgpu_vm_size = -1;
>   int amdgpu_vm_fragment_size = -1;
>   int amdgpu_vm_block_size = -1;
>   int amdgpu_vm_fault_stop;
> -int amdgpu_vm_debug;
>   int amdgpu_vm_update_mode = -1;
>   int amdgpu_exp_hw_support;
>   int amdgpu_dc = -1;
> @@ -194,6 +193,7 @@ int amdgpu_use_xgmi_p2p = 1;
>   int amdgpu_vcnfw_log;
>   int amdgpu_sg_display = -1; /* auto */
>   int amdgpu_user_partt_mode = AMDGPU_AUTO_COMPUTE_PARTITION_MODE;
> +uint amdgpu_debug_mask;
>   
>   static void amdgpu_drv_delayed_reset_work_handler(struct work_struct *work);
>   
> @@ -405,13 +405,6 @@ module_param_named(vm_block_size, amdgpu_vm_block_size, int, 0444);
>   MODULE_PARM_DESC(vm_fault_stop, "Stop on VM fault (0 = never (default), 1 = print first, 2 = always)");
>   module_param_named(vm_fault_stop, amdgpu_vm_fault_stop, int, 0444);
>   
> -/**
> - * DOC: vm_debug (int)
> - * Debug VM handling (0 = disabled, 1 = enabled). The default is 0 (Disabled).
> - */
> -MODULE_PARM_DESC(vm_debug, "Debug VM handling (0 = disabled (default), 1 = enabled)");
> -module_param_named(vm_debug, amdgpu_vm_debug, int, 0644);
> -
>   /**
>    * DOC: vm_update_mode (int)
>    * Override VM update mode. VM updated by using CPU (0 = never, 1 = Graphics only, 2 = Compute only, 3 = Both). The default
> @@ -743,18 +736,6 @@ module_param(send_sigterm, int, 0444);
>   MODULE_PARM_DESC(send_sigterm,
>   	"Send sigterm to HSA process on unhandled exception (0 = disable, 1 = enable)");
>   
> -/**
> - * DOC: debug_largebar (int)
> - * Set debug_largebar as 1 to enable simulating large-bar capability on non-large bar
> - * system. This limits the VRAM size reported to ROCm applications to the visible
> - * size, usually 256MB.
> - * Default value is 0, diabled.
> - */
> -int debug_largebar;
> -module_param(debug_largebar, int, 0444);
> -MODULE_PARM_DESC(debug_largebar,
> -	"Debug large-bar flag used to simulate large-bar capability on non-large bar machine (0 = disable, 1 = enable)");
> -
>   /**
>    * DOC: halt_if_hws_hang (int)
>    * Halt if HWS hang is detected. Default value, 0, disables the halt on hang.
> @@ -938,6 +919,18 @@ module_param_named(user_partt_mode, amdgpu_user_partt_mode, uint, 0444);
>   module_param(enforce_isolation, bool, 0444);
>   MODULE_PARM_DESC(enforce_isolation, "enforce process isolation between graphics and compute . enforce_isolation = on");
>   
> +/**
> + * DOC: debug_mask (uint)
> + * Debug options for amdgpu, work as a binary mask with the following options:
> + *
> + * - 0x1: Debug VM handling
> + * - 0x2: Enable simulating large-bar capability on non-large bar system. This
> + *   limits the VRAM size reported to ROCm applications to the visible
> + *   size, usually 256MB.
> + */
> +MODULE_PARM_DESC(debug_mask, "debug options for amdgpu, disabled by default");
> +module_param_named(debug_mask, amdgpu_debug_mask, uint, 0444);
> +
>   /* These devices are not supported by amdgpu.
>    * They are supported by the mach64, r128, radeon drivers
>    */
> @@ -2042,6 +2035,19 @@ static void amdgpu_get_secondary_funcs(struct amdgpu_device *adev)
>   	}
>   }
>   
> +static void amdgpu_init_debug_options(struct amdgpu_device *adev)
> +{
> +	if (amdgpu_debug_mask & AMDGPU_DEBUG_VM) {
> +		pr_info("debug: VM handling debug enabled\n");
> +		adev->debug_vm = true;
> +	}
> +
> +	if (amdgpu_debug_mask & AMDGPU_DEBUG_LARGEBAR) {
> +		pr_info("debug: enabled simulating large-bar capability on non-large bar system\n");
> +		adev->debug_largebar = true;
> +	}
> +}
> +
>   static int amdgpu_pci_probe(struct pci_dev *pdev,
>   			    const struct pci_device_id *ent)
>   {
> @@ -2220,6 +2226,8 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
>   			amdgpu_get_secondary_funcs(adev);
>   	}
>   
> +	amdgpu_init_debug_options(adev);
> +
>   	return 0;
>   
>   err_pci:
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index 09203e22b026..548e65f2db5f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -794,7 +794,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>   	default:
>   		break;
>   	}
> -	if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !amdgpu_vm_debug)
> +	if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !adev->debug_vm)
>   		amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
>   					args->operation);
>   
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 74380b21e7a5..d483cd9c612a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1407,7 +1407,7 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
>   		spin_unlock(&vm->status_lock);
>   
>   		/* Try to reserve the BO to avoid clearing its ptes */
> -		if (!amdgpu_vm_debug && dma_resv_trylock(resv))
> +		if (!adev->debug_vm && dma_resv_trylock(resv))
>   			clear = false;
>   		/* Somebody else is using the BO right now */
>   		else
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> index 3b8f592384fa..41ac2ec936c3 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
> @@ -1021,7 +1021,7 @@ static int kfd_ioctl_acquire_vm(struct file *filep, struct kfd_process *p,
>   
>   bool kfd_dev_is_large_bar(struct kfd_node *dev)
>   {
> -	if (debug_largebar) {
> +	if (dev->kfd->adev->debug_largebar) {
>   		pr_debug("Simulate large-bar allocation on non large-bar machine\n");
>   		return true;
>   	}
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> index 2e9612cf56ae..b05e06f89814 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
> @@ -2115,7 +2115,7 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
>   	sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
>   			sub_type_hdr->length);
>   
> -	if (debug_largebar)
> +	if (kdev->adev->debug_largebar)
>   		local_mem_info.local_mem_size_private = 0;
>   
>   	if (local_mem_info.local_mem_size_private == 0)
> diff --git a/drivers/gpu/drm/amd/include/amd_shared.h b/drivers/gpu/drm/amd/include/amd_shared.h
> index 67d7b7ee8a2a..2fd6af2183cc 100644
> --- a/drivers/gpu/drm/amd/include/amd_shared.h
> +++ b/drivers/gpu/drm/amd/include/amd_shared.h
> @@ -257,6 +257,14 @@ enum DC_DEBUG_MASK {
>   
>   enum amd_dpm_forced_level;
>   
> +/*
> + * amdgpu.debug module options. Are all disabled by default
> + */
> +enum AMDGPU_DEBUG_MASK {
> +	AMDGPU_DEBUG_VM = BIT(0),
> +	AMDGPU_DEBUG_LARGEBAR = BIT(1),
> +};
> +

That is probably not the right place for this.

Better put this into drivers/gpu/drm/amd/amdgpu/amdgpu_drv.h

Apart from that really good work. With the location of the defines fixed 
feel free to add my rb before sending it out the next time.

Regards,
Christian.

>   /**
>    * struct amd_ip_funcs - general hooks for managing amdgpu IP Blocks
>    * @name: Name of IP block



More information about the amd-gfx mailing list