[PATCH 15/32] drm/amdkfd: prepare trap workaround for gfx11

Thu Mar 23 14:00:30 UTC 2023

Am 2023-03-23 um 09:50 schrieb Kim, Jonathan:
> [Public]
>
>> -----Original Message-----
>> From: Kuehling, Felix <Felix.Kuehling at amd.com>
>> Sent: Monday, March 20, 2023 5:50 PM
>> To: Kim, Jonathan <Jonathan.Kim at amd.com>; amd-
>> gfx at lists.freedesktop.org; dri-devel at lists.freedesktop.org
>> Subject: Re: [PATCH 15/32] drm/amdkfd: prepare trap workaround for gfx11
>>
>>
>> On 2023-01-25 14:53, Jonathan Kim wrote:
>>> Due to a HW bug, waves in only half the shader arrays can enter trap.
>>>
>>> When starting a debug session, relocate all waves to the first shader
>>> array of each shader engine and mask off the 2nd shader array as
>>> unavailable.
>>>
>>> When ending a debug session, re-enable the 2nd shader array per
>>> shader engine.
>>>
>>> User CU masking per queue cannot be guaranteed to remain functional
>>> if requested during debugging (e.g. user cu mask requests only 2nd shader
>>> array as an available resource leading to zero HW resources available)
>>> nor can runtime be alerted of any of these changes during execution.
>>>
>>> Make user CU masking and debugging mutual exclusive with respect to
>>> availability.
>>>
>>> If the debugger tries to attach to a process with a user cu masked
>>> queue, return the runtime status as enabled but busy.
>>>
>>> If the debugger tries to attach and fails to reallocate queue waves to
>>> the first shader array of each shader engine, return the runtime status
>>> as enabled but with an error.
>>>
>>> In addition, like any other mutli-process debug supported devices,
>>> disable trap temporary setup per-process to avoid performance impact
>> from
>>> setup overhead.
>>>
>>> Signed-off-by: Jonathan Kim <jonathan.kim at amd.com>
>>> ---
>>>    drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h       |  2 +
>>>    drivers/gpu/drm/amd/amdgpu/mes_v11_0.c        |  7 +-
>>>    drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      |  2 -
>>>    drivers/gpu/drm/amd/amdkfd/kfd_debug.c        | 64
>> +++++++++++++++++++
>>>    drivers/gpu/drm/amd/amdkfd/kfd_debug.h        |  3 +-
>>>    .../drm/amd/amdkfd/kfd_device_queue_manager.c |  7 ++
>>>    .../gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c  |  3 +-
>>>    .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c  |  3 +-
>>>    .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c  | 42 ++++++++----
>>>    .../gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c   |  3 +-
>>>    .../gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c   |  3 +-
>>>    drivers/gpu/drm/amd/amdkfd/kfd_priv.h         |  5 +-
>>>    .../amd/amdkfd/kfd_process_queue_manager.c    |  9 ++-
>>>    13 files changed, 124 insertions(+), 29 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
>>> index d20df0cf0d88..b5f5eed2b5ef 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
>>> @@ -219,6 +219,8 @@ struct mes_add_queue_input {
>>>      uint32_t        gws_size;
>>>      uint64_t        tba_addr;
>>>      uint64_t        tma_addr;
>>> +   uint32_t        trap_en;
>>> +   uint32_t        skip_process_ctx_clear;
>>>      uint32_t        is_kfd_process;
>>>      uint32_t        is_aql_queue;
>>>      uint32_t        queue_size;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
>> b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
>>> index fbacdc42efac..38c7a0cbf264 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
>>> @@ -197,17 +197,14 @@ static int mes_v11_0_add_hw_queue(struct
>> amdgpu_mes *mes,
>>>      mes_add_queue_pkt.gws_size = input->gws_size;
>>>      mes_add_queue_pkt.trap_handler_addr = input->tba_addr;
>>>      mes_add_queue_pkt.tma_addr = input->tma_addr;
>>> +   mes_add_queue_pkt.trap_en = input->trap_en;
>>> +   mes_add_queue_pkt.skip_process_ctx_clear = input-
>>> skip_process_ctx_clear;
>>>      mes_add_queue_pkt.is_kfd_process = input->is_kfd_process;
>>>
>>>      /* For KFD, gds_size is re-used for queue size (needed in MES for AQL
>> queues) */
>>>      mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
>>>      mes_add_queue_pkt.gds_size = input->queue_size;
>>>
>>> -   if (!(((adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >=
>> 4) &&
>>> -             (adev->ip_versions[GC_HWIP][0] >= IP_VERSION(11, 0, 0))
>> &&
>>> -             (adev->ip_versions[GC_HWIP][0] <= IP_VERSION(11, 0, 3))))
>>> -           mes_add_queue_pkt.trap_en = 1;
>>> -
>>>      /* For KFD, gds_size is re-used for queue size (needed in MES for AQL
>> queues) */
>>>      mes_add_queue_pkt.is_aql_queue = input->is_aql_queue;
>>>      mes_add_queue_pkt.gds_size = input->queue_size;
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>> index ee05c2e54ef6..f5f639de28f0 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>> @@ -530,8 +530,6 @@ static int kfd_ioctl_set_cu_mask(struct file *filp,
>> struct kfd_process *p,
>>>              goto out;
>>>      }
>>>
>>> -   minfo.update_flag = UPDATE_FLAG_CU_MASK;
>>> -
>>>      mutex_lock(&p->mutex);
>>>
>>>      retval = pqm_update_mqd(&p->pqm, args->queue_id, &minfo);
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
>> b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
>>> index f6ea6db266b4..6e99a0160275 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
>>> @@ -37,6 +37,70 @@ void debug_event_write_work_handler(struct
>> work_struct *work)
>>>      kernel_write(process->dbg_ev_file, &write_data, 1, &pos);
>>>    }
>>>
>>> +static int kfd_dbg_set_queue_workaround(struct queue *q, bool enable)
>>> +{
>>> +   struct mqd_update_info minfo = {0};
>>> +   int err;
>>> +
>>> +   if (!q || (!q->properties.is_dbg_wa && !enable))
>> Should this condition be:
>>
>>       if (!q || q->properties.is_dbg_wa != enable)
> The latter part should probably be q->properties.is_dbg_wa == enable.  q->properties.is_dbg_wa != enable would always skip a request to change the queue's current workaround state.

Yeah, I that's what I meant.

> I think we can just drop the latter half of this test condition as a redundant queue workaround update is harmless.
> It's a static call from a process wide call and the process wide call is static itself and only gets called twice, once on attach and once on detach.

Sounds good.

Thanks,
   Felix

>
> Thanks,
>
> Jon
>
>>> +           return 0;
>>> +
>>> +   if (KFD_GC_VERSION(q->device) < IP_VERSION(11, 0, 0) ||
>>> +                   KFD_GC_VERSION(q->device) >= IP_VERSION(12, 0, 0))
>> Indentation. It would be more readable if the KFD_GC_VERSIONs were
>> aligned.
>>
>>
>>> +           return 0;
>>> +
>>> +   if (enable && q->properties.is_user_cu_masked)
>>> +           return -EBUSY;
>>> +
>>> +   minfo.update_flag = enable ? UPDATE_FLAG_DBG_WA_ENABLE :
>> UPDATE_FLAG_DBG_WA_DISABLE;
>>> +
>>> +   q->properties.is_dbg_wa = enable;
>>> +   err = q->device->dqm->ops.update_queue(q->device->dqm, q,
>> &minfo);
>>> +   if (err)
>>> +           q->properties.is_dbg_wa = false;
>>> +
>>> +   return err;
>>> +}
>>> +
>>> +static int kfd_dbg_set_workaround(struct kfd_process *target, bool
>> enable)
>>> +{
>>> +   struct process_queue_manager *pqm = &target->pqm;
>>> +   struct process_queue_node *pqn;
>>> +   int r = 0;
>>> +
>>> +   list_for_each_entry(pqn, &pqm->queues, process_queue_list) {
>>> +           r = kfd_dbg_set_queue_workaround(pqn->q, enable);
>>> +           if (enable && r)
>>> +                   goto unwind;
>>> +   }
>>> +
>>> +   return 0;
>>> +
>>> +unwind:
>>> +   list_for_each_entry(pqn, &pqm->queues, process_queue_list)
>>> +           kfd_dbg_set_queue_workaround(pqn->q, false);
>>> +
>>> +   if (enable) {
>>> +           target->runtime_info.runtime_state = r == -EBUSY ?
>>> +                           DEBUG_RUNTIME_STATE_ENABLED_BUSY :
>>> +                           DEBUG_RUNTIME_STATE_ENABLED_ERROR;
>>> +   }
>> Braces are not needed here.
>>
>>
>>> +
>>> +   return r;
>>> +}
>>> +
>>> +static int kfd_dbg_set_mes_debug_mode(struct kfd_process_device *pdd)
>>> +{
>>> +   uint32_t spi_dbg_cntl = pdd->spi_dbg_override | pdd-
>>> spi_dbg_launch_mode;
>>> +   uint32_t flags = pdd->process->dbg_flags;
>>> +
>>> +   if (!kfd_dbg_is_per_vmid_supported(pdd->dev))
>>> +           return 0;
>>> +
>>> +   return amdgpu_mes_set_shader_debugger(pdd->dev->adev, pdd-
>>> proc_ctx_gpu_addr, spi_dbg_cntl,
>>> +                                           pdd->watch_points, flags);
>>> +}
>>> +
>> You're adding some unused static functions here. This will cause compile
>> warnings until the patch that starts using them. You could avoid this by
>> reordering this and the next patch and moving the function calls into
>> this patch. That would also make it more obvious where the workaround
>> plugs into the debug code.
>>
>> Regards,
>>     Felix
>>
>>
>>>    int kfd_dbg_trap_disable(struct kfd_process *target)
>>>    {
>>>      if (!target->debug_trap_enabled)
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
>> b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
>>> index 53c5a3e55bd2..0c09f1729325 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.h
>>> @@ -35,7 +35,8 @@ int kfd_dbg_trap_enable(struct kfd_process *target,
>> uint32_t fd,
>>>    static inline bool kfd_dbg_is_per_vmid_supported(struct kfd_dev *dev)
>>>    {
>>> -   return KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2);
>>> +   return KFD_GC_VERSION(dev) == IP_VERSION(9, 4, 2) ||
>>> +          KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0);
>>>    }
>>>
>>>    void debug_event_write_work_handler(struct work_struct *work);
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>> b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>>> index 2517716d7cbc..be1985b87ea7 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
>>> @@ -214,6 +214,10 @@ static int add_queue_mes(struct
>> device_queue_manager *dqm, struct queue *q,
>>>      queue_input.paging = false;
>>>      queue_input.tba_addr = qpd->tba_addr;
>>>      queue_input.tma_addr = qpd->tma_addr;
>>> +   queue_input.trap_en = KFD_GC_VERSION(q->device) <
>> IP_VERSION(11, 0, 0) ||
>>> +                         KFD_GC_VERSION(q->device) >= IP_VERSION(12, 0,
>> 0) ||
>>> +                         q->properties.is_dbg_wa;
>>> +   queue_input.skip_process_ctx_clear = qpd->pqm->process-
>>> debug_trap_enabled;
>>>
>>>      queue_type = convert_to_mes_queue_type(q->properties.type);
>>>      if (queue_type < 0) {
>>> @@ -1679,6 +1683,9 @@ static int create_queue_cpsch(struct
>> device_queue_manager *dqm, struct queue *q,
>>>       * updates the is_evicted flag but is a no-op otherwise.
>>>       */
>>>      q->properties.is_evicted = !!qpd->evicted;
>>> +   q->properties.is_dbg_wa = qpd->pqm->process-
>>> debug_trap_enabled &&
>>> +                   KFD_GC_VERSION(q->device) >= IP_VERSION(11, 0, 0)
>> &&
>>> +                   KFD_GC_VERSION(q->device) < IP_VERSION(12, 0, 0);
>>>
>>>      if (qd)
>>>              mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q-
>>> mqd_mem_obj, &q->gart_mqd_addr,
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
>> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
>>> index 4889865c725c..c2a7226fc588 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
>>> @@ -48,8 +48,7 @@ static void update_cu_mask(struct mqd_manager
>> *mm, void *mqd,
>>>      struct cik_mqd *m;
>>>      uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */
>>>
>>> -   if (!minfo || (minfo->update_flag != UPDATE_FLAG_CU_MASK) ||
>>> -       !minfo->cu_mask.ptr)
>>> +   if (!minfo || !minfo->cu_mask.ptr)
>>>              return;
>>>
>>>      mqd_symmetrically_map_cu_mask(mm,
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
>> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
>>> index cb484ace17de..8248e77751e7 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v10.c
>>> @@ -48,8 +48,7 @@ static void update_cu_mask(struct mqd_manager
>> *mm, void *mqd,
>>>      struct v10_compute_mqd *m;
>>>      uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */
>>>
>>> -   if (!minfo || (minfo->update_flag != UPDATE_FLAG_CU_MASK) ||
>>> -       !minfo->cu_mask.ptr)
>>> +   if (!minfo || !minfo->cu_mask.ptr)
>>>              return;
>>>
>>>      mqd_symmetrically_map_cu_mask(mm,
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
>> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
>>> index ac7c8fc83c94..18ab613e787c 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v11.c
>>> @@ -46,15 +46,33 @@ static void update_cu_mask(struct mqd_manager
>> *mm, void *mqd,
>>>    {
>>>      struct v11_compute_mqd *m;
>>>      uint32_t se_mask[KFD_MAX_NUM_SE] = {0};
>>> +   bool has_wa_flag = minfo && (minfo->update_flag &
>> (UPDATE_FLAG_DBG_WA_ENABLE |
>>> +                   UPDATE_FLAG_DBG_WA_DISABLE));
>>>
>>> -   if (!minfo || (minfo->update_flag != UPDATE_FLAG_CU_MASK) ||
>>> -       !minfo->cu_mask.ptr)
>>> +   if (!minfo || !(has_wa_flag || minfo->cu_mask.ptr))
>>>              return;
>>>
>>> +   m = get_mqd(mqd);
>>> +
>>> +   if (has_wa_flag) {
>>> +           uint32_t wa_mask = minfo->update_flag ==
>> UPDATE_FLAG_DBG_WA_ENABLE ?
>>> +                                           0xffff : 0xffffffff;
>>> +
>>> +           m->compute_static_thread_mgmt_se0 = wa_mask;
>>> +           m->compute_static_thread_mgmt_se1 = wa_mask;
>>> +           m->compute_static_thread_mgmt_se2 = wa_mask;
>>> +           m->compute_static_thread_mgmt_se3 = wa_mask;
>>> +           m->compute_static_thread_mgmt_se4 = wa_mask;
>>> +           m->compute_static_thread_mgmt_se5 = wa_mask;
>>> +           m->compute_static_thread_mgmt_se6 = wa_mask;
>>> +           m->compute_static_thread_mgmt_se7 = wa_mask;
>>> +
>>> +           return;
>>> +   }
>>> +
>>>      mqd_symmetrically_map_cu_mask(mm,
>>>              minfo->cu_mask.ptr, minfo->cu_mask.count, se_mask);
>>>
>>> -   m = get_mqd(mqd);
>>>      m->compute_static_thread_mgmt_se0 = se_mask[0];
>>>      m->compute_static_thread_mgmt_se1 = se_mask[1];
>>>      m->compute_static_thread_mgmt_se2 = se_mask[2];
>>> @@ -109,6 +127,7 @@ static void init_mqd(struct mqd_manager *mm,
>> void **mqd,
>>>      uint64_t addr;
>>>      struct v11_compute_mqd *m;
>>>      int size;
>>> +   uint32_t wa_mask = q->is_dbg_wa ? 0xffff : 0xffffffff;
>>>
>>>      m = (struct v11_compute_mqd *) mqd_mem_obj->cpu_ptr;
>>>      addr = mqd_mem_obj->gpu_addr;
>>> @@ -122,14 +141,15 @@ static void init_mqd(struct mqd_manager *mm,
>> void **mqd,
>>>      m->header = 0xC0310800;
>>>      m->compute_pipelinestat_enable = 1;
>>> -   m->compute_static_thread_mgmt_se0 = 0xFFFFFFFF;
>>> -   m->compute_static_thread_mgmt_se1 = 0xFFFFFFFF;
>>> -   m->compute_static_thread_mgmt_se2 = 0xFFFFFFFF;
>>> -   m->compute_static_thread_mgmt_se3 = 0xFFFFFFFF;
>>> -   m->compute_static_thread_mgmt_se4 = 0xFFFFFFFF;
>>> -   m->compute_static_thread_mgmt_se5 = 0xFFFFFFFF;
>>> -   m->compute_static_thread_mgmt_se6 = 0xFFFFFFFF;
>>> -   m->compute_static_thread_mgmt_se7 = 0xFFFFFFFF;
>>> +
>>> +   m->compute_static_thread_mgmt_se0 = wa_mask;
>>> +   m->compute_static_thread_mgmt_se1 = wa_mask;
>>> +   m->compute_static_thread_mgmt_se2 = wa_mask;
>>> +   m->compute_static_thread_mgmt_se3 = wa_mask;
>>> +   m->compute_static_thread_mgmt_se4 = wa_mask;
>>> +   m->compute_static_thread_mgmt_se5 = wa_mask;
>>> +   m->compute_static_thread_mgmt_se6 = wa_mask;
>>> +   m->compute_static_thread_mgmt_se7 = wa_mask;
>>>
>>>      m->cp_hqd_persistent_state =
>> CP_HQD_PERSISTENT_STATE__PRELOAD_REQ_MASK |
>>>                      0x55 <<
>> CP_HQD_PERSISTENT_STATE__PRELOAD_SIZE__SHIFT;
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
>> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
>>> index 86f1cf090246..50da16dd4c96 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
>>> @@ -49,8 +49,7 @@ static void update_cu_mask(struct mqd_manager
>> *mm, void *mqd,
>>>      struct v9_mqd *m;
>>>      uint32_t se_mask[KFD_MAX_NUM_SE] = {0};
>>>
>>> -   if (!minfo || (minfo->update_flag != UPDATE_FLAG_CU_MASK) ||
>>> -       !minfo->cu_mask.ptr)
>>> +   if (!minfo || !minfo->cu_mask.ptr)
>>>              return;
>>>
>>>      mqd_symmetrically_map_cu_mask(mm,
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
>> b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
>>> index 530ba6f5b57e..58b40bff3e0c 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
>>> @@ -51,8 +51,7 @@ static void update_cu_mask(struct mqd_manager
>> *mm, void *mqd,
>>>      struct vi_mqd *m;
>>>      uint32_t se_mask[4] = {0}; /* 4 is the max # of SEs */
>>>
>>> -   if (!minfo || (minfo->update_flag != UPDATE_FLAG_CU_MASK) ||
>>> -       !minfo->cu_mask.ptr)
>>> +   if (!minfo || !minfo->cu_mask.ptr)
>>>              return;
>>>
>>>      mqd_symmetrically_map_cu_mask(mm,
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>> index 8f1e2f9023db..75521d96e937 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>> @@ -479,6 +479,8 @@ struct queue_properties {
>>>      bool is_evicted;
>>>      bool is_active;
>>>      bool is_gws;
>>> +   bool is_dbg_wa;
>>> +   bool is_user_cu_masked;
>>>      /* Not relevant for user mode queues in cp scheduling */
>>>      unsigned int vmid;
>>>      /* Relevant only for sdma queues*/
>>> @@ -501,7 +503,8 @@ struct queue_properties {
>>>                          !(q).is_evicted)
>>>
>>>    enum mqd_update_flag {
>>> -   UPDATE_FLAG_CU_MASK = 0,
>>> +   UPDATE_FLAG_DBG_WA_ENABLE = 1,
>>> +   UPDATE_FLAG_DBG_WA_DISABLE = 2,
>>>    };
>>>
>>>    struct mqd_update_info {
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
>> b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
>>> index 5137476ec18e..d8f032214481 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c
>>> @@ -498,8 +498,12 @@ int pqm_update_mqd(struct
>> process_queue_manager *pqm,
>>>              return -EFAULT;
>>>      }
>>>
>>> +   /* CUs are masked for debugger requirements so deny user mask  */
>>> +   if (pqn->q->properties.is_dbg_wa && minfo && minfo->cu_mask.ptr)
>>> +           return -EBUSY;
>>> +
>>>      /* ASICs that have WGPs must enforce pairwise enabled mask
>> checks. */
>>> -   if (minfo && minfo->update_flag == UPDATE_FLAG_CU_MASK &&
>> minfo->cu_mask.ptr &&
>>> +   if (minfo && minfo->cu_mask.ptr &&
>>>                      KFD_GC_VERSION(pqn->q->device) >= IP_VERSION(10,
>> 0, 0)) {
>>>              int i;
>>>
>>> @@ -518,6 +522,9 @@ int pqm_update_mqd(struct
>> process_queue_manager *pqm,
>>>      if (retval != 0)
>>>              return retval;
>>>
>>> +   if (minfo && minfo->cu_mask.ptr)
>>> +           pqn->q->properties.is_user_cu_masked = true;
>>> +
>>>      return 0;
>>>    }
>>>