[PATCH 18/24] drm/amdkfd: enable pc sampling start

James Zhu jamesz at amd.com
Thu Nov 23 22:00:11 UTC 2023


On 2023-11-23 15:21, Felix Kuehling wrote:
>
> On 2023-11-23 15:01, James Zhu wrote:
>>
>> On 2023-11-22 17:27, Felix Kuehling wrote:
>>>
>>> On 2023-11-03 09:11, James Zhu wrote:
>>>> Enable pc sampling start.
>>>>
>>>> Signed-off-by: James Zhu <James.Zhu at amd.com>
>>>> ---
>>>>   drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.c | 26 
>>>> +++++++++++++++++---
>>>>   drivers/gpu/drm/amd/amdkfd/kfd_priv.h        |  2 ++
>>>>   2 files changed, 25 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.c 
>>>> b/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.c
>>>> index 60b29b245db5..33d003ca0093 100644
>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.c
>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_pc_sampling.c
>>>> @@ -83,9 +83,29 @@ static int kfd_pc_sample_query_cap(struct 
>>>> kfd_process_device *pdd,
>>>>       return 0;
>>>>   }
>>>>   -static int kfd_pc_sample_start(struct kfd_process_device *pdd)
>>>> +static int kfd_pc_sample_start(struct kfd_process_device *pdd,
>>>> +                    struct pc_sampling_entry *pcs_entry)
>>>>   {
>>>> -    return -EINVAL;
>>>> +    bool pc_sampling_start = false;
>>>> +
>>>> +    pcs_entry->enabled = true;
>>>> +    mutex_lock(&pdd->dev->pcs_data.mutex);
>>>> +    if (!pdd->dev->pcs_data.hosttrap_entry.base.active_count)
>>>> +        pc_sampling_start = true;
>>>> + pdd->dev->pcs_data.hosttrap_entry.base.active_count++;
>>>> +    mutex_unlock(&pdd->dev->pcs_data.mutex);
>>>> +
>>>> +    while (pc_sampling_start) {
>>>> +        if 
>>>> (READ_ONCE(pdd->dev->pcs_data.hosttrap_entry.base.stop_enable)) {
>>>> +            usleep_range(1000, 2000);
>>>
>>> I don't understand why you need this synchronization through 
>>> stop_enable. Why can't you do both the start and stop while holding 
>>> the mutex? It's just setting a flag in the TMA, so it's not a 
>>> time-consuming operation, and I don't see any potential for deadlocks.
>> [JZ] for stop, not just set TMA. need wait for current pc sampling 
>> completely stop and reset some initial setting.
>
> I think that's being obfuscated by how you split up this patch series. 
> Maybe if you squash the queue remapping patch into this one, it would 
> be more obvious what's really happening when you stop sampling and 
> would make it easier to review the synchronization and locking strategy.
[JZ] Sure
>
> Regards,
>   Felix
>
>
>>>
>>> Regards,
>>>   Felix
>>>
>>>
>>>> +        } else {
>>>> + kfd_process_set_trap_pc_sampling_flag(&pdd->qpd,
>>>> + pdd->dev->pcs_data.hosttrap_entry.base.pc_sample_info.method, true);
>>>> +            break;
>>>> +        }
>>>> +    }
>>>> +
>>>> +    return 0;
>>>>   }
>>>>     static int kfd_pc_sample_stop(struct kfd_process_device *pdd)
>>>> @@ -225,7 +245,7 @@ int kfd_pc_sample(struct kfd_process_device *pdd,
>>>>           if (pcs_entry->enabled)
>>>>               return -EALREADY;
>>>>           else
>>>> -            return kfd_pc_sample_start(pdd);
>>>> +            return kfd_pc_sample_start(pdd, pcs_entry);
>>>>         case KFD_IOCTL_PCS_OP_STOP:
>>>>           if (!pcs_entry->enabled)
>>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h 
>>>> b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>>> index 6670534f47b8..613910e0d440 100644
>>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
>>>> @@ -258,6 +258,8 @@ struct kfd_dev;
>>>>     struct kfd_dev_pc_sampling_data {
>>>>       uint32_t use_count;         /* Num of PC sampling sessions */
>>>> +    uint32_t active_count;      /* Num of active sessions */
>>>> +    bool stop_enable;           /* pc sampling stop in process */
>>>>       struct idr pc_sampling_idr;
>>>>       struct kfd_pc_sample_info pc_sample_info;
>>>>   };


More information about the amd-gfx mailing list