[PATCH] drm/amdkfd: fix debug watchpoints for logical devices

Kasiviswanathan, Harish Harish.Kasiviswanathan at amd.com
Tue Jul 30 16:23:04 UTC 2024


[AMD Official Use Only - AMD Internal Distribution Only]

One minor comment b. Either way Reviewed-by: Harish Kasiviswanathan <Harish.Kasiviswanathan at amd.com>

-----Original Message-----
From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> On Behalf Of Jonathan Kim
Sent: Monday, July 22, 2024 2:57 PM
To: amd-gfx at lists.freedesktop.org
Cc: Kim, Jonathan <Jonathan.Kim at amd.com>; Kim, Jonathan <Jonathan.Kim at amd.com>
Subject: [PATCH] drm/amdkfd: fix debug watchpoints for logical devices

The number of watchpoints should be set and constrained per logical
partition device, not by the socket device.

Signed-off-by: Jonathan Kim <jonathan.kim at amd.com>
---
 drivers/gpu/drm/amd/amdkfd/kfd_debug.c  | 20 ++++++++++----------
 drivers/gpu/drm/amd/amdkfd/kfd_device.c |  4 ++--
 drivers/gpu/drm/amd/amdkfd/kfd_priv.h   |  8 ++++----
 3 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
index 34a282540c7e..312dfa84f29f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_debug.c
@@ -365,47 +365,47 @@ static int kfd_dbg_get_dev_watch_id(struct kfd_process_device *pdd, int *watch_i

        *watch_id = KFD_DEBUGGER_INVALID_WATCH_POINT_ID;

-       spin_lock(&pdd->dev->kfd->watch_points_lock);
+       spin_lock(&pdd->dev->watch_points_lock);

        for (i = 0; i < MAX_WATCH_ADDRESSES; i++) {
                /* device watchpoint in use so skip */
-               if ((pdd->dev->kfd->alloc_watch_ids >> i) & 0x1)
+               if ((pdd->dev->alloc_watch_ids >> i) & 0x1)
                        continue;

                pdd->alloc_watch_ids |= 0x1 << i;
-               pdd->dev->kfd->alloc_watch_ids |= 0x1 << i;
+               pdd->dev->alloc_watch_ids |= 0x1 << i;
                *watch_id = i;
-               spin_unlock(&pdd->dev->kfd->watch_points_lock);
+               spin_unlock(&pdd->dev->watch_points_lock);
                return 0;
        }

-       spin_unlock(&pdd->dev->kfd->watch_points_lock);
+       spin_unlock(&pdd->dev->watch_points_lock);

        return -ENOMEM;
 }

 static void kfd_dbg_clear_dev_watch_id(struct kfd_process_device *pdd, int watch_id)
 {
-       spin_lock(&pdd->dev->kfd->watch_points_lock);
+       spin_lock(&pdd->dev->watch_points_lock);

        /* process owns device watch point so safe to clear */
        if ((pdd->alloc_watch_ids >> watch_id) & 0x1) {
                pdd->alloc_watch_ids &= ~(0x1 << watch_id);
-               pdd->dev->kfd->alloc_watch_ids &= ~(0x1 << watch_id);
+               pdd->dev->alloc_watch_ids &= ~(0x1 << watch_id);
        }

-       spin_unlock(&pdd->dev->kfd->watch_points_lock);
+       spin_unlock(&pdd->dev->watch_points_lock);
 }

 static bool kfd_dbg_owns_dev_watch_id(struct kfd_process_device *pdd, int watch_id)
 {
        bool owns_watch_id = false;

-       spin_lock(&pdd->dev->kfd->watch_points_lock);
+       spin_lock(&pdd->dev->watch_points_lock);
        owns_watch_id = watch_id < MAX_WATCH_ADDRESSES &&
                        ((pdd->alloc_watch_ids >> watch_id) & 0x1);

-       spin_unlock(&pdd->dev->kfd->watch_points_lock);
+       spin_unlock(&pdd->dev->watch_points_lock);

        return owns_watch_id;
 }
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index f4d20adaa068..f91a9b6ce3fb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -885,12 +885,12 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd,
                        goto node_init_error;
                }

Wouldn't it better to initialize node->watch_points_lock here.
spin_lock_init(&node->watch_points_lock);

                kfd->nodes[i] = node;
+
+               spin_lock_init(&kfd->nodes[i]->watch_points_lock);
        }

        svm_range_set_max_pages(kfd->adev);

-       spin_lock_init(&kfd->watch_points_lock);
-
        kfd->init_complete = true;
        dev_info(kfd_device, "added device %x:%x\n", kfd->adev->pdev->vendor,
                 kfd->adev->pdev->device);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index 2b3ec92981e8..653e1f934107 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -310,6 +310,10 @@ struct kfd_node {
        struct kfd_local_mem_info local_mem_info;

        struct kfd_dev *kfd;
+
+       /* Track per device allocated watch points */
+       uint32_t alloc_watch_ids;
+       spinlock_t watch_points_lock;
 };

 struct kfd_dev {
@@ -362,10 +366,6 @@ struct kfd_dev {
        struct kfd_node *nodes[MAX_KFD_NODES];
        unsigned int num_nodes;

-       /* Track per device allocated watch points */
-       uint32_t alloc_watch_ids;
-       spinlock_t watch_points_lock;
-
        /* Kernel doorbells for KFD device */
        struct amdgpu_bo *doorbells;

--
2.34.1



More information about the amd-gfx mailing list