[PATCH 1/2] drm/amdgpu: Implement get num of hops between two xgmi device
Liu, Shaoyun
Shaoyun.Liu at amd.com
Tue Apr 23 20:21:31 UTC 2019
KFD need to provide the info for upper level to determine the data path
Change-Id: Idc809e8f3381b9222dd7be96539522d440f3ee7d
Signed-off-by: shaoyunl <shaoyun.liu at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 15 +++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h | 26 ++++++++++++++------------
drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 23 ++++++++++++++++++-----
drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h | 3 ++-
5 files changed, 50 insertions(+), 18 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index acf8ae0..8f8523a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -27,6 +27,7 @@
#include "amdgpu_gfx.h"
#include <linux/module.h>
#include <linux/dma-buf.h>
+#include "amdgpu_xgmi.h"
static const unsigned int compute_vmid_bitmap = 0xFF00;
@@ -481,6 +482,20 @@ uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd)
return adev->gmc.xgmi.hive_id;
}
+uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src)
+{
+ struct amdgpu_device *peer_adev = (struct amdgpu_device *)src;
+ struct amdgpu_device *adev = (struct amdgpu_device *)dst;
+ int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev);
+
+ if (ret < 0) {
+ DRM_ERROR("amdgpu: failed to get xgmi hops count between node %d and %d. ret = %d\n",
+ adev->gmc.xgmi.physical_node_id,
+ peer_adev->gmc.xgmi.physical_node_id, ret);
+ ret = 0;
+ }
+ return (uint8_t)ret;
+}
int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
uint32_t vmid, uint64_t gpu_addr,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index e6a5037..b0cb94d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -154,6 +154,7 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
uint32_t *flags);
uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd);
+uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src);
#define read_user_wptr(mmptr, wptr, dst) \
({ \
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index cde113f..acbc18b5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -95,12 +95,26 @@ struct psp_funcs
int (*ras_cure_posion)(struct psp_context *psp, uint64_t *mode_ptr);
};
+#define AMDGPU_XGMI_MAX_CONNECTED_NODES 64
+struct psp_xgmi_node_info {
+ uint64_t node_id;
+ uint8_t num_hops;
+ uint8_t is_sharing_enabled;
+ enum ta_xgmi_assigned_sdma_engine sdma_engine;
+};
+
+struct psp_xgmi_topology_info {
+ uint32_t num_nodes;
+ struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES];
+};
+
struct psp_xgmi_context {
uint8_t initialized;
uint32_t session_id;
struct amdgpu_bo *xgmi_shared_bo;
uint64_t xgmi_shared_mc_addr;
void *xgmi_shared_buf;
+ struct psp_xgmi_topology_info top_info;
};
struct psp_ras_context {
@@ -181,18 +195,6 @@ struct amdgpu_psp_funcs {
enum AMDGPU_UCODE_ID);
};
-#define AMDGPU_XGMI_MAX_CONNECTED_NODES 64
-struct psp_xgmi_node_info {
- uint64_t node_id;
- uint8_t num_hops;
- uint8_t is_sharing_enabled;
- enum ta_xgmi_assigned_sdma_engine sdma_engine;
-};
-
-struct psp_xgmi_topology_info {
- uint32_t num_nodes;
- struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES];
-};
#define psp_ring_init(psp, type) (psp)->funcs->ring_init((psp), (type))
#define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type))
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index a48c84c..04dfc8b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -238,7 +238,7 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
/* Each psp need to set the latest topology */
ret = psp_xgmi_set_topology_info(&adev->psp,
hive->number_devices,
- &hive->topology_info);
+ &adev->psp.xgmi_context.top_info);
if (ret)
dev_err(adev->dev,
"XGMI: Set topology failure on device %llx, hive %llx, ret %d",
@@ -248,9 +248,22 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
return ret;
}
+
+int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
+ struct amdgpu_device *peer_adev)
+{
+ struct psp_xgmi_topology_info *top = &adev->psp.xgmi_context.top_info;
+ int i;
+
+ for (i = 0 ; i < top->num_nodes; ++i)
+ if (top->nodes[i].node_id == peer_adev->gmc.xgmi.node_id)
+ return top->nodes[i].num_hops;
+ return -EINVAL;
+}
+
int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
{
- struct psp_xgmi_topology_info *hive_topology;
+ struct psp_xgmi_topology_info *top_info;
struct amdgpu_hive_info *hive;
struct amdgpu_xgmi *entry;
struct amdgpu_device *tmp_adev = NULL;
@@ -283,16 +296,16 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
goto exit;
}
- hive_topology = &hive->topology_info;
+ top_info = &adev->psp.xgmi_context.top_info;
list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
list_for_each_entry(entry, &hive->device_list, head)
- hive_topology->nodes[count++].node_id = entry->node_id;
+ top_info->nodes[count++].node_id = entry->node_id;
hive->number_devices = count;
/* Each psp need to get the latest topology */
list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
- ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, hive_topology);
+ ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, top_info);
if (ret) {
dev_err(tmp_adev->dev,
"XGMI: Get topology failure on device %llx, hive %llx, ret %d",
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
index 3e9c91e..fbcee31 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
@@ -27,7 +27,6 @@
struct amdgpu_hive_info {
uint64_t hive_id;
struct list_head device_list;
- struct psp_xgmi_topology_info topology_info;
int number_devices;
struct mutex hive_lock, reset_lock;
struct kobject *kobj;
@@ -41,6 +40,8 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
void amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
int amdgpu_xgmi_set_pstate(struct amdgpu_device *adev, int pstate);
+int amdgpu_xgmi_get_hops_count(struct amdgpu_device *adev,
+ struct amdgpu_device *peer_adev);
static inline bool amdgpu_xgmi_same_hive(struct amdgpu_device *adev,
struct amdgpu_device *bo_adev)
--
2.7.4
More information about the amd-gfx
mailing list