[PATCH 3/5] drm/amdgpu: Refactor amdgpu_xgmi_add_device
Alex Deucher
alexdeucher at gmail.com
Wed Nov 21 19:38:41 UTC 2018
On Wed, Nov 21, 2018 at 2:36 PM Grodzovsky, Andrey
<Andrey.Grodzovsky at amd.com> wrote:
>
>
>
> On 11/21/2018 02:29 PM, Alex Deucher wrote:
> > On Wed, Nov 21, 2018 at 1:11 PM Andrey Grodzovsky
> > <andrey.grodzovsky at amd.com> wrote:
> >> This is prep work for updating each PSP FW in hive after
> >> GPU reset.
> >> Split into build topology SW state and update each PSP FW in the hive.
> >> Save topology and count of XGMI devices for reuse.
> >>
> >> Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky at amd.com>
> >> ---
> >> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 5 +++
> >> drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 55 +++++++++++++++++++-------------
> >> 2 files changed, 38 insertions(+), 22 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >> index 2c80453..3e5bede 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> >> @@ -1226,6 +1226,11 @@ long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd,
> >> /*
> >> * functions used by amdgpu_xgmi.c
> >> */
> >> +
> >> +struct amdgpu_hive_info;
> >> +
> >> +struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev);
> >> +int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive);
> >> int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
> > We should move these to their own header, amdgpu_xgmi.h, rather than
> > dumping them in amdgpu.h
> >
> >> /*
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> >> index 909216a..23e4e16 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
> >> @@ -34,12 +34,14 @@ static DEFINE_MUTEX(xgmi_mutex);
> >> struct amdgpu_hive_info {
> >> uint64_t hive_id;
> >> struct list_head device_list;
> >> + struct psp_xgmi_topology_info topology_info;
> >> + int number_devices;
> >> };
> >>
> >> static struct amdgpu_hive_info xgmi_hives[AMDGPU_MAX_XGMI_HIVE];
> >> static unsigned hive_count = 0;
> >>
> >> -static struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
> >> +struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
> > Any reason to make this public?
>
> It is used in Patch 5.
Ok. thanks.
Alex
>
> Andrey
>
> >
> >> {
> >> int i;
> >> struct amdgpu_hive_info *tmp;
> >> @@ -61,12 +63,33 @@ static struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
> >> return tmp;
> >> }
> >>
> >> +int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev)
> >> +{
> >> + int ret = -EINVAL;
> >> +
> >> + /* Each psp need to set the latest topology */
> >> + ret = psp_xgmi_set_topology_info(&adev->psp,
> >> + hive->number_devices,
> >> + &hive->topology_info);
> >> + if (ret)
> >> + dev_err(adev->dev,
> >> + "XGMI: Set topology failure on device %llx, hive %llx, ret %d",
> >> + adev->gmc.xgmi.node_id,
> >> + adev->gmc.xgmi.hive_id, ret);
> >> + else
> >> + dev_info(adev->dev, "XGMI: Add node %d to hive 0x%llx.\n",
> >> + adev->gmc.xgmi.physical_node_id,
> >> + adev->gmc.xgmi.hive_id);
> >> +
> >> + return ret;
> >> +}
> > Indentation in this function looks wrong.
> >
> >> +
> >> int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
> >> {
> >> - struct psp_xgmi_topology_info *tmp_topology;
> >> + struct psp_xgmi_topology_info *hive_topology;
> >> struct amdgpu_hive_info *hive;
> >> struct amdgpu_xgmi *entry;
> >> - struct amdgpu_device *tmp_adev;
> >> + struct amdgpu_device *tmp_adev = NULL;
> >>
> >> int count = 0, ret = -EINVAL;
> >>
> >> @@ -76,21 +99,21 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
> >> adev->gmc.xgmi.node_id = psp_xgmi_get_node_id(&adev->psp);
> >> adev->gmc.xgmi.hive_id = psp_xgmi_get_hive_id(&adev->psp);
> >>
> >> - tmp_topology = kzalloc(sizeof(struct psp_xgmi_topology_info), GFP_KERNEL);
> >> - if (!tmp_topology)
> >> - return -ENOMEM;
> >> mutex_lock(&xgmi_mutex);
> >> hive = amdgpu_get_xgmi_hive(adev);
> >> if (!hive)
> >> goto exit;
> >>
> >> + hive_topology = &hive->topology_info;
> >> +
> >> list_add_tail(&adev->gmc.xgmi.head, &hive->device_list);
> >> list_for_each_entry(entry, &hive->device_list, head)
> >> - tmp_topology->nodes[count++].node_id = entry->node_id;
> >> + hive_topology->nodes[count++].node_id = entry->node_id;
> >> + hive->number_devices = count;
> >>
> >> /* Each psp need to get the latest topology */
> >> list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
> >> - ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, tmp_topology);
> >> + ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, hive_topology);
> >> if (ret) {
> >> dev_err(tmp_adev->dev,
> >> "XGMI: Get topology failure on device %llx, hive %llx, ret %d",
> >> @@ -101,25 +124,13 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
> >> }
> >> }
> >>
> >> - /* Each psp need to set the latest topology */
> >> list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) {
> >> - ret = psp_xgmi_set_topology_info(&tmp_adev->psp, count, tmp_topology);
> >> - if (ret) {
> >> - dev_err(tmp_adev->dev,
> >> - "XGMI: Set topology failure on device %llx, hive %llx, ret %d",
> >> - tmp_adev->gmc.xgmi.node_id,
> >> - tmp_adev->gmc.xgmi.hive_id, ret);
> >> - /* To do : continue with some node failed or disable the whole hive */
> >> + ret = amdgpu_xgmi_update_topology(hive, tmp_adev);
> >> + if (ret)
> >> break;
> >> - }
> >> }
> >> - if (!ret)
> >> - dev_info(adev->dev, "XGMI: Add node %d to hive 0x%llx.\n",
> >> - adev->gmc.xgmi.physical_node_id,
> >> - adev->gmc.xgmi.hive_id);
> >>
> >> exit:
> >> mutex_unlock(&xgmi_mutex);
> >> - kfree(tmp_topology);
> >> return ret;
> >> }
> >> --
> >> 2.7.4
> >>
> >> _______________________________________________
> >> amd-gfx mailing list
> >> amd-gfx at lists.freedesktop.org
> >> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
More information about the amd-gfx
mailing list