[PATCH libdrm] amdgpu: dynamically detect number of drm devices

xiyuan xiyuan at amd.com
Thu Apr 19 13:45:45 UTC 2018



On 04/19/2018 09:11 PM, Christian König wrote:
> Am 19.04.2018 um 15:06 schrieb xiyuan:
>> Hi Christian,
>>
>> The problem I encountered is that amdgpu_test throws segment fault on 
>> a platform with 1 iGPU + 4 dGPU. The root cause is that 'drm_count' 
>> returned by drmGetDevices() is 5 which exceeds MAX_CARDS_SUPPORTED, 
>> so I decided to make it more flexible to get the actual number of drm 
>> devices.
>>
>> Setting MAX_CARDS_SUPPORTED to 128 is fine to solve my problem but is 
>> it a bit large for traversal?
>
> No, not really. MAX_CARDS_SUPPORTED only affects the size of 
> drm_amdgpu array. And so we would use 128*4=512 bytes at most.
Agree. I glanced at code again, traversal for drm_amdgpu array only 
occurs at amdgpu_print_devices() and amdgpu_find_devices() and neither 
are hot codepath.
>
> 128 is the maximum the kernel can support at the moment, so that 
> should work for a while.
Makes more sense. Could you just point out where the 128 limit is set?
I'll prepare another patch soon.

Regards,
Xiaojie
>
> Christian.
>
>>
>> Regards,
>> Xiaojie
>>
>>
>> On 04/19/2018 07:06 PM, Christian König wrote:
>>> Wouldn't it be simpler to just set MAX_CARDS_SUPPORTED to 128?
>>>
>>> Regards,
>>> Christian.
>>>
>>> Am 19.04.2018 um 12:12 schrieb Xiaojie Yuan:
>>>> Change-Id: I36764951bebbcbf06cf84dd43ee946a34ec7b100
>>>> Signed-off-by: Xiaojie Yuan <Xiaojie.Yuan at amd.com>
>>>> ---
>>>>   tests/amdgpu/amdgpu_test.c | 44 
>>>> ++++++++++++++++++++++++++++----------
>>>>   tests/amdgpu/amdgpu_test.h |  7 +-----
>>>>   2 files changed, 34 insertions(+), 17 deletions(-)
>>>>
>>>> diff --git a/tests/amdgpu/amdgpu_test.c b/tests/amdgpu/amdgpu_test.c
>>>> index 96fcd687..f7ac4ab4 100644
>>>> --- a/tests/amdgpu/amdgpu_test.c
>>>> +++ b/tests/amdgpu/amdgpu_test.c
>>>> @@ -61,7 +61,8 @@
>>>>    *  Open handles for amdgpu devices
>>>>    *
>>>>    */
>>>> -int drm_amdgpu[MAX_CARDS_SUPPORTED];
>>>> +int *drm_amdgpu;
>>>> +size_t num_drm_devices;
>>>>     /** Open render node to test */
>>>>   int open_render_node = 0;    /* By default run most tests on 
>>>> primary node */
>>>> @@ -238,16 +239,16 @@ static const char options[]   = "hlrps:t:b:d:f";
>>>>    */
>>>>   static int amdgpu_open_devices(int open_render_node)
>>>>   {
>>>> -    drmDevicePtr devices[MAX_CARDS_SUPPORTED];
>>>> +    drmDevicePtr *devices;
>>>>       int i;
>>>>       int drm_node;
>>>>       int amd_index = 0;
>>>>       int drm_count;
>>>> +    int drm_count2;
>>>>       int fd;
>>>>       drmVersionPtr version;
>>>>   -    drm_count = drmGetDevices2(0, devices, MAX_CARDS_SUPPORTED);
>>>> -
>>>> +    drm_count = drmGetDevices2(0, NULL, 0);
>>>>       if (drm_count < 0) {
>>>>           fprintf(stderr,
>>>>               "drmGetDevices2() returned an error %d\n",
>>>> @@ -255,6 +256,27 @@ static int amdgpu_open_devices(int 
>>>> open_render_node)
>>>>           return 0;
>>>>       }
>>>>   +    devices = calloc(drm_count, sizeof(drmDevicePtr));
>>>> +    if (!devices) {
>>>> +        goto end;
>>>> +    }
>>>> +
>>>> +    drm_amdgpu = calloc(drm_count, sizeof(int));
>>>> +    if (!drm_amdgpu) {
>>>> +        goto end;
>>>> +    }
>>>> +
>>>> +    for (i = 0; i < drm_count; i++)
>>>> +        drm_amdgpu[i] = -1;
>>>> +
>>>> +    drm_count2 = drmGetDevices2(0, devices, drm_count);
>>>> +    if (drm_count2 != drm_count) {
>>>> +        fprintf(stderr, "number of drm devices changed");
>>>> +        goto end;
>>>> +    }
>>>> +
>>>> +    num_drm_devices = drm_count;
>>>> +
>>>>       for (i = 0; i < drm_count; i++) {
>>>>           /* If this is not PCI device, skip*/
>>>>           if (devices[i]->bustype != DRM_BUS_PCI)
>>>> @@ -302,7 +324,9 @@ static int amdgpu_open_devices(int 
>>>> open_render_node)
>>>>           amd_index++;
>>>>       }
>>>>   +end:
>>>>       drmFreeDevices(devices, drm_count);
>>>> +    free(devices);
>>>>       return amd_index;
>>>>   }
>>>>   @@ -311,9 +335,11 @@ static int amdgpu_open_devices(int 
>>>> open_render_node)
>>>>   static void amdgpu_close_devices()
>>>>   {
>>>>       int i;
>>>> -    for (i = 0; i < MAX_CARDS_SUPPORTED; i++)
>>>> +    for (i = 0; i < num_drm_devices; i++)
>>>>           if (drm_amdgpu[i] >=0)
>>>>               close(drm_amdgpu[i]);
>>>> +
>>>> +    free(drm_amdgpu);
>>>>   }
>>>>     /* Print AMD devices information */
>>>> @@ -339,7 +365,7 @@ static void amdgpu_print_devices()
>>>>         /* Display information of AMD devices */
>>>>       printf("Devices:\n");
>>>> -    for (i = 0; i < MAX_CARDS_SUPPORTED && drm_amdgpu[i] >=0; i++)
>>>> +    for (i = 0; i < num_drm_devices && drm_amdgpu[i] >=0; i++)
>>>>           if (drmGetDevice2(drm_amdgpu[i],
>>>>               DRM_DEVICE_GET_PCI_REVISION,
>>>>               &device) == 0) {
>>>> @@ -377,7 +403,7 @@ static int amdgpu_find_device(uint8_t bus, 
>>>> uint16_t dev)
>>>>       int i;
>>>>       drmDevicePtr device;
>>>>   -    for (i = 0; i < MAX_CARDS_SUPPORTED && drm_amdgpu[i] >= 0; 
>>>> i++) {
>>>> +    for (i = 0; i < num_drm_devices && drm_amdgpu[i] >= 0; i++) {
>>>>           if (drmGetDevice2(drm_amdgpu[i],
>>>>               DRM_DEVICE_GET_PCI_REVISION,
>>>>               &device) == 0) {
>>>> @@ -456,10 +482,6 @@ int main(int argc, char **argv)
>>>>       int display_list = 0;
>>>>       int force_run = 0;
>>>>   -    for (i = 0; i < MAX_CARDS_SUPPORTED; i++)
>>>> -        drm_amdgpu[i] = -1;
>>>> -
>>>> -
>>>>       /* Parse command line string */
>>>>       opterr = 0;        /* Do not print error messages from getopt */
>>>>       while ((c = getopt(argc, argv, options)) != -1) {
>>>> diff --git a/tests/amdgpu/amdgpu_test.h b/tests/amdgpu/amdgpu_test.h
>>>> index 62875736..8a604fe4 100644
>>>> --- a/tests/amdgpu/amdgpu_test.h
>>>> +++ b/tests/amdgpu/amdgpu_test.h
>>>> @@ -27,13 +27,8 @@
>>>>   #include "amdgpu.h"
>>>>   #include "amdgpu_drm.h"
>>>>   -/**
>>>> - * Define max. number of card in system which we are able to handle
>>>> - */
>>>> -#define MAX_CARDS_SUPPORTED     4
>>>> -
>>>>   /* Forward reference for array to keep "drm" handles */
>>>> -extern int drm_amdgpu[MAX_CARDS_SUPPORTED];
>>>> +extern int *drm_amdgpu;
>>>>     /* Global variables */
>>>>   extern int open_render_node;
>>>
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>



More information about the amd-gfx mailing list