[PATCH libdrm] amdgpu: dynamically detect number of drm devices
xiyuan
xiyuan at amd.com
Thu Apr 19 13:45:45 UTC 2018
On 04/19/2018 09:11 PM, Christian König wrote:
> Am 19.04.2018 um 15:06 schrieb xiyuan:
>> Hi Christian,
>>
>> The problem I encountered is that amdgpu_test throws segment fault on
>> a platform with 1 iGPU + 4 dGPU. The root cause is that 'drm_count'
>> returned by drmGetDevices() is 5 which exceeds MAX_CARDS_SUPPORTED,
>> so I decided to make it more flexible to get the actual number of drm
>> devices.
>>
>> Setting MAX_CARDS_SUPPORTED to 128 is fine to solve my problem but is
>> it a bit large for traversal?
>
> No, not really. MAX_CARDS_SUPPORTED only affects the size of
> drm_amdgpu array. And so we would use 128*4=512 bytes at most.
Agree. I glanced at code again, traversal for drm_amdgpu array only
occurs at amdgpu_print_devices() and amdgpu_find_devices() and neither
are hot codepath.
>
> 128 is the maximum the kernel can support at the moment, so that
> should work for a while.
Makes more sense. Could you just point out where the 128 limit is set?
I'll prepare another patch soon.
Regards,
Xiaojie
>
> Christian.
>
>>
>> Regards,
>> Xiaojie
>>
>>
>> On 04/19/2018 07:06 PM, Christian König wrote:
>>> Wouldn't it be simpler to just set MAX_CARDS_SUPPORTED to 128?
>>>
>>> Regards,
>>> Christian.
>>>
>>> Am 19.04.2018 um 12:12 schrieb Xiaojie Yuan:
>>>> Change-Id: I36764951bebbcbf06cf84dd43ee946a34ec7b100
>>>> Signed-off-by: Xiaojie Yuan <Xiaojie.Yuan at amd.com>
>>>> ---
>>>> tests/amdgpu/amdgpu_test.c | 44
>>>> ++++++++++++++++++++++++++++----------
>>>> tests/amdgpu/amdgpu_test.h | 7 +-----
>>>> 2 files changed, 34 insertions(+), 17 deletions(-)
>>>>
>>>> diff --git a/tests/amdgpu/amdgpu_test.c b/tests/amdgpu/amdgpu_test.c
>>>> index 96fcd687..f7ac4ab4 100644
>>>> --- a/tests/amdgpu/amdgpu_test.c
>>>> +++ b/tests/amdgpu/amdgpu_test.c
>>>> @@ -61,7 +61,8 @@
>>>> * Open handles for amdgpu devices
>>>> *
>>>> */
>>>> -int drm_amdgpu[MAX_CARDS_SUPPORTED];
>>>> +int *drm_amdgpu;
>>>> +size_t num_drm_devices;
>>>> /** Open render node to test */
>>>> int open_render_node = 0; /* By default run most tests on
>>>> primary node */
>>>> @@ -238,16 +239,16 @@ static const char options[] = "hlrps:t:b:d:f";
>>>> */
>>>> static int amdgpu_open_devices(int open_render_node)
>>>> {
>>>> - drmDevicePtr devices[MAX_CARDS_SUPPORTED];
>>>> + drmDevicePtr *devices;
>>>> int i;
>>>> int drm_node;
>>>> int amd_index = 0;
>>>> int drm_count;
>>>> + int drm_count2;
>>>> int fd;
>>>> drmVersionPtr version;
>>>> - drm_count = drmGetDevices2(0, devices, MAX_CARDS_SUPPORTED);
>>>> -
>>>> + drm_count = drmGetDevices2(0, NULL, 0);
>>>> if (drm_count < 0) {
>>>> fprintf(stderr,
>>>> "drmGetDevices2() returned an error %d\n",
>>>> @@ -255,6 +256,27 @@ static int amdgpu_open_devices(int
>>>> open_render_node)
>>>> return 0;
>>>> }
>>>> + devices = calloc(drm_count, sizeof(drmDevicePtr));
>>>> + if (!devices) {
>>>> + goto end;
>>>> + }
>>>> +
>>>> + drm_amdgpu = calloc(drm_count, sizeof(int));
>>>> + if (!drm_amdgpu) {
>>>> + goto end;
>>>> + }
>>>> +
>>>> + for (i = 0; i < drm_count; i++)
>>>> + drm_amdgpu[i] = -1;
>>>> +
>>>> + drm_count2 = drmGetDevices2(0, devices, drm_count);
>>>> + if (drm_count2 != drm_count) {
>>>> + fprintf(stderr, "number of drm devices changed");
>>>> + goto end;
>>>> + }
>>>> +
>>>> + num_drm_devices = drm_count;
>>>> +
>>>> for (i = 0; i < drm_count; i++) {
>>>> /* If this is not PCI device, skip*/
>>>> if (devices[i]->bustype != DRM_BUS_PCI)
>>>> @@ -302,7 +324,9 @@ static int amdgpu_open_devices(int
>>>> open_render_node)
>>>> amd_index++;
>>>> }
>>>> +end:
>>>> drmFreeDevices(devices, drm_count);
>>>> + free(devices);
>>>> return amd_index;
>>>> }
>>>> @@ -311,9 +335,11 @@ static int amdgpu_open_devices(int
>>>> open_render_node)
>>>> static void amdgpu_close_devices()
>>>> {
>>>> int i;
>>>> - for (i = 0; i < MAX_CARDS_SUPPORTED; i++)
>>>> + for (i = 0; i < num_drm_devices; i++)
>>>> if (drm_amdgpu[i] >=0)
>>>> close(drm_amdgpu[i]);
>>>> +
>>>> + free(drm_amdgpu);
>>>> }
>>>> /* Print AMD devices information */
>>>> @@ -339,7 +365,7 @@ static void amdgpu_print_devices()
>>>> /* Display information of AMD devices */
>>>> printf("Devices:\n");
>>>> - for (i = 0; i < MAX_CARDS_SUPPORTED && drm_amdgpu[i] >=0; i++)
>>>> + for (i = 0; i < num_drm_devices && drm_amdgpu[i] >=0; i++)
>>>> if (drmGetDevice2(drm_amdgpu[i],
>>>> DRM_DEVICE_GET_PCI_REVISION,
>>>> &device) == 0) {
>>>> @@ -377,7 +403,7 @@ static int amdgpu_find_device(uint8_t bus,
>>>> uint16_t dev)
>>>> int i;
>>>> drmDevicePtr device;
>>>> - for (i = 0; i < MAX_CARDS_SUPPORTED && drm_amdgpu[i] >= 0;
>>>> i++) {
>>>> + for (i = 0; i < num_drm_devices && drm_amdgpu[i] >= 0; i++) {
>>>> if (drmGetDevice2(drm_amdgpu[i],
>>>> DRM_DEVICE_GET_PCI_REVISION,
>>>> &device) == 0) {
>>>> @@ -456,10 +482,6 @@ int main(int argc, char **argv)
>>>> int display_list = 0;
>>>> int force_run = 0;
>>>> - for (i = 0; i < MAX_CARDS_SUPPORTED; i++)
>>>> - drm_amdgpu[i] = -1;
>>>> -
>>>> -
>>>> /* Parse command line string */
>>>> opterr = 0; /* Do not print error messages from getopt */
>>>> while ((c = getopt(argc, argv, options)) != -1) {
>>>> diff --git a/tests/amdgpu/amdgpu_test.h b/tests/amdgpu/amdgpu_test.h
>>>> index 62875736..8a604fe4 100644
>>>> --- a/tests/amdgpu/amdgpu_test.h
>>>> +++ b/tests/amdgpu/amdgpu_test.h
>>>> @@ -27,13 +27,8 @@
>>>> #include "amdgpu.h"
>>>> #include "amdgpu_drm.h"
>>>> -/**
>>>> - * Define max. number of card in system which we are able to handle
>>>> - */
>>>> -#define MAX_CARDS_SUPPORTED 4
>>>> -
>>>> /* Forward reference for array to keep "drm" handles */
>>>> -extern int drm_amdgpu[MAX_CARDS_SUPPORTED];
>>>> +extern int *drm_amdgpu;
>>>> /* Global variables */
>>>> extern int open_render_node;
>>>
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
More information about the amd-gfx
mailing list