[PATCH umr v2] Add PCI device based GPU selection with --pci

Tom St Denis tom.stdenis at amd.com
Sat Jun 24 17:09:24 UTC 2017


Applied thanks!


Cheers,
Tom

On 23/06/17 04:33 PM, Jean-Francois Thibert wrote:
> This allows selecting the GPU by its PCI device both with and
> without kernel mode support. The instance is populated automatically
> so that the proper corresponding debugfs files are used if present.
> 
> Signed-off-by: Jean-Francois Thibert <jfthibert at google.com>
> ---
>   doc/umr.1          |  4 +++
>   src/app/main.c     |  9 ++++++
>   src/lib/discover.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
>   src/umr.h          |  6 ++++
>   4 files changed, 103 insertions(+), 3 deletions(-)
> 
> diff --git a/doc/umr.1 b/doc/umr.1
> index 5c4bd01..a03dfb8 100644
> --- a/doc/umr.1
> +++ b/doc/umr.1
> @@ -17,6 +17,10 @@ Select a GFX INSTANCE/SH/SE bank in decimal.  Can use 'x' to denote a broadcast
>   .IP "--force -f <number>"
>   Force a PCIE Device ID in hex or by asic name.  This is used in case the amdgpu driver
>   is not yet loaded or a display is not yet attached.
> +.IP "--pci <device>"
> +Force a specific PCI device using the domain:bus:slot.function format in hex.
> +This is useful when more than one GPU is available. If the amdgpu driver is
> +loaded the corresponding instance will be automatically detected.
>   .IP "--print, -p"
>   Enable scanning and printing all registers.  Defaults to off as it can
>   be very verbose.
> diff --git a/src/app/main.c b/src/app/main.c
> index 1d9ef9e..067b472 100644
> --- a/src/app/main.c
> +++ b/src/app/main.c
> @@ -174,6 +174,15 @@ int main(int argc, char **argv)
>   				printf("--force requires a number/name\n");
>   				return EXIT_FAILURE;
>   			}
> +		} else if (!strcmp(argv[i], "--pci")) {
> +			if (i + 1 < argc && sscanf(argv[i+1], "%04x:%02x:%02x.%01x",
> +				&options.pci.domain, &options.pci.bus, &options.pci.slot,
> +				&options.pci.func ) >= 4) {
> +				++i;
> +			} else {
> +				printf("--pci requires domain:bus:slot.function\n");
> +				return EXIT_FAILURE;
> +			}
>   		} else if (!strcmp(argv[i], "--print") || !strcmp(argv[i], "-p")) {
>   			options.print = 1;
>   			options.need_scan = 1;
> diff --git a/src/lib/discover.c b/src/lib/discover.c
> index 9662d05..a95fe57 100644
> --- a/src/lib/discover.c
> +++ b/src/lib/discover.c
> @@ -22,6 +22,9 @@
>    * Authors: Tom St Denis <tom.stdenis at amd.com>
>    *
>    */
> +#include <dirent.h>
> +#include <sys/types.h>
> +
>   #include "umr.h"
>   
>   static int is_did_match(struct umr_asic *asic, unsigned did)
> @@ -44,6 +47,44 @@ static int is_did_match(struct umr_asic *asic, unsigned did)
>   	return r;
>   }
>   
> +static int find_pci_instance(const char* pci_string) {
> +	DIR* dir;
> +	dir = opendir("/sys/kernel/debug/dri");
> +	if (dir == NULL) {
> +		perror("Couldn't open DRI under debugfs");
> +		return -1;
> +	}
> +	struct dirent *dir_entry;
> +	while ((dir_entry = readdir(dir)) != NULL) {
> +		char device[256], name[256];
> +		int parsed_device;
> +		// ignore . and ..
> +		if (strcmp(dir_entry->d_name, ".") == 0 || strcmp(dir_entry->d_name,
> +			"..") == 0) {
> +			continue;
> +		}
> +		snprintf(name, sizeof(name), "/sys/kernel/debug/dri/%s/name",
> +			dir_entry->d_name);
> +		FILE *f = fopen(name, "r");
> +		if (!f) {
> +			continue;
> +		}
> +		device[sizeof(device) - 1] = 0;
> +		parsed_device = fscanf(f, "%*s %255s", device);
> +		fclose(f);
> +		if (parsed_device != 1)
> +			continue;
> +		// strip off dev= for kernels > 4.7
> +		if (strstr(device, "dev="))
> +			memmove(device, device+4, strlen(device)-3);
> +		if (strcmp(pci_string, device) == 0) {
> +			closedir(dir);
> +			return atoi(dir_entry->d_name);
> +		}
> +	}
> +	closedir(dir);
> +	return -1;
> +}
>   
>   struct umr_asic *umr_discover_asic(struct umr_options *options)
>   {
> @@ -53,6 +94,30 @@ struct umr_asic *umr_discover_asic(struct umr_options *options)
>   	struct umr_asic *asic;
>   	long trydid = options->forcedid;
>   
> +	// Try to map to instance if we have a specific pci device
> +	if (options->pci.domain || options->pci.bus ||
> +		options->pci.slot || options->pci.func) {
> +		char pci_string[16];
> +		int parsed_did;
> +		snprintf(pci_string, sizeof(pci_string), "%04x:%02x:%02x.%x",
> +			options->pci.domain, options->pci.bus, options->pci.slot,
> +			options->pci.func);
> +		if (!options->no_kernel) {
> +			options->instance = find_pci_instance(pci_string);
> +		}
> +		snprintf(driver, sizeof(driver), "/sys/bus/pci/devices/%s/device", pci_string);
> +		f = fopen(driver, "r");
> +		if (!f) {
> +			if (!options->quiet) perror("Cannot open PCI device name under sysfs (is a display attached?)");
> +			return NULL;
> +		}
> +		parsed_did = fscanf(f, "0x%04lx", &trydid);
> +		fclose(f);
> +		if (parsed_did != 1) {
> +			if (!options->quiet) printf("Could not read device id");
> +			return NULL;
> +		}
> +	}
>   	// try to scan via debugfs
>   	asic = calloc(1, sizeof *asic);
>   	if (asic) {
> @@ -64,7 +129,6 @@ struct umr_asic *umr_discover_asic(struct umr_options *options)
>   		umr_free_asic(asic);
>   		asic = NULL;
>   	}
> -
>   	if (trydid < 0) {
>   		snprintf(name, sizeof(name)-1, "/sys/kernel/debug/dri/%d/name", options->instance);
>   		f = fopen(name, "r");
> @@ -86,8 +150,12 @@ struct umr_asic *umr_discover_asic(struct umr_options *options)
>   			}
>   			return NULL;
>   		}
> -		fscanf(f, "%s %s %s\n", driver, name, driver);
> +		int parsed_pci_id = fscanf(f, "%*s %s", name);
>   		fclose(f);
> +		if (parsed_pci_id != 1) {
> +			if (!options->quiet) printf("Cannot read pci device id\n");
> +			return NULL;
> +		}
>   
>   		// strip off dev= for kernels > 4.7
>   		if (strstr(name, "dev="))
> @@ -99,8 +167,12 @@ struct umr_asic *umr_discover_asic(struct umr_options *options)
>   			if (!options->quiet) perror("Cannot open PCI device name under sysfs (is a display attached?)");
>   			return NULL;
>   		}
> -		fscanf(f, "0x%04x", &did);
> +		int parsed_did = fscanf(f, "0x%04x", &did);
>   		fclose(f);
> +		if (parsed_did != 1) {
> +			if (!options->quiet) printf("Could not read device id");
> +			return NULL;
> +		}
>   		asic = umr_discover_asic_by_did(options, did);
>   	} else {
>   		if (options->dev_name[0])
> @@ -158,6 +230,15 @@ struct umr_asic *umr_discover_asic(struct umr_options *options)
>   			}
>   			do {
>   				asic->pci.pdevice = pci_device_next(pci_iter);
> +				if (options->pci.domain || options->pci.bus || options->pci.slot || options->pci.func) {
> +					while (asic->pci.pdevice && (
> +						options->pci.domain != asic->pci.pdevice->domain ||
> +						options->pci.bus != asic->pci.pdevice->bus ||
> +						options->pci.slot != asic->pci.pdevice->dev ||
> +						options->pci.func != asic->pci.pdevice->func)) {
> +						asic->pci.pdevice = pci_device_next(pci_iter);
> +					}
> +				}
>   			} while (asic->pci.pdevice && !(asic->pci.pdevice->vendor_id == 0x1002 && is_did_match(asic, asic->pci.pdevice->device_id)));
>   
>   			if (!asic->pci.pdevice) {
> diff --git a/src/umr.h b/src/umr.h
> index ccfac5d..2a69017 100644
> --- a/src/umr.h
> +++ b/src/umr.h
> @@ -181,6 +181,12 @@ struct umr_options {
>   	long forcedid;
>   	char *scanblock;
>   	char dev_name[32];
> +	struct {
> +		int domain,
> +		    bus,
> +		    slot,
> +		    func;
> +	} pci;
>   };
>   
>   struct umr_asic {
> 



More information about the amd-gfx mailing list