[PATCH] Add PCI device based GPU selection with --pci

Tom St Denis tom.stdenis at amd.com
Fri Jun 23 11:20:35 UTC 2017


Some style/flow issues inline below.


On 22/06/17 04:36 PM, Jean-Francois Thibert wrote:
> This allows selecting the GPU by its PCI device both with and
> without kernel mode support. The instance is populated automatically
> so that the proper corresponding debugfs files are used if present.
> 
> Signed-off-by: Jean-Francois Thibert <jfthibert at google.com>
> ---
>   src/app/main.c     |  9 ++++++
>   src/lib/discover.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
>   src/umr.h          |  6 +++-
>   3 files changed, 96 insertions(+), 4 deletions(-)
> 
> diff --git a/src/app/main.c b/src/app/main.c
> index 1d9ef9e..aa630f5 100644
> --- a/src/app/main.c
> +++ b/src/app/main.c
> @@ -174,6 +174,15 @@ int main(int argc, char **argv)
>   				printf("--force requires a number/name\n");
>   				return EXIT_FAILURE;
>   			}
> +		} else if (!strcmp(argv[i], "--pci")) {
> +			if (i + 1 < argc && sscanf(argv[i+1], "%04x:%02x:%02x.%01x",
> +				&options.pci_domain, &options.pci_bus, &options.pci_slot,
> +				&options.pci_func ) >= 4) {
> +				++i;
> +			} else {
> +				printf("--pci requires domain:bus:slot.function\n");
> +				return EXIT_FAILURE;
> +			}

Could use man/help text updates.  I can do that though in a followup patch.

>   		} else if (!strcmp(argv[i], "--print") || !strcmp(argv[i], "-p")) {
>   			options.print = 1;
>   			options.need_scan = 1;
> diff --git a/src/lib/discover.c b/src/lib/discover.c
> index 9662d05..8ab321f 100644
> --- a/src/lib/discover.c
> +++ b/src/lib/discover.c
> @@ -22,6 +22,9 @@
>    * Authors: Tom St Denis <tom.stdenis at amd.com>
>    *
>    */
> +#include <dirent.h>
> +#include <sys/types.h>
> +
>   #include "umr.h"
>   
>   static int is_did_match(struct umr_asic *asic, unsigned did)
> @@ -44,6 +47,43 @@ static int is_did_match(struct umr_asic *asic, unsigned did)
>   	return r;
>   }
>   
> +static int find_pci_instance(const char* pci_string) {
> +	DIR* dir;
> +	dir = opendir("/sys/kernel/debug/dri");
> +	if (dir == NULL) {
> +		perror("Couldn't open DRI under debugfs");
> +		return -1;
> +	}
> +	struct dirent *dir_entry;
> +	while ((dir_entry = readdir(dir)) != NULL) {
> +		// ignore . and ..
> +		if (strcmp(dir_entry->d_name, ".") == 0 || strcmp(dir_entry->d_name,
> +			"..") == 0) {
> +			continue;
> +		}
> +		char name[256];

I try not to use C99/C++ style declaration-in-scope (except at the start 
of scope) variables.  It makes the code easier to follow, e.g. "how big 
is name, oh look at the start of the block).

Also I use kernel style {} rules where single statement blocks don't 
receive {} unless they're paired with another related (e.g. if/else) 
block which has multiple statements.

> +		snprintf(name, sizeof(name) - 1, "/sys/kernel/debug/dri/%s/name",
> +			dir_entry->d_name);
> +		FILE *f = fopen(name, "r");
> +		if (!f) {
> +			continue;
> +		}
> +		char device[256] = {};
> +		int parsed_device = fscanf(f, "%*s %255s", device);
> +		fclose(f);
> +		if (parsed_device != 1)
> +			continue;
> +		// strip off dev= for kernels > 4.7
> +		if (strstr(device, "dev="))
> +			memmove(device, device+4, strlen(device)-3);
> +		if (strcmp(pci_string, device) == 0) {
> +			closedir(dir);
> +			return atoi(dir_entry->d_name);
> +		}
> +	}
> +	closedir(dir);
> +	return -1;
> +}
>   
>   struct umr_asic *umr_discover_asic(struct umr_options *options)
>   {
> @@ -53,6 +93,29 @@ struct umr_asic *umr_discover_asic(struct umr_options *options)
>   	struct umr_asic *asic;
>   	long trydid = options->forcedid;
>   
> +	// Try to map to instance if we have a specific pci device
> +	if (options->pci_domain || options->pci_bus ||
> +		options->pci_slot || options->pci_func) {
> +		char pci_string[16];
> +		snprintf(pci_string, sizeof(pci_string) - 1, "%04x:%02x:%02x.%x",
> +			options->pci_domain, options->pci_bus, options->pci_slot,
> +			options->pci_func);
> +		if (!options->no_kernel) {
> +			options->instance = find_pci_instance(pci_string);
> +		}
> +		snprintf(driver, sizeof(driver)-1, "/sys/bus/pci/devices/%s/device", pci_string);
> +		f = fopen(driver, "r");
> +		if (!f) {
> +			if (!options->quiet) perror("Cannot open PCI device name under sysfs (is a display attached?)");
> +			return NULL;
> +		}
> +		int found_did = fscanf(f, "0x%04lx", &trydid);
> +		fclose(f);
> +		if (found_did != 1) {
> +			if (!options->quiet) printf("Could not read device id");
> +			return NULL;
> +		}

Maybe set trydid here instead for consistency.

> +	}
>   	// try to scan via debugfs
>   	asic = calloc(1, sizeof *asic);
>   	if (asic) {


Not part of your patch but we should gate all of this behind "if 
(!options->no_kernel)" since the debugfs entry cannot be used in that 
mode.  I can do that in a followup patch.

> @@ -64,7 +127,6 @@ struct umr_asic *umr_discover_asic(struct umr_options *options)
>   		umr_free_asic(asic);
>   		asic = NULL;
>   	}
> -
>   	if (trydid < 0) {
>   		snprintf(name, sizeof(name)-1, "/sys/kernel/debug/dri/%d/name", options->instance);
>   		f = fopen(name, "r");
> @@ -86,8 +148,12 @@ struct umr_asic *umr_discover_asic(struct umr_options *options)
>   			}
>   			return NULL;
>   		}
> -		fscanf(f, "%s %s %s\n", driver, name, driver);
> +		int parsed_pci_id = fscanf(f, "%*s %s", name);
>   		fclose(f);
> +		if (parsed_pci_id != 1) {
> +			if (!options->quiet) printf("Cannot read pci device id\n");
> +			return NULL;
> +		}
>   
>   		// strip off dev= for kernels > 4.7
>   		if (strstr(name, "dev="))
> @@ -99,8 +165,12 @@ struct umr_asic *umr_discover_asic(struct umr_options *options)
>   			if (!options->quiet) perror("Cannot open PCI device name under sysfs (is a display attached?)");
>   			return NULL;
>   		}
> -		fscanf(f, "0x%04x", &did);
> +		int parsed_did = fscanf(f, "0x%04x", &did);
>   		fclose(f);
> +		if (parsed_did != 1) {
> +			if (!options->quiet) printf("Could not read device id");
> +			return NULL;
> +		}
>   		asic = umr_discover_asic_by_did(options, did);
>   	} else {
>   		if (options->dev_name[0])
> @@ -158,6 +228,15 @@ struct umr_asic *umr_discover_asic(struct umr_options *options)
>   			}
>   			do {
>   				asic->pci.pdevice = pci_device_next(pci_iter);
> +				if (options->pci_domain || options->pci_bus || options->pci_slot || options->pci_func) {
> +					while (asic->pci.pdevice && (
> +						options->pci_domain != asic->pci.pdevice->domain ||
> +						options->pci_bus != asic->pci.pdevice->bus ||
> +						options->pci_slot != asic->pci.pdevice->dev ||
> +						options->pci_func != asic->pci.pdevice->func)) {
> +						asic->pci.pdevice = pci_device_next(pci_iter);
> +					}
> +				}
>   			} while (asic->pci.pdevice && !(asic->pci.pdevice->vendor_id == 0x1002 && is_did_match(asic, asic->pci.pdevice->device_id)));
>   
>   			if (!asic->pci.pdevice) {
> diff --git a/src/umr.h b/src/umr.h
> index ccfac5d..391c5e7 100644
> --- a/src/umr.h
> +++ b/src/umr.h
> @@ -173,7 +173,11 @@ struct umr_options {
>   	    read_smc,
>   	    quiet,
>   	    follow_ib,
> -	    no_kernel;
> +	    no_kernel,
> +	    pci_domain,
> +	    pci_bus,
> +	    pci_slot,
> +	    pci_func;

As I commented in private maybe switch this to

struct pci {
	int domain,
	    bus,
	    slot,
	    func;
};

Inside the options structure since it will make the code easier to read 
and follow.

With the style issues fixed up: Reviewed-by: Tom St Denis 
<tom.stdenis at amd.com>.

Tom


More information about the amd-gfx mailing list