[PATCH] Add PCI device based GPU selection with --pci
Tom St Denis
tom.stdenis at amd.com
Fri Jun 23 11:20:35 UTC 2017
Some style/flow issues inline below.
On 22/06/17 04:36 PM, Jean-Francois Thibert wrote:
> This allows selecting the GPU by its PCI device both with and
> without kernel mode support. The instance is populated automatically
> so that the proper corresponding debugfs files are used if present.
>
> Signed-off-by: Jean-Francois Thibert <jfthibert at google.com>
> ---
> src/app/main.c | 9 ++++++
> src/lib/discover.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
> src/umr.h | 6 +++-
> 3 files changed, 96 insertions(+), 4 deletions(-)
>
> diff --git a/src/app/main.c b/src/app/main.c
> index 1d9ef9e..aa630f5 100644
> --- a/src/app/main.c
> +++ b/src/app/main.c
> @@ -174,6 +174,15 @@ int main(int argc, char **argv)
> printf("--force requires a number/name\n");
> return EXIT_FAILURE;
> }
> + } else if (!strcmp(argv[i], "--pci")) {
> + if (i + 1 < argc && sscanf(argv[i+1], "%04x:%02x:%02x.%01x",
> + &options.pci_domain, &options.pci_bus, &options.pci_slot,
> + &options.pci_func ) >= 4) {
> + ++i;
> + } else {
> + printf("--pci requires domain:bus:slot.function\n");
> + return EXIT_FAILURE;
> + }
Could use man/help text updates. I can do that though in a followup patch.
> } else if (!strcmp(argv[i], "--print") || !strcmp(argv[i], "-p")) {
> options.print = 1;
> options.need_scan = 1;
> diff --git a/src/lib/discover.c b/src/lib/discover.c
> index 9662d05..8ab321f 100644
> --- a/src/lib/discover.c
> +++ b/src/lib/discover.c
> @@ -22,6 +22,9 @@
> * Authors: Tom St Denis <tom.stdenis at amd.com>
> *
> */
> +#include <dirent.h>
> +#include <sys/types.h>
> +
> #include "umr.h"
>
> static int is_did_match(struct umr_asic *asic, unsigned did)
> @@ -44,6 +47,43 @@ static int is_did_match(struct umr_asic *asic, unsigned did)
> return r;
> }
>
> +static int find_pci_instance(const char* pci_string) {
> + DIR* dir;
> + dir = opendir("/sys/kernel/debug/dri");
> + if (dir == NULL) {
> + perror("Couldn't open DRI under debugfs");
> + return -1;
> + }
> + struct dirent *dir_entry;
> + while ((dir_entry = readdir(dir)) != NULL) {
> + // ignore . and ..
> + if (strcmp(dir_entry->d_name, ".") == 0 || strcmp(dir_entry->d_name,
> + "..") == 0) {
> + continue;
> + }
> + char name[256];
I try not to use C99/C++ style declaration-in-scope (except at the start
of scope) variables. It makes the code easier to follow, e.g. "how big
is name, oh look at the start of the block).
Also I use kernel style {} rules where single statement blocks don't
receive {} unless they're paired with another related (e.g. if/else)
block which has multiple statements.
> + snprintf(name, sizeof(name) - 1, "/sys/kernel/debug/dri/%s/name",
> + dir_entry->d_name);
> + FILE *f = fopen(name, "r");
> + if (!f) {
> + continue;
> + }
> + char device[256] = {};
> + int parsed_device = fscanf(f, "%*s %255s", device);
> + fclose(f);
> + if (parsed_device != 1)
> + continue;
> + // strip off dev= for kernels > 4.7
> + if (strstr(device, "dev="))
> + memmove(device, device+4, strlen(device)-3);
> + if (strcmp(pci_string, device) == 0) {
> + closedir(dir);
> + return atoi(dir_entry->d_name);
> + }
> + }
> + closedir(dir);
> + return -1;
> +}
>
> struct umr_asic *umr_discover_asic(struct umr_options *options)
> {
> @@ -53,6 +93,29 @@ struct umr_asic *umr_discover_asic(struct umr_options *options)
> struct umr_asic *asic;
> long trydid = options->forcedid;
>
> + // Try to map to instance if we have a specific pci device
> + if (options->pci_domain || options->pci_bus ||
> + options->pci_slot || options->pci_func) {
> + char pci_string[16];
> + snprintf(pci_string, sizeof(pci_string) - 1, "%04x:%02x:%02x.%x",
> + options->pci_domain, options->pci_bus, options->pci_slot,
> + options->pci_func);
> + if (!options->no_kernel) {
> + options->instance = find_pci_instance(pci_string);
> + }
> + snprintf(driver, sizeof(driver)-1, "/sys/bus/pci/devices/%s/device", pci_string);
> + f = fopen(driver, "r");
> + if (!f) {
> + if (!options->quiet) perror("Cannot open PCI device name under sysfs (is a display attached?)");
> + return NULL;
> + }
> + int found_did = fscanf(f, "0x%04lx", &trydid);
> + fclose(f);
> + if (found_did != 1) {
> + if (!options->quiet) printf("Could not read device id");
> + return NULL;
> + }
Maybe set trydid here instead for consistency.
> + }
> // try to scan via debugfs
> asic = calloc(1, sizeof *asic);
> if (asic) {
Not part of your patch but we should gate all of this behind "if
(!options->no_kernel)" since the debugfs entry cannot be used in that
mode. I can do that in a followup patch.
> @@ -64,7 +127,6 @@ struct umr_asic *umr_discover_asic(struct umr_options *options)
> umr_free_asic(asic);
> asic = NULL;
> }
> -
> if (trydid < 0) {
> snprintf(name, sizeof(name)-1, "/sys/kernel/debug/dri/%d/name", options->instance);
> f = fopen(name, "r");
> @@ -86,8 +148,12 @@ struct umr_asic *umr_discover_asic(struct umr_options *options)
> }
> return NULL;
> }
> - fscanf(f, "%s %s %s\n", driver, name, driver);
> + int parsed_pci_id = fscanf(f, "%*s %s", name);
> fclose(f);
> + if (parsed_pci_id != 1) {
> + if (!options->quiet) printf("Cannot read pci device id\n");
> + return NULL;
> + }
>
> // strip off dev= for kernels > 4.7
> if (strstr(name, "dev="))
> @@ -99,8 +165,12 @@ struct umr_asic *umr_discover_asic(struct umr_options *options)
> if (!options->quiet) perror("Cannot open PCI device name under sysfs (is a display attached?)");
> return NULL;
> }
> - fscanf(f, "0x%04x", &did);
> + int parsed_did = fscanf(f, "0x%04x", &did);
> fclose(f);
> + if (parsed_did != 1) {
> + if (!options->quiet) printf("Could not read device id");
> + return NULL;
> + }
> asic = umr_discover_asic_by_did(options, did);
> } else {
> if (options->dev_name[0])
> @@ -158,6 +228,15 @@ struct umr_asic *umr_discover_asic(struct umr_options *options)
> }
> do {
> asic->pci.pdevice = pci_device_next(pci_iter);
> + if (options->pci_domain || options->pci_bus || options->pci_slot || options->pci_func) {
> + while (asic->pci.pdevice && (
> + options->pci_domain != asic->pci.pdevice->domain ||
> + options->pci_bus != asic->pci.pdevice->bus ||
> + options->pci_slot != asic->pci.pdevice->dev ||
> + options->pci_func != asic->pci.pdevice->func)) {
> + asic->pci.pdevice = pci_device_next(pci_iter);
> + }
> + }
> } while (asic->pci.pdevice && !(asic->pci.pdevice->vendor_id == 0x1002 && is_did_match(asic, asic->pci.pdevice->device_id)));
>
> if (!asic->pci.pdevice) {
> diff --git a/src/umr.h b/src/umr.h
> index ccfac5d..391c5e7 100644
> --- a/src/umr.h
> +++ b/src/umr.h
> @@ -173,7 +173,11 @@ struct umr_options {
> read_smc,
> quiet,
> follow_ib,
> - no_kernel;
> + no_kernel,
> + pci_domain,
> + pci_bus,
> + pci_slot,
> + pci_func;
As I commented in private maybe switch this to
struct pci {
int domain,
bus,
slot,
func;
};
Inside the options structure since it will make the code easier to read
and follow.
With the style issues fixed up: Reviewed-by: Tom St Denis
<tom.stdenis at amd.com>.
Tom
More information about the amd-gfx
mailing list