[igt-dev] [PATCH i-g-t] gem_wsim: Distinguish particular engines during calculating nop calibration.

Anna Karas anna.karas at intel.com
Fri Jan 24 13:54:08 UTC 2020


Extend handling -n parameter by accepting multiple values of per
engine nop calibration. Add raw numbers handling to set default
calibration values. Print copyable and pastable string with
calibrations. Allow to switch between calculating in parallel
or doing it sequentially.

Accepted input values:
-n 123456
All calibrations will be set to 123456.

-n ENG=value,ENG2=value2,value3
i.e.
-n RCS=123456,BCS=345678,999999
RCS engine's value is set to 123456, BCS engine's value is set to
345678, 999999 is copied to rest of engines. All engines must be set;
you can either provide values for each of the engines, or you can set
specific values and provide a default value for the others.

-n value,ENG1=value1,ENG2=value2
First, value is copied to all engines, then value1 overrides ENG1, and
finally value2 overrides ENG2.

New output follows the pattern:
Nop calibrations for 1000us delay is: <eng1>=<v1>,<eng2>=<v2>,...
So you can easily copy-paste it to the next invocation.

Switching between calculation modes:
Run program with -T parameter to calculate calibrations in parallel.
The calculations are performed sequentially by default.

v2: Get rid of trailing whitespaces. Skip DEFAULT and VCS engines
when printing out calibrations. Reject them in the string passed
to -n. Re-align rest of help text. Fix accepting unknown engines.

v3: Consider all cases of arguments
for -n (Tvrtko).

-n 10 (raw number)
-n RCS (engine without calib)
-n AA (neither the engine nor the number)
-n RCS=500 (valid eng=val pair)
-n RCS=AA (calib is not a number)
-n XYZ=10 (engine is not an engine)
-n XYZ=AA (combo)

v4: Print calculated values (Chris). Do not make any assumptions
about the order of the engines (Tvrtko).

v5: Suppress the output with -q (Chris).

Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Cc: Chris Wilson <chris at chris-wilson.co.uk>
Signed-off-by: Anna Karas <anna.karas at intel.com>
---
 benchmarks/gem_wsim.c | 319 ++++++++++++++++++++++++++++++++++--------
 1 file changed, 261 insertions(+), 58 deletions(-)

diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c
index 9156fdc9..887e82cf 100644
--- a/benchmarks/gem_wsim.c
+++ b/benchmarks/gem_wsim.c
@@ -23,6 +23,7 @@
  */
 
 #include <unistd.h>
+#include <stdbool.h>
 #include <stdlib.h>
 #include <stdint.h>
 #include <stdio.h>
@@ -55,6 +56,7 @@
 #include "i915/gem_mman.h"
 
 #include "ewma.h"
+#include "i915/gem_engine_topology.h"
 
 enum intel_engine_id {
 	DEFAULT,
@@ -240,7 +242,8 @@ struct workload
 
 struct intel_mmio_data mmio_data;
 static const unsigned int nop_calibration_us = 1000;
-static unsigned long nop_calibration;
+static bool has_nop_calibration = false;
+static bool sequential = true;
 
 static unsigned int master_prng;
 
@@ -281,6 +284,61 @@ static const char *ring_str_map[NUM_ENGINES] = {
 	[VECS] = "VECS",
 };
 
+/* stores calibrations for particular engines */
+static unsigned long engine_calib_map[NUM_ENGINES];
+
+static enum intel_engine_id
+ci_to_engine_id(int class, int instance)
+{
+	static const struct {
+		int class;
+		int instance;
+		unsigned int id;
+	} map[] = {
+		{ I915_ENGINE_CLASS_RENDER, 0, RCS },
+		{ I915_ENGINE_CLASS_COPY, 0, BCS },
+		{ I915_ENGINE_CLASS_VIDEO, 0, VCS1 },
+		{ I915_ENGINE_CLASS_VIDEO, 1, VCS2 },
+		{ I915_ENGINE_CLASS_VIDEO, 2, VCS2 }, /* FIXME/ICL */
+		{ I915_ENGINE_CLASS_VIDEO_ENHANCE, 0, VECS },
+	};
+
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(map); i++) {
+		if (class == map[i].class && instance == map[i].instance)
+			return map[i].id;
+	}
+	return -1;
+}
+
+static void
+apply_unset_calibrations(unsigned long raw_number)
+{
+	for (int i = 0; i < NUM_ENGINES; i++)
+		engine_calib_map[i] += engine_calib_map[i] ? 0 : raw_number;
+}
+
+static void
+print_engine_calibrations(void)
+{
+	bool first_entry = true;
+
+	printf("Nop calibrations for %uus delay is: ", nop_calibration_us);
+	for (int i = 0; i < NUM_ENGINES; i++) {
+		/* skip DEFAULT and VCS engines */
+		if (i != DEFAULT && i != VCS) {
+			if (first_entry) {
+				printf("%s=%lu", ring_str_map[i], engine_calib_map[i]);
+				first_entry = false;
+			} else {
+				printf(",%s=%lu", ring_str_map[i], engine_calib_map[i]);
+			}
+		}
+	}
+	printf("\n");
+}
+
 static int
 parse_dependencies(unsigned int nr_steps, struct w_step *w, char *_desc)
 {
@@ -1082,17 +1140,17 @@ static unsigned int get_duration(struct workload *wrk, struct w_step *w)
 		       (dur->max + 1 - dur->min);
 }
 
-static unsigned long get_bb_sz(unsigned int duration)
+static unsigned long get_bb_sz(unsigned int duration, enum intel_engine_id engine)
 {
-	return ALIGN(duration * nop_calibration * sizeof(uint32_t) /
-		     nop_calibration_us, sizeof(uint32_t));
+	return ALIGN(duration * engine_calib_map[engine] * sizeof(uint32_t) /
+		nop_calibration_us, sizeof(uint32_t));
 }
 
 static void
 init_bb(struct w_step *w, unsigned int flags)
 {
 	const unsigned int arb_period =
-			get_bb_sz(w->preempt_us) / sizeof(uint32_t);
+			get_bb_sz(w->preempt_us, w->engine) / sizeof(uint32_t);
 	const unsigned int mmap_len = ALIGN(w->bb_sz, 4096);
 	unsigned int i;
 	uint32_t *ptr;
@@ -1319,10 +1377,11 @@ alloc_step_batch(struct workload *wrk, struct w_step *w, unsigned int flags)
 
 	if (w->unbound_duration)
 		/* nops + MI_ARB_CHK + MI_BATCH_BUFFER_START */
-		w->bb_sz = max(PAGE_SIZE, get_bb_sz(w->preempt_us)) +
+		w->bb_sz = max(PAGE_SIZE, get_bb_sz(w->preempt_us, w->engine)) +
 			   (1 + 3) * sizeof(uint32_t);
 	else
-		w->bb_sz = get_bb_sz(w->duration.max);
+		w->bb_sz = get_bb_sz(w->duration.max, w->engine);
+
 	w->bb_handle = w->obj[j].handle = gem_create(fd, w->bb_sz + (w->unbound_duration ? 4096 : 0));
 	init_bb(w, flags);
 	w->obj[j].relocation_count = terminate_bb(w, flags);
@@ -2622,7 +2681,7 @@ do_eb(struct workload *wrk, struct w_step *w, enum intel_engine_id engine,
 	w->eb.batch_start_offset =
 		w->unbound_duration ?
 		0 :
-		ALIGN(w->bb_sz - get_bb_sz(get_duration(wrk, w)),
+		ALIGN(w->bb_sz - get_bb_sz(get_duration(wrk, w), w->engine),
 		      2 * sizeof(uint32_t));
 
 	for (i = 0; i < w->fence_deps.nr; i++) {
@@ -2899,15 +2958,18 @@ static void fini_workload(struct workload *wrk)
 	free(wrk);
 }
 
-static unsigned long calibrate_nop(unsigned int tolerance_pct)
+static unsigned long calibrate_nop(unsigned int tolerance_pct, struct intel_execution_engine2 *engine)
 {
 	const uint32_t bbe = 0xa << 23;
 	unsigned int loops = 17;
 	unsigned int usecs = nop_calibration_us;
 	struct drm_i915_gem_exec_object2 obj = {};
-	struct drm_i915_gem_execbuffer2 eb =
-		{ .buffer_count = 1, .buffers_ptr = (uintptr_t)&obj};
-	long size, last_size;
+	struct drm_i915_gem_execbuffer2 eb = {
+		.buffer_count = 1,
+		.buffers_ptr = (uintptr_t)&obj,
+		.flags = engine->flags
+	};
+	unsigned long size, last_size;
 	struct timespec t_0, t_end;
 
 	clock_gettime(CLOCK_MONOTONIC, &t_0);
@@ -2939,6 +3001,70 @@ static unsigned long calibrate_nop(unsigned int tolerance_pct)
 	return size / sizeof(uint32_t);
 }
 
+static void
+calibrate_sequentially(void)
+{
+	struct intel_execution_engine2 *engine;
+	enum intel_engine_id eng_id;
+
+	__for_each_physical_engine(fd, engine) {
+		eng_id = ci_to_engine_id(engine->class, engine->instance);
+		igt_assert(eng_id >= 0);
+		engine_calib_map[eng_id] = calibrate_nop(fd, engine);
+	}
+}
+
+struct thread_data {
+	struct intel_execution_engine2 *eng;
+	pthread_t thr;
+	unsigned long calib;
+};
+
+static void *
+engine_calibration_thread(void *data)
+{
+	struct thread_data *thr_d = (struct thread_data *) data;
+
+	thr_d->calib = calibrate_nop(fd, thr_d->eng);
+	return NULL;
+}
+
+static void
+calibrate_in_parallel(void)
+{
+	struct thread_data *thr_d = malloc(NUM_ENGINES * sizeof(*thr_d));
+	struct intel_execution_engine2 *engine;
+	enum intel_engine_id id;
+	int ret;
+
+	__for_each_physical_engine(fd, engine) {
+		id = ci_to_engine_id(engine->class, engine->instance);
+		thr_d[id].eng = engine;
+		ret = pthread_create(&thr_d[id].thr, NULL, engine_calibration_thread, &thr_d[id]);
+		igt_assert_eq(ret, 0);
+	}
+
+	__for_each_physical_engine(fd, engine) {
+		id = ci_to_engine_id(engine->class, engine->instance);
+		igt_assert(id >= 0);
+
+		ret = pthread_join(thr_d[id].thr, NULL);
+		igt_assert_eq(ret, 0);
+		engine_calib_map[id] = thr_d[id].calib;
+	}
+
+	free(thr_d);
+}
+
+static void
+calibrate_engines(void)
+{
+	if (sequential)
+		calibrate_sequentially();
+	else
+		calibrate_in_parallel();
+}
+
 static void print_help(void)
 {
 	unsigned int i;
@@ -2951,50 +3077,53 @@ static void print_help(void)
 "be provided when running the simulation in subsequent invocations.\n"
 "\n"
 "Options:\n"
-"  -h              This text.\n"
-"  -q              Be quiet - do not output anything to stdout.\n"
-"  -n <n>          Nop calibration value.\n"
-"  -t <n>          Nop calibration tolerance percentage.\n"
-"                  Use when there is a difficulty obtaining calibration with the\n"
-"                  default settings.\n"
-"  -I <n>          Initial randomness seed.\n"
-"  -p <n>          Context priority to use for the following workload on the\n"
-"                  command line.\n"
-"  -w <desc|path>  Filename or a workload descriptor.\n"
-"                  Can be given multiple times.\n"
-"  -W <desc|path>  Filename or a master workload descriptor.\n"
-"                  Only one master workload can be optinally specified in which\n"
-"                  case all other workloads become background ones and run as\n"
-"                  long as the master.\n"
-"  -a <desc|path>  Append a workload to all other workloads.\n"
-"  -r <n>          How many times to emit the workload.\n"
-"  -c <n>          Fork N clients emitting the workload simultaneously.\n"
-"  -x              Swap VCS1 and VCS2 engines in every other client.\n"
-"  -b <n>          Load balancing to use.\n"
-"                  Available load balancers are:"
+"  -h                This text.\n"
+"  -q                Be quiet - do not output anything to stdout.\n"
+"  -n <n |           Nop calibration value - single value is set to all engines\n"
+"  e1=v1,e2=v2,n...> without specified value; you can also specify calibrations for\n"
+"                    particular engines.\n"
+"  -t <n>            Nop calibration tolerance percentage.\n"
+"  -T                Disable sequential calibration and perform calibration in parallel.\n"
+"                    Use when there is a difficulty obtaining calibration with the\n"
+"                    default settings.\n"
+"  -I <n>            Initial randomness seed.\n"
+"  -p <n>            Context priority to use for the following workload on the\n"
+"                    command line.\n"
+"  -w <desc|path>    Filename or a workload descriptor.\n"
+"                    Can be given multiple times.\n"
+"  -W <desc|path>    Filename or a master workload descriptor.\n"
+"                    Only one master workload can be optinally specified in which\n"
+"                    case all other workloads become background ones and run as\n"
+"                    long as the master.\n"
+"  -a <desc|path>    Append a workload to all other workloads.\n"
+"  -r <n>            How many times to emit the workload.\n"
+"  -c <n>            Fork N clients emitting the workload simultaneously.\n"
+"  -x                Swap VCS1 and VCS2 engines in every other client.\n"
+"  -b <n>            Load balancing to use.\n"
+"                    Available load balancers are:"
 	);
 
 	for (i = 0; i < ARRAY_SIZE(all_balancers); i++) {
 		igt_assert(all_balancers[i].desc);
 		printf(
-"                     %s (%u): %s\n",
+"                       %s (%u): %s\n",
 		       all_balancers[i].name, all_balancers[i].id,
 		       all_balancers[i].desc);
 	}
 	puts(
-"                  Balancers can be specified either as names or as their id\n"
-"                  number as listed above.\n"
-"  -2              Remap VCS2 to BCS.\n"
-"  -R              Round-robin initial VCS assignment per client.\n"
-"  -H              Send heartbeat on synchronisation points with seqno based\n"
-"                  balancers. Gives better engine busyness view in some cases.\n"
-"  -s              Turn on small SSEU config for the next workload on the\n"
-"                  command line. Subsequent -s switches it off.\n"
-"  -S              Synchronize the sequence of random batch durations between\n"
-"                  clients.\n"
-"  -G              Global load balancing - a single load balancer will be shared\n"
-"                  between all clients and there will be a single seqno domain.\n"
-"  -d              Sync between data dependencies in userspace."
+"                     Balancers can be specified either as names or as their id\n"
+"                     number as listed above.\n"
+"  -2                 Remap VCS2 to BCS.\n"
+"  -R                 Round-robin initial VCS assignment per client.\n"
+"  -H                 Send heartbeat on synchronisation points with seqno based\n"
+"                     balancers. Gives better engine busyness view in some cases.\n"
+"  -s                 Turn on small SSEU config for the next workload on the\n"
+"                     command line. Subsequent -s switches it off.\n"
+"  -S                 Synchronize the sequence of random batch durations between\n"
+"                     clients.\n"
+"  -G                 Global load balancing - a single load balancer will be shared\n"
+"                     between all clients and there will be a single seqno domain.\n"
+"  -d                 Sync between data dependencies in userspace."
 	);
 }
 
@@ -3117,6 +3246,10 @@ int main(int argc, char **argv)
 	int prio = 0;
 	double t;
 	int i, c;
+	char *subopts, *value;
+	int raw_number = 0;
+	long calib_val;
+	int eng;
 
 	/*
 	 * Open the device via the low-level API so we can do the GPU quiesce
@@ -3134,7 +3267,7 @@ int main(int argc, char **argv)
 	master_prng = time(NULL);
 
 	while ((c = getopt(argc, argv,
-			   "hqv2RsSHxGdc:n:r:w:W:a:t:b:p:I:")) != -1) {
+			   "Thqv2RsSHxGdc:n:r:w:W:a:t:b:p:I:")) != -1) {
 		switch (c) {
 		case 'W':
 			if (master_workload >= 0) {
@@ -3163,8 +3296,62 @@ int main(int argc, char **argv)
 		case 't':
 			tolerance_pct = strtol(optarg, NULL, 0);
 			break;
+		case 'T':
+			sequential = false;
+			break;
+
 		case 'n':
-			nop_calibration = strtol(optarg, NULL, 0);
+			subopts = optarg;
+			while (*subopts != '\0') {
+				eng = getsubopt(&subopts, (char **)ring_str_map, &value);
+				if (!value) {
+					/* only engine name was given */
+					wsim_err("Missing calibration value for '%s'!\n",
+						ring_str_map[eng]);
+					goto err;
+				}
+
+				calib_val = atol(value);
+
+				if (eng >= 0 && eng < NUM_ENGINES) {
+				/* engine name with some value were given */
+
+					if (eng == DEFAULT || eng == VCS) {
+						wsim_err("'%s' not allowed in engine calibrations!\n",
+							ring_str_map[eng]);
+						goto err;
+					} else if (calib_val <= 0) {
+						wsim_err("Invalid calibration for engine '%s' - value "
+						"is either non-positive or is not a number!\n",
+							ring_str_map[eng]);
+						goto err;
+					} else if (engine_calib_map[eng]) {
+						wsim_err("Invalid repeated calibration of '%s'!\n",
+							ring_str_map[eng]);
+						goto err;
+					} else {
+						engine_calib_map[eng] = calib_val;
+						has_nop_calibration = true;
+					}
+				} else {
+					/* raw number was given */
+
+					if (!calib_val) {
+						wsim_err("Invalid engine or zero calibration!\n");
+						goto err;
+					} else if (calib_val < 0) {
+						wsim_err("Invalid negative calibration!\n");
+						goto err;
+					} else if (raw_number) {
+						wsim_err("Default engine calibration provided more than once!\n");
+						goto err;
+					} else {
+						raw_number = calib_val;
+						apply_unset_calibrations(raw_number);
+						has_nop_calibration = true;
+					}
+				}
+			}
 			break;
 		case 'r':
 			repeat = strtol(optarg, NULL, 0);
@@ -3242,16 +3429,33 @@ int main(int argc, char **argv)
 		goto err;
 	}
 
-	if (!nop_calibration) {
-		if (verbose > 1)
-			printf("Calibrating nop delay with %u%% tolerance...\n",
+	if (!has_nop_calibration) {
+		if (verbose > 1) {
+			printf("Calibrating nop delays with %u%% tolerance...\n",
 				tolerance_pct);
-		nop_calibration = calibrate_nop(tolerance_pct);
-		if (verbose)
-			printf("Nop calibration for %uus delay is %lu.\n",
-			       nop_calibration_us, nop_calibration);
+		}
 
+		calibrate_engines();
+
+		if (verbose)
+			print_engine_calibrations();
 		goto out;
+	} else {
+		bool missing = false;
+
+		for (i = 0; i < NUM_ENGINES; i++) {
+			if (i == DEFAULT || i == VCS)
+				continue;
+
+			if (!engine_calib_map[i]) {
+				wsim_err("Missing calibration for '%s'!\n",
+					 ring_str_map[i]);
+				missing = true;
+			}
+		}
+
+		if (missing)
+			goto err;
 	}
 
 	if (!nr_w_args) {
@@ -3309,8 +3513,7 @@ int main(int argc, char **argv)
 
 	if (verbose > 1) {
 		printf("Random seed is %u.\n", master_prng);
-		printf("Using %lu nop calibration for %uus delay.\n",
-		       nop_calibration, nop_calibration_us);
+		print_engine_calibrations();
 		printf("%u client%s.\n", clients, clients > 1 ? "s" : "");
 		if (flags & SWAPVCS)
 			printf("Swapping VCS rings between clients.\n");
-- 
2.19.0



More information about the igt-dev mailing list