[igt-dev] [PATCH i-g-t] Distinguish particular engines during calculating nop calibration.

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Thu Jan 16 17:38:59 UTC 2020


On 16/01/2020 13:55, Anna Karas wrote:
> Extend handling -n parameter by accepting multiple values of per
> engine nop calibration. Add raw numbers handling to set default
> calibration values. Print copyable and pastable string with
> calibrations. Allow to switch between calculating in parallel
> or doing it sequentially.
> 
> Accepted input values:
> -n 123456
> All calibrations will be set to 123456.
> 
> -n ENG=value,ENG2=value2,value3
> i.e.
> -n RCS=123456,BCS=345678,999999
> RCS engine's value is set to 123456, BCS engine's value is set to
> 345678, 999999 is copied to rest of engines. All engines must be set;
> you can either provide values for each of the engines, or you can set
> specific values and provide a default value for the others.
> 
> -n value,ENG1=value1,ENG2=value2
> First, value is copied to all engines, then value1 overrides ENG1, and
> finally value2 overrides ENG2.
> 
> New output follows the pattern:
> Nop calibrations for 1000us delay is: <eng1>=<v1>,<eng2>=<v2>,...
> So you can easily copy-paste it to the next invocation.
> 
> Switching between calculation modes:
> Run program with -T parameter to calculate calibrations in parallel.
> The calculations are performed sequentially by default.

Put "gem_wsim:" prefix in patch title and fix this please:

$ git am ...
Applying: Distinguish particular engines during calculating nop calibration.
.git/rebase-apply/patch:61: trailing whitespace.
                 if (class == map[i].class && instance == map[i].instance)
.git/rebase-apply/patch:67: trailing whitespace.
static void
.git/rebase-apply/patch:70: trailing whitespace.
         for (int i = 0; i < NUM_ENGINES; i++)
.git/rebase-apply/patch:76: trailing whitespace.
static void
.git/rebase-apply/patch:77: trailing whitespace.
print_engines_calibrations(void)
warning: squelched 12 whitespace errors
warning: 17 lines add whitespace errors.

Then when printing out calibrations:

Nop calibrations for 1000us delay is: 
DEFAULT=0,RCS=813060,BCS=381189,VCS=0,VCS1=846222,VCS2=846409,VECS=890365

Please skip DEFAULT and VCS since those have special meaning in 
gem_wsim. It will be bit ugly but it will do for now. At some later 
point I need to refactor how tool think about engines in a bit smarter way.

Also please reject DEFAULT and VCS in the string passed to -n.

> 
> Signed-off-by: Anna Karas <anna.karas at intel.com>
> ---
>   benchmarks/gem_wsim.c | 251 ++++++++++++++++++++++++++++++++++--------
>   1 file changed, 207 insertions(+), 44 deletions(-)
> 
> diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c
> index 9156fdc9..faac01b0 100644
> --- a/benchmarks/gem_wsim.c
> +++ b/benchmarks/gem_wsim.c
> @@ -23,6 +23,7 @@
>    */
>   
>   #include <unistd.h>
> +#include <stdbool.h>
>   #include <stdlib.h>
>   #include <stdint.h>
>   #include <stdio.h>
> @@ -55,6 +56,7 @@
>   #include "i915/gem_mman.h"
>   
>   #include "ewma.h"
> +#include "i915/gem_engine_topology.h"
>   
>   enum intel_engine_id {
>   	DEFAULT,
> @@ -240,7 +242,8 @@ struct workload
>   
>   struct intel_mmio_data mmio_data;
>   static const unsigned int nop_calibration_us = 1000;
> -static unsigned long nop_calibration;
> +static bool has_nop_calibration = false;
> +static bool sequential = true;
>   
>   static unsigned int master_prng;
>   
> @@ -281,6 +284,54 @@ static const char *ring_str_map[NUM_ENGINES] = {
>   	[VECS] = "VECS",
>   };
>   
> +/* stores calibrations for particular engines */
> +static unsigned long engine_calib_map[NUM_ENGINES];
> +
> +static enum intel_engine_id
> +ci_to_engine_id(int class, int instance)
> +{
> +	static const struct {
> +		int class;
> +		int instance;
> +		unsigned int id;
> +	} map[] = {
> +		{ I915_ENGINE_CLASS_RENDER, 0, RCS },
> +		{ I915_ENGINE_CLASS_COPY, 0, BCS },
> +		{ I915_ENGINE_CLASS_VIDEO, 0, VCS1 },
> +		{ I915_ENGINE_CLASS_VIDEO, 1, VCS2 },
> +		{ I915_ENGINE_CLASS_VIDEO, 2, VCS2 }, /* FIXME/ICL */
> +		{ I915_ENGINE_CLASS_VIDEO_ENHANCE, 0, VECS },
> +	};
> +
> +	unsigned int i;
> +
> +	for (i = 0; i < ARRAY_SIZE(map); i++) {
> +		if (class == map[i].class && instance == map[i].instance)
> +			return map[i].id;
> +	}
> +	return -1;
> +}
> +
> +static void
> +apply_unset_calibrations(unsigned long raw_number)
> +{
> +	for (int i = 0; i < NUM_ENGINES; i++)
> +		engine_calib_map[i] += engine_calib_map[i] ? 0 : raw_number;
> +
> +	has_nop_calibration = true;
> +}
> +
> +static void
> +print_engines_calibrations(void)
> +{
> +	printf("Nop calibrations for %uus delay is: ", nop_calibration_us);
> +
> +	for (int i = 0; i < NUM_ENGINES; i++) {
> +		printf("%s%s=%lu", i > 0 ? "," : "", ring_str_map[i], engine_calib_map[i]);
> +	}
> +	printf("\n");
> +}
> +
>   static int
>   parse_dependencies(unsigned int nr_steps, struct w_step *w, char *_desc)
>   {
> @@ -1082,17 +1133,17 @@ static unsigned int get_duration(struct workload *wrk, struct w_step *w)
>   		       (dur->max + 1 - dur->min);
>   }
>   
> -static unsigned long get_bb_sz(unsigned int duration)
> +static unsigned long get_bb_sz(unsigned int duration, enum intel_engine_id engine)
>   {
> -	return ALIGN(duration * nop_calibration * sizeof(uint32_t) /
> -		     nop_calibration_us, sizeof(uint32_t));
> +	return ALIGN(duration * engine_calib_map[engine] * sizeof(uint32_t) /
> +		nop_calibration_us, sizeof(uint32_t));
>   }
>   
>   static void
>   init_bb(struct w_step *w, unsigned int flags)
>   {
>   	const unsigned int arb_period =
> -			get_bb_sz(w->preempt_us) / sizeof(uint32_t);
> +			get_bb_sz(w->preempt_us, w->engine) / sizeof(uint32_t);
>   	const unsigned int mmap_len = ALIGN(w->bb_sz, 4096);
>   	unsigned int i;
>   	uint32_t *ptr;
> @@ -1319,10 +1370,11 @@ alloc_step_batch(struct workload *wrk, struct w_step *w, unsigned int flags)
>   
>   	if (w->unbound_duration)
>   		/* nops + MI_ARB_CHK + MI_BATCH_BUFFER_START */
> -		w->bb_sz = max(PAGE_SIZE, get_bb_sz(w->preempt_us)) +
> +		w->bb_sz = max(PAGE_SIZE, get_bb_sz(w->preempt_us, w->engine)) +
>   			   (1 + 3) * sizeof(uint32_t);
>   	else
> -		w->bb_sz = get_bb_sz(w->duration.max);
> +		w->bb_sz = get_bb_sz(w->duration.max, w->engine);
> +
>   	w->bb_handle = w->obj[j].handle = gem_create(fd, w->bb_sz + (w->unbound_duration ? 4096 : 0));
>   	init_bb(w, flags);
>   	w->obj[j].relocation_count = terminate_bb(w, flags);
> @@ -2622,7 +2674,7 @@ do_eb(struct workload *wrk, struct w_step *w, enum intel_engine_id engine,
>   	w->eb.batch_start_offset =
>   		w->unbound_duration ?
>   		0 :
> -		ALIGN(w->bb_sz - get_bb_sz(get_duration(wrk, w)),
> +		ALIGN(w->bb_sz - get_bb_sz(get_duration(wrk, w), w->engine),
>   		      2 * sizeof(uint32_t));
>   
>   	for (i = 0; i < w->fence_deps.nr; i++) {
> @@ -2899,15 +2951,18 @@ static void fini_workload(struct workload *wrk)
>   	free(wrk);
>   }
>   
> -static unsigned long calibrate_nop(unsigned int tolerance_pct)
> +static unsigned long calibrate_nop(unsigned int tolerance_pct, struct intel_execution_engine2 *engine)
>   {
>   	const uint32_t bbe = 0xa << 23;
>   	unsigned int loops = 17;
>   	unsigned int usecs = nop_calibration_us;
>   	struct drm_i915_gem_exec_object2 obj = {};
> -	struct drm_i915_gem_execbuffer2 eb =
> -		{ .buffer_count = 1, .buffers_ptr = (uintptr_t)&obj};
> -	long size, last_size;
> +	struct drm_i915_gem_execbuffer2 eb = {
> +		.buffer_count = 1,
> +		.buffers_ptr = (uintptr_t)&obj,
> +		.flags = engine->flags
> +	};
> +	unsigned long size, last_size;
>   	struct timespec t_0, t_end;
>   
>   	clock_gettime(CLOCK_MONOTONIC, &t_0);
> @@ -2939,6 +2994,77 @@ static unsigned long calibrate_nop(unsigned int tolerance_pct)
>   	return size / sizeof(uint32_t);
>   }
>   
> +static void
> +calibrate_sequentially(void)
> +{
> +	struct intel_execution_engine2 *engine;
> +	enum intel_engine_id eng_id;
> +
> +	__for_each_physical_engine(fd, engine) {
> +		eng_id = ci_to_engine_id(engine->class, engine->instance);
> +		igt_assert(eng_id >= 0);
> +		engine_calib_map[eng_id] = calibrate_nop(fd, engine);
> +	}
> +}
> +
> +struct thread_data {
> +	struct intel_execution_engine2 *eng;
> +	pthread_t thr;
> +	unsigned long calib;
> +};
> +
> +static void *
> +engine_calibration_thread(void *data)
> +{
> +	struct thread_data *thr_d = (struct thread_data *) data;
> +
> +	thr_d->calib = calibrate_nop(fd, thr_d->eng);
> +	return NULL;
> +}
> +
> +static void
> +calibrate_in_parallel(void)
> +{
> +	struct thread_data *thr_d = malloc(NUM_ENGINES * sizeof(*thr_d));
> +	struct intel_execution_engine2 *engine;
> +	enum intel_engine_id id;
> +	int ret;
> +
> +	__for_each_physical_engine(fd, engine) {
> +		id = ci_to_engine_id(engine->class, engine->instance);
> +		thr_d[id].eng = engine;
> +		ret = pthread_create(&thr_d[id].thr, NULL, engine_calibration_thread, &thr_d[id]);
> +		igt_assert_eq(ret, 0);
> +	}
> +
> +	__for_each_physical_engine(fd, engine) {
> +		id = ci_to_engine_id(engine->class, engine->instance);
> +		igt_assert(id >= 0);
> +
> +		ret = pthread_join(thr_d[id].thr, NULL);
> +		igt_assert_eq(ret, 0);
> +		engine_calib_map[id] = thr_d[id].calib;
> +	}
> +
> +	free(thr_d);
> +}
> +
> +static void
> +calibrate_engines(void)
> +{
> +	if (sequential)
> +		calibrate_sequentially();
> +	else
> +		calibrate_in_parallel();
> +
> +	has_nop_calibration = true;
> +
> +	if (verbose > 1)
> +		print_engines_calibrations();
> +
> +}
> +
> +
>   static void print_help(void)
>   {
>   	unsigned int i;
> @@ -2951,27 +3077,30 @@ static void print_help(void)
>   "be provided when running the simulation in subsequent invocations.\n"
>   "\n"
>   "Options:\n"
> -"  -h              This text.\n"
> -"  -q              Be quiet - do not output anything to stdout.\n"
> -"  -n <n>          Nop calibration value.\n"
> -"  -t <n>          Nop calibration tolerance percentage.\n"
> -"                  Use when there is a difficulty obtaining calibration with the\n"
> -"                  default settings.\n"
> -"  -I <n>          Initial randomness seed.\n"
> -"  -p <n>          Context priority to use for the following workload on the\n"
> -"                  command line.\n"
> -"  -w <desc|path>  Filename or a workload descriptor.\n"
> -"                  Can be given multiple times.\n"
> -"  -W <desc|path>  Filename or a master workload descriptor.\n"
> -"                  Only one master workload can be optinally specified in which\n"
> -"                  case all other workloads become background ones and run as\n"
> -"                  long as the master.\n"
> -"  -a <desc|path>  Append a workload to all other workloads.\n"
> -"  -r <n>          How many times to emit the workload.\n"
> -"  -c <n>          Fork N clients emitting the workload simultaneously.\n"
> -"  -x              Swap VCS1 and VCS2 engines in every other client.\n"
> -"  -b <n>          Load balancing to use.\n"
> -"                  Available load balancers are:"
> +"  -h                This text.\n"
> +"  -q                Be quiet - do not output anything to stdout.\n"
> +"  -n <n |           Nop calibration value - single value is set to all engines\n"
> +"  e1=v1,e2=v2,n...> without specified value; you can also specify calibrations for\n"
> +"                    particular engines.\n"
> +"  -t <n>            Nop calibration tolerance percentage.\n"
> +"  -T                Disable sequential calibration and perform calibration in parallel.\n"
> +"                    Use when there is a difficulty obtaining calibration with the\n"
> +"                    default settings.\n"
> +"  -I <n>            Initial randomness seed.\n"
> +"  -p <n>            Context priority to use for the following workload on the\n"
> +"                    command line.\n"
> +"  -w <desc|path>    Filename or a workload descriptor.\n"
> +"                    Can be given multiple times.\n"
> +"  -W <desc|path>    Filename or a master workload descriptor.\n"
> +"                    Only one master workload can be optinally specified in which\n"
> +"                    case all other workloads become background ones and run as\n"
> +"                    long as the master.\n"
> +"  -a <desc|path>    Append a workload to all other workloads.\n"
> +"  -r <n>            How many times to emit the workload.\n"
> +"  -c <n>            Fork N clients emitting the workload simultaneously.\n"
> +"  -x                Swap VCS1 and VCS2 engines in every other client.\n"
> +"  -b <n>            Load balancing to use.\n"
> +"                    Available load balancers are:"

You need to re-align the second part of the help text as well. Run the 
tool with -h to check.

>   	);
>   
>   	for (i = 0; i < ARRAY_SIZE(all_balancers); i++) {
> @@ -3117,6 +3246,9 @@ int main(int argc, char **argv)
>   	int prio = 0;
>   	double t;
>   	int i, c;
> +	char *subopts, *value;
> +	enum intel_engine_id res;
> +	int raw_number = 0;
>   
>   	/*
>   	 * Open the device via the low-level API so we can do the GPU quiesce
> @@ -3134,7 +3266,7 @@ int main(int argc, char **argv)
>   	master_prng = time(NULL);
>   
>   	while ((c = getopt(argc, argv,
> -			   "hqv2RsSHxGdc:n:r:w:W:a:t:b:p:I:")) != -1) {
> +			   "Thqv2RsSHxGdc:n:r:w:W:a:t:b:p:I:")) != -1) {
>   		switch (c) {
>   		case 'W':
>   			if (master_workload >= 0) {
> @@ -3163,8 +3295,41 @@ int main(int argc, char **argv)
>   		case 't':
>   			tolerance_pct = strtol(optarg, NULL, 0);
>   			break;
> +		case 'T':
> +			sequential = false;
> +			break;
> +
>   		case 'n':
> -			nop_calibration = strtol(optarg, NULL, 0);
> +			subopts = optarg;
> +			while (*subopts != '\0') {
> +				res = getsubopt(&subopts, (char **)ring_str_map, &value);
> +				switch (res) {
> +				case RCS ... VECS:
> +					engine_calib_map[res] = atol(value);
> +					has_nop_calibration = true;
> +					break;
> +				default:
> +					/* raw number was given - two cases: */
> +
> +					/* raw number is already set  */
> +					if (raw_number) {
> +						wsim_err("Default engine calibration provided more than once.\n");
> +						goto err;
> +					}
> +
> +					/* raw number has been set for the first time */
> +					raw_number = atol(value);
> +
> +					if (value) {
> +						apply_unset_calibrations(raw_number);
> +					} else {
> +						/* not engine=value pair, not raw number, so it's just an error */
> +						wsim_err("Unknown engine name: %s/\n", value);
> +						goto err;
> +					}

I was able to pass "-n 10,XYZ=10" and did not get the "Unknown engine 
name" error. Please check what's happening with that.

> +					break;
> +				}
> +			}
>   			break;
>   		case 'r':
>   			repeat = strtol(optarg, NULL, 0);
> @@ -3242,14 +3407,13 @@ int main(int argc, char **argv)
>   		goto err;
>   	}
>   
> -	if (!nop_calibration) {
> -		if (verbose > 1)
> -			printf("Calibrating nop delay with %u%% tolerance...\n",
> +	if (!has_nop_calibration) {
> +		if (verbose > 1) {
> +			printf("Calibrating nop delays with %u%% tolerance...\n",
>   				tolerance_pct);
> -		nop_calibration = calibrate_nop(tolerance_pct);
> -		if (verbose)
> -			printf("Nop calibration for %uus delay is %lu.\n",
> -			       nop_calibration_us, nop_calibration);
> +		}
> +
> +		calibrate_engines();
>   
>   		goto out;
>   	}
> @@ -3309,8 +3473,7 @@ int main(int argc, char **argv)
>   
>   	if (verbose > 1) {
>   		printf("Random seed is %u.\n", master_prng);
> -		printf("Using %lu nop calibration for %uus delay.\n",
> -		       nop_calibration, nop_calibration_us);
> +		print_engines_calibrations();
>   		printf("%u client%s.\n", clients, clients > 1 ? "s" : "");
>   		if (flags & SWAPVCS)
>   			printf("Swapping VCS rings between clients.\n");
> 

Almost there. :)

Regards,

Tvrtko


More information about the igt-dev mailing list