[igt-dev] [PATCH i-g-t] Distinguish particular engines during calculating nop calibration.
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Thu Jan 16 17:38:59 UTC 2020
On 16/01/2020 13:55, Anna Karas wrote:
> Extend handling -n parameter by accepting multiple values of per
> engine nop calibration. Add raw numbers handling to set default
> calibration values. Print copyable and pastable string with
> calibrations. Allow to switch between calculating in parallel
> or doing it sequentially.
>
> Accepted input values:
> -n 123456
> All calibrations will be set to 123456.
>
> -n ENG=value,ENG2=value2,value3
> i.e.
> -n RCS=123456,BCS=345678,999999
> RCS engine's value is set to 123456, BCS engine's value is set to
> 345678, 999999 is copied to rest of engines. All engines must be set;
> you can either provide values for each of the engines, or you can set
> specific values and provide a default value for the others.
>
> -n value,ENG1=value1,ENG2=value2
> First, value is copied to all engines, then value1 overrides ENG1, and
> finally value2 overrides ENG2.
>
> New output follows the pattern:
> Nop calibrations for 1000us delay is: <eng1>=<v1>,<eng2>=<v2>,...
> So you can easily copy-paste it to the next invocation.
>
> Switching between calculation modes:
> Run program with -T parameter to calculate calibrations in parallel.
> The calculations are performed sequentially by default.
Put "gem_wsim:" prefix in patch title and fix this please:
$ git am ...
Applying: Distinguish particular engines during calculating nop calibration.
.git/rebase-apply/patch:61: trailing whitespace.
if (class == map[i].class && instance == map[i].instance)
.git/rebase-apply/patch:67: trailing whitespace.
static void
.git/rebase-apply/patch:70: trailing whitespace.
for (int i = 0; i < NUM_ENGINES; i++)
.git/rebase-apply/patch:76: trailing whitespace.
static void
.git/rebase-apply/patch:77: trailing whitespace.
print_engines_calibrations(void)
warning: squelched 12 whitespace errors
warning: 17 lines add whitespace errors.
Then when printing out calibrations:
Nop calibrations for 1000us delay is:
DEFAULT=0,RCS=813060,BCS=381189,VCS=0,VCS1=846222,VCS2=846409,VECS=890365
Please skip DEFAULT and VCS since those have special meaning in
gem_wsim. It will be bit ugly but it will do for now. At some later
point I need to refactor how tool think about engines in a bit smarter way.
Also please reject DEFAULT and VCS in the string passed to -n.
>
> Signed-off-by: Anna Karas <anna.karas at intel.com>
> ---
> benchmarks/gem_wsim.c | 251 ++++++++++++++++++++++++++++++++++--------
> 1 file changed, 207 insertions(+), 44 deletions(-)
>
> diff --git a/benchmarks/gem_wsim.c b/benchmarks/gem_wsim.c
> index 9156fdc9..faac01b0 100644
> --- a/benchmarks/gem_wsim.c
> +++ b/benchmarks/gem_wsim.c
> @@ -23,6 +23,7 @@
> */
>
> #include <unistd.h>
> +#include <stdbool.h>
> #include <stdlib.h>
> #include <stdint.h>
> #include <stdio.h>
> @@ -55,6 +56,7 @@
> #include "i915/gem_mman.h"
>
> #include "ewma.h"
> +#include "i915/gem_engine_topology.h"
>
> enum intel_engine_id {
> DEFAULT,
> @@ -240,7 +242,8 @@ struct workload
>
> struct intel_mmio_data mmio_data;
> static const unsigned int nop_calibration_us = 1000;
> -static unsigned long nop_calibration;
> +static bool has_nop_calibration = false;
> +static bool sequential = true;
>
> static unsigned int master_prng;
>
> @@ -281,6 +284,54 @@ static const char *ring_str_map[NUM_ENGINES] = {
> [VECS] = "VECS",
> };
>
> +/* stores calibrations for particular engines */
> +static unsigned long engine_calib_map[NUM_ENGINES];
> +
> +static enum intel_engine_id
> +ci_to_engine_id(int class, int instance)
> +{
> + static const struct {
> + int class;
> + int instance;
> + unsigned int id;
> + } map[] = {
> + { I915_ENGINE_CLASS_RENDER, 0, RCS },
> + { I915_ENGINE_CLASS_COPY, 0, BCS },
> + { I915_ENGINE_CLASS_VIDEO, 0, VCS1 },
> + { I915_ENGINE_CLASS_VIDEO, 1, VCS2 },
> + { I915_ENGINE_CLASS_VIDEO, 2, VCS2 }, /* FIXME/ICL */
> + { I915_ENGINE_CLASS_VIDEO_ENHANCE, 0, VECS },
> + };
> +
> + unsigned int i;
> +
> + for (i = 0; i < ARRAY_SIZE(map); i++) {
> + if (class == map[i].class && instance == map[i].instance)
> + return map[i].id;
> + }
> + return -1;
> +}
> +
> +static void
> +apply_unset_calibrations(unsigned long raw_number)
> +{
> + for (int i = 0; i < NUM_ENGINES; i++)
> + engine_calib_map[i] += engine_calib_map[i] ? 0 : raw_number;
> +
> + has_nop_calibration = true;
> +}
> +
> +static void
> +print_engines_calibrations(void)
> +{
> + printf("Nop calibrations for %uus delay is: ", nop_calibration_us);
> +
> + for (int i = 0; i < NUM_ENGINES; i++) {
> + printf("%s%s=%lu", i > 0 ? "," : "", ring_str_map[i], engine_calib_map[i]);
> + }
> + printf("\n");
> +}
> +
> static int
> parse_dependencies(unsigned int nr_steps, struct w_step *w, char *_desc)
> {
> @@ -1082,17 +1133,17 @@ static unsigned int get_duration(struct workload *wrk, struct w_step *w)
> (dur->max + 1 - dur->min);
> }
>
> -static unsigned long get_bb_sz(unsigned int duration)
> +static unsigned long get_bb_sz(unsigned int duration, enum intel_engine_id engine)
> {
> - return ALIGN(duration * nop_calibration * sizeof(uint32_t) /
> - nop_calibration_us, sizeof(uint32_t));
> + return ALIGN(duration * engine_calib_map[engine] * sizeof(uint32_t) /
> + nop_calibration_us, sizeof(uint32_t));
> }
>
> static void
> init_bb(struct w_step *w, unsigned int flags)
> {
> const unsigned int arb_period =
> - get_bb_sz(w->preempt_us) / sizeof(uint32_t);
> + get_bb_sz(w->preempt_us, w->engine) / sizeof(uint32_t);
> const unsigned int mmap_len = ALIGN(w->bb_sz, 4096);
> unsigned int i;
> uint32_t *ptr;
> @@ -1319,10 +1370,11 @@ alloc_step_batch(struct workload *wrk, struct w_step *w, unsigned int flags)
>
> if (w->unbound_duration)
> /* nops + MI_ARB_CHK + MI_BATCH_BUFFER_START */
> - w->bb_sz = max(PAGE_SIZE, get_bb_sz(w->preempt_us)) +
> + w->bb_sz = max(PAGE_SIZE, get_bb_sz(w->preempt_us, w->engine)) +
> (1 + 3) * sizeof(uint32_t);
> else
> - w->bb_sz = get_bb_sz(w->duration.max);
> + w->bb_sz = get_bb_sz(w->duration.max, w->engine);
> +
> w->bb_handle = w->obj[j].handle = gem_create(fd, w->bb_sz + (w->unbound_duration ? 4096 : 0));
> init_bb(w, flags);
> w->obj[j].relocation_count = terminate_bb(w, flags);
> @@ -2622,7 +2674,7 @@ do_eb(struct workload *wrk, struct w_step *w, enum intel_engine_id engine,
> w->eb.batch_start_offset =
> w->unbound_duration ?
> 0 :
> - ALIGN(w->bb_sz - get_bb_sz(get_duration(wrk, w)),
> + ALIGN(w->bb_sz - get_bb_sz(get_duration(wrk, w), w->engine),
> 2 * sizeof(uint32_t));
>
> for (i = 0; i < w->fence_deps.nr; i++) {
> @@ -2899,15 +2951,18 @@ static void fini_workload(struct workload *wrk)
> free(wrk);
> }
>
> -static unsigned long calibrate_nop(unsigned int tolerance_pct)
> +static unsigned long calibrate_nop(unsigned int tolerance_pct, struct intel_execution_engine2 *engine)
> {
> const uint32_t bbe = 0xa << 23;
> unsigned int loops = 17;
> unsigned int usecs = nop_calibration_us;
> struct drm_i915_gem_exec_object2 obj = {};
> - struct drm_i915_gem_execbuffer2 eb =
> - { .buffer_count = 1, .buffers_ptr = (uintptr_t)&obj};
> - long size, last_size;
> + struct drm_i915_gem_execbuffer2 eb = {
> + .buffer_count = 1,
> + .buffers_ptr = (uintptr_t)&obj,
> + .flags = engine->flags
> + };
> + unsigned long size, last_size;
> struct timespec t_0, t_end;
>
> clock_gettime(CLOCK_MONOTONIC, &t_0);
> @@ -2939,6 +2994,77 @@ static unsigned long calibrate_nop(unsigned int tolerance_pct)
> return size / sizeof(uint32_t);
> }
>
> +static void
> +calibrate_sequentially(void)
> +{
> + struct intel_execution_engine2 *engine;
> + enum intel_engine_id eng_id;
> +
> + __for_each_physical_engine(fd, engine) {
> + eng_id = ci_to_engine_id(engine->class, engine->instance);
> + igt_assert(eng_id >= 0);
> + engine_calib_map[eng_id] = calibrate_nop(fd, engine);
> + }
> +}
> +
> +struct thread_data {
> + struct intel_execution_engine2 *eng;
> + pthread_t thr;
> + unsigned long calib;
> +};
> +
> +static void *
> +engine_calibration_thread(void *data)
> +{
> + struct thread_data *thr_d = (struct thread_data *) data;
> +
> + thr_d->calib = calibrate_nop(fd, thr_d->eng);
> + return NULL;
> +}
> +
> +static void
> +calibrate_in_parallel(void)
> +{
> + struct thread_data *thr_d = malloc(NUM_ENGINES * sizeof(*thr_d));
> + struct intel_execution_engine2 *engine;
> + enum intel_engine_id id;
> + int ret;
> +
> + __for_each_physical_engine(fd, engine) {
> + id = ci_to_engine_id(engine->class, engine->instance);
> + thr_d[id].eng = engine;
> + ret = pthread_create(&thr_d[id].thr, NULL, engine_calibration_thread, &thr_d[id]);
> + igt_assert_eq(ret, 0);
> + }
> +
> + __for_each_physical_engine(fd, engine) {
> + id = ci_to_engine_id(engine->class, engine->instance);
> + igt_assert(id >= 0);
> +
> + ret = pthread_join(thr_d[id].thr, NULL);
> + igt_assert_eq(ret, 0);
> + engine_calib_map[id] = thr_d[id].calib;
> + }
> +
> + free(thr_d);
> +}
> +
> +static void
> +calibrate_engines(void)
> +{
> + if (sequential)
> + calibrate_sequentially();
> + else
> + calibrate_in_parallel();
> +
> + has_nop_calibration = true;
> +
> + if (verbose > 1)
> + print_engines_calibrations();
> +
> +}
> +
> +
> static void print_help(void)
> {
> unsigned int i;
> @@ -2951,27 +3077,30 @@ static void print_help(void)
> "be provided when running the simulation in subsequent invocations.\n"
> "\n"
> "Options:\n"
> -" -h This text.\n"
> -" -q Be quiet - do not output anything to stdout.\n"
> -" -n <n> Nop calibration value.\n"
> -" -t <n> Nop calibration tolerance percentage.\n"
> -" Use when there is a difficulty obtaining calibration with the\n"
> -" default settings.\n"
> -" -I <n> Initial randomness seed.\n"
> -" -p <n> Context priority to use for the following workload on the\n"
> -" command line.\n"
> -" -w <desc|path> Filename or a workload descriptor.\n"
> -" Can be given multiple times.\n"
> -" -W <desc|path> Filename or a master workload descriptor.\n"
> -" Only one master workload can be optinally specified in which\n"
> -" case all other workloads become background ones and run as\n"
> -" long as the master.\n"
> -" -a <desc|path> Append a workload to all other workloads.\n"
> -" -r <n> How many times to emit the workload.\n"
> -" -c <n> Fork N clients emitting the workload simultaneously.\n"
> -" -x Swap VCS1 and VCS2 engines in every other client.\n"
> -" -b <n> Load balancing to use.\n"
> -" Available load balancers are:"
> +" -h This text.\n"
> +" -q Be quiet - do not output anything to stdout.\n"
> +" -n <n | Nop calibration value - single value is set to all engines\n"
> +" e1=v1,e2=v2,n...> without specified value; you can also specify calibrations for\n"
> +" particular engines.\n"
> +" -t <n> Nop calibration tolerance percentage.\n"
> +" -T Disable sequential calibration and perform calibration in parallel.\n"
> +" Use when there is a difficulty obtaining calibration with the\n"
> +" default settings.\n"
> +" -I <n> Initial randomness seed.\n"
> +" -p <n> Context priority to use for the following workload on the\n"
> +" command line.\n"
> +" -w <desc|path> Filename or a workload descriptor.\n"
> +" Can be given multiple times.\n"
> +" -W <desc|path> Filename or a master workload descriptor.\n"
> +" Only one master workload can be optinally specified in which\n"
> +" case all other workloads become background ones and run as\n"
> +" long as the master.\n"
> +" -a <desc|path> Append a workload to all other workloads.\n"
> +" -r <n> How many times to emit the workload.\n"
> +" -c <n> Fork N clients emitting the workload simultaneously.\n"
> +" -x Swap VCS1 and VCS2 engines in every other client.\n"
> +" -b <n> Load balancing to use.\n"
> +" Available load balancers are:"
You need to re-align the second part of the help text as well. Run the
tool with -h to check.
> );
>
> for (i = 0; i < ARRAY_SIZE(all_balancers); i++) {
> @@ -3117,6 +3246,9 @@ int main(int argc, char **argv)
> int prio = 0;
> double t;
> int i, c;
> + char *subopts, *value;
> + enum intel_engine_id res;
> + int raw_number = 0;
>
> /*
> * Open the device via the low-level API so we can do the GPU quiesce
> @@ -3134,7 +3266,7 @@ int main(int argc, char **argv)
> master_prng = time(NULL);
>
> while ((c = getopt(argc, argv,
> - "hqv2RsSHxGdc:n:r:w:W:a:t:b:p:I:")) != -1) {
> + "Thqv2RsSHxGdc:n:r:w:W:a:t:b:p:I:")) != -1) {
> switch (c) {
> case 'W':
> if (master_workload >= 0) {
> @@ -3163,8 +3295,41 @@ int main(int argc, char **argv)
> case 't':
> tolerance_pct = strtol(optarg, NULL, 0);
> break;
> + case 'T':
> + sequential = false;
> + break;
> +
> case 'n':
> - nop_calibration = strtol(optarg, NULL, 0);
> + subopts = optarg;
> + while (*subopts != '\0') {
> + res = getsubopt(&subopts, (char **)ring_str_map, &value);
> + switch (res) {
> + case RCS ... VECS:
> + engine_calib_map[res] = atol(value);
> + has_nop_calibration = true;
> + break;
> + default:
> + /* raw number was given - two cases: */
> +
> + /* raw number is already set */
> + if (raw_number) {
> + wsim_err("Default engine calibration provided more than once.\n");
> + goto err;
> + }
> +
> + /* raw number has been set for the first time */
> + raw_number = atol(value);
> +
> + if (value) {
> + apply_unset_calibrations(raw_number);
> + } else {
> + /* not engine=value pair, not raw number, so it's just an error */
> + wsim_err("Unknown engine name: %s/\n", value);
> + goto err;
> + }
I was able to pass "-n 10,XYZ=10" and did not get the "Unknown engine
name" error. Please check what's happening with that.
> + break;
> + }
> + }
> break;
> case 'r':
> repeat = strtol(optarg, NULL, 0);
> @@ -3242,14 +3407,13 @@ int main(int argc, char **argv)
> goto err;
> }
>
> - if (!nop_calibration) {
> - if (verbose > 1)
> - printf("Calibrating nop delay with %u%% tolerance...\n",
> + if (!has_nop_calibration) {
> + if (verbose > 1) {
> + printf("Calibrating nop delays with %u%% tolerance...\n",
> tolerance_pct);
> - nop_calibration = calibrate_nop(tolerance_pct);
> - if (verbose)
> - printf("Nop calibration for %uus delay is %lu.\n",
> - nop_calibration_us, nop_calibration);
> + }
> +
> + calibrate_engines();
>
> goto out;
> }
> @@ -3309,8 +3473,7 @@ int main(int argc, char **argv)
>
> if (verbose > 1) {
> printf("Random seed is %u.\n", master_prng);
> - printf("Using %lu nop calibration for %uus delay.\n",
> - nop_calibration, nop_calibration_us);
> + print_engines_calibrations();
> printf("%u client%s.\n", clients, clients > 1 ? "s" : "");
> if (flags & SWAPVCS)
> printf("Swapping VCS rings between clients.\n");
>
Almost there. :)
Regards,
Tvrtko
More information about the igt-dev
mailing list