[Intel-gfx] [PATCH i-g-t 15/17] benchmarks/wsim: Simulate and interpret .wsim

Chris Wilson chris at chris-wilson.co.uk
Mon Jul 2 09:07:25 UTC 2018


A little tool I've been meaning to write for a while... Convert the
.wsim into their dag and find the longest chains and evaluate them on an
simulated machine.

v2: Implement barriers to handle sync commands

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
 benchmarks/Makefile.sources |   1 +
 benchmarks/sim_wsim.c       | 419 ++++++++++++++++++++++++++++++++++++
 2 files changed, 420 insertions(+)
 create mode 100644 benchmarks/sim_wsim.c

diff --git a/benchmarks/Makefile.sources b/benchmarks/Makefile.sources
index 86928df5c..633623527 100644
--- a/benchmarks/Makefile.sources
+++ b/benchmarks/Makefile.sources
@@ -17,6 +17,7 @@ benchmarks_prog_list =			\
 	gem_wsim			\
 	kms_vblank			\
 	prime_lookup			\
+	sim_wsim			\
 	vgem_mmap			\
 	$(NULL)
 
diff --git a/benchmarks/sim_wsim.c b/benchmarks/sim_wsim.c
new file mode 100644
index 000000000..5f3d56045
--- /dev/null
+++ b/benchmarks/sim_wsim.c
@@ -0,0 +1,419 @@
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "igt_aux.h"
+
+#if 0
+#define DBG(...) printf(__VA_ARGS__)
+#else
+#define DBG(...) do { } while(0)
+#endif
+
+struct dependency {
+	struct task *signal;
+	struct igt_list link;
+};
+
+struct task {
+	int step;
+	int ctx;
+	int class;
+	int instance;
+
+	unsigned long min, max;
+	unsigned long duration;
+	unsigned long deadline;
+
+	struct igt_list link;
+	struct igt_list sched;
+	struct igt_list signals;
+
+	bool visited;
+};
+
+struct work {
+	struct task **tasks;
+	unsigned int count, size;
+
+	struct igt_list ends;
+	struct task *barrier;
+};
+
+static void add_dependency(struct task *task, struct task *signal)
+{
+	struct dependency *dep;
+
+	DBG("%s %d (context %d, engine %d) -> %d (context %d, engine %d)\n",
+	    __func__,
+	    signal->step, signal->ctx, signal->class,
+	    task->step, task->ctx, task->class);
+
+	dep = malloc(sizeof(*dep));
+	dep->signal = signal;
+	igt_list_add(&dep->link, &task->signals);
+	if (!igt_list_empty(&signal->link)) {
+		igt_list_del(&signal->link);
+		igt_list_init(&signal->link);
+	}
+}
+
+enum class {
+	RCS,
+	BCS,
+	VCS,
+	VECS,
+};
+
+static void add_task(struct work *work, char *line)
+{
+#define TSEP "."
+
+	static const char *engines[] = {
+		[RCS]  = "rcs",
+		[BCS]  = "bcs",
+		[VCS]  = "vcs",
+		[VECS] = "vecs",
+	};
+	struct task *task;
+	char *token;
+	int i;
+
+	DBG("line='%s'\n", line);
+
+	token = strtok(line, TSEP);
+
+	if (!strcasecmp(token, "s")) { /* sync point */
+		int sync = atoi(strtok(NULL, TSEP));
+
+		DBG("syncpt %d\n", sync);
+
+		work->barrier = work->tasks[work->count + sync];
+		return;
+	}
+
+	if (!isdigit(*token)) {
+		fprintf(stderr, "Ignoring step '%s' at your peril!\n", token);
+		return;
+	}
+
+	/*
+	 * 1.RCS.2800-3100.-1.0
+	 * - context
+	 * - engine
+	 * - delay
+	 * - dependencies
+	 * - sync
+	 */
+	task = malloc(sizeof(*task));
+
+	igt_list_init(&task->signals);
+	task->step = work->count;
+	task->visited = false;
+
+	/* context */
+	DBG("context='%s'\n", token);
+	task->ctx = atoi(token);
+
+	/* engine */
+	token = strtok(NULL, TSEP);
+	DBG("engine='%s'\n", token);
+	task->class = -1;
+	for (i = 0; i < sizeof(engines)/sizeof(*engines); i++) {
+		const int len = strlen(engines[i]);
+		if (!strncasecmp(token, engines[i], len)) {
+			task->class = i;
+			if (token[len])
+				task->instance = atoi(token + len);
+			else
+				task->instance = -1;
+			break;
+		}
+	}
+
+	/* delay */
+	token = strtok(NULL, TSEP);
+	DBG("delay='%s'\n", token);
+	task->min = strtol(token, &token, 0);
+	if (*token)
+		task->max = strtol(token + 1, NULL, 0);
+	else
+		task->max = task->min;
+	task->duration = (task->min + task->max) / 2;
+	DBG("min=%lu, max=%lu; duration=%lu\n", task->min, task->max, task->duration);
+
+	/* dependencies */
+	token = strtok(NULL, TSEP);
+	DBG("deps='%s'\n", token);
+	while ((i = strtol(token, &token, 0))) {
+		add_dependency(task, work->tasks[work->count + i]);
+		if (*token)
+			token++;
+	}
+
+	/* add a dependency for the context+engine timeline */
+	for (i = work->count; --i >= 0; ) {
+		if (work->tasks[i]->ctx == task->ctx &&
+		    work->tasks[i]->class == task->class) {
+			add_dependency(task, work->tasks[i]);
+			break;
+		}
+	}
+
+	if (work->barrier)
+		add_dependency(task, work->barrier);
+
+	/* sync -- we become the barrier */
+	if (atoi(strtok(NULL, TSEP))) {
+		DBG("marking as a sync point\n");
+		work->barrier = task;
+	}
+
+	igt_list_add(&task->link, &work->ends);
+	work->tasks[work->count++] = task;
+
+#undef TSEP
+}
+
+static struct work *parse_work(FILE *file)
+{
+	struct work *work;
+	char *line = NULL;
+	size_t len = 0;
+
+	work = malloc(sizeof(*work));
+	igt_list_init(&work->ends);
+	work->barrier = NULL;
+
+	work->size = 64;
+	work->count = 0;
+	work->tasks = malloc(sizeof(*work->tasks) * work->size);
+
+	while (getline(&line, &len, file) != -1) {
+		if (work->count == work->size) {
+			work->tasks = realloc(work->tasks,
+					      sizeof(*work->tasks) * work->size);
+			work->size *= 2;
+		}
+		add_task(work, line);
+	}
+
+	free(line);
+
+	DBG("%d tasks\n", work->count);
+	return work;
+}
+
+static unsigned long sum_durations(struct task *task)
+{
+	unsigned long max_duration = 0;
+	struct task *signal = NULL;
+	struct dependency *dep;
+
+	igt_list_for_each(dep, &task->signals, link) {
+		if (dep->signal->duration > max_duration) {
+			signal = dep->signal;
+			max_duration = signal->duration;
+		}
+	}
+
+	return task->duration + (signal ? sum_durations(signal) : 0);
+}
+
+static void ideal_depth(struct work *work)
+{
+	unsigned long total_duration;
+	unsigned long max_duration;
+	struct task *task;
+	int i;
+
+	/*
+	 * The ideal depth is the longest chain of dependencies as the
+	 * dependency chain requires sequential task execution. Each
+	 * chain is assumed to be run in parallel on an infinite set of
+	 * engines, so the ratelimiting step is its longest path.
+	 */
+	max_duration = 0;
+	igt_list_for_each(task, &work->ends, link) {
+		unsigned long duration = sum_durations(task);
+		if (duration > max_duration)
+			max_duration = duration;
+	}
+
+	total_duration = 0;
+	for (i = 0; i < work->count; i++)
+		total_duration += work->tasks[i]->duration;
+
+	printf("Single client\n");
+	printf("   total duration %luus; %.2f wps\n", total_duration, 1e6/total_duration);
+	printf("   ideal duration %luus; %.2f wps\n", max_duration, 1e6/max_duration);
+}
+
+struct sim_class {
+	int ninstance;
+	unsigned long deadline[4];
+	unsigned long busy[4];
+};
+
+static void simulate_client(struct work *work)
+{
+	struct sim_class sim[] = {
+		[RCS]  = { 1 },
+		[BCS]  = { 1 },
+		[VCS]  = { 2 },
+		[VECS] = { 1 },
+	}, *class;
+	IGT_LIST(sched);
+	struct task *task;
+	unsigned long max;
+	int i, j;
+
+	printf("Simulated clients:\n");
+
+	for (i = 0; i < work->count; i++)
+		igt_list_init(&work->tasks[i]->sched);
+
+	igt_list_for_each(task, &work->ends, link)
+		igt_list_add_tail(&task->sched, &sched);
+
+	igt_list_for_each(task, &sched, sched) {
+		struct dependency *dep;
+
+		igt_list_for_each(dep, &task->signals, link)
+			igt_list_move_tail(&dep->signal->sched, &sched);
+	}
+
+	igt_list_for_each_reverse(task, &sched, sched) {
+		struct dependency *dep;
+		int instance;
+
+		class = &sim[task->class];
+		max = class->deadline[0];
+
+		instance = task->instance;
+		if (instance < 0) {
+			instance = 0;
+			for (i = 1; i < class->ninstance; i++) {
+				if (class->deadline[i] < max) {
+					max = class->deadline[i];
+					instance = i;
+				}
+			}
+		}
+
+		/*
+		 * Greedy (first available), not true optimal scheduling.
+		 *
+		 * For optimal, we do have to compute the global optimal
+		 * ordering by checking every permutation...
+		 */
+		igt_list_for_each(dep, &task->signals, link) {
+			if (dep->signal->deadline > max)
+				max = dep->signal->deadline;
+		}
+
+		DBG("task %d: engine %d, instance %d; finish %lu\n",
+		    task->step, task->class, instance, max);
+
+		task->deadline = max + task->duration;
+		class->deadline[instance] = task->deadline;
+		class->busy[instance] += task->duration;
+	}
+
+	max = 0;
+	for (i = 0; i < sizeof(sim)/sizeof(sim[0]); i++) {
+		class = &sim[i];
+		for (j = 0; j < class->ninstance; j++) {
+			if (class->deadline[j] > max)
+				max = class->deadline[j];
+		}
+	}
+	printf("   single duration %luus; %.2f wps\n", max, 1e6/max);
+
+	/*
+	 * Compute the maximum duration required on any engine.
+	 *
+	 * With sufficient clients forcing maximum occupancy under their weight,
+	 * the ratelimiting step becomes a single engine and how many clients
+	 * it takes to fill.
+	 */
+	max = 0;
+	for (i = 0; i < sizeof(sim)/sizeof(sim[0]); i++) {
+		class = &sim[i];
+		for (j = 0; j < class->ninstance; j++) {
+			if (class->busy[j] > max)
+				max = class->busy[j];
+		}
+	}
+	printf("   packed duration %luus; %.2f wps\n", max, 1e6/max);
+}
+
+static void graphviz(struct work *work)
+{
+#if 0
+	int i, j;
+
+	printf("digraph {\n");
+	printf("  rankdir=LR;\n");
+	printf("  splines=line;\n");
+	printf("\n");
+
+	for (i = 0; i < work->count; i++) {
+		struct task *task = work->tasks[i];
+
+		if (task->visited)
+			goto skip;
+
+		printf("  subgraph cluster_%d {\n", task->ctx);
+		printf("    label=\"Context %d\"\n", task->ctx);
+		for (j = i; j < work->count; j++) {
+			if (work->tasks[j]->ctx == task->ctx) {
+				printf("    task_%03d;\n", j);
+				work->tasks[j]->visited = true;
+			}
+		}
+		printf("  }\n\n");
+
+skip:
+		task->visited = false;
+	}
+
+	for (i = 0; i < work->count; i++) {
+		struct task *task = work->tasks[i];
+		struct dependency *dep;
+
+		igt_list_for_each(dep, &task->signals, link) {
+			printf("  task_%03d -> task_%03d;\n",
+			       dep->signal->step, task->step);
+		}
+	}
+
+	printf("}\n");
+#endif
+}
+
+int main(int argc, char **argv)
+{
+	int i;
+
+	for (i = 1; i < argc; i++) {
+		FILE *file = fopen(argv[i], "r");
+		struct work *work;
+
+		if (!file) {
+			perror(argv[i]);
+			return 1;
+		}
+
+		work = parse_work(file);
+		fclose(file);
+
+		graphviz(work);
+
+		ideal_depth(work);
+		simulate_client(work);
+	}
+
+	return 0;
+}
-- 
2.18.0



More information about the Intel-gfx mailing list