[Intel-gfx] [PATCH i-g-t 15/17] benchmarks/wsim: Simulate and interpret .wsim
Chris Wilson
chris at chris-wilson.co.uk
Mon Jul 2 09:07:25 UTC 2018
A little tool I've been meaning to write for a while... Convert the
.wsim into their dag and find the longest chains and evaluate them on an
simulated machine.
v2: Implement barriers to handle sync commands
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
benchmarks/Makefile.sources | 1 +
benchmarks/sim_wsim.c | 419 ++++++++++++++++++++++++++++++++++++
2 files changed, 420 insertions(+)
create mode 100644 benchmarks/sim_wsim.c
diff --git a/benchmarks/Makefile.sources b/benchmarks/Makefile.sources
index 86928df5c..633623527 100644
--- a/benchmarks/Makefile.sources
+++ b/benchmarks/Makefile.sources
@@ -17,6 +17,7 @@ benchmarks_prog_list = \
gem_wsim \
kms_vblank \
prime_lookup \
+ sim_wsim \
vgem_mmap \
$(NULL)
diff --git a/benchmarks/sim_wsim.c b/benchmarks/sim_wsim.c
new file mode 100644
index 000000000..5f3d56045
--- /dev/null
+++ b/benchmarks/sim_wsim.c
@@ -0,0 +1,419 @@
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "igt_aux.h"
+
+#if 0
+#define DBG(...) printf(__VA_ARGS__)
+#else
+#define DBG(...) do { } while(0)
+#endif
+
+struct dependency {
+ struct task *signal;
+ struct igt_list link;
+};
+
+struct task {
+ int step;
+ int ctx;
+ int class;
+ int instance;
+
+ unsigned long min, max;
+ unsigned long duration;
+ unsigned long deadline;
+
+ struct igt_list link;
+ struct igt_list sched;
+ struct igt_list signals;
+
+ bool visited;
+};
+
+struct work {
+ struct task **tasks;
+ unsigned int count, size;
+
+ struct igt_list ends;
+ struct task *barrier;
+};
+
+static void add_dependency(struct task *task, struct task *signal)
+{
+ struct dependency *dep;
+
+ DBG("%s %d (context %d, engine %d) -> %d (context %d, engine %d)\n",
+ __func__,
+ signal->step, signal->ctx, signal->class,
+ task->step, task->ctx, task->class);
+
+ dep = malloc(sizeof(*dep));
+ dep->signal = signal;
+ igt_list_add(&dep->link, &task->signals);
+ if (!igt_list_empty(&signal->link)) {
+ igt_list_del(&signal->link);
+ igt_list_init(&signal->link);
+ }
+}
+
+enum class {
+ RCS,
+ BCS,
+ VCS,
+ VECS,
+};
+
+static void add_task(struct work *work, char *line)
+{
+#define TSEP "."
+
+ static const char *engines[] = {
+ [RCS] = "rcs",
+ [BCS] = "bcs",
+ [VCS] = "vcs",
+ [VECS] = "vecs",
+ };
+ struct task *task;
+ char *token;
+ int i;
+
+ DBG("line='%s'\n", line);
+
+ token = strtok(line, TSEP);
+
+ if (!strcasecmp(token, "s")) { /* sync point */
+ int sync = atoi(strtok(NULL, TSEP));
+
+ DBG("syncpt %d\n", sync);
+
+ work->barrier = work->tasks[work->count + sync];
+ return;
+ }
+
+ if (!isdigit(*token)) {
+ fprintf(stderr, "Ignoring step '%s' at your peril!\n", token);
+ return;
+ }
+
+ /*
+ * 1.RCS.2800-3100.-1.0
+ * - context
+ * - engine
+ * - delay
+ * - dependencies
+ * - sync
+ */
+ task = malloc(sizeof(*task));
+
+ igt_list_init(&task->signals);
+ task->step = work->count;
+ task->visited = false;
+
+ /* context */
+ DBG("context='%s'\n", token);
+ task->ctx = atoi(token);
+
+ /* engine */
+ token = strtok(NULL, TSEP);
+ DBG("engine='%s'\n", token);
+ task->class = -1;
+ for (i = 0; i < sizeof(engines)/sizeof(*engines); i++) {
+ const int len = strlen(engines[i]);
+ if (!strncasecmp(token, engines[i], len)) {
+ task->class = i;
+ if (token[len])
+ task->instance = atoi(token + len);
+ else
+ task->instance = -1;
+ break;
+ }
+ }
+
+ /* delay */
+ token = strtok(NULL, TSEP);
+ DBG("delay='%s'\n", token);
+ task->min = strtol(token, &token, 0);
+ if (*token)
+ task->max = strtol(token + 1, NULL, 0);
+ else
+ task->max = task->min;
+ task->duration = (task->min + task->max) / 2;
+ DBG("min=%lu, max=%lu; duration=%lu\n", task->min, task->max, task->duration);
+
+ /* dependencies */
+ token = strtok(NULL, TSEP);
+ DBG("deps='%s'\n", token);
+ while ((i = strtol(token, &token, 0))) {
+ add_dependency(task, work->tasks[work->count + i]);
+ if (*token)
+ token++;
+ }
+
+ /* add a dependency for the context+engine timeline */
+ for (i = work->count; --i >= 0; ) {
+ if (work->tasks[i]->ctx == task->ctx &&
+ work->tasks[i]->class == task->class) {
+ add_dependency(task, work->tasks[i]);
+ break;
+ }
+ }
+
+ if (work->barrier)
+ add_dependency(task, work->barrier);
+
+ /* sync -- we become the barrier */
+ if (atoi(strtok(NULL, TSEP))) {
+ DBG("marking as a sync point\n");
+ work->barrier = task;
+ }
+
+ igt_list_add(&task->link, &work->ends);
+ work->tasks[work->count++] = task;
+
+#undef TSEP
+}
+
+static struct work *parse_work(FILE *file)
+{
+ struct work *work;
+ char *line = NULL;
+ size_t len = 0;
+
+ work = malloc(sizeof(*work));
+ igt_list_init(&work->ends);
+ work->barrier = NULL;
+
+ work->size = 64;
+ work->count = 0;
+ work->tasks = malloc(sizeof(*work->tasks) * work->size);
+
+ while (getline(&line, &len, file) != -1) {
+ if (work->count == work->size) {
+ work->tasks = realloc(work->tasks,
+ sizeof(*work->tasks) * work->size);
+ work->size *= 2;
+ }
+ add_task(work, line);
+ }
+
+ free(line);
+
+ DBG("%d tasks\n", work->count);
+ return work;
+}
+
+static unsigned long sum_durations(struct task *task)
+{
+ unsigned long max_duration = 0;
+ struct task *signal = NULL;
+ struct dependency *dep;
+
+ igt_list_for_each(dep, &task->signals, link) {
+ if (dep->signal->duration > max_duration) {
+ signal = dep->signal;
+ max_duration = signal->duration;
+ }
+ }
+
+ return task->duration + (signal ? sum_durations(signal) : 0);
+}
+
+static void ideal_depth(struct work *work)
+{
+ unsigned long total_duration;
+ unsigned long max_duration;
+ struct task *task;
+ int i;
+
+ /*
+ * The ideal depth is the longest chain of dependencies as the
+ * dependency chain requires sequential task execution. Each
+ * chain is assumed to be run in parallel on an infinite set of
+ * engines, so the ratelimiting step is its longest path.
+ */
+ max_duration = 0;
+ igt_list_for_each(task, &work->ends, link) {
+ unsigned long duration = sum_durations(task);
+ if (duration > max_duration)
+ max_duration = duration;
+ }
+
+ total_duration = 0;
+ for (i = 0; i < work->count; i++)
+ total_duration += work->tasks[i]->duration;
+
+ printf("Single client\n");
+ printf(" total duration %luus; %.2f wps\n", total_duration, 1e6/total_duration);
+ printf(" ideal duration %luus; %.2f wps\n", max_duration, 1e6/max_duration);
+}
+
+struct sim_class {
+ int ninstance;
+ unsigned long deadline[4];
+ unsigned long busy[4];
+};
+
+static void simulate_client(struct work *work)
+{
+ struct sim_class sim[] = {
+ [RCS] = { 1 },
+ [BCS] = { 1 },
+ [VCS] = { 2 },
+ [VECS] = { 1 },
+ }, *class;
+ IGT_LIST(sched);
+ struct task *task;
+ unsigned long max;
+ int i, j;
+
+ printf("Simulated clients:\n");
+
+ for (i = 0; i < work->count; i++)
+ igt_list_init(&work->tasks[i]->sched);
+
+ igt_list_for_each(task, &work->ends, link)
+ igt_list_add_tail(&task->sched, &sched);
+
+ igt_list_for_each(task, &sched, sched) {
+ struct dependency *dep;
+
+ igt_list_for_each(dep, &task->signals, link)
+ igt_list_move_tail(&dep->signal->sched, &sched);
+ }
+
+ igt_list_for_each_reverse(task, &sched, sched) {
+ struct dependency *dep;
+ int instance;
+
+ class = &sim[task->class];
+ max = class->deadline[0];
+
+ instance = task->instance;
+ if (instance < 0) {
+ instance = 0;
+ for (i = 1; i < class->ninstance; i++) {
+ if (class->deadline[i] < max) {
+ max = class->deadline[i];
+ instance = i;
+ }
+ }
+ }
+
+ /*
+ * Greedy (first available), not true optimal scheduling.
+ *
+ * For optimal, we do have to compute the global optimal
+ * ordering by checking every permutation...
+ */
+ igt_list_for_each(dep, &task->signals, link) {
+ if (dep->signal->deadline > max)
+ max = dep->signal->deadline;
+ }
+
+ DBG("task %d: engine %d, instance %d; finish %lu\n",
+ task->step, task->class, instance, max);
+
+ task->deadline = max + task->duration;
+ class->deadline[instance] = task->deadline;
+ class->busy[instance] += task->duration;
+ }
+
+ max = 0;
+ for (i = 0; i < sizeof(sim)/sizeof(sim[0]); i++) {
+ class = &sim[i];
+ for (j = 0; j < class->ninstance; j++) {
+ if (class->deadline[j] > max)
+ max = class->deadline[j];
+ }
+ }
+ printf(" single duration %luus; %.2f wps\n", max, 1e6/max);
+
+ /*
+ * Compute the maximum duration required on any engine.
+ *
+ * With sufficient clients forcing maximum occupancy under their weight,
+ * the ratelimiting step becomes a single engine and how many clients
+ * it takes to fill.
+ */
+ max = 0;
+ for (i = 0; i < sizeof(sim)/sizeof(sim[0]); i++) {
+ class = &sim[i];
+ for (j = 0; j < class->ninstance; j++) {
+ if (class->busy[j] > max)
+ max = class->busy[j];
+ }
+ }
+ printf(" packed duration %luus; %.2f wps\n", max, 1e6/max);
+}
+
+static void graphviz(struct work *work)
+{
+#if 0
+ int i, j;
+
+ printf("digraph {\n");
+ printf(" rankdir=LR;\n");
+ printf(" splines=line;\n");
+ printf("\n");
+
+ for (i = 0; i < work->count; i++) {
+ struct task *task = work->tasks[i];
+
+ if (task->visited)
+ goto skip;
+
+ printf(" subgraph cluster_%d {\n", task->ctx);
+ printf(" label=\"Context %d\"\n", task->ctx);
+ for (j = i; j < work->count; j++) {
+ if (work->tasks[j]->ctx == task->ctx) {
+ printf(" task_%03d;\n", j);
+ work->tasks[j]->visited = true;
+ }
+ }
+ printf(" }\n\n");
+
+skip:
+ task->visited = false;
+ }
+
+ for (i = 0; i < work->count; i++) {
+ struct task *task = work->tasks[i];
+ struct dependency *dep;
+
+ igt_list_for_each(dep, &task->signals, link) {
+ printf(" task_%03d -> task_%03d;\n",
+ dep->signal->step, task->step);
+ }
+ }
+
+ printf("}\n");
+#endif
+}
+
+int main(int argc, char **argv)
+{
+ int i;
+
+ for (i = 1; i < argc; i++) {
+ FILE *file = fopen(argv[i], "r");
+ struct work *work;
+
+ if (!file) {
+ perror(argv[i]);
+ return 1;
+ }
+
+ work = parse_work(file);
+ fclose(file);
+
+ graphviz(work);
+
+ ideal_depth(work);
+ simulate_client(work);
+ }
+
+ return 0;
+}
--
2.18.0
More information about the Intel-gfx
mailing list