[igt-dev] [PATCH i-g-t] igt: Add gem_ctx_freq to exercise requesting freq on a ctx

Tue Apr 10 16:17:13 UTC 2018

On 16/03/2018 11:06, Arkadiusz Hiler wrote:
> From: Chris Wilson <chris at chris-wilson.co.uk>
> 
> Exercise some new API that allows applications to request that
> individual contexts are executed within a desired frequency range.
> 
> v2: Split single/continuous set_freq subtests
> v3: Do an up/down ramp for individual freq request, check nothing
> changes after each invalid request
> v4: Check the frequencies reported by the kernel across the entire
> range.
> v5: Rewrite sandwich to create a sandwich between multiple concurrent
> engines.
> v6: Exercise sysfs overrides.
> v7: Reset min/max of default context after independent(); don't ask
> about failure
> v8: Check transition beyond randomly chosen frequencies as well as
> up/down ramps.
> v9: Fix meson build. (A. Hiler)
> 
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Praveen Paneri <praveen.paneri at intel.com>
> Cc: Sagar A Kamble <sagar.a.kamble at intel.com>
> Cc: Antonio Argenziano <antonio.argenziano at intel.com>
> Reviewed-by: Antonio Argenziano <antonio.argenziano at intel.com> #v5
> Reviewed-by: Sagar Arun Kamble <sagar.a.kamble at intel.com>
> Signed-off-by: Arkadiusz Hiler <arkadiusz.hiler at intel.com>
> ---
>   tests/Makefile.am      |   1 +
>   tests/Makefile.sources |   1 +
>   tests/gem_ctx_freq.c   | 837 +++++++++++++++++++++++++++++++++++++++++++++++++
>   tests/meson.build      |  20 +-
>   4 files changed, 853 insertions(+), 6 deletions(-)
>   create mode 100644 tests/gem_ctx_freq.c
> 
> diff --git a/tests/Makefile.am b/tests/Makefile.am
> index dbc7be72..389f7fc7 100644
> --- a/tests/Makefile.am
> +++ b/tests/Makefile.am
> @@ -104,6 +104,7 @@ drm_import_export_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
>   drm_import_export_LDADD = $(LDADD) -lpthread
>   gem_close_race_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
>   gem_close_race_LDADD = $(LDADD) -lpthread
> +gem_ctx_freq_LDADD = $(LDADD) $(top_builddir)/lib/libigt_perf.la
>   gem_ctx_thrash_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
>   gem_ctx_thrash_LDADD = $(LDADD) -lpthread
>   gem_exec_parallel_CFLAGS = $(AM_CFLAGS) $(THREAD_CFLAGS)
> diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> index 4e6f5319..a4ca85bc 100644
> --- a/tests/Makefile.sources
> +++ b/tests/Makefile.sources
> @@ -58,6 +58,7 @@ TESTS_progs = \
>   	gem_ctx_bad_exec \
>   	gem_ctx_create \
>   	gem_ctx_exec \
> +	gem_ctx_freq \
>   	gem_ctx_isolation \
>   	gem_ctx_param \
>   	gem_ctx_switch \
> diff --git a/tests/gem_ctx_freq.c b/tests/gem_ctx_freq.c
> new file mode 100644
> index 00000000..83509ea6
> --- /dev/null
> +++ b/tests/gem_ctx_freq.c
> @@ -0,0 +1,837 @@
> +/*
> + * Copyright © 2018 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + */
> +
> +#include <unistd.h>
> +#include <stdlib.h>
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <string.h>
> +#include <fcntl.h>
> +#include <inttypes.h>
> +#include <errno.h>
> +#include <sys/stat.h>
> +#include <sys/ioctl.h>
> +#include <sys/time.h>
> +#include <time.h>
> +
> +#include "igt.h"
> +#include "igt_perf.h"
> +#include "igt_sysfs.h"
> +
> +#define LOCAL_CONTEXT_PARAM_FREQUENCY 7
> +
> +#define SAMPLE_PERIOD (USEC_PER_SEC / 10)
> +#define PMU_TOLERANCE 100
> +
> +static int sysfs = -1;
> +
> +static int __set_freq(int fd, uint32_t ctx, uint32_t min, uint32_t max)
> +{
> +	struct drm_i915_gem_context_param param = {
> +		.ctx_id = ctx,
> +		.param = LOCAL_CONTEXT_PARAM_FREQUENCY,
> +		.value = (uint64_t)max << 32 | min,
> +	};
> +
> +	return __gem_context_set_param(fd, &param);
> +}
> +
> +static void set_freq(int fd, uint32_t ctx, uint32_t min, uint32_t max)
> +{
> +	igt_assert_eq(__set_freq(fd, ctx, min, max), 0);
> +}
> +
> +static void get_freq(int fd, uint32_t ctx, uint32_t *min, uint32_t *max)
> +{
> +	struct drm_i915_gem_context_param param = {
> +		.ctx_id = ctx,
> +		.param = LOCAL_CONTEXT_PARAM_FREQUENCY,
> +	};
> +
> +	gem_context_get_param(fd, &param);
> +
> +	*min = param.value & 0xffffffff;
> +	*max = param.value >> 32;
> +}
> +
> +static double measure_frequency(int pmu, int period_us)
> +{
> +	uint64_t data[2];
> +	uint64_t d_t, d_v;
> +
> +	igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
> +	d_v = -data[0];
> +	d_t = -data[1];
> +
> +	usleep(period_us);
> +
> +	igt_assert_eq(read(pmu, data, sizeof(data)), sizeof(data));
> +	d_v += data[0];
> +	d_t += data[1];
> +
> +	return d_v * 1e9 / d_t;
> +}
> +
> +static bool __pmu_within_tolerance(double actual, double target)
> +{
> +	return (actual > target - PMU_TOLERANCE &&
> +	       	actual < target + PMU_TOLERANCE);
> +}

Hopefully +/- 100 MHz survives on thermally challenged devices.

> +
> +static void pmu_assert(double actual, double target)
> +{
> +	igt_assert_f(__pmu_within_tolerance(actual, target),
> +		     "Measured frequency %.2fMHz, is beyond target %.2f+-%dMhz",
> +		     actual, target, PMU_TOLERANCE);
> +}
> +
> +static void single(int fd, const struct intel_execution_engine *e)
> +{
> +#define N_STEPS 10
> +	const unsigned int engine = e->exec_id | e->flags;
> +	uint32_t ctx = gem_context_create(fd);
> +	uint32_t frequencies[2*N_STEPS + 1];
> +	uint32_t min, max;
> +	double measured;
> +	igt_spin_t *spin;
> +	int pmu;
> +
> +	get_freq(fd, ctx, &min, &max);
> +	igt_info("Min freq: %dMHz; Max freq: %dMHz\n", min, max);
> +
> +	pmu = perf_i915_open(I915_PMU_REQUESTED_FREQUENCY);
> +	igt_require(pmu >= 0);

For BYT I guess.

> +
> +	for (int step = 0; step <= 2*N_STEPS; step++) {
> +		int frac = step > N_STEPS ? 2*N_STEPS - step : step;
> +		frequencies[step] = min + (max - min) * frac / N_STEPS;
> +	}

Wouldn't harm moving to helper whose name could avoid writing a comment, 
like make_triangle_wave(&frequencies, N_STEPS, min, max)?

> +
> +	for (int pass = 0; pass < 3; pass++) {
> +		for (int i = 0; i < ARRAY_SIZE(frequencies); i++) {
> +			uint32_t freq = frequencies[i];
> +			uint32_t cur, discard;
> +
> +			set_freq(fd, ctx, freq, freq);
> +			get_freq(fd, ctx, &cur, &discard);
> +
> +			gem_quiescent_gpu(fd);
> +			spin = __igt_spin_batch_new(fd, ctx, engine, 0);
> +			usleep(10000);

Use polling spinner?

> +
> +			measured = measure_frequency(pmu, SAMPLE_PERIOD);
> +			igt_debugfs_dump(fd, "i915_rps_boost_info");
> +
> +			igt_spin_batch_free(fd, spin);
> +			igt_info("%s(%s): Measured %.1fMHz, expected %dMhz\n",
> +				 e->name, __func__,  measured, cur);
> +			pmu_assert(measured, cur);
> +		}
> +
> +		igt_permute_array(frequencies,
> +				  ARRAY_SIZE(frequencies),
> +				  igt_exchange_int);

Sad for the little triangle. :) Do you even need it (triangle) in the 
first place then?

> +	}
> +	gem_quiescent_gpu(fd);
> +
> +	close(pmu);
> +	gem_context_destroy(fd, ctx);
> +
> +#undef N_STEPS
> +}
> +
> +static void continuous(int fd, const struct intel_execution_engine *e)
> +{
> +#define N_STEPS 10
> +	const unsigned int engine = e->exec_id | e->flags;
> +	uint32_t ctx = gem_context_create(fd);
> +	uint32_t frequencies[2*N_STEPS + 1];
> +	uint32_t min, max;
> +	double measured;
> +	igt_spin_t *spin;
> +	int pmu;
> +
> +	get_freq(fd, ctx, &min, &max);
> +	igt_info("Min freq: %dMHz; Max freq: %dMHz\n", min, max);
> +
> +	pmu = perf_i915_open(I915_PMU_REQUESTED_FREQUENCY);
> +	igt_require(pmu >= 0);
> +
> +	for (int step = 0; step <= 2*N_STEPS; step++) {
> +		int frac = step > N_STEPS ? 2*N_STEPS - step : step;
> +		frequencies[step] = min + (max - min) * frac / N_STEPS;
> +	}
> +
> +	gem_quiescent_gpu(fd);
> +	spin = __igt_spin_batch_new(fd, ctx, engine, 0);
> +	for (int pass = 0; pass < 3; pass++) {
> +		for (int i = 0; i < ARRAY_SIZE(frequencies); i++) {
> +			uint32_t freq = frequencies[i];
> +			uint32_t cur, discard;
> +			igt_spin_t *kick;
> +
> +			set_freq(fd, ctx, freq, freq);
> +			get_freq(fd, ctx, &cur, &discard);
> +
> +			/*
> +			 * When requesting a new frequency on the currently
> +			 * executing context, it does not take effect until the
> +			 * next context switch. In this case, we trigger a lite
> +			 * restore.
> +			 */
> +			kick = __igt_spin_batch_new(fd, ctx, engine, 0);
> +			igt_spin_batch_free(fd, spin);
> +			spin = kick;
> +
> +			usleep(10000);
> +
> +			measured = measure_frequency(pmu, SAMPLE_PERIOD);
> +			igt_debugfs_dump(fd, "i915_rps_boost_info");
> +
> +			igt_info("%s(continuous): Measured %.1fMHz, expected %dMhz\n",

You used __func__ above, but I am not sure why even put the test name 
here since it will be obvious from the overall log?

> +				 e->name, measured, cur);
> +			pmu_assert(measured, cur);
> +		}
> +
> +		igt_permute_array(frequencies,
> +				  ARRAY_SIZE(frequencies),
> +				  igt_exchange_int);
> +	}
> +	igt_spin_batch_free(fd, spin);
> +	gem_quiescent_gpu(fd);
> +
> +	close(pmu);
> +	gem_context_destroy(fd, ctx);
> +#undef N_STEPS
> +}
> +
> +static void inflight(int fd, const struct intel_execution_engine *e)
> +{
> +	const unsigned int engine = e->exec_id | e->flags;
> +	uint32_t ctx, min, max, freq, discard;
> +	double measured;
> +	igt_spin_t *plug, *work[2];
> +	int pmu;
> +
> +	pmu = perf_i915_open(I915_PMU_REQUESTED_FREQUENCY);
> +	igt_require(pmu >= 0);
> +
> +	ctx = gem_context_create(fd);
> +	get_freq(fd, ctx, &min, &max);
> +	set_freq(fd, ctx, min, min);
> +
> +	igt_info("Min freq: %dMHz; Max freq: %dMHz\n", min, max);
> +
> +	gem_quiescent_gpu(fd);
> +	plug = igt_spin_batch_new(fd, ctx, engine, 0);
> +	gem_context_destroy(fd, ctx);
> +	for (int n = 0; n < 16; n++) {
> +		struct drm_i915_gem_exec_object2 obj = {
> +			.handle = plug->handle,
> +		};
> +		struct drm_i915_gem_execbuffer2 eb = {
> +			.buffer_count = 1,
> +			.buffers_ptr = to_user_pointer(&obj),
> +			.flags = engine,
> +			.rsvd1 = gem_context_create(fd),
> +		};
> +		set_freq(fd, eb.rsvd1, min, min);
> +		gem_execbuf(fd, &eb);
> +		gem_context_destroy(fd, eb.rsvd1);
> +	}
> +	measured = measure_frequency(pmu, SAMPLE_PERIOD);
> +	igt_debugfs_dump(fd, "i915_rps_boost_info");
> +	igt_info("%s(plug): Measured %.1fMHz, expected %dMhz\n",
> +		 e->name, measured, min);
> +	pmu_assert(measured, min);

I would expect queued contexts would have different limits so here you 
can check that the limit from the currently executing one remains in 
force? Like it is I am not sure what it's testing?

> +
> +	ctx = gem_context_create(fd);
> +	set_freq(fd, ctx, max, max);

This line looks important for the test so needs a comment since it is 
not obvious.

> +	work[0] = __igt_spin_batch_new(fd, ctx, engine, 0);
> +
> +	/* work is now queued but not executing */
> +	freq = (max + min) / 2;
> +	set_freq(fd, ctx, freq, freq);
> +	get_freq(fd, ctx, &freq, &discard);
> +	gem_context_destroy(fd, ctx);
> +
> +	ctx = gem_context_create(fd);
> +	set_freq(fd, ctx, max, max);
> +	work[1] = __igt_spin_batch_new(fd, ctx, engine, 0);
> +	gem_context_destroy(fd, ctx);
> +
> +	igt_spin_batch_end(plug);
> +	do
> +		usleep(10000);
> +	while (gem_bo_busy(fd, plug->handle));

gem_sync wouldn't cut it? Something internal could fool us here as the 
queued up batches proceed to start and complete?

Also braces please or it looks to weird.

> +	igt_spin_batch_free(fd, plug);
> +
> +	/* Now work will execute */
> +	measured = measure_frequency(pmu, SAMPLE_PERIOD);
> +	igt_debugfs_dump(fd, "i915_engine_info");

Need this?

> +	igt_debugfs_dump(fd, "i915_rps_boost_info");
> +	igt_info("%s(work0): Measured %.1fMHz, expected %dMhz\n",
> +		 e->name, measured, freq);
> +	pmu_assert(measured, freq);
> +
> +	igt_spin_batch_end(work[0]);
> +	do
> +		usleep(10000);
> +	while (gem_bo_busy(fd, work[0]->handle));
> +	igt_spin_batch_free(fd, work[0]);
> +
> +	measured = measure_frequency(pmu, SAMPLE_PERIOD);
> +	igt_debugfs_dump(fd, "i915_engine_info");
> +	igt_debugfs_dump(fd, "i915_rps_boost_info");
> +	igt_info("%s(work1): Measured %.1fMHz, expected %dMhz\n",
> +		 e->name, measured, max);
> +	pmu_assert(measured, max);
> +
> +	igt_spin_batch_free(fd, work[1]);
> +	close(pmu);
> +	gem_quiescent_gpu(fd);
> +}
> +
> +static void set_sysfs_freq(uint32_t min, uint32_t max)
> +{
> +	igt_sysfs_printf(sysfs, "gt_min_freq_mhz", "%u", min);
> +	igt_sysfs_printf(sysfs, "gt_max_freq_mhz", "%u", max);
> +}
> +
> +static void get_sysfs_freq(uint32_t *min, uint32_t *max)
> +{
> +	igt_sysfs_scanf(sysfs, "gt_min_freq_mhz", "%u", min);
> +	igt_sysfs_scanf(sysfs, "gt_max_freq_mhz", "%u", max);
> +}
> +
> +static void sysfs_clamp(int fd, const struct intel_execution_engine *e)
> +{
> +#define N_STEPS 10
> +	const unsigned int engine = e->exec_id | e->flags;
> +	uint32_t ctx = gem_context_create(fd);
> +	uint32_t sys_min, sys_max;
> +	uint32_t min, max;
> +	double measured;
> +	igt_spin_t *spin;
> +	int pmu;
> +
> +	get_sysfs_freq(&sys_min, &sys_max);
> +	igt_info("System min freq: %dMHz; max freq: %dMHz\n", sys_min, sys_max);
> +
> +	get_freq(fd, ctx, &min, &max);
> +	igt_info("Context min freq: %dMHz; max freq: %dMHz\n", min, max);

Assert it matches sysfs values?

> +
> +	pmu = perf_i915_open(I915_PMU_REQUESTED_FREQUENCY);
> +	igt_require(pmu >= 0);
> +
> +	for (int outer = 0; outer <= 2*N_STEPS; outer++) {
> +		int ofrac = outer > N_STEPS ? 2*N_STEPS - outer : outer;
> +		uint32_t ofreq = min + (max - min) * ofrac / N_STEPS;

Move to helper? But if my eyes are not fooling me looks the same 
triangle wave so for simplicity sake use local array and the helper I 
suggested earlier to fill it?

> +		uint32_t cur, discard;
> +
> +		for (int inner = 0; inner <= 2*N_STEPS; inner++) {

Rename inner and outer to global_freq and ctx_freq, or something?

> +			int ifrac = inner > N_STEPS ? 2*N_STEPS - inner : inner;
> +			uint32_t ifreq = min + (max - min) * ifrac / N_STEPS;
> +
> +			set_freq(fd, ctx, ifreq, ifreq);
> +
> +			gem_quiescent_gpu(fd);
> +			spin = __igt_spin_batch_new(fd, ctx, engine, 0);
> +			usleep(10000);
> +
> +			set_sysfs_freq(ofreq, ofreq);
> +			get_sysfs_freq(&cur, &discard);

Read-back ctx freq and check is correct?

> +
> +			measured = measure_frequency(pmu, SAMPLE_PERIOD);
> +			igt_debugfs_dump(fd, "i915_rps_boost_info");
> +
> +			set_sysfs_freq(sys_min, sys_max);

Just so we do not exit with random min/max set?

> +
> +			igt_spin_batch_free(fd, spin);
> +			igt_info("%s(sysfs): Measured %.1fMHz, context %dMhz, expected %dMhz\n",
> +					e->name, measured, ifreq, cur);
> +			pmu_assert(measured, cur);
> +		}
> +	}
> +	gem_quiescent_gpu(fd);
> +
> +	close(pmu);
> +	gem_context_destroy(fd, ctx);
> +
> +#undef N_STEPS
> +}
> +
> +static void sandwich_engine(int fd, unsigned int engine, int timeout)
> +{
> +	uint32_t ctx = gem_context_create(fd);
> +	uint32_t min, max;
> +	int pmu;
> +
> +	pmu = perf_i915_open(I915_PMU_REQUESTED_FREQUENCY);
> +	igt_require(pmu >= 0);
> +
> +	get_freq(fd, ctx, &min, &max);
> +
> +	igt_until_timeout(timeout) {
> +		uint32_t range[2];
> +		igt_spin_t *spin;
> +		double measured;
> +
> +		/* make sure we keep an overlap between all engines */

What kind of overlap do you mean? Deliberate absence of gem_sync 
probably not since that woldn't be completely making sure, and the 
commend is also associated with min/max calc.

> +		range[0] = min + (rand() % (max - min) / 2);
> +		range[1] = max - (rand() % (max - min) / 2);
> +
> +		set_freq(fd, ctx, range[0], range[1]);
> +		get_freq(fd, ctx, &range[0], &range[1]);
> +
> +		spin = __igt_spin_batch_new(fd, ctx, engine, 0);
> +
> +		usleep(10000);
> +		measured = measure_frequency(pmu, SAMPLE_PERIOD);
> +		igt_spin_batch_free(fd, spin);
> +
> +		igt_assert(measured >= range[0] - PMU_TOLERANCE &&
> +			   measured <= range[1] + PMU_TOLERANCE);
> +	}
> +
> +	gem_context_destroy(fd, ctx);
> +	close(pmu);
> +}
> +
> +static void sandwich(int fd, int timeout)
> +{
> +	unsigned int engine;
> +
> +	for_each_physical_engine(fd, engine) {
> +		igt_fork(child, 1)
> +			sandwich_engine(fd, engine, timeout);
> +	}

Hmm.. so engines run in parallel - but how come all can assert they got 
exactly the limits they wanted? Surely someone must get a different 
range depending on what's running on another engine?

If the given range is reflected in the get_freq query after setting, but 
then why can't it race (be changed) while the child is executing? Or you 
are counting on the sampling period to be short enough the effect of 
that is lost in tolerance?

> +
> +	igt_waitchildren();
> +	gem_quiescent_gpu(fd);
> +}
> +
> +static void pwm(int fd, unsigned int *engines, unsigned int nengine, int link)
> +{
> +	uint32_t ctx[nengine];
> +
> +	fcntl(link, F_SETFL, fcntl(fd, F_GETFL) | O_NONBLOCK);

Needs a comment - it's very unobvious what is the connection between drm 
fd and a communication pipe to be used like this.

> +
> +	for (unsigned int n = 0; n < nengine; n++)
> +		ctx[n] = gem_context_create(fd);
> +
> +	do {
> +		igt_spin_t *spin;
> +		struct {
> +			uint32_t engine;
> +			uint32_t min;
> +			uint32_t max;
> +		} req;
> +
> +		while (read(link, &req, sizeof(req)) > 0) {
> +			if ((req.engine | req.min | req.max) == 0)
> +				goto out;
> +
> +			igt_assert(req.engine < nengine);
> +			set_freq(fd, ctx[req.engine], req.min, req.max);
> +		}
> +
> +		/* Create a 20% load using busy spinners */
> +		spin = __igt_spin_batch_new(fd, ctx[0], engines[0], 0);
> +		for (unsigned int n = 1; n < nengine; n++) {
> +			struct drm_i915_gem_exec_object2 obj = {
> +				.handle = spin->handle,
> +			};
> +			struct drm_i915_gem_execbuffer2 eb = {
> +				.buffer_count = 1,
> +				.buffers_ptr = to_user_pointer(&obj),
> +				.flags = engines[n],
> +				.rsvd1 = ctx[n],
> +			};
> +			gem_execbuf(fd, &eb);
> +		}
> +		usleep(100);
> +		igt_spin_batch_end(spin);
> +
> +		do
> +			usleep(10);
> +		while (gem_bo_busy(fd, spin->handle));

This loop should be moved to a helper since it is repeated a few times. 
(If it is required in this form.)

> +		igt_spin_batch_free(fd, spin);
> +		usleep(400);

100us/400us is I think too short if you want to get close to target 
ratio. Will see later whether you actualy depend on it.

> +	} while (1);
> +
> +out:
> +	for (unsigned int n = 0; n < nengine; n++)
> +		gem_context_destroy(fd, ctx[n]);
> +}
> +
> +static void smoketest(int fd, int timeout)
> +{
> +	unsigned int engines[16];
> +	unsigned int nengine;
> +	unsigned int engine;
> +	uint32_t min[16], max[16];
> +	int pmu, link[2];
> +
> +	get_freq(fd, 0, &min[0], &max[0]);
> +
> +	nengine = 0;
> +	for_each_physical_engine(fd, engine) {
> +		if (nengine == ARRAY_SIZE(engines) - 1)
> +			break;
> +
> +		min[nengine] = min[0];
> +		max[nengine] = max[0];
> +		engines[nengine] = engine;
> +		nengine++;
> +	}
> +	igt_require(nengine);
> +
> +	igt_assert(pipe(link) == 0);
> +	igt_fork(child, 1)
> +		pwm(fd, engines, nengine, link[0]);
> +	close(link[0]);
> +
> +	pmu = perf_i915_open(I915_PMU_REQUESTED_FREQUENCY);
> +	igt_require(pmu >= 0);
> +
> +	igt_until_timeout(timeout) {
> +		struct {
> +			uint32_t engine;
> +			uint32_t min;
> +			uint32_t max;
> +		} req;
> +		double measured;
> +		uint32_t ctx;
> +
> +		req.engine = rand() % nengine;
> +
> +		ctx = gem_context_create(fd);
> +		get_freq(fd, ctx, &req.min, &req.max);
> +		req.min = rand() % (req.max - req.min) + req.min;
> +		req.max = rand() % (req.max - req.min) + req.min;
> +		set_freq(fd, ctx, req.min, req.max);
> +		get_freq(fd, ctx, &req.min, &req.max);
> +
> +		igt_debug("Replacing (%d, %d) on engine %x with (%d, %d)\n",
> +			  min[req.engine], max[req.engine], req.engine,
> +			  req.min, req.max);
> +		igt_assert(write(link[1], &req, sizeof(req)) == sizeof(req));
> +		gem_context_destroy(fd, ctx);
> +
> +		min[req.engine] = req.min;
> +		max[req.engine] = req.max;
> +
> +		for (unsigned int n = 0; n < nengine; n++) {
> +			igt_debug("[%d]: [%d, %d]\n", n, min[n], max[n]);
> +			if (min[n] < req.min)
> +				req.min = min[n];
> +			if (max[n] > req.max)
> +				req.max = max[n];
> +		}
> +		igt_assert(req.max >= req.min);
> +
> +		usleep(50000);

Have bi-directional comms so you know when child has processed the 
updated request?

> +		measured = measure_frequency(pmu, SAMPLE_PERIOD);
> +
> +		if (measured <= req.min - PMU_TOLERANCE ||
> +		    measured >= req.max + PMU_TOLERANCE)

Could move to helper, think I've seen it once before so far.

> +			igt_debugfs_dump(fd, "i915_rps_boost_info");
> +		igt_info("Measured %.1fMHz, expected [%d, %d]Mhz\n",
> +			 measured, req.min, req.max);
> +		igt_assert(measured > req.min - PMU_TOLERANCE &&
> +			   measured < req.max + PMU_TOLERANCE);

Okay so there isn't any purpose in 20% PWM? Apart that frequency should 
travel towards min when test context is in the idle PWM half? So how 
come that does not affect the measured values?

> +	}
> +
> +	do {
> +		struct {
> +			uint32_t engine;
> +			uint32_t min;
> +			uint32_t max;
> +		} req = {};

3x local struct definition.

> +
> +		write(link[1], &req, sizeof(req));
> +		close(link[1]);
> +	} while (0);
> +	igt_waitchildren();
> +	gem_quiescent_gpu(fd);
> +
> +	close(pmu);
> +}
> +
> +static void invalid_context(int fd, uint32_t ctx, uint32_t min, uint32_t max)
> +{
> +	const struct test {
> +		uint32_t min, max;
> +	} tests[] = {
> +		{ min - 50, max - 50 },
> +		{ min - 50, max },
> +		{ min - 50, max + 50 },
> +		{ min, max + 50 },
> +		{ min + 50, max + 50 },
> +
> +		{ min - 50, min - 50 },
> +
> +		{ min - 50, min },
> +		{ min + 50, min },
> +		{ min, min - 50 },
> +
> +		{ max + 50, max },
> +		{ max, max + 50 },
> +		{ max, max - 50 },
> +
> +		{ max + 50, max + 50 },
> +
> +		{}
> +	};
> +
> +	for (const struct test *t = tests; t->min | t->max; t++) {
> +		uint32_t cur_min, cur_max;
> +
> +		igt_assert_f(__set_freq(fd, ctx, t->min, t->max) == -EINVAL,
> +			     "Failed to reject invalid [%d, %d] (valid range [%d, %d]) on context %d\n",
> +			     t->min, t->max, min, max, ctx);
> +
> +		get_freq(fd, 0, &cur_min, &cur_max);
> +		igt_assert_eq(cur_min, min);
> +		igt_assert_eq(cur_max, max);
> +	}
> +}
> +
> +static void invalid(int fd)
> +{
> +	uint32_t min, max, ctx;
> +
> +	get_freq(fd, 0, &min, &max);
> +
> +	invalid_context(fd, 0, min, max);
> +
> +	ctx = gem_context_create(fd);
> +	invalid_context(fd, ctx, min, max);
> +	gem_context_destroy(fd, ctx);
> +}
> +
> +static void idempotent_context(int fd, uint32_t ctx)
> +{
> +	uint32_t min, max;
> +	uint32_t cur_min, cur_max;
> +
> +	get_freq(fd, ctx, &min, &max);
> +
> +	set_freq(fd, ctx, max, max);
> +	get_freq(fd, ctx, &cur_min, &cur_max);
> +	igt_assert_eq(cur_min, max);
> +	igt_assert_eq(cur_max, max);
> +
> +	set_freq(fd, ctx, min, min);
> +	get_freq(fd, ctx, &cur_min, &cur_max);
> +	igt_assert_eq(cur_min, min);
> +	igt_assert_eq(cur_max, min);
> +
> +	set_freq(fd, ctx, min, max);
> +	get_freq(fd, ctx, &cur_min, &cur_max);
> +	igt_assert_eq(cur_min, min);
> +	igt_assert_eq(cur_max, max);
> +}
> +
> +static void idempotent(int fd)

fixed? By idempotent I tought it will be one which cannot be modified.

> +{
> +	uint32_t ctx;
> +
> +	idempotent_context(fd, 0);
> +
> +	ctx = gem_context_create(fd);
> +	idempotent_context(fd, ctx);
> +	gem_context_destroy(fd, ctx);
> +}
> +
> +static void range_context(int fd, uint32_t ctx)
> +{
> +	uint32_t min, max;
> +	uint32_t cur_min, cur_max;
> +
> +	get_freq(fd, ctx, &min, &max);
> +
> +	for (uint32_t freq = min; freq <= max; freq++) {
> +		set_freq(fd, ctx, freq, freq);
> +		get_freq(fd, ctx, &cur_min, &cur_max);
> +
> +		igt_assert(cur_min >= min);
> +		igt_assert(cur_max <= max);
> +	}
> +
> +	set_freq(fd, ctx, min, max);
> +	get_freq(fd, ctx, &cur_min, &cur_max);
> +	igt_assert_eq(cur_min, min);
> +	igt_assert_eq(cur_max, max);
> +}
> +
> +static void range(int fd)
> +{
> +	uint32_t ctx;
> +
> +	range_context(fd, 0);
> +
> +	ctx = gem_context_create(fd);
> +	range_context(fd, ctx);
> +	gem_context_destroy(fd, ctx);
> +}
> +
> +static void independent(int fd)
> +{
> +	uint32_t min, max;
> +	uint32_t cur_min, cur_max;
> +	uint32_t ctx[2];
> +
> +	get_freq(fd, 0, &min, &max);
> +
> +	set_freq(fd, 0, max, max);
> +	ctx[0] = gem_context_create(fd);
> +	get_freq(fd, ctx[0], &cur_min, &cur_max);
> +	igt_assert_eq(cur_min, min);
> +	igt_assert_eq(cur_max, max);
> +
> +	set_freq(fd, 0, min, min);
> +	get_freq(fd, ctx[0], &cur_min, &cur_max);
> +	igt_assert_eq(cur_min, min);
> +	igt_assert_eq(cur_max, max);
> +
> +	ctx[1] = gem_context_create(fd);
> +	get_freq(fd, ctx[1], &cur_min, &cur_max);
> +	igt_assert_eq(cur_min, min);
> +	igt_assert_eq(cur_max, max);
> +
> +	set_freq(fd, ctx[1], max, max);
> +	get_freq(fd, ctx[0], &cur_min, &cur_max);
> +	igt_assert_eq(cur_min, min);
> +	igt_assert_eq(cur_max, max);
> +
> +	get_freq(fd, 0, &cur_min, &cur_max);
> +	igt_assert_eq(cur_min, min);
> +	igt_assert_eq(cur_max, min);
> +
> +	get_freq(fd, ctx[1], &cur_min, &cur_max);
> +	igt_assert_eq(cur_min, max);
> +	igt_assert_eq(cur_max, max);
> +	gem_context_destroy(fd, ctx[1]);
> +
> +	get_freq(fd, ctx[0], &cur_min, &cur_max);
> +	igt_assert_eq(cur_min, min);
> +	igt_assert_eq(cur_max, max);
> +	gem_context_destroy(fd, ctx[0]);
> +
> +	set_freq(fd, 0, min, max);
> +	get_freq(fd, 0, &cur_min, &cur_max);
> +	igt_assert_eq(cur_min, min);
> +	igt_assert_eq(cur_max, max);
> +}
> +
> +static bool has_ctx_freq(int fd)
> +{
> +	struct drm_i915_gem_context_param param = {
> +		.param = LOCAL_CONTEXT_PARAM_FREQUENCY,
> +	};
> +
> +	return __gem_context_get_param(fd, &param) == 0;
> +}
> +
> +static void restore_sysfs_freq(int sig)
> +{
> +	char buf[256];
> +
> +	if (igt_sysfs_read(sysfs, "gt_RPn_freq_mhz", buf, sizeof(buf)) > 0) {
> +		igt_sysfs_set(sysfs, "gt_idle_freq_mhz", buf);
> +		igt_sysfs_set(sysfs, "gt_min_freq_mhz", buf);
> +	}
> +
> +	if (igt_sysfs_read(sysfs, "gt_RP0_freq_mhz", buf, sizeof(buf)) > 0) {
> +		igt_sysfs_set(sysfs, "gt_max_freq_mhz", buf);
> +		igt_sysfs_set(sysfs, "gt_boost_freq_mhz", buf);
> +	}
> +}
> +
> +static void disable_boost(int fd)
> +{
> +	char *value;
> +
> +	value = igt_sysfs_get(fd, "gt_RPn_freq_mhz");
> +	igt_sysfs_set(fd, "gt_boost_freq_mhz", value);
> +	free(value);
> +}
> +
> +igt_main
> +{
> +	const struct intel_execution_engine *e;
> +	int fd = -1;
> +
> +	igt_fixture {
> +		fd = drm_open_driver(DRIVER_INTEL);
> +		igt_require_gem(fd);
> +
> +		igt_require(has_ctx_freq(fd));
> +
> +		sysfs = igt_sysfs_open(fd, NULL);
> +		igt_assert(sysfs != -1);
> +		igt_install_exit_handler(restore_sysfs_freq);
> +
> +		disable_boost(sysfs);
> +	}
> +
> +	igt_subtest("invalid")
> +		invalid(fd);
> +
> +	igt_subtest("idempotent")
> +		idempotent(fd);
> +
> +	igt_subtest("range")
> +		range(fd);
> +
> +	igt_subtest("independent")
> +		independent(fd);
> +
> +	igt_skip_on_simulation();
> +
> +	for (e = intel_execution_engines; e->name; e++) {
> +		igt_subtest_group {
> +			igt_fixture {
> +				gem_require_ring(fd, e->exec_id | e->flags);
> +			}
> +
> +			igt_subtest_f("%s-single", e->name)
> +				single(fd, e);
> +			igt_subtest_f("%s-continuous", e->name)
> +				continuous(fd, e);
> +			igt_subtest_f("%s-inflight", e->name)
> +				inflight(fd, e);
> +			igt_subtest_f("%s-sysfs", e->name)
> +				sysfs_clamp(fd, e);
> +		}
> +	}
> +
> +	igt_subtest("sandwich")
> +		sandwich(fd, 20);
> +
> +	igt_subtest("smoketest")
> +		smoketest(fd, 20);
> +}
> diff --git a/tests/meson.build b/tests/meson.build
> index 1176463c..1901cb16 100644
> --- a/tests/meson.build
> +++ b/tests/meson.build
> @@ -1,3 +1,9 @@
> +# those will be linked with lib_igt_perf
> +perf_test_progs = [
> +	'gem_ctx_freq',
> +	'perf_pmu',
> +]
> +
>   test_progs = [
>   	'core_auth',
>   	'core_get_client_auth',
> @@ -288,12 +294,14 @@ foreach prog : test_progs
>   		   install : true)
>   endforeach
>   
> -test_executables += executable('perf_pmu', 'perf_pmu.c',
> -	   dependencies : test_deps + [ lib_igt_perf ],
> -	   install_dir : libexecdir,
> -	   install_rpath : rpathdir,
> -	   install : true)
> -test_progs += 'perf_pmu'
> +foreach prog : perf_test_progs
> +	test_executables += executable(prog, prog + '.c',
> +		   dependencies : test_deps + [ lib_igt_perf ],
> +		   install_dir : libexecdir,
> +		   install_rpath : rpathdir,
> +		   install : true)
> +	test_progs += prog
> +endforeach
>   
>   executable('testdisplay', ['testdisplay.c', 'testdisplay_hotplug.c'],
>   	   dependencies : test_deps,
> 

Regards,

Tvrtko