[Intel-gfx] [PATCH igt] igt: Add gem_ctx_freq to exercise requesting freq on a ctx
Sagar Arun Kamble
sagar.a.kamble at intel.com
Tue Mar 13 12:38:04 UTC 2018
On 3/10/2018 3:05 AM, Chris Wilson wrote:
> Exercise some new API that allows applications to request that
> individual contexts are executed within a desired frequency range.
>
> v2: Split single/continuous set_freq subtests
> v3: Do an up/down ramp for individual freq request, check nothing
> changes after each invalid request
> v4: Check the frequencies reported by the kernel across the entire
> range.
> v5: Rewrite sandwich to create a sandwich between multiple concurrent
> engines.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Praveen Paneri <praveen.paneri at intel.com>
> Cc: Sagar A Kamble <sagar.a.kamble at intel.com>
> Cc: Antonio Argenziano <antonio.argenziano at intel.com>
<snip>
> +static void single(int fd, const struct intel_execution_engine *e)
> +{
> +#define N_STEPS 10
> + const unsigned int engine = e->exec_id | e->flags;
> + uint32_t ctx = gem_context_create(fd);
> + uint32_t min, max;
> + double measured;
> + igt_spin_t *spin;
> + int pmu;
> +
> + get_freq(fd, ctx, &min, &max);
> + igt_info("Min freq: %dMHz; Max freq: %dMHz\n", min, max);
> +
> + pmu = perf_i915_open(I915_PMU_REQUESTED_FREQUENCY);
> + igt_require(pmu >= 0);
This igt_require can go to igt_fixture below.
> +
> + for (int step = 0; step <= 2*N_STEPS; step++) {
> + int frac = step > N_STEPS ? 2*N_STEPS - step : step;
> + uint32_t freq = min + (max - min) * frac / N_STEPS;
> + uint32_t cur, discard;
> +
> + set_freq(fd, ctx, freq, freq);
> + get_freq(fd, ctx, &cur, &discard);
> +
> + gem_quiescent_gpu(fd);
> + spin = __igt_spin_batch_new(fd, ctx, engine, 0);
> + usleep(10000);
> +
> + measured = measure_frequency(pmu, SAMPLE_PERIOD);
> + igt_debugfs_dump(fd, "i915_rps_boost_info");
> +
> + igt_spin_batch_free(fd, spin);
> + igt_info("%s(single): Measured %.1fMHz, expected %dMhz\n",
> + e->name, measured, cur);
> + igt_assert(measured > cur - 100 && measured < cur + 100);
Is this margin of 100Mhz for PMU accuracy?
> + }
> + gem_quiescent_gpu(fd);
> +
> + close(pmu);
> + gem_context_destroy(fd, ctx);
> +
> +#undef N_STEPS
> +}
> +
> +static void continuous(int fd, const struct intel_execution_engine *e)
> +{
> +#define N_STEPS 10
> + const unsigned int engine = e->exec_id | e->flags;
> + uint32_t ctx = gem_context_create(fd);
> + uint32_t min, max;
> + double measured;
> + igt_spin_t *spin;
> + int pmu;
> +
> + get_freq(fd, ctx, &min, &max);
> + igt_info("Min freq: %dMHz; Max freq: %dMHz\n", min, max);
> +
> + pmu = perf_i915_open(I915_PMU_REQUESTED_FREQUENCY);
> + igt_require(pmu >= 0);
> +
> + gem_quiescent_gpu(fd);
> + spin = __igt_spin_batch_new(fd, ctx, engine, 0);
> + for (int step = 0; step <= 2*N_STEPS; step++) {
> + int frac = step > N_STEPS ? 2*N_STEPS - step : step;
> + uint32_t freq = min + (max - min) * frac / N_STEPS;
> + uint32_t cur, discard;
> + igt_spin_t *kick;
> +
> + set_freq(fd, ctx, freq, freq);
> + get_freq(fd, ctx, &cur, &discard);
> +
> + /*
> + * When requesting a new frequency on the currently
> + * executing context, it does not take effect until the
> + * next context switch. In this case, we trigger a lite
> + * restore.
> + */
> + kick = __igt_spin_batch_new(fd, ctx, engine, 0);
> + igt_spin_batch_free(fd, spin);
> + spin = kick;
> +
> + usleep(10000);
> +
> + measured = measure_frequency(pmu, SAMPLE_PERIOD);
> + igt_debugfs_dump(fd, "i915_rps_boost_info");
> +
> + igt_info("%s(continuous): Measured %.1fMHz, expected %dMhz\n",
> + e->name, measured, cur);
> + igt_assert(measured > cur - 100 && measured < cur + 100);
> + }
> + igt_spin_batch_free(fd, spin);
> + gem_quiescent_gpu(fd);
> +
> + close(pmu);
> + gem_context_destroy(fd, ctx);
> +#undef N_STEPS
> +}
> +
> +static void inflight(int fd, const struct intel_execution_engine *e)
> +{
> + const unsigned int engine = e->exec_id | e->flags;
> + uint32_t ctx, min, max, freq, discard;
> + double measured;
> + igt_spin_t *plug, *work[2];
> + int pmu;
> +
> + pmu = perf_i915_open(I915_PMU_REQUESTED_FREQUENCY);
> + igt_require(pmu >= 0);
> +
> + ctx = gem_context_create(fd);
> + get_freq(fd, ctx, &min, &max);
> + set_freq(fd, ctx, min, min);
> +
> + igt_info("Min freq: %dMHz; Max freq: %dMHz\n", min, max);
> +
> + gem_quiescent_gpu(fd);
> + plug = igt_spin_batch_new(fd, ctx, engine, 0);
> + gem_context_destroy(fd, ctx);
> + for (int n = 0; n < 16; n++) {
> + struct drm_i915_gem_exec_object2 obj = {
> + .handle = plug->handle,
> + };
> + struct drm_i915_gem_execbuffer2 eb = {
> + .buffer_count = 1,
> + .buffers_ptr = to_user_pointer(&obj),
> + .flags = engine,
> + .rsvd1 = gem_context_create(fd),
> + };
> + set_freq(fd, eb.rsvd1, min, min);
> + gem_execbuf(fd, &eb);
> + gem_context_destroy(fd, eb.rsvd1);
> + }
> + measured = measure_frequency(pmu, SAMPLE_PERIOD);
> + igt_debugfs_dump(fd, "i915_rps_boost_info");
> + igt_info("%s(plug): Measured %.1fMHz, expected %dMhz\n",
> + e->name, measured, min);
> + igt_assert(measured > min - 100 && measured < min + 100);
> +
> + ctx = gem_context_create(fd);
> + set_freq(fd, ctx, max, max);
this set_freq can be removed.
> + work[0] = __igt_spin_batch_new(fd, ctx, engine, 0);
> +
> + /* work is now queued but not executing */
> + freq = (max + min) / 2;
> + set_freq(fd, ctx, freq, freq);
> + get_freq(fd, ctx, &freq, &discard);
> + gem_context_destroy(fd, ctx);
> +
> + ctx = gem_context_create(fd);
> + set_freq(fd, ctx, max, max);
> + work[1] = __igt_spin_batch_new(fd, ctx, engine, 0);
> + gem_context_destroy(fd, ctx);
> +
> + igt_spin_batch_end(plug);
> + do
> + usleep(10000);
> + while (gem_bo_busy(fd, plug->handle));
> + igt_spin_batch_free(fd, plug);
> +
> + /* Now work will execute */
> + measured = measure_frequency(pmu, SAMPLE_PERIOD);
> + igt_debugfs_dump(fd, "i915_engine_info");
> + igt_debugfs_dump(fd, "i915_rps_boost_info");
> + igt_info("%s(work0): Measured %.1fMHz, expected %dMhz\n",
> + e->name, measured, freq);
> + igt_assert(measured > freq - 100 && measured < freq + 100);
> +
> + igt_spin_batch_end(work[0]);
> + do
> + usleep(10000);
> + while (gem_bo_busy(fd, work[0]->handle));
> + igt_spin_batch_free(fd, work[0]);
> +
> + measured = measure_frequency(pmu, SAMPLE_PERIOD);
> + igt_debugfs_dump(fd, "i915_engine_info");
> + igt_debugfs_dump(fd, "i915_rps_boost_info");
> + igt_info("%s(work1): Measured %.1fMHz, expected %dMhz\n",
> + e->name, measured, max);
> + igt_assert(measured > max - 100 && measured < max + 100);
> +
> + igt_spin_batch_free(fd, work[1]);
> + close(pmu);
> + gem_quiescent_gpu(fd);
> +}
> +
> +static void sandwich_engine(int fd, unsigned int engine, int timeout)
> +{
> + uint32_t ctx = gem_context_create(fd);
> + uint32_t min, max;
> + int pmu;
> +
> + pmu = perf_i915_open(I915_PMU_REQUESTED_FREQUENCY);
> + igt_require(pmu >= 0);
> +
> + get_freq(fd, ctx, &min, &max);
> +
> + igt_until_timeout(timeout) {
> + uint32_t range[2];
> + igt_spin_t *spin;
> + double measured;
> +
> + /* make sure we keep an overlap between all engines */
> + range[0] = min + (rand() % (max - min) / 2);
> + range[1] = max - (rand() % (max - min) / 2);
> +
> + set_freq(fd, ctx, range[0], range[1]);
> + get_freq(fd, ctx, &range[0], &range[1]);
> +
> + spin = __igt_spin_batch_new(fd, ctx, engine, 0);
> +
> + usleep(10000);
> + measured = measure_frequency(pmu, SAMPLE_PERIOD);
> + igt_spin_batch_free(fd, spin);
> +
> + igt_assert(measured >= range[0] - 100 &&
> + measured <= range[1] + 100);
> + }
> +
> + gem_context_destroy(fd, ctx);
> + close(pmu);
> +}
> +
> +static void sandwich(int fd, int timeout)
> +{
> + unsigned int engine;
> +
> + for_each_physical_engine(fd, engine) {
> + igt_fork(child, 1)
> + sandwich_engine(fd, engine, timeout);
> + }
> +
> + igt_waitchildren();
> + gem_quiescent_gpu(fd);
> +}
> +
> +static void pwm(int fd, unsigned int *engines, unsigned int nengine, int link)
> +{
> + uint32_t ctx[nengine];
> +
> + fcntl(link, F_SETFL, fcntl(fd, F_GETFL) | O_NONBLOCK);
> +
> + for (unsigned int n = 0; n < nengine; n++)
> + ctx[n] = gem_context_create(fd);
> +
> + do {
> + igt_spin_t *spin;
> + struct {
> + uint32_t engine;
> + uint32_t min;
> + uint32_t max;
> + } req;
> +
> + while (read(link, &req, sizeof(req)) > 0) {
> + if ((req.engine | req.min | req.max) == 0)
> + goto out;
> +
> + igt_assert(req.engine < nengine);
> + set_freq(fd, ctx[req.engine], req.min, req.max);
> + }
> +
> + /* Create a 20% load using busy spinners */
> + spin = __igt_spin_batch_new(fd, ctx[0], engines[0], 0);
> + for (unsigned int n = 1; n < nengine; n++) {
> + struct drm_i915_gem_exec_object2 obj = {
> + .handle = spin->handle,
> + };
> + struct drm_i915_gem_execbuffer2 eb = {
> + .buffer_count = 1,
> + .buffers_ptr = to_user_pointer(&obj),
> + .flags = engines[n],
> + .rsvd1 = ctx[n],
> + };
> + gem_execbuf(fd, &eb);
> + }
> + usleep(100);
> + igt_spin_batch_end(spin);
> +
> + do
> + usleep(10);
> + while (gem_bo_busy(fd, spin->handle));
> + igt_spin_batch_free(fd, spin);
> + usleep(400);
> + } while (1);
> +
> +out:
> + for (unsigned int n = 0; n < nengine; n++)
> + gem_context_destroy(fd, ctx[n]);
> +}
> +
> +static void smoketest(int fd, int timeout)
> +{
> + unsigned int engines[16];
> + unsigned int nengine;
> + unsigned int engine;
> + uint32_t min[16], max[16];
> + int pmu, link[2];
> +
> + get_freq(fd, 0, &min[0], &max[0]);
> +
> + nengine = 0;
> + for_each_physical_engine(fd, engine) {
> + if (nengine == ARRAY_SIZE(engines) - 1)
> + break;
> +
> + min[nengine] = min[0];
> + max[nengine] = max[0];
> + engines[nengine] = engine;
> + nengine++;
> + }
> + igt_require(nengine);
> +
> + igt_assert(pipe(link) == 0);
> + igt_fork(child, 1)
> + pwm(fd, engines, nengine, link[0]);
> + close(link[0]);
> +
> + pmu = perf_i915_open(I915_PMU_REQUESTED_FREQUENCY);
> + igt_require(pmu >= 0);
> +
> + igt_until_timeout(timeout) {
> + struct {
> + uint32_t engine;
> + uint32_t min;
> + uint32_t max;
> + } req;
> + double measured;
> + uint32_t ctx;
> +
> + req.engine = rand() % nengine;
> +
> + ctx = gem_context_create(fd);
> + get_freq(fd, ctx, &req.min, &req.max);
> + req.min = rand() % (req.max - req.min) + req.min;
> + req.max = rand() % (req.max - req.min) + req.min;
> + set_freq(fd, ctx, req.min, req.max);
> + get_freq(fd, ctx, &req.min, &req.max);
> +
> + igt_debug("Replacing (%d, %d) on engine %x with (%d, %d)\n",
> + min[req.engine], max[req.engine], req.engine,
> + req.min, req.max);
> + igt_assert(write(link[1], &req, sizeof(req)) == sizeof(req));
> + gem_context_destroy(fd, ctx);
> +
> + min[req.engine] = req.min;
> + max[req.engine] = req.max;
> +
> + for (unsigned int n = 0; n < nengine; n++) {
> + igt_debug("[%d]: [%d, %d]\n", n, min[n], max[n]);
> + if (min[n] < req.min)
> + req.min = min[n];
> + if (max[n] > req.max)
> + req.max = max[n];
> + }
I thought policy i915 will be implementing is max of mins and min of maxes.
> + igt_assert(req.max >= req.min);
> +
> + usleep(50000);
> + measured = measure_frequency(pmu, SAMPLE_PERIOD);
> +
> + if (measured <= req.min - 100 || measured >= req.max + 100)
> + igt_debugfs_dump(fd, "i915_rps_boost_info");
> + igt_info("Measured %.1fMHz, expected [%d, %d]Mhz\n",
> + measured, req.min, req.max);
> + igt_assert(measured > req.min - 100 &&
> + measured < req.max + 100);
> + }
> +
> + do {
> + struct {
> + uint32_t engine;
> + uint32_t min;
> + uint32_t max;
> + } req = {};
> +
> + write(link[1], &req, sizeof(req));
> + close(link[1]);
> + } while (0);
> + igt_waitchildren();
> + gem_quiescent_gpu(fd);
> +
> + close(pmu);
> +}
> +
> +static void invalid_context(int fd, uint32_t ctx, uint32_t min, uint32_t max)
> +{
> + const struct test {
> + uint32_t min, max;
> + } tests[] = {
> + { min - 50, max - 50 },
> + { min - 50, max },
> + { min - 50, max + 50 },
> + { min, max + 50 },
> + { min + 50, max + 50 },
> +
> + { min - 50, min - 50 },
> +
> + { min - 50, min },
This one is similar to { min - 50, max } where max is in range but min
is outside.
Similarly on max side. what is the reasoning for these cases?
> + { min + 50, min },
> + { min, min - 50 },
> +
> + { max + 50, max },
> + { max, max + 50 },
> + { max, max - 50 },
> +
> + { max + 50, max + 50 },
> +
> + {}
> + };
> +
> + for (const struct test *t = tests; t->min | t->max; t++) {
> + uint32_t cur_min, cur_max;
> +
> + igt_assert_f(__set_freq(fd, ctx, t->min, t->max) == -EINVAL,
> + "Failed to reject invalid [%d, %d] (valid range [%d, %d]) on context %d\n",
> + t->min, t->max, min, max, ctx);
> +
> + get_freq(fd, 0, &cur_min, &cur_max);
> + igt_assert_eq(cur_min, min);
> + igt_assert_eq(cur_max, max);
> + }
> +}
> +
> +static void invalid(int fd)
> +{
> + uint32_t min, max, ctx;
> +
> + get_freq(fd, 0, &min, &max);
> +
> + invalid_context(fd, 0, min, max);
> +
> + ctx = gem_context_create(fd);
> + invalid_context(fd, ctx, min, max);
> + gem_context_destroy(fd, ctx);
> +}
> +
> +static void idempotent_context(int fd, uint32_t ctx)
> +{
> + uint32_t min, max;
> + uint32_t cur_min, cur_max;
> +
> + get_freq(fd, ctx, &min, &max);
> +
> + set_freq(fd, ctx, max, max);
> + get_freq(fd, ctx, &cur_min, &cur_max);
> + igt_assert_eq(cur_min, max);
> + igt_assert_eq(cur_max, max);
> +
> + set_freq(fd, ctx, min, min);
> + get_freq(fd, ctx, &cur_min, &cur_max);
> + igt_assert_eq(cur_min, min);
> + igt_assert_eq(cur_max, min);
> +
> + set_freq(fd, ctx, min, max);
> + get_freq(fd, ctx, &cur_min, &cur_max);
> + igt_assert_eq(cur_min, min);
> + igt_assert_eq(cur_max, max);
> +}
> +
> +static void idempotent(int fd)
> +{
> + uint32_t ctx;
> +
> + idempotent_context(fd, 0);
> +
> + ctx = gem_context_create(fd);
> + idempotent_context(fd, ctx);
> + gem_context_destroy(fd, ctx);
> +}
> +
> +static void range_context(int fd, uint32_t ctx)
> +{
> + uint32_t min, max;
> + uint32_t cur_min, cur_max;
> +
> + get_freq(fd, ctx, &min, &max);
> +
> + for (uint32_t freq = min; freq <= max; freq++) {
> + set_freq(fd, ctx, freq, freq);
> + get_freq(fd, ctx, &cur_min, &cur_max);
> +
> + igt_assert(cur_min >= min);
> + igt_assert(cur_max <= max);
> + }
> +
> + set_freq(fd, ctx, min, max);
> + get_freq(fd, ctx, &cur_min, &cur_max);
> + igt_assert_eq(cur_min, min);
> + igt_assert_eq(cur_max, max);
> +}
> +
> +static void range(int fd)
> +{
> + uint32_t ctx;
> +
> + range_context(fd, 0);
> +
> + ctx = gem_context_create(fd);
> + range_context(fd, ctx);
> + gem_context_destroy(fd, ctx);
> +}
> +
> +static void independent(int fd)
> +{
> + uint32_t min, max;
> + uint32_t cur_min, cur_max;
> + uint32_t ctx[2];
> +
> + get_freq(fd, 0, &min, &max);
> +
> + set_freq(fd, 0, max, max);
> + ctx[0] = gem_context_create(fd);
> + get_freq(fd, ctx[0], &cur_min, &cur_max);
> + igt_assert_eq(cur_min, min);
> + igt_assert_eq(cur_max, max);
> +
> + set_freq(fd, 0, min, min);
> + get_freq(fd, ctx[0], &cur_min, &cur_max);
> + igt_assert_eq(cur_min, min);
> + igt_assert_eq(cur_max, max);
> +
> + ctx[1] = gem_context_create(fd);
> + get_freq(fd, ctx[1], &cur_min, &cur_max);
> + igt_assert_eq(cur_min, min);
> + igt_assert_eq(cur_max, max);
> +
> + set_freq(fd, ctx[1], max, max);
> + get_freq(fd, ctx[0], &cur_min, &cur_max);
> + igt_assert_eq(cur_min, min);
> + igt_assert_eq(cur_max, max);
> +
> + get_freq(fd, 0, &cur_min, &cur_max);
> + igt_assert_eq(cur_min, min);
> + igt_assert_eq(cur_max, min);
> +
> + get_freq(fd, ctx[1], &cur_min, &cur_max);
> + igt_assert_eq(cur_min, max);
> + igt_assert_eq(cur_max, max);
> + gem_context_destroy(fd, ctx[1]);
> +
> + get_freq(fd, ctx[0], &cur_min, &cur_max);
There is no set_freq between earlier get_freq and this one for ctx[0] so
we can skip one.
> + igt_assert_eq(cur_min, min);
> + igt_assert_eq(cur_max, max);
> + gem_context_destroy(fd, ctx[0]);
Need to restore min/max for default context?
> +}
> +
> +static bool has_ctx_freq(int fd)
> +{
> + struct drm_i915_gem_context_param param = {
> + .param = LOCAL_CONTEXT_PARAM_FREQUENCY,
> + };
> +
> + return __gem_context_get_param(fd, ¶m) == 0;
> +}
> +
> +igt_main
> +{
> + const struct intel_execution_engine *e;
> + int fd = -1;
> +
> + igt_fixture {
> + fd = drm_open_driver(DRIVER_INTEL);
> + igt_require_gem(fd);
> +
> + igt_require(has_ctx_freq(fd));
> + }
> +
> + igt_subtest("invalid")
> + invalid(fd);
> +
> + igt_subtest("idempotent")
> + idempotent(fd);
> +
> + igt_subtest("range")
> + range(fd);
> +
> + igt_subtest("independent")
> + independent(fd);
> +
> + igt_skip_on_simulation();
> +
> + for (e = intel_execution_engines; e->name; e++) {
> + igt_subtest_group {
> + igt_fixture {
> + gem_require_ring(fd, e->exec_id | e->flags);
> + }
> +
> + igt_subtest_f("%s-single", e->name)
> + single(fd, e);
> + igt_subtest_f("%s-continuous", e->name)
> + continuous(fd, e);
> + igt_subtest_f("%s-inflight", e->name)
> + inflight(fd, e);
> + }
> + }
> +
> + igt_subtest("sandwich")
> + sandwich(fd, 20);
> +
> + igt_subtest("smoketest")
> + smoketest(fd, 20);
> +}
> diff --git a/tests/meson.build b/tests/meson.build
> index 6e776bb7..fa5e3169 100644
> --- a/tests/meson.build
> +++ b/tests/meson.build
> @@ -36,6 +36,7 @@ test_progs = [
> 'gem_ctx_create',
> 'gem_ctx_exec',
> 'gem_ctx_isolation',
> + 'gem_ctx_freq',
this should be one entry above
> 'gem_ctx_param',
> 'gem_ctx_switch',
> 'gem_ctx_thrash',
--
Thanks,
Sagar
More information about the Intel-gfx
mailing list