[igt-dev] [PATCH i-g-t v3] tests/i915/gem_lmem_swapping: Add ccs subtests
Zbigniew Kempczyński
zbigniew.kempczynski at intel.com
Tue Mar 22 09:51:09 UTC 2022
On Tue, Mar 22, 2022 at 04:30:53AM +0530, Ramalingam C wrote:
> On 2022-03-15 at 10:29:38 +0100, Zbigniew Kempczyński wrote:
> > On Mon, Mar 14, 2022 at 10:44:32AM +0530, Ramalingam C wrote:
> > > Add subtests for covering the compressed object's eviction.
> > >
> > > v2:
> > > gem_sync after the block_copy blit for init
> > > v3:
> > > ahnd is passed in as a param [Zbigniew]
> > > cmd is bb [Zbigniew]
> > > blt src and dst sizes supposed to be same [Zbigniew]
> > >
> > > Signed-off-by: Chris Wilson <chris.p.wilson at intel.com>
> > > Signed-off-by: Ayaz A Siddiqui <ayaz.siddiqui at intel.com>
> > > Signed-off-by: Ramalingam C <ramalingam.c at intel.com>
> > > ---
> > > tests/i915/gem_lmem_swapping.c | 222 +++++++++++++++++++++++++++++++--
> > > 1 file changed, 215 insertions(+), 7 deletions(-)
> > >
> > > diff --git a/tests/i915/gem_lmem_swapping.c b/tests/i915/gem_lmem_swapping.c
> > > index 582111dddeb9..97858219aa96 100644
> > > --- a/tests/i915/gem_lmem_swapping.c
> > > +++ b/tests/i915/gem_lmem_swapping.c
> > > @@ -22,6 +22,7 @@
> > > #include <sys/time.h>
> > > #include <sys/wait.h>
> > > #include "drm.h"
> > > +#include "i915/i915_blt.h"
> > >
> > > IGT_TEST_DESCRIPTION("Exercise local memory swapping.");
> > >
> > > @@ -30,6 +31,7 @@ IGT_TEST_DESCRIPTION("Exercise local memory swapping.");
> > >
> > > #define PAGE_SIZE (1ULL << 12)
> > > #define SZ_64K (16 * PAGE_SIZE)
> > > +#define DG2_MOCS (2 << 1)
> > >
> > > static const char *readable_unit(uint64_t size)
> > > {
> > > @@ -60,6 +62,7 @@ struct params {
> > > #define TEST_RANDOM (1 << 3)
> > > #define TEST_ENGINES (1 << 4)
> > > #define TEST_MULTI (1 << 5)
> > > +#define TEST_CCS (1 << 6)
> > > unsigned int flags;
> > > unsigned int seed;
> > > bool oom_test;
> > > @@ -69,8 +72,56 @@ struct object {
> > > uint64_t size;
> > > uint32_t seed;
> > > uint32_t handle;
> > > + struct blt_copy_object *blt_obj;
> > > };
> > >
> > > +static void set_object(struct blt_copy_object *obj,
> > > + uint32_t handle, uint64_t size, uint32_t region,
> > > + uint8_t mocs, enum blt_tiling tiling,
> > > + enum blt_compression compression,
> > > + enum blt_compression_type compression_type)
> > > +{
> > > + obj->handle = handle;
> > > + obj->size = size;
> > > + obj->region = region;
> > > + obj->mocs = mocs;
> > > + obj->tiling = tiling;
> > > + obj->compression = compression;
> > > + obj->compression_type = compression_type;
> > > +}
> > > +
> > > +static void set_geom(struct blt_copy_object *obj, uint32_t pitch,
> > > + int16_t x1, int16_t y1, int16_t x2, int16_t y2,
> > > + uint16_t x_offset, uint16_t y_offset)
> > > +{
> > > + obj->pitch = pitch;
> > > + obj->x1 = x1;
> > > + obj->y1 = y1;
> > > + obj->x2 = x2;
> > > + obj->y2 = y2;
> > > + obj->x_offset = x_offset;
> > > + obj->y_offset = y_offset;
> > > +}
> > > +
> > > +static void set_batch(struct blt_copy_batch *batch,
> > > + uint32_t handle, uint64_t size, uint32_t region)
> > > +{
> > > + batch->handle = handle;
> > > + batch->size = size;
> > > + batch->region = region;
> > > +}
> > > +
> > > +static void set_object_ext(struct blt_block_copy_object_ext *obj,
> > > + uint8_t compression_format,
> > > + uint16_t surface_width, uint16_t surface_height,
> > > + enum blt_surface_type surface_type)
> > > +{
> > > + obj->compression_format = compression_format;
> > > + obj->surface_width = surface_width;
> > > + obj->surface_height = surface_height;
> > > + obj->surface_type = surface_type;
> > > +}
> > > +
> > > static uint32_t create_bo(int i915,
> > > uint64_t size,
> > > struct drm_i915_gem_memory_class_instance *region,
> > > @@ -105,6 +156,46 @@ init_object(int i915, struct object *obj, unsigned long seed, unsigned int flags
> > > munmap(buf, obj->size);
> > > }
> > >
> > > +#define BATCH_SIZE 4096
> > > +static void
> > > +init_object_ccs(int i915, struct object *obj, struct blt_copy_object *tmp,
> > > + struct blt_copy_batch *cmd, unsigned long seed,
> > > + const intel_ctx_t *ctx, uint32_t region, uint64_t ahnd)
> > > +{
> > > + struct blt_block_copy_data_ext ext = {}, *pext = &ext;
> > > + const struct intel_execution_engine2 *e;
> > > + struct blt_copy_data blt = {};
> > > + unsigned long *buf, j;
> > > +
> > > + obj->seed = seed;
> > > + for_each_ctx_engine(i915, ctx, e) {
> > > + igt_assert_f(gem_engine_can_block_copy(i915, e),
> > > + "Ctx dont have Blt engine");
> > > + break;
> > > + }
> > > +
> > > + buf = gem_mmap__device_coherent(i915, tmp->handle, 0, obj->size, PROT_WRITE);
> > > + gem_set_domain(i915, tmp->handle, I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
> > > +
> > > + for (j = 0; j < obj->size / sizeof(*buf); j++)
> > > + buf[j] = seed++;
> > > + munmap(buf, obj->size);
> > > +
> > > + memset(&blt, 0, sizeof(blt));
> > > + blt.color_depth = CD_32bit;
> > > +
> > > + memcpy(&blt.src, tmp, sizeof(blt.src));
> > > + memcpy(&blt.dst, obj->blt_obj, sizeof(blt.dst));
> > > + memcpy(&blt.bb, cmd, sizeof(blt.bb));
> > > +
> > > + set_object_ext(&ext.src, 0, tmp->x2, tmp->y2, SURFACE_TYPE_2D);
> > > + set_object_ext(&ext.dst, 0, obj->blt_obj->x2, obj->blt_obj->y2,
> > > + SURFACE_TYPE_2D);
> > > +
> > > + blt_block_copy(i915, ctx, e, ahnd, &blt, pext);
> > > + gem_sync(i915, obj->blt_obj->handle);
> >
> > You can avoid stalls if you will use gem_create_from_pool().
> Hi Zbigkniew! Thanks for the review.
>
> AFAIK, pool of obj will be useful if we want to keep scheduling work with
> different bb. So that we will get different or free bbs each time so
> that on the run we wont write into already executing work.
>
> Here the common obj used between init_obj_ccs and verify_obj_ccs are the
> obj and also cmd. So either way we need to sync here, which we are
> already doing it on obj.
>
> Now could you please help me to understand how pool could help us here?
Currently you need to wait until bb becomes not busy (gem_sync()) before
you're reusing it feeding it with commands. This leads to stalls between
execution - after gem_sync() gpu is doing nothing. These micro pauses can
hide some coherency issues which may occur in pipelined executions.
If you're doing
bb = gem_create_from_pool(i915, 4096);
you'll get first handle which is 4KiB size and at the moment of that call
is not busy. So you don't need to use gem_sync() because library will return
you first not used object with 4KiB size. Removing gem_sync() you'll verify
how not-stalled execution will behave.
--
Zbigniew
> >
> > > +}
> > > +
> > > static void
> > > verify_object(int i915, const struct object *obj, unsigned int flags)
> > > {
> > > @@ -125,6 +216,53 @@ verify_object(int i915, const struct object *obj, unsigned int flags)
> > > munmap(buf, obj->size);
> > > }
> > >
> > > +static void
> > > +verify_object_ccs(int i915, const struct object *obj,
> > > + struct blt_copy_object *tmp, struct blt_copy_batch *cmd,
> > > + const intel_ctx_t *ctx, uint32_t region, uint64_t ahnd)
> > > +{
> > > + struct blt_block_copy_data_ext ext = {}, *pext = &ext;
> > > + const struct intel_execution_engine2 *e;
> > > + struct blt_copy_data blt = {};
> > > + unsigned long j, val, *buf;
> > > +
> > > + for_each_ctx_engine(i915, ctx, e) {
> > > + igt_assert_f(gem_engine_can_block_copy(i915, e),
> > > + "ctx dont have Blt engine");
> > > + break;
> > > + }
> > > +
> > > + memset(&blt, 0, sizeof(blt));
> > > + blt.color_depth = CD_32bit;
> > > +
> > > + memcpy(&blt.src, obj->blt_obj, sizeof(blt.src));
> > > + memcpy(&blt.dst, tmp, sizeof(blt.dst));
> > > + memcpy(&blt.bb, cmd, sizeof(blt.bb));
> > > +
> > > + blt.dst.x2 = min(obj->blt_obj->x2, tmp->x2);
> > > + blt.dst.y2 = min(obj->blt_obj->y2, tmp->y2);
> > > +
> > > + set_object_ext(&ext.src, 0, obj->blt_obj->x2, obj->blt_obj->y2,
> > > + SURFACE_TYPE_2D);
> > > + set_object_ext(&ext.dst, 0, tmp->x2, tmp->y2, SURFACE_TYPE_2D);
> > > + blt_block_copy(i915, ctx, e, ahnd, &blt, pext);
> > > +
> > > + buf = gem_mmap__device_coherent(i915, tmp->handle, 0,
> > > + obj->size, PROT_READ);
> > > + gem_set_domain(i915, tmp->handle, I915_GEM_DOMAIN_WC, 0);
> > > +
> > > + for (j = 0; j < obj->size / PAGE_SIZE; j++) {
> > > + unsigned long x = (j * PAGE_SIZE + rand() % PAGE_SIZE) / sizeof(*buf);
> > > +
> > > + val = obj->seed + x;
> > > + igt_assert_f(buf[x] == val,
> > > + "Object mismatch at offset %lu - found %lx, expected %lx, difference:%lx!\n",
> > > + x * sizeof(*buf), buf[x], val, buf[x] ^ val);
> > > + }
> > > +
> > > + munmap(buf, obj->size);
> > > +}
> > > +
> > > static void move_to_lmem(int i915,
> > > struct object *list,
> > > unsigned int num,
> > > @@ -160,31 +298,62 @@ static void __do_evict(int i915,
> > > struct params *params,
> > > unsigned int seed)
> > > {
> > > + uint32_t region_id = INTEL_MEMORY_REGION_ID(region->memory_class,
> > > + region->memory_instance);
> > > const unsigned int max_swap_in = params->count / 100 + 1;
> > > const uint32_t bbe = MI_BATCH_BUFFER_END;
> > > struct object *objects, *obj, *list;
> > > - uint32_t batch;
> > > + const uint32_t bpp = 32;
> > > + uint32_t batch, width, height, stride;
> > > + const intel_ctx_t *blt_ctx;
> > > + struct blt_copy_object *tmp;
> > > + struct blt_copy_batch *cmd;
> > > unsigned int engine = 0;
> > > unsigned int i, l;
> > > - uint64_t size;
> > > + uint64_t size, ahnd;
> > > struct timespec t = {};
> > > unsigned int num;
> > >
> > > + width = PAGE_SIZE / (bpp / 8);
> > > + height = params->size.max / (bpp / 8) / width;
> > > + stride = width * 4;
> > > +
> > > + tmp = calloc(1, sizeof(*tmp));
> > > + cmd = calloc(1, sizeof(*cmd));
> > > +
> > > __gem_context_set_persistence(i915, 0, false);
> > > size = 4096;
> > > batch = create_bo(i915, size, region, params->oom_test);
> > > -
> > > gem_write(i915, batch, 0, &bbe, sizeof(bbe));
> > >
> > > + if (params->flags & TEST_CCS)
> > > + blt_ctx = intel_ctx_create_for_engine(i915,
> > > + I915_ENGINE_CLASS_COPY,
> > > + 0);
> > > +
> > > objects = calloc(params->count, sizeof(*objects));
> > > igt_assert(objects);
> > >
> > > list = calloc(max_swap_in, sizeof(*list));
> > > igt_assert(list);
> > >
> > > + ahnd = intel_allocator_open_full(i915, blt_ctx->id, 0, 0,
> > > + INTEL_ALLOCATOR_SIMPLE,
> > > + ALLOC_STRATEGY_LOW_TO_HIGH, 0);
> > > srand(seed);
> > >
> > > /* Create the initial working set of objects. */
> > > + if (params->flags & TEST_CCS) {
> > > + tmp->handle = gem_create_in_memory_regions(i915, params->size.max,
> > > + INTEL_MEMORY_REGION_ID(I915_SYSTEM_MEMORY, 0));
> > > + set_object(tmp, tmp->handle, params->size.max,
> > > + INTEL_MEMORY_REGION_ID(I915_SYSTEM_MEMORY, 0), DG2_MOCS,
> > > + T_LINEAR, COMPRESSION_DISABLED, COMPRESSION_TYPE_3D);
> > > + set_geom(tmp, stride, 0, 0, width, height, 0, 0);
> > > + cmd->handle = gem_create_in_memory_regions(i915, BATCH_SIZE, region_id);
> >
> > I would use bb pool to avoid stalls.
> May be i am missing something. since this is sequential if we sync
> between init, verify and subsequent init we dont need bb pool.
>
> if we need two different bb in a loop for different independent tasks
> then bb pool might help to go ahead without sync inbetween.
>
> Sorry if i miss what you are suggesting. Please help me to understand.
>
> Ram.
> >
> >
> > > + set_batch(cmd, cmd->handle, BATCH_SIZE, region_id);
> > > + }
> > > +
> > > size = 0;
> > > for (i = 0, obj = objects; i < params->count; i++, obj++) {
> > > if (params->flags & TEST_RANDOM)
> > > @@ -194,6 +363,7 @@ static void __do_evict(int i915,
> > > else
> > > obj->size = params->size.min;
> > >
> > > + obj->size = ALIGN(obj->size, 4096);
> > > size += obj->size;
> > > if ((size >> 20) > params->mem_limit) {
> > > params->count = i;
> > > @@ -201,10 +371,26 @@ static void __do_evict(int i915,
> > > }
> > > obj->handle = create_bo(i915, obj->size, region, params->oom_test);
> > >
> > > - move_to_lmem(i915, objects + i, 1, batch, engine,
> > > - params->oom_test);
> > > - if (params->flags & TEST_VERIFY)
> > > + if (params->flags & TEST_CCS) {
> > > + width = PAGE_SIZE / (bpp / 8);
> > > + height = obj->size / (bpp / 8) / width;
> > > + stride = width * 4;
> > > +
> > > + obj->blt_obj = calloc(1, sizeof(*obj->blt_obj));
> > > + set_object(obj->blt_obj, obj->handle, obj->size, region_id,
> > > + DG2_MOCS, T_LINEAR, COMPRESSION_ENABLED,
> > > + COMPRESSION_TYPE_3D);
> > > + set_geom(obj->blt_obj, stride, 0, 0, width, height, 0, 0);
> >
> > Especially here and sync after last blit.
> >
> > > + init_object_ccs(i915, obj, tmp, cmd, rand(), blt_ctx,
> > > + region_id, ahnd);
> > > + } else if (params->flags & TEST_VERIFY) {
> > > init_object(i915, obj, rand(), params->flags);
> > > + move_to_lmem(i915, objects + i, 1, batch, engine,
> > > + params->oom_test);
> > > + } else {
> > > + move_to_lmem(i915, objects + i, 1, batch, engine,
> > > + params->oom_test);
> > > + }
> > > }
> > >
> > > igt_debug("obj size min/max=%lu %s/%lu %s, count=%u, seed: %u\n",
> > > @@ -231,7 +417,15 @@ static void __do_evict(int i915,
> > > if (params->flags & TEST_ENGINES)
> > > engine = (engine + 1) % __num_engines__;
> > >
> > > - if (params->flags & TEST_VERIFY) {
> > > + if (params->flags & TEST_CCS) {
> > > + for (i = 0; i < num; i++)
> > > + verify_object_ccs(i915, &list[i], tmp, cmd,
> > > + blt_ctx, region_id, ahnd);
> >
> > This one syncs internally in gem_set_domain so single bb is ok.
> >
> > > + /* Update random object - may swap it back in. */
> > > + i = rand() % params->count;
> > > + init_object_ccs(i915, &objects[i], tmp, cmd, rand(),
> > > + blt_ctx, region_id, ahnd);
> >
> > But here also I would use bb pool with sync after last one.
> >
> > Other things looks ok imo.
> >
> > --
> > Zbigniew
> >
> > > + } else if (params->flags & TEST_VERIFY) {
> > > for (i = 0; i < num; i++)
> > > verify_object(i915, &list[i], params->flags);
> > >
> > > @@ -241,11 +435,17 @@ static void __do_evict(int i915,
> > > }
> > > }
> > >
> > > + put_ahnd(ahnd);
> > > for (i = 0; i < params->count; i++)
> > > gem_close(i915, objects[i].handle);
> > >
> > > free(list);
> > > free(objects);
> > > + if (params->flags & TEST_CCS) {
> > > + gem_close(i915, cmd->handle);
> > > + gem_close(i915, tmp->handle);
> > > + gem_context_destroy(i915, blt_ctx->id);
> > > + }
> > >
> > > gem_close(i915, batch);
> > > }
> > > @@ -348,6 +548,9 @@ static void test_evict(int i915,
> > > const unsigned int nproc = sysconf(_SC_NPROCESSORS_ONLN) + 1;
> > > struct params params;
> > >
> > > + if (flags & TEST_CCS)
> > > + igt_require(IS_DG2(intel_get_drm_devid(i915)));
> > > +
> > > fill_params(i915, ¶ms, region, flags, nproc, false);
> > >
> > > if (flags & TEST_PARALLEL) {
> > > @@ -511,6 +714,11 @@ igt_main_args("", long_options, help_str, opt_handler, NULL)
> > > { "parallel-random-engines", TEST_PARALLEL | TEST_RANDOM | TEST_ENGINES },
> > > { "parallel-random-verify", TEST_PARALLEL | TEST_RANDOM | TEST_VERIFY },
> > > { "parallel-multi", TEST_PARALLEL | TEST_RANDOM | TEST_VERIFY | TEST_ENGINES | TEST_MULTI },
> > > + { "verify-ccs", TEST_CCS },
> > > + { "verify-random-ccs", TEST_CCS | TEST_RANDOM },
> > > + { "heavy-verify-random-ccs", TEST_CCS | TEST_RANDOM | TEST_HEAVY },
> > > + { "heavy-verify-multi-ccs", TEST_CCS | TEST_RANDOM | TEST_HEAVY | TEST_ENGINES | TEST_MULTI },
> > > + { "parallel-random-verify-ccs", TEST_PARALLEL | TEST_RANDOM | TEST_CCS },
> > > { }
> > > };
> > > int i915 = -1;
> > > --
> > > 2.20.1
> > >
More information about the igt-dev
mailing list