[Intel-gfx] [PATCH i-g-t 2/3] i915/gem_cpu_reloc: Use a self-modifying chained batch

Mika Kuoppala mika.kuoppala at linux.intel.com
Wed Jan 16 14:49:19 UTC 2019


Chris Wilson <chris at chris-wilson.co.uk> writes:

> Quoting Mika Kuoppala (2019-01-16 14:22:59)
>> Chris Wilson <chris at chris-wilson.co.uk> writes:
>> 
>> > Use another sensitive CPU reloc to emit a chained batch from inside the
>> > updated buffer to reduce the workload on slow machines to fit within the
>> > CI timeout.
>> >
>> > References: https://bugs.freedesktop.org/show_bug.cgi?id=108248
>> > Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
>> > ---
>> >  tests/i915/gem_cpu_reloc.c | 347 ++++++++++++++++++++-----------------
>> >  1 file changed, 189 insertions(+), 158 deletions(-)
>> >
>> > diff --git a/tests/i915/gem_cpu_reloc.c b/tests/i915/gem_cpu_reloc.c
>> > index 882c312d4..33c4e4e0f 100644
>> > --- a/tests/i915/gem_cpu_reloc.c
>> > +++ b/tests/i915/gem_cpu_reloc.c
>> > @@ -59,214 +59,245 @@
>> >  
>> >  #include "intel_bufmgr.h"
>> >  
>> > -IGT_TEST_DESCRIPTION("Test the relocations through the CPU domain.");
>> > +#define MI_INSTR(opcode, flags) ((opcode) << 23 | (flags))
>> >  
>> > -static uint32_t use_blt;
>> > +IGT_TEST_DESCRIPTION("Test the relocations through the CPU domain.");
>> >  
>> > -static void copy(int fd, uint32_t batch, uint32_t src, uint32_t dst)
>> > +static uint32_t *
>> > +gen2_emit_store_addr(uint32_t *cs, struct drm_i915_gem_relocation_entry *addr)
>> >  {
>> > -     struct drm_i915_gem_execbuffer2 execbuf;
>> > -     struct drm_i915_gem_relocation_entry gem_reloc[2];
>> > -     struct drm_i915_gem_exec_object2 gem_exec[3];
>> > -
>> > -     gem_reloc[0].offset = 4 * sizeof(uint32_t);
>> > -     gem_reloc[0].delta = 0;
>> > -     gem_reloc[0].target_handle = dst;
>> > -     gem_reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
>> > -     gem_reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
>> > -     gem_reloc[0].presumed_offset = -1;
>> > -
>> > -     gem_reloc[1].offset = 7 * sizeof(uint32_t);
>> > -     if (intel_gen(intel_get_drm_devid(fd)) >= 8)
>> > -             gem_reloc[1].offset += sizeof(uint32_t);
>> > -     gem_reloc[1].delta = 0;
>> > -     gem_reloc[1].target_handle = src;
>> > -     gem_reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
>> > -     gem_reloc[1].write_domain = 0;
>> > -     gem_reloc[1].presumed_offset = -1;
>> > -
>> > -     memset(gem_exec, 0, sizeof(gem_exec));
>> > -     gem_exec[0].handle = src;
>> > -     gem_exec[1].handle = dst;
>> > -     gem_exec[2].handle = batch;
>> > -     gem_exec[2].relocation_count = 2;
>> > -     gem_exec[2].relocs_ptr = to_user_pointer(gem_reloc);
>> > -
>> > -     memset(&execbuf, 0, sizeof(execbuf));
>> > -     execbuf.buffers_ptr = to_user_pointer(gem_exec);
>> > -     execbuf.buffer_count = 3;
>> > -     execbuf.batch_len = 4096;
>> > -     execbuf.flags = use_blt;
>> > -
>> > -     gem_execbuf(fd, &execbuf);
>> > +     *cs++ = MI_STORE_DWORD_IMM - 1;
>> > +     addr->offset += sizeof(*cs);
>> > +     cs += 1; /* addr */
>> > +     cs += 1; /* value: implicit 0xffffffff */
>> > +     return cs;
>> > +}
>> > +static uint32_t *
>> > +gen4_emit_store_addr(uint32_t *cs, struct drm_i915_gem_relocation_entry *addr)
>> > +{
>> > +     *cs++ = MI_STORE_DWORD_IMM;
>> > +     *cs++ = 0;
>> > +     addr->offset += 2 * sizeof(*cs);
>> > +     cs += 1; /* addr */
>> > +     cs += 1; /* value: implicit 0xffffffff */
>> > +     return cs;
>> > +}
>> > +static uint32_t *
>> > +gen8_emit_store_addr(uint32_t *cs, struct drm_i915_gem_relocation_entry *addr)
>> > +{
>> > +     *cs++ = (MI_STORE_DWORD_IMM | 1 << 21) + 1;
>> > +     addr->offset += sizeof(*cs);
>> > +     igt_assert((addr->delta & 7) == 0);
>> > +     cs += 2; /* addr */
>> > +     cs += 2; /* value: implicit 0xffffffffffffffff */
>> > +     return cs;
>> >  }
>> >  
>> > -static void exec(int fd, uint32_t handle)
>> > +static uint32_t *
>> > +gen2_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *reloc)
>> >  {
>> > -     struct drm_i915_gem_execbuffer2 execbuf;
>> > -     struct drm_i915_gem_exec_object2 gem_exec;
>> > +     *cs++ = MI_BATCH_BUFFER_START | 2 << 6;
>> > +     reloc->offset += sizeof(*cs);
>> > +     reloc->delta += 1;
>> > +     cs += 1; /* addr */
>> > +     return cs;
>> > +}
>> > +static uint32_t *
>> > +gen4_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *reloc)
>> > +{
>> > +     *cs++ = MI_BATCH_BUFFER_START | 2 << 6 | 1 << 8;
>> > +     reloc->offset += sizeof(*cs);
>> > +     cs += 1; /* addr */
>> > +     return cs;
>> > +}
>> > +static uint32_t *
>> > +gen6_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *reloc)
>> > +{
>> > +     *cs++ = MI_BATCH_BUFFER_START | 1 << 8;
>> > +     reloc->offset += sizeof(*cs);
>> > +     cs += 1; /* addr */
>> > +     return cs;
>> > +}
>> > +static uint32_t *
>> > +hsw_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *reloc)
>> > +{
>> > +     *cs++ = MI_BATCH_BUFFER_START | 2 << 6 | 1 << 8 | 1 << 13;
>> > +     reloc->offset += sizeof(*cs);
>> > +     cs += 1; /* addr */
>> > +     return cs;
>> > +}
>> > +static uint32_t *
>> > +gen8_emit_bb_start(uint32_t *cs, struct drm_i915_gem_relocation_entry *reloc)
>> > +{
>> > +     if (((uintptr_t)cs & 7) == 0) {
>> > +             *cs++ = MI_NOOP; /* align addr for MI_STORE_DWORD_IMM */
>> > +             reloc->offset += sizeof(*cs);
>> > +     }
>> 
>> Align it so that after the bb start emitted we are in right alignment?
>> Otherwise it looks it should have '!' in it.
>
> It's so the batch buffer address is aligned to the qword (since we use a
> qword MI_STORE_DATA_IMM which requires destination alignment)
>
> Imagine s/reloc/addr/ might be a better clue.

Ok. Some undefined bits on bb starts. But sometimes magic is warranted,
as in here distinction between trickery and magic is blurred. Alerting
and warming up the reader to avoid sprains.

Reviewed-by: Mika Kuoppala <mika.kuoppala at linux.intel.com>

> -chris


More information about the Intel-gfx mailing list