[Intel-gfx] [PATCH i-g-t 07/15] gem_wsim: Infinite batch support
Chris Wilson
chris at chris-wilson.co.uk
Thu May 23 14:05:05 UTC 2019
Quoting Tvrtko Ursulin (2019-05-22 16:57:12)
> -static void
> +static unsigned int
> terminate_bb(struct w_step *w, unsigned int flags)
> {
> const uint32_t bbe = 0xa << 23;
> unsigned long mmap_start, mmap_len;
> unsigned long batch_start = w->bb_sz;
> + unsigned int r = 0;
> uint32_t *ptr, *cs;
>
> igt_assert(((flags & RT) && (flags & SEQNO)) || !(flags & RT));
> @@ -838,6 +854,9 @@ terminate_bb(struct w_step *w, unsigned int flags)
> if (flags & RT)
> batch_start -= 12 * sizeof(uint32_t);
>
> + if (w->unbound_duration)
> + batch_start -= 4 * sizeof(uint32_t); /* MI_ARB_CHK + MI_BATCH_BUFFER_START */
> +
> mmap_start = rounddown(batch_start, PAGE_SIZE);
> mmap_len = ALIGN(w->bb_sz - mmap_start, PAGE_SIZE);
>
> @@ -847,8 +866,19 @@ terminate_bb(struct w_step *w, unsigned int flags)
> ptr = gem_mmap__wc(fd, w->bb_handle, mmap_start, mmap_len, PROT_WRITE);
> cs = (uint32_t *)((char *)ptr + batch_start - mmap_start);
>
> + if (w->unbound_duration) {
> + w->reloc[r++].offset = batch_start + 2 * sizeof(uint32_t);
> + batch_start += 4 * sizeof(uint32_t);
> +
> + *cs++ = w->preempt_us ? 0x5 << 23 /* MI_ARB_CHK; */ : MI_NOOP;
> + w->recursive_bb_start = cs;
> + *cs++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
> + *cs++ = 0;
> + *cs++ = 0;
delta is zero, and mmap_len is consistent, so yup this gives a page of
nops before looping.
> + }
> +
> if (flags & SEQNO) {
> - w->reloc[0].offset = batch_start + sizeof(uint32_t);
> + w->reloc[r++].offset = batch_start + sizeof(uint32_t);
> batch_start += 4 * sizeof(uint32_t);
>
> *cs++ = MI_STORE_DWORD_IMM;
> @@ -860,7 +890,7 @@ terminate_bb(struct w_step *w, unsigned int flags)
> }
>
> if (flags & RT) {
> - w->reloc[1].offset = batch_start + sizeof(uint32_t);
> + w->reloc[r++].offset = batch_start + sizeof(uint32_t);
> batch_start += 4 * sizeof(uint32_t);
>
> *cs++ = MI_STORE_DWORD_IMM;
> @@ -870,7 +900,7 @@ terminate_bb(struct w_step *w, unsigned int flags)
> w->rt0_value = cs;
> *cs++ = 0;
>
> - w->reloc[2].offset = batch_start + 2 * sizeof(uint32_t);
> + w->reloc[r++].offset = batch_start + 2 * sizeof(uint32_t);
> batch_start += 4 * sizeof(uint32_t);
>
> *cs++ = 0x24 << 23 | 2; /* MI_STORE_REG_MEM */
> @@ -879,7 +909,7 @@ terminate_bb(struct w_step *w, unsigned int flags)
> *cs++ = 0;
> *cs++ = 0;
>
> - w->reloc[3].offset = batch_start + sizeof(uint32_t);
> + w->reloc[r++].offset = batch_start + sizeof(uint32_t);
> batch_start += 4 * sizeof(uint32_t);
>
> *cs++ = MI_STORE_DWORD_IMM;
> @@ -891,6 +921,8 @@ terminate_bb(struct w_step *w, unsigned int flags)
> }
>
> *cs = bbe;
> +
> + return r;
> }
>
> static const unsigned int eb_engine_map[NUM_ENGINES] = {
> @@ -1011,19 +1043,22 @@ alloc_step_batch(struct workload *wrk, struct w_step *w, unsigned int flags)
> }
> }
>
> - w->bb_sz = get_bb_sz(w->duration.max);
> - w->bb_handle = w->obj[j].handle = gem_create(fd, w->bb_sz);
> + if (w->unbound_duration)
> + /* nops + MI_ARB_CHK + MI_BATCH_BUFFER_START */
> + w->bb_sz = max(PAGE_SIZE, get_bb_sz(w->preempt_us)) +
> + (1 + 3) * sizeof(uint32_t);
> + else
> + w->bb_sz = get_bb_sz(w->duration.max);
> + w->bb_handle = w->obj[j].handle = gem_create(fd, w->bb_sz + (w->unbound_duration ? 4096 : 0));
> init_bb(w, flags);
> - terminate_bb(w, flags);
> + w->obj[j].relocation_count = terminate_bb(w, flags);
>
> - if (flags & SEQNO) {
> + if (w->obj[j].relocation_count) {
> w->obj[j].relocs_ptr = to_user_pointer(&w->reloc);
> - if (flags & RT)
> - w->obj[j].relocation_count = 4;
> - else
> - w->obj[j].relocation_count = 1;
> for (i = 0; i < w->obj[j].relocation_count; i++)
> w->reloc[i].target_handle = 1;
> + if (w->unbound_duration)
> + w->reloc[0].target_handle = j;
> }
That flows much better.
Reviewed-by: Chris Wilson <chris at chris-wilson.co.uk>
-Chris
More information about the Intel-gfx
mailing list