[igt-dev] [PATCH i-g-t 2/6] lib/intel_batchbuffer: Introduce intel_bb
Chris Wilson
chris at chris-wilson.co.uk
Tue May 12 09:39:03 UTC 2020
Quoting Zbigniew Kempczyński (2020-05-12 09:23:58)
> Simple batchbuffer facility which gathers and outputs relocations.
>
> Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> lib/intel_batchbuffer.c | 202 ++++++++++++++++++++++++++++++++++++++++
> lib/intel_batchbuffer.h | 78 ++++++++++++++++
> 2 files changed, 280 insertions(+)
>
> diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
> index f1a45b47..c7f52f01 100644
> --- a/lib/intel_batchbuffer.c
> +++ b/lib/intel_batchbuffer.c
> @@ -41,6 +41,7 @@
> #include "rendercopy.h"
> #include "media_fill.h"
> #include "ioctl_wrappers.h"
> +#include "i915/gem_mman.h"
> #include "media_spin.h"
> #include "gpgpu_fill.h"
> #include "igt_aux.h"
> @@ -1171,3 +1172,204 @@ igt_media_spinfunc_t igt_get_media_spinfunc(int devid)
>
> return spin;
> }
> +
> +/* Intel batchbuffer v2 */
> +struct intel_bb *intel_bb_create(int i915, uint32_t size)
> +{
> + struct intel_bb *ibb = calloc(1, sizeof(*ibb));
> +
> + igt_assert(ibb);
> +
> + ibb->i915 = i915;
> + ibb->devid = intel_get_drm_devid(i915);
> + ibb->gen = intel_gen(ibb->devid);
> + ibb->handle = gem_create(i915, size);
> + ibb->size = size;
> + ibb->batch = gem_mmap__cpu_coherent(i915, ibb->handle, 0, size,
> + PROT_READ | PROT_WRITE);
cpu coherent? There should be no read backs, and emission should be
linear?
> + memset(ibb->batch, 0, size);
No need, it's zero by ABI.
> + ibb->ptr = ibb->batch;
> + ibb->objects = NULL;
> +
> + return ibb;
> +}
> +
> +void intel_bb_destroy(struct intel_bb *ibb)
> +{
> + uint32_t i;
> +
> + igt_assert(ibb);
> +
> + /* Free relocations */
> + for (i = 0; i < ibb->num_objects; i++) {
> + if ((void *) ibb->objects->relocs_ptr)
void *ptr = from_user_pointer(ibb->objects->relocs_ptr);
> + free((void *) ibb->objects->relocs_ptr);
> + }
> +
> + free(ibb->objects);
> +
> + munmap(ibb->batch, ibb->size);
> + gem_close(ibb->i915, ibb->handle);
> +
> + free(ibb);
> +}
> +
> +void intel_bb_set_debug(struct intel_bb *ibb, bool debug)
> +{
> + ibb->debug = debug;
> +}
> +
> +static void intel_bb_add_handle(struct intel_bb *ibb, uint32_t handle)
> +{
> + uint32_t i;
> +
> + igt_assert(ibb);
> +
> + /* Skip bb as object, it will be added before exec */
> + if (ibb->handle == handle)
> + return;
> +
> + for (i = 0; i < ibb->num_objects; i++)
> + if (ibb->objects[i].handle == handle)
> + return;
> +
> + i = ibb->num_objects;
> + ibb->objects = realloc(ibb->objects,
> + sizeof(*ibb->objects) * (i + 1));
> + igt_assert(ibb->objects);
> +
> + memset(&ibb->objects[i], 0, sizeof(*ibb->objects));
> + ibb->objects[i].handle = handle;
> +
> + ibb->num_objects++;
> +}
> +
> +static void intel_bb_emit_reloc(struct intel_bb *ibb,
> + uint32_t handle,
> + uint32_t read_domains,
> + uint32_t write_domain,
> + uint64_t delta,
> + uint64_t offset,
> + bool use_offset,
> + bool out)
> +
> +{
> + struct drm_i915_gem_relocation_entry *relocs;
> + uint32_t i;
> +
> + intel_bb_add_handle(ibb, handle);
For bonus points, you can use LUT.
> + relocs = ibb->relocs;
> + if (ibb->num_relocs == ibb->allocated_relocs) {
> + ibb->allocated_relocs += 4096 / sizeof(*relocs);
> + relocs = realloc(relocs, sizeof(*relocs) * ibb->allocated_relocs);
> + igt_assert(relocs);
> + ibb->relocs = relocs;
> + }
> +
> + i = ibb->num_relocs++;
> + memset(&relocs[i], 0, sizeof(*relocs));
> + relocs[i].target_handle = handle;
> + relocs[i].read_domains = read_domains;
> + relocs[i].write_domain = write_domain;
> + relocs[i].delta = delta;
> + if (use_offset)
> + relocs[i].offset = offset;
> + else
> + relocs[i].offset = intel_bb_offset(ibb);
> +
> + if (out) {
> + intel_bb_out(ibb, delta);
> + if (ibb->gen >= 8)
> + intel_bb_out(ibb, delta >> 32);
> + }
> +}
> +
> +void intel_bb_out_reloc(struct intel_bb *ibb,
> + uint32_t read_domains,
> + uint32_t write_domain,
> + uint64_t delta)
> +
> +{
> + igt_assert(ibb);
> +
> + intel_bb_emit_reloc(ibb, ibb->handle, read_domains, write_domain,
> + delta, intel_bb_offset(ibb),
> + false, true);
> +}
> +
> +void intel_bb_offset_reloc(struct intel_bb *ibb, uint32_t handle,
> + uint32_t offset,
> + uint32_t read_domains,
> + uint32_t write_domain)
> +{
> + igt_assert(ibb);
> +
> + intel_bb_emit_reloc(ibb, handle, read_domains, write_domain,
> + 0, offset, true, false);
> +}
Experience says this is inverted and we want
intel_bb_emit_reloc() {
address = intel_bb_add_reloc(...);
EMIT(lower_32_bits(address));
if (bb->reloc_64b)
EMIT(upper_32_bits(address));
}
> +
> +static void intel_bb_dump_execbuf(struct drm_i915_gem_execbuffer2 *execbuf)
> +{
> + struct drm_i915_gem_exec_object2 *objects;
> + struct drm_i915_gem_relocation_entry *relocs, *reloc;
> + int i, j;
> +
> + igt_info("execbuf batch len: %u, start offset: 0x%x, "
> + "DR1: 0x%x, DR4: 0x%x, "
> + "num clip: %u, clipptr: 0x%llx, "
> + "flags: 0x%llx, rsvd1: 0x%llx, rsvd2: 0x%llx\n",
> + execbuf->batch_len, execbuf->batch_start_offset,
> + execbuf->DR1, execbuf->DR4,
> + execbuf->num_cliprects, execbuf->cliprects_ptr,
> + execbuf->flags, execbuf->rsvd1, execbuf->rsvd2);
> +
> + igt_info("execbuf buffer_count: %d\n", execbuf->buffer_count);
> + for (i = 0; i < execbuf->buffer_count; i++) {
> + objects = &((struct drm_i915_gem_exec_object2 *) execbuf->buffers_ptr)[i];
> + relocs = (struct drm_i915_gem_relocation_entry *) objects->relocs_ptr;
> + igt_info(" [%d] <handle: %u, reloc_count: %d, reloc_ptr: %p, "
> + "align: %llx, offset: %llx, flags: %llx, rsvd1: %llx, rsvd2: %llx\n",
> + i, objects->handle, objects->relocation_count,
> + (void *) objects->relocs_ptr, objects->alignment, objects->offset,
> + objects->flags, objects->rsvd1, objects->rsvd2);
> + if (objects->relocation_count) {
> + igt_info("execbuf relocs:\n");
> + for (j = 0; j < objects->relocation_count; j++) {
> + reloc = &relocs[j];
> + igt_info(" [%d] <target handle: %u, delta: %x, offset: %llx, "
> + "presumed_offset: %llx, read_domains: %x, write_domain: %x\n",
> + j, reloc->target_handle, reloc->delta, reloc->offset,
> + reloc->presumed_offset, reloc->read_domains, reloc->write_domain);
> + }
> + }
> + }
> +}
> +
> +void intel_bb_exec(struct intel_bb *ibb, uint32_t end_offset)
> +{
> + struct drm_i915_gem_execbuffer2 execbuf;
> + uint32_t i;
> + int64_t timeout = NSEC_PER_SEC / 2;
> +
> + i = ibb->num_objects++;
> + ibb->objects = realloc(ibb->objects, sizeof(*ibb->objects) * (i + 1));
> + igt_assert(ibb->objects);
> +
> + memset(&ibb->objects[i], 0, sizeof(*ibb->objects));
> + ibb->objects[i].relocs_ptr = to_user_pointer(ibb->relocs);
> + ibb->objects[i].relocation_count = ibb->num_relocs;
> + ibb->objects[i].handle = ibb->handle;
> +
> + memset(&execbuf, 0, sizeof(execbuf));
> + execbuf.buffers_ptr = (uintptr_t) ibb->objects;
> + execbuf.buffer_count = ibb->num_objects;
> + execbuf.batch_len = end_offset;
> + execbuf.flags = I915_EXEC_DEFAULT;
You should be at least fulfilling the contract for NORELOC or else
something is very wrong with the framework.
> +
> + gem_execbuf(ibb->i915, &execbuf);
> + gem_wait(ibb->i915, ibb->handle, &timeout);
NO!!!!!
-Chris
More information about the igt-dev
mailing list