[Intel-gfx] [PATCH] libdrm/intel: execbuf2 support
Eric Anholt
eric at anholt.net
Sun Aug 9 09:14:44 CEST 2009
On Tue, 2009-07-14 at 13:51 -0700, Jesse Barnes wrote:
> This patch to libdrm adds support for the new execbuf2 ioctl. If
> detected, it will be used instead of the old ioctl. To make using the
> new code easier, this patch also adds a new tiled allocation function.
> drm_intel_bo_alloc_tiled hides the stride and size restrictions open
> coded in current tiling aware code, and so should make tiling easier to
> use.
>
> Signed-off-by: Jesse Barnes <jbarnes at virtuousgeek.org>
>
> diff --git a/libdrm/intel/intel_bufmgr.c b/libdrm/intel/intel_bufmgr.c
> index f170e7f..ec32993 100644
> --- a/libdrm/intel/intel_bufmgr.c
> +++ b/libdrm/intel/intel_bufmgr.c
> @@ -45,6 +45,17 @@
> */
>
> drm_intel_bo *
> +drm_intel_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
> + int x, int y, int cpp, uint32_t *tiling_mode,
> + unsigned long *pitch, unsigned long flags)
> +{
> + if (!bufmgr->bo_alloc_tiled)
> + return bufmgr->bo_alloc(bufmgr, name, x * y * cpp, 0);
> + return bufmgr->bo_alloc_tiled(bufmgr, name, x, y, cpp, tiling_mode, pitch,
> + flags);
> +}
> +
> +drm_intel_bo *
> drm_intel_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name,
> unsigned long size, unsigned int alignment)
> {
> @@ -174,6 +185,17 @@ drm_intel_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
> read_domains, write_domain);
> }
>
> +/* For fence registers, not GL fences */
> +int
> +drm_intel_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset,
> + drm_intel_bo *target_bo, uint32_t target_offset,
> + uint32_t read_domains, uint32_t write_domain)
> +{
> + return bo->bufmgr->bo_emit_reloc_fence(bo, offset,
> + target_bo, target_offset,
> + read_domains, write_domain);
> +}
> +
> int
> drm_intel_bo_pin(drm_intel_bo *bo, uint32_t alignment)
> {
> diff --git a/libdrm/intel/intel_bufmgr.h b/libdrm/intel/intel_bufmgr.h
> index 758558d..5459d13 100644
> --- a/libdrm/intel/intel_bufmgr.h
> +++ b/libdrm/intel/intel_bufmgr.h
> @@ -73,6 +73,14 @@ struct _drm_intel_bo {
> int handle;
> };
>
> +#define BO_ALLOC_FOR_RENDER (1<<0)
> +
> +drm_intel_bo *drm_intel_bo_alloc_tiled(drm_intel_bufmgr *bufmgr,
> + const char *name,
> + int x, int y, int cpp,
> + uint32_t *tiling_mode,
> + unsigned long *pitch,
> + unsigned long flags);
> drm_intel_bo *drm_intel_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name,
> unsigned long size, unsigned int alignment);
> drm_intel_bo *drm_intel_bo_alloc_for_render(drm_intel_bufmgr *bufmgr,
> @@ -100,6 +108,10 @@ int drm_intel_bufmgr_check_aperture_space(drm_intel_bo **bo_array, int count);
> int drm_intel_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
> drm_intel_bo *target_bo, uint32_t target_offset,
> uint32_t read_domains, uint32_t write_domain);
> +int drm_intel_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset,
> + drm_intel_bo *target_bo,
> + uint32_t target_offset,
> + uint32_t read_domains, uint32_t write_domain);
> int drm_intel_bo_pin(drm_intel_bo *bo, uint32_t alignment);
> int drm_intel_bo_unpin(drm_intel_bo *bo);
> int drm_intel_bo_set_tiling(drm_intel_bo *bo, uint32_t *tiling_mode,
> diff --git a/libdrm/intel/intel_bufmgr_gem.c b/libdrm/intel/intel_bufmgr_gem.c
> index 737ceae..65c84bc 100644
> --- a/libdrm/intel/intel_bufmgr_gem.c
> +++ b/libdrm/intel/intel_bufmgr_gem.c
> @@ -95,6 +95,7 @@ typedef struct _drm_intel_bufmgr_gem {
> pthread_mutex_t lock;
>
> struct drm_i915_gem_exec_object *exec_objects;
> + struct drm_i915_gem_exec_object2 *exec2_objects;
> drm_intel_bo **exec_bos;
> int exec_size;
> int exec_count;
> @@ -107,6 +108,13 @@ typedef struct _drm_intel_bufmgr_gem {
> int pci_device;
> } drm_intel_bufmgr_gem;
>
> +#define DRM_INTEL_RELOC_FENCE (1<<0)
> +
> +typedef struct _drm_intel_reloc_target_info {
> + drm_intel_bo *bo;
> + int flags;
> +} drm_intel_reloc_target;
> +
> struct _drm_intel_bo_gem {
> drm_intel_bo bo;
>
> @@ -143,8 +151,8 @@ struct _drm_intel_bo_gem {
>
> /** Array passed to the DRM containing relocation information. */
> struct drm_i915_gem_relocation_entry *relocs;
> - /** Array of bos corresponding to relocs[i].target_handle */
> - drm_intel_bo **reloc_target_bo;
> + /** Array of info structs corresponding to relocs[i].target_handle etc */
> + drm_intel_reloc_target *reloc_target_info;
> /** Number of entries in relocs */
> int reloc_count;
> /** Mapped address for the buffer, saved across map/unmap cycles */
> @@ -206,6 +214,71 @@ drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t *tiling_mode,
> static void
> drm_intel_gem_bo_unreference(drm_intel_bo *bo);
>
> +#define ROUND_UP_TO(x, y) (((x) + (y) - 1) / (y) * (y))
> +#define ROUND_UP_TO_MB(x) ROUND_UP_TO((x), 1024*1024)
> +
> +/* Round a given size up to the nearest tileable size for the object, taking
> + * fence register mapping into account */
> +static unsigned long
> +drm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size,
> + uint32_t *tiling_mode)
> +{
> + unsigned long min_size, max_size;
> + unsigned long i;
> +
> + if (*tiling_mode == I915_TILING_NONE)
> + return size;
> +
> + /* 965+ just need multiples of page size for tiling */
> + if (IS_I965G(bufmgr_gem))
> + return ROUND_UP_TO(size, 4096);
> +
> + /* Older chips need powers of two, of at least 512k or 1M */
> + if (IS_I9XX(bufmgr_gem)) {
> + min_size = 1024*1024;
> + max_size = 128*1024*1024;
> + } else {
> + min_size = 512*1024;
> + max_size = 64*1024*1024;
> + }
> +
> + if (size > max_size) {
> + *tiling_mode = I915_TILING_NONE;
> + return size;
> + }
> +
> + for (i = min_size; i < size; i <<= 1)
> + ;
> +
> + return i;
> +}
> +
> +/*
> + * Round a given pitch up to the minimum required for X tiling on a
> + * given chip. We use 512 as the minimum to allow for a later tiling
> + * change.
> + */
> +static unsigned long
> +drm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem,
> + unsigned long pitch, uint32_t tiling_mode)
> +{
> + unsigned long tile_width = 512;
> + unsigned long i;
> +
> + if (tiling_mode == I915_TILING_NONE)
> + return ROUND_UP_TO(pitch, tile_width);
> +
> + /* 965 is flexible */
> + if (IS_I965G(bufmgr_gem))
> + return ROUND_UP_TO(pitch, tile_width);
> +
> + /* Pre-965 needs power of two tile width */
> + for (i = tile_width; i < pitch; i <<= 1)
> + ;
> +
> + return i;
> +}
> +
> static struct drm_intel_gem_bo_bucket *
> drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem,
> unsigned long size)
> @@ -236,7 +309,7 @@ static void drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem)
> }
>
> for (j = 0; j < bo_gem->reloc_count; j++) {
> - drm_intel_bo *target_bo = bo_gem->reloc_target_bo[j];
> + drm_intel_bo *target_bo = bo_gem->reloc_target_info[j].bo;
> drm_intel_bo_gem *target_gem = (drm_intel_bo_gem *)target_bo;
>
> DBG("%2d: %d (%s)@0x%08llx -> %d (%s)@0x%08lx + 0x%08x\n",
> @@ -296,6 +369,49 @@ drm_intel_add_validate_buffer(drm_intel_bo *bo)
> bufmgr_gem->exec_count++;
> }
>
> +static void
> +drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
> +{
> + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
> + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
> + int index;
> +
> + if (bo_gem->validate_index != -1)
> + return;
> +
> + /* Extend the array of validation entries as necessary. */
> + if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) {
> + int new_size = bufmgr_gem->exec_size * 2;
> +
> + if (new_size == 0)
> + new_size = 5;
> +
> + bufmgr_gem->exec2_objects =
> + realloc(bufmgr_gem->exec2_objects,
> + sizeof(*bufmgr_gem->exec2_objects) * new_size);
> + bufmgr_gem->exec_bos =
> + realloc(bufmgr_gem->exec_bos,
> + sizeof(*bufmgr_gem->exec_bos) * new_size);
> + bufmgr_gem->exec_size = new_size;
> + }
> +
> + index = bufmgr_gem->exec_count;
> + bo_gem->validate_index = index;
> + /* Fill in array entry */
> + bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle;
> + bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count;
> + bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs;
> + bufmgr_gem->exec2_objects[index].alignment = 0;
> + bufmgr_gem->exec2_objects[index].offset = 0;
> + bufmgr_gem->exec_bos[index] = bo;
> + bufmgr_gem->exec2_objects[index].flags = 0;
> + bufmgr_gem->exec2_objects[index].rsvd1 = 0;
> + bufmgr_gem->exec2_objects[index].rsvd2 = 0;
> + if (need_fence)
> + bufmgr_gem->exec2_objects[index].flags |= EXEC_OBJECT_NEEDS_FENCE;
> + drm_intel_gem_bo_reference_locked(bo);
> + bufmgr_gem->exec_count++;
> +}
>
> #define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \
> sizeof(uint32_t))
> @@ -308,25 +424,27 @@ drm_intel_setup_reloc_list(drm_intel_bo *bo)
>
> bo_gem->relocs = malloc(bufmgr_gem->max_relocs *
> sizeof(struct drm_i915_gem_relocation_entry));
> - bo_gem->reloc_target_bo = malloc(bufmgr_gem->max_relocs *
> - sizeof(drm_intel_bo *));
> + bo_gem->reloc_target_info = malloc(bufmgr_gem->max_relocs *
> + sizeof(drm_intel_reloc_target *));
>
> return 0;
> }
>
> static drm_intel_bo *
> drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, const char *name,
> - unsigned long size, unsigned int alignment,
> - int for_render)
> + unsigned long size, unsigned long flags)
> {
> drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
> drm_intel_bo_gem *bo_gem;
> unsigned int page_size = getpagesize();
> - int ret;
> + int ret, for_render = 0;
> struct drm_intel_gem_bo_bucket *bucket;
> int alloc_from_cache = 0;
> unsigned long bo_size;
>
> + if (flags & BO_ALLOC_FOR_RENDER)
> + for_render = 1;
> +
> /* Round the allocated size up to a power of two number of pages. */
> bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size);
>
> @@ -405,9 +523,9 @@ drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, const char *name,
> bo_gem->reloc_tree_size = bo_gem->bo.size;
> bo_gem->reloc_tree_fences = 0;
> bo_gem->used_as_reloc_target = 0;
> + bo_gem->reusable = 1;
> bo_gem->tiling_mode = I915_TILING_NONE;
> bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
> - bo_gem->reusable = 1;
>
> DBG("bo_create: buf %d (%s) %ldb\n",
> bo_gem->gem_handle, bo_gem->name, size);
> @@ -419,14 +537,45 @@ static drm_intel_bo *
> drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr, const char *name,
> unsigned long size, unsigned int alignment)
> {
> - return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, alignment, 1);
> + return drm_intel_gem_bo_alloc_internal(bufmgr, name, size,
> + BO_ALLOC_FOR_RENDER);
> }
>
> static drm_intel_bo *
> drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name,
> unsigned long size, unsigned int alignment)
> {
> - return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, alignment, 0);
> + return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0);
> +}
> +
> +static drm_intel_bo *
> +drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
> + int x, int y, int cpp, uint32_t *tiling_mode,
> + unsigned long *pitch, unsigned long flags)
> +{
> + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
> + drm_intel_bo *bo;
> + unsigned long size, stride;
> + int ret;
> +
> + stride = x * cpp;
> + stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, *tiling_mode);
> + size = stride * y;
Needs the y alignment treatment here like my commits on Friday.
> + size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode);
> +
> + bo = drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags);
> + if (!bo)
> + return NULL;
> +
> + ret = drm_intel_gem_bo_set_tiling(bo, tiling_mode, stride);
> + if (ret != 0) {
> + drm_intel_gem_bo_unreference(bo);
> + return NULL;
> + }
> +
> + *pitch = stride;
> +
> + return bo;
> }
>
> /**
> @@ -478,10 +627,6 @@ drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, const char *name,
> }
> bo_gem->tiling_mode = get_tiling.tiling_mode;
> bo_gem->swizzle_mode = get_tiling.swizzle_mode;
> - if (bo_gem->tiling_mode == I915_TILING_NONE)
> - bo_gem->reloc_tree_fences = 0;
> - else
> - bo_gem->reloc_tree_fences = 1;
>
> DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name);
>
> @@ -574,8 +719,8 @@ drm_intel_gem_bo_unreference_locked(drm_intel_bo *bo)
>
> /* Unreference all the target buffers */
> for (i = 0; i < bo_gem->reloc_count; i++)
> - drm_intel_gem_bo_unreference_locked(bo_gem->reloc_target_bo[i]);
> - free(bo_gem->reloc_target_bo);
> + drm_intel_gem_bo_unreference_locked(bo_gem->reloc_target_info[i].bo);
> + free(bo_gem->reloc_target_info);
> free(bo_gem->relocs);
> }
>
> @@ -600,7 +745,7 @@ drm_intel_gem_bo_unreference_locked(drm_intel_bo *bo)
> bo_gem->name = NULL;
> bo_gem->validate_index = -1;
> bo_gem->relocs = NULL;
> - bo_gem->reloc_target_bo = NULL;
> + bo_gem->reloc_target_info = NULL;
> bo_gem->reloc_count = 0;
>
> DRMLISTADDTAIL(&bo_gem->head, &bucket->head);
> @@ -919,6 +1064,7 @@ drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr)
> int i;
>
> free(bufmgr_gem->exec_objects);
> + free(bufmgr_gem->exec2_objects);
> free(bufmgr_gem->exec_bos);
>
> pthread_mutex_destroy(&bufmgr_gem->lock);
> @@ -950,9 +1096,9 @@ drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr)
> * last known offset in target_bo.
> */
> static int
> -drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
> - drm_intel_bo *target_bo, uint32_t target_offset,
> - uint32_t read_domains, uint32_t write_domain)
> +do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
> + drm_intel_bo *target_bo, uint32_t target_offset,
> + uint32_t read_domains, uint32_t write_domain, int need_fence)
> {
> drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
> drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
> @@ -976,7 +1122,8 @@ drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
> */
> assert(!bo_gem->used_as_reloc_target);
> bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size;
> - bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences;
> + if (need_fence)
> + bo_gem->reloc_tree_fences++;
With this and other changes in the patch, it looks like
reloc_tree_fences stops actually counting the number of fences required
in the reloc tree rooted at bo_gem.
> /* Flag the target to disallow further relocations in it. */
> target_bo_gem->used_as_reloc_target = 1;
> @@ -989,7 +1136,12 @@ drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
> bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain;
> bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset;
>
> - bo_gem->reloc_target_bo[bo_gem->reloc_count] = target_bo;
> + bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo;
> + if (need_fence)
> + bo_gem->reloc_target_info[bo_gem->reloc_count].flags =
> + DRM_INTEL_RELOC_FENCE;
> + else
> + bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0;
> drm_intel_gem_bo_reference_locked(target_bo);
>
> bo_gem->reloc_count++;
> @@ -999,6 +1151,24 @@ drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
> return 0;
> }
>
> +static int
> +drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
> + drm_intel_bo *target_bo, uint32_t target_offset,
> + uint32_t read_domains, uint32_t write_domain)
> +{
> + return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
> + read_domains, write_domain, 0);
> +}
So, new drm plus old code assumes that the old code doesn't want fences
on its objects? That seems broken -- then reloc_tree_fences isn't
counted for old code. A simple boolean flag the user sets saying "I
understand the requirement to use emit_reloc_fence" to enable new
behavior seems like it would work fine, though.
> +
> +static int
> +drm_intel_gem_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset,
> + drm_intel_bo *target_bo,
> + uint32_t target_offset,
> + uint32_t read_domains, uint32_t write_domain)
> +{
> + return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
> + read_domains, write_domain, 1);
> +}
> /**
> * Walk the tree of relocations rooted at BO and accumulate the list of
> * validations to be performed and update the relocation buffers with
> @@ -1014,7 +1184,7 @@ drm_intel_gem_bo_process_reloc(drm_intel_bo *bo)
> return;
>
> for (i = 0; i < bo_gem->reloc_count; i++) {
> - drm_intel_bo *target_bo = bo_gem->reloc_target_bo[i];
> + drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
>
> /* Continue walking the tree depth-first. */
> drm_intel_gem_bo_process_reloc(target_bo);
> @@ -1025,6 +1195,29 @@ drm_intel_gem_bo_process_reloc(drm_intel_bo *bo)
> }
>
> static void
> +drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
> +{
> + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
> + int i;
> +
> + if (bo_gem->relocs == NULL)
> + return;
> +
> + for (i = 0; i < bo_gem->reloc_count; i++) {
> + drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
> + int need_fence;
> +
> + /* Continue walking the tree depth-first. */
> + drm_intel_gem_bo_process_reloc(target_bo);
> +
> + need_fence = bo_gem->reloc_target_info[i].flags & DRM_INTEL_RELOC_FENCE;
> +
> + /* Add the target to the validate list */
> + drm_intel_add_validate_buffer2(target_bo, need_fence);
> + }
> +}
> +
> +static void
> drm_intel_update_buffer_offsets (drm_intel_bufmgr_gem *bufmgr_gem)
> {
> int i;
> @@ -1043,6 +1236,25 @@ drm_intel_update_buffer_offsets (drm_intel_bufmgr_gem *bufmgr_gem)
> }
> }
>
> +static void
> +drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem)
> +{
> + int i;
> +
> + for (i = 0; i < bufmgr_gem->exec_count; i++) {
> + drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
> + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
> +
> + /* Update the buffer offset */
> + if (bufmgr_gem->exec2_objects[i].offset != bo->offset) {
> + DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n",
> + bo_gem->gem_handle, bo_gem->name, bo->offset,
> + (unsigned long long)bufmgr_gem->exec2_objects[i].offset);
> + bo->offset = bufmgr_gem->exec2_objects[i].offset;
> + }
> + }
> +}
> +
> static int
> drm_intel_gem_bo_exec(drm_intel_bo *bo, int used,
> drm_clip_rect_t *cliprects, int num_cliprects,
> @@ -1106,6 +1318,71 @@ drm_intel_gem_bo_exec(drm_intel_bo *bo, int used,
> }
>
> static int
> +drm_intel_gem_bo_exec2(drm_intel_bo *bo, int used,
> + drm_clip_rect_t *cliprects, int num_cliprects,
> + int DR4)
> +{
> + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
> + struct drm_i915_gem_execbuffer2 execbuf;
> + int ret, i;
> +
> + pthread_mutex_lock(&bufmgr_gem->lock);
> + /* Update indices and set up the validate list. */
> + drm_intel_gem_bo_process_reloc2(bo);
> +
> + /* Add the batch buffer to the validation list. There are no relocations
> + * pointing to it.
> + */
> + drm_intel_add_validate_buffer2(bo, 0);
> +
> + execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects;
> + execbuf.buffer_count = bufmgr_gem->exec_count;
> + execbuf.batch_start_offset = 0;
> + execbuf.batch_len = used;
> + execbuf.cliprects_ptr = (uintptr_t)cliprects;
> + execbuf.num_cliprects = num_cliprects;
> + execbuf.DR1 = 0;
> + execbuf.DR4 = DR4;
> + execbuf.flags = 0;
> + execbuf.rsvd1 = 0;
> + execbuf.rsvd2 = 0;
> +
> + do {
> + ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf);
> + } while (ret != 0 && errno == EAGAIN);
> +
> + if (ret != 0 && errno == ENOMEM) {
> + fprintf(stderr, "Execbuffer fails to pin. Estimate: %u. Actual: %u. Available: %u\n",
> + drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos,
> + bufmgr_gem->exec_count),
> + drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos,
> + bufmgr_gem->exec_count),
> + (unsigned int) bufmgr_gem->gtt_size);
> + }
> + drm_intel_update_buffer_offsets2 (bufmgr_gem);
> +
> + if (bufmgr_gem->bufmgr.debug)
> + drm_intel_gem_dump_validation_list(bufmgr_gem);
> +
> + for (i = 0; i < bufmgr_gem->exec_count; i++) {
> + drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
> + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
> +
> + /* Need to call swrast on next bo_map */
> + bo_gem->swrast = 0;
> +
> + /* Disconnect the buffer from the validate list */
> + bo_gem->validate_index = -1;
> + drm_intel_gem_bo_unreference_locked(bo);
> + bufmgr_gem->exec_bos[i] = NULL;
> + }
> + bufmgr_gem->exec_count = 0;
> + pthread_mutex_unlock(&bufmgr_gem->lock);
> +
> + return 0;
> +}
> +
> +static int
> drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment)
> {
> drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
> @@ -1158,10 +1435,6 @@ drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t *tiling_mode,
> if (bo_gem->global_name == 0 && *tiling_mode == bo_gem->tiling_mode)
> return 0;
>
> - /* If we're going from non-tiling to tiling, bump fence count */
> - if (bo_gem->tiling_mode == I915_TILING_NONE)
> - bo_gem->reloc_tree_fences++;
> -
> memset(&set_tiling, 0, sizeof(set_tiling));
> set_tiling.handle = bo_gem->gem_handle;
> set_tiling.tiling_mode = *tiling_mode;
> @@ -1175,10 +1448,6 @@ drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t *tiling_mode,
> bo_gem->tiling_mode = set_tiling.tiling_mode;
> bo_gem->swizzle_mode = set_tiling.swizzle_mode;
>
> - /* If we're going from tiling to non-tiling, drop fence count */
> - if (bo_gem->tiling_mode == I915_TILING_NONE)
> - bo_gem->reloc_tree_fences--;
> -
> *tiling_mode = bo_gem->tiling_mode;
> return 0;
> }
> @@ -1253,7 +1522,7 @@ drm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo)
> bo_gem->included_in_check_aperture = 1;
>
> for (i = 0; i < bo_gem->reloc_count; i++)
> - total += drm_intel_gem_bo_get_aperture_space(bo_gem->reloc_target_bo[i]);
> + total += drm_intel_gem_bo_get_aperture_space(bo_gem->reloc_target_info[i].bo);
>
> return total;
> }
> @@ -1299,7 +1568,7 @@ drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo)
> bo_gem->included_in_check_aperture = 0;
>
> for (i = 0; i < bo_gem->reloc_count; i++)
> - drm_intel_gem_bo_clear_aperture_space_flag(bo_gem->reloc_target_bo[i]);
> + drm_intel_gem_bo_clear_aperture_space_flag(bo_gem->reloc_target_info[i].bo);
> }
>
> /**
> @@ -1426,6 +1695,7 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
> drm_i915_getparam_t gp;
> int ret, i;
> unsigned long size;
> + int exec2 = 0;
>
> bufmgr_gem = calloc(1, sizeof(*bufmgr_gem));
> bufmgr_gem->fd = fd;
> @@ -1467,6 +1737,11 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
> }
> }
>
> + gp.param = I915_PARAM_HAS_EXECBUF2;
> + ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
> + if (!ret)
> + exec2 = 1;
> +
> /* Let's go with one relocation per every 2 dwords (but round down a bit
> * since a power of two will mean an extra page allocation for the reloc
> * buffer).
> @@ -1475,6 +1750,7 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
> */
> bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2;
>
> + bufmgr_gem->bufmgr.bo_alloc_tiled = drm_intel_gem_bo_alloc_tiled;
> bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc;
> bufmgr_gem->bufmgr.bo_alloc_for_render = drm_intel_gem_bo_alloc_for_render;
> bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference;
> @@ -1485,12 +1761,17 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
> bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata;
> bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering;
> bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc;
> + bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence;
> bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin;
> bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin;
> bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling;
> bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling;
> bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink;
> bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec;
> + /* Use the new one if available */
> + if (exec2)
> + bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec2;
> +
> bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_destroy;
> bufmgr_gem->bufmgr.debug = 0;
> bufmgr_gem->bufmgr.check_aperture_space = drm_intel_gem_check_aperture_space;
> diff --git a/libdrm/intel/intel_bufmgr_priv.h b/libdrm/intel/intel_bufmgr_priv.h
> index 0098076..d832882 100644
> --- a/libdrm/intel/intel_bufmgr_priv.h
> +++ b/libdrm/intel/intel_bufmgr_priv.h
> @@ -41,6 +41,25 @@
> */
> struct _drm_intel_bufmgr {
> /**
> + * Allocate a tiled buffer object.
> + *
> + * Alignment for tiled objects is set automatically; the 'flags'
> + * argument provides a hint about how the object will be used initially.
> + *
> + * Valid tiling formats are:
> + * I915_TILING_NONE
> + * I915_TILING_X
> + * I915_TILING_Y
> + *
> + * Note the tiling format may be rejected; callers should check the
> + * 'tiling_mode' field on return, as well as the pitch value, which
> + * may have been rounded up to accommodate for tiling restrictions.
> + */
> + drm_intel_bo *(*bo_alloc_tiled)(drm_intel_bufmgr *bufmgr, const char *name,
> + int x, int y, int cpp, uint32_t *tiling_mode,
> + unsigned long *pitch, unsigned long flags);
> +
> + /**
> * Allocate a buffer object.
> *
> * Buffer objects are not necessarily initially mapped into CPU virtual
> @@ -133,6 +152,9 @@ struct _drm_intel_bufmgr {
> int (*bo_emit_reloc)(drm_intel_bo *bo, uint32_t offset,
> drm_intel_bo *target_bo, uint32_t target_offset,
> uint32_t read_domains, uint32_t write_domain);
> + int (*bo_emit_reloc_fence)(drm_intel_bo *bo, uint32_t offset,
> + drm_intel_bo *target_bo, uint32_t target_offset,
> + uint32_t read_domains, uint32_t write_domain);
>
> /** Executes the command buffer pointed to by bo. */
> int (*bo_exec)(drm_intel_bo *bo, int used,
> diff --git a/shared-core/i915_drm.h b/shared-core/i915_drm.h
> index 2539966..3ee768c 100644
> --- a/shared-core/i915_drm.h
> +++ b/shared-core/i915_drm.h
> @@ -206,6 +206,7 @@ typedef struct drm_i915_sarea {
> #define DRM_I915_GEM_GET_APERTURE 0x23
> #define DRM_I915_GEM_MMAP_GTT 0x24
> #define DRM_I915_GET_PIPE_FROM_CRTC_ID 0x25
> +#define DRM_I915_GEM_EXECBUFFER2 0x26
>
> #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
> #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
> @@ -227,6 +228,7 @@ typedef struct drm_i915_sarea {
> #define DRM_IOCTL_I915_EXECBUFFER DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_EXECBUFFER, struct drm_i915_execbuffer)
> #define DRM_IOCTL_I915_GEM_INIT DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init)
> #define DRM_IOCTL_I915_GEM_EXECBUFFER DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer)
> +#define DRM_IOCTL_I915_GEM_EXECBUFFER2 DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
> #define DRM_IOCTL_I915_GEM_PIN DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin)
> #define DRM_IOCTL_I915_GEM_UNPIN DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin)
> #define DRM_IOCTL_I915_GEM_BUSY DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy)
> @@ -299,6 +301,7 @@ typedef struct drm_i915_irq_wait {
> #define I915_PARAM_CHIPSET_ID 4
> #define I915_PARAM_HAS_GEM 5
> #define I915_PARAM_NUM_FENCES_AVAIL 6
> +#define I915_PARAM_HAS_EXECBUF2 7
>
> typedef struct drm_i915_getparam {
> int param;
> @@ -622,6 +625,57 @@ struct drm_i915_gem_execbuffer {
> uint64_t cliprects_ptr;
> };
>
> +struct drm_i915_gem_exec_object2 {
> + /**
> + * User's handle for a buffer to be bound into the GTT for this
> + * operation.
> + */
> + uint32_t handle;
> +
> + /** Number of relocations to be performed on this buffer */
> + uint32_t relocation_count;
> + /**
> + * Pointer to array of struct drm_i915_gem_relocation_entry containing
> + * the relocations to be performed in this buffer.
> + */
> + uint64_t relocs_ptr;
> +
> + /** Required alignment in graphics aperture */
> + uint64_t alignment;
> +
> + /**
> + * Returned value of the updated offset of the object, for future
> + * presumed_offset writes.
> + */
> + uint64_t offset;
> +
> +#define EXEC_OBJECT_NEEDS_FENCE (1<<0)
> + uint64_t flags;
> + uint64_t rsvd1;
> + uint64_t rsvd2;
> +};
> +
> +struct drm_i915_gem_execbuffer2 {
> + /**
> + * List of gem_exec_object2 structs
> + */
> + uint64_t buffers_ptr;
> + uint32_t buffer_count;
> +
> + /** Offset in the batchbuffer to start execution from. */
> + uint32_t batch_start_offset;
> + /** Bytes used in batchbuffer from batch_start_offset */
> + uint32_t batch_len;
> + uint32_t DR1;
> + uint32_t DR4;
> + uint32_t num_cliprects;
> + /** This is a struct drm_clip_rect *cliprects */
> + uint64_t cliprects_ptr;
> + uint64_t flags; /* currently unused */
> + uint64_t rsvd1;
> + uint64_t rsvd2;
> +};
> +
> struct drm_i915_gem_pin {
> /** Handle of the buffer to be pinned. */
> uint32_t handle;
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Eric Anholt
eric at anholt.net eric.anholt at intel.com
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 197 bytes
Desc: This is a digitally signed message part
URL: <http://lists.freedesktop.org/archives/intel-gfx/attachments/20090809/40ebbb65/attachment.sig>
More information about the Intel-gfx
mailing list