[Intel-gfx] [PATCH] libdrm/intel: execbuf2 support

Jesse Barnes jbarnes at virtuousgeek.org
Tue Jul 14 22:51:53 CEST 2009


This patch to libdrm adds support for the new execbuf2 ioctl.  If
detected, it will be used instead of the old ioctl.  To make using the
new code easier, this patch also adds a new tiled allocation function.
drm_intel_bo_alloc_tiled hides the stride and size restrictions open
coded in current tiling aware code, and so should make tiling easier to
use.

Signed-off-by: Jesse Barnes <jbarnes at virtuousgeek.org>

diff --git a/libdrm/intel/intel_bufmgr.c b/libdrm/intel/intel_bufmgr.c
index f170e7f..ec32993 100644
--- a/libdrm/intel/intel_bufmgr.c
+++ b/libdrm/intel/intel_bufmgr.c
@@ -45,6 +45,17 @@
  */
 
 drm_intel_bo *
+drm_intel_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
+			 int x, int y, int cpp, uint32_t *tiling_mode,
+			 unsigned long *pitch, unsigned long flags)
+{
+   if (!bufmgr->bo_alloc_tiled)
+      return bufmgr->bo_alloc(bufmgr, name, x * y * cpp, 0);
+   return bufmgr->bo_alloc_tiled(bufmgr, name, x, y, cpp, tiling_mode, pitch,
+				 flags);
+}
+
+drm_intel_bo *
 drm_intel_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name,
 		   unsigned long size, unsigned int alignment)
 {
@@ -174,6 +185,17 @@ drm_intel_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
 					 read_domains, write_domain);
 }
 
+/* For fence registers, not GL fences */
+int
+drm_intel_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset,
+			      drm_intel_bo *target_bo, uint32_t target_offset,
+			      uint32_t read_domains, uint32_t write_domain)
+{
+	return bo->bufmgr->bo_emit_reloc_fence(bo, offset,
+					       target_bo, target_offset,
+					       read_domains, write_domain);
+}
+
 int
 drm_intel_bo_pin(drm_intel_bo *bo, uint32_t alignment)
 {
diff --git a/libdrm/intel/intel_bufmgr.h b/libdrm/intel/intel_bufmgr.h
index 758558d..5459d13 100644
--- a/libdrm/intel/intel_bufmgr.h
+++ b/libdrm/intel/intel_bufmgr.h
@@ -73,6 +73,14 @@ struct _drm_intel_bo {
     int handle;
 };
 
+#define BO_ALLOC_FOR_RENDER (1<<0)
+
+drm_intel_bo *drm_intel_bo_alloc_tiled(drm_intel_bufmgr *bufmgr,
+				       const char *name,
+				       int x, int y, int cpp,
+				       uint32_t *tiling_mode,
+				       unsigned long *pitch,
+				       unsigned long flags);
 drm_intel_bo *drm_intel_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name,
 				 unsigned long size, unsigned int alignment);
 drm_intel_bo *drm_intel_bo_alloc_for_render(drm_intel_bufmgr *bufmgr,
@@ -100,6 +108,10 @@ int drm_intel_bufmgr_check_aperture_space(drm_intel_bo **bo_array, int count);
 int drm_intel_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
 			    drm_intel_bo *target_bo, uint32_t target_offset,
 			    uint32_t read_domains, uint32_t write_domain);
+int drm_intel_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset,
+				  drm_intel_bo *target_bo,
+				  uint32_t target_offset,
+				  uint32_t read_domains, uint32_t write_domain);
 int drm_intel_bo_pin(drm_intel_bo *bo, uint32_t alignment);
 int drm_intel_bo_unpin(drm_intel_bo *bo);
 int drm_intel_bo_set_tiling(drm_intel_bo *bo, uint32_t *tiling_mode,
diff --git a/libdrm/intel/intel_bufmgr_gem.c b/libdrm/intel/intel_bufmgr_gem.c
index 737ceae..65c84bc 100644
--- a/libdrm/intel/intel_bufmgr_gem.c
+++ b/libdrm/intel/intel_bufmgr_gem.c
@@ -95,6 +95,7 @@ typedef struct _drm_intel_bufmgr_gem {
     pthread_mutex_t lock;
 
     struct drm_i915_gem_exec_object *exec_objects;
+    struct drm_i915_gem_exec_object2 *exec2_objects;
     drm_intel_bo **exec_bos;
     int exec_size;
     int exec_count;
@@ -107,6 +108,13 @@ typedef struct _drm_intel_bufmgr_gem {
     int pci_device;
 } drm_intel_bufmgr_gem;
 
+#define DRM_INTEL_RELOC_FENCE (1<<0)
+
+typedef struct _drm_intel_reloc_target_info {
+	drm_intel_bo *bo;
+	int flags;
+} drm_intel_reloc_target;
+
 struct _drm_intel_bo_gem {
     drm_intel_bo bo;
 
@@ -143,8 +151,8 @@ struct _drm_intel_bo_gem {
 
     /** Array passed to the DRM containing relocation information. */
     struct drm_i915_gem_relocation_entry *relocs;
-    /** Array of bos corresponding to relocs[i].target_handle */
-    drm_intel_bo **reloc_target_bo;
+    /** Array of info structs corresponding to relocs[i].target_handle etc */
+    drm_intel_reloc_target *reloc_target_info;
     /** Number of entries in relocs */
     int reloc_count;
     /** Mapped address for the buffer, saved across map/unmap cycles */
@@ -206,6 +214,71 @@ drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t *tiling_mode,
 static void
 drm_intel_gem_bo_unreference(drm_intel_bo *bo);
 
+#define ROUND_UP_TO(x, y)			(((x) + (y) - 1) / (y) * (y))
+#define ROUND_UP_TO_MB(x)			ROUND_UP_TO((x), 1024*1024)
+
+/* Round a given size up to the nearest tileable size for the object, taking
+ * fence register mapping into account */
+static unsigned long
+drm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size,
+			   uint32_t *tiling_mode)
+{
+    unsigned long min_size, max_size;
+    unsigned long i;
+
+    if (*tiling_mode == I915_TILING_NONE)
+	return size;
+
+    /* 965+ just need multiples of page size for tiling */
+    if (IS_I965G(bufmgr_gem))
+	return ROUND_UP_TO(size, 4096);
+
+    /* Older chips need powers of two, of at least 512k or 1M */
+    if (IS_I9XX(bufmgr_gem)) {
+	min_size = 1024*1024;
+	max_size = 128*1024*1024;
+    } else {
+	min_size = 512*1024;
+	max_size = 64*1024*1024;
+    }
+
+    if (size > max_size) {
+	*tiling_mode = I915_TILING_NONE;
+	return size;
+    }
+
+    for (i = min_size; i < size; i <<= 1)
+	;
+
+    return i;
+}
+
+/*
+ * Round a given pitch up to the minimum required for X tiling on a
+ * given chip.  We use 512 as the minimum to allow for a later tiling
+ * change.
+ */
+static unsigned long
+drm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem,
+			    unsigned long pitch, uint32_t tiling_mode)
+{
+    unsigned long tile_width = 512;
+    unsigned long i;
+
+    if (tiling_mode == I915_TILING_NONE)
+	return ROUND_UP_TO(pitch, tile_width);
+
+    /* 965 is flexible */
+    if (IS_I965G(bufmgr_gem))
+	return ROUND_UP_TO(pitch, tile_width);
+
+    /* Pre-965 needs power of two tile width */
+    for (i = tile_width; i < pitch; i <<= 1)
+	;
+
+    return i;
+}
+
 static struct drm_intel_gem_bo_bucket *
 drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem,
 				 unsigned long size)
@@ -236,7 +309,7 @@ static void drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem)
 	}
 
 	for (j = 0; j < bo_gem->reloc_count; j++) {
-	    drm_intel_bo *target_bo = bo_gem->reloc_target_bo[j];
+	    drm_intel_bo *target_bo = bo_gem->reloc_target_info[j].bo;
 	    drm_intel_bo_gem *target_gem = (drm_intel_bo_gem *)target_bo;
 
 	    DBG("%2d: %d (%s)@0x%08llx -> %d (%s)@0x%08lx + 0x%08x\n",
@@ -296,6 +369,49 @@ drm_intel_add_validate_buffer(drm_intel_bo *bo)
     bufmgr_gem->exec_count++;
 }
 
+static void
+drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
+{
+    drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
+    drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
+    int index;
+
+    if (bo_gem->validate_index != -1)
+	return;
+
+    /* Extend the array of validation entries as necessary. */
+    if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) {
+	int new_size = bufmgr_gem->exec_size * 2;
+
+	if (new_size == 0)
+	    new_size = 5;
+
+	bufmgr_gem->exec2_objects =
+	    realloc(bufmgr_gem->exec2_objects,
+		    sizeof(*bufmgr_gem->exec2_objects) * new_size);
+	bufmgr_gem->exec_bos =
+	    realloc(bufmgr_gem->exec_bos,
+		    sizeof(*bufmgr_gem->exec_bos) * new_size);
+	bufmgr_gem->exec_size = new_size;
+    }
+
+    index = bufmgr_gem->exec_count;
+    bo_gem->validate_index = index;
+    /* Fill in array entry */
+    bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle;
+    bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count;
+    bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs;
+    bufmgr_gem->exec2_objects[index].alignment = 0;
+    bufmgr_gem->exec2_objects[index].offset = 0;
+    bufmgr_gem->exec_bos[index] = bo;
+    bufmgr_gem->exec2_objects[index].flags = 0;
+    bufmgr_gem->exec2_objects[index].rsvd1 = 0;
+    bufmgr_gem->exec2_objects[index].rsvd2 = 0;
+    if (need_fence)
+	    bufmgr_gem->exec2_objects[index].flags |= EXEC_OBJECT_NEEDS_FENCE;
+    drm_intel_gem_bo_reference_locked(bo);
+    bufmgr_gem->exec_count++;
+}
 
 #define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \
 	sizeof(uint32_t))
@@ -308,25 +424,27 @@ drm_intel_setup_reloc_list(drm_intel_bo *bo)
 
     bo_gem->relocs = malloc(bufmgr_gem->max_relocs *
 			    sizeof(struct drm_i915_gem_relocation_entry));
-    bo_gem->reloc_target_bo = malloc(bufmgr_gem->max_relocs *
-				     sizeof(drm_intel_bo *));
+    bo_gem->reloc_target_info = malloc(bufmgr_gem->max_relocs *
+				       sizeof(drm_intel_reloc_target *));
 
     return 0;
 }
 
 static drm_intel_bo *
 drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, const char *name,
-				unsigned long size, unsigned int alignment,
-				int for_render)
+				unsigned long size, unsigned long flags)
 {
     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
     drm_intel_bo_gem *bo_gem;
     unsigned int page_size = getpagesize();
-    int ret;
+    int ret, for_render = 0;
     struct drm_intel_gem_bo_bucket *bucket;
     int alloc_from_cache = 0;
     unsigned long bo_size;
 
+    if (flags & BO_ALLOC_FOR_RENDER)
+	for_render = 1;
+
     /* Round the allocated size up to a power of two number of pages. */
     bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size);
 
@@ -405,9 +523,9 @@ drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, const char *name,
     bo_gem->reloc_tree_size = bo_gem->bo.size;
     bo_gem->reloc_tree_fences = 0;
     bo_gem->used_as_reloc_target = 0;
+    bo_gem->reusable = 1;
     bo_gem->tiling_mode = I915_TILING_NONE;
     bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
-    bo_gem->reusable = 1;
 
     DBG("bo_create: buf %d (%s) %ldb\n",
 	bo_gem->gem_handle, bo_gem->name, size);
@@ -419,14 +537,45 @@ static drm_intel_bo *
 drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr, const char *name,
 				  unsigned long size, unsigned int alignment)
 {
-    return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, alignment, 1);
+    return drm_intel_gem_bo_alloc_internal(bufmgr, name, size,
+					   BO_ALLOC_FOR_RENDER);
 }
 
 static drm_intel_bo *
 drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name,
 		       unsigned long size, unsigned int alignment)
 {
-    return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, alignment, 0);
+    return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0);
+}
+
+static drm_intel_bo *
+drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
+			     int x, int y, int cpp, uint32_t *tiling_mode,
+			     unsigned long *pitch, unsigned long flags)
+{
+    drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
+    drm_intel_bo *bo;
+    unsigned long size, stride;
+    int ret;
+
+    stride = x * cpp;
+    stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, *tiling_mode);
+    size = stride * y;
+    size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode);
+
+    bo = drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags);
+    if (!bo)
+	return NULL;
+
+    ret = drm_intel_gem_bo_set_tiling(bo, tiling_mode, stride);
+    if (ret != 0) {
+	drm_intel_gem_bo_unreference(bo);
+	return NULL;
+    }
+
+    *pitch = stride;
+
+    return bo;
 }
 
 /**
@@ -478,10 +627,6 @@ drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, const char *name,
     }
     bo_gem->tiling_mode = get_tiling.tiling_mode;
     bo_gem->swizzle_mode = get_tiling.swizzle_mode;
-    if (bo_gem->tiling_mode == I915_TILING_NONE)
-	bo_gem->reloc_tree_fences = 0;
-    else
-	bo_gem->reloc_tree_fences = 1;
 
     DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name);
 
@@ -574,8 +719,8 @@ drm_intel_gem_bo_unreference_locked(drm_intel_bo *bo)
 
 	    /* Unreference all the target buffers */
 	    for (i = 0; i < bo_gem->reloc_count; i++)
-		 drm_intel_gem_bo_unreference_locked(bo_gem->reloc_target_bo[i]);
-	    free(bo_gem->reloc_target_bo);
+		 drm_intel_gem_bo_unreference_locked(bo_gem->reloc_target_info[i].bo);
+	    free(bo_gem->reloc_target_info);
 	    free(bo_gem->relocs);
 	}
 
@@ -600,7 +745,7 @@ drm_intel_gem_bo_unreference_locked(drm_intel_bo *bo)
 	    bo_gem->name = NULL;
 	    bo_gem->validate_index = -1;
 	    bo_gem->relocs = NULL;
-	    bo_gem->reloc_target_bo = NULL;
+	    bo_gem->reloc_target_info = NULL;
 	    bo_gem->reloc_count = 0;
 
 	    DRMLISTADDTAIL(&bo_gem->head, &bucket->head);
@@ -919,6 +1064,7 @@ drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr)
     int i;
 
     free(bufmgr_gem->exec_objects);
+    free(bufmgr_gem->exec2_objects);
     free(bufmgr_gem->exec_bos);
 
     pthread_mutex_destroy(&bufmgr_gem->lock);
@@ -950,9 +1096,9 @@ drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr)
  * last known offset in target_bo.
  */
 static int
-drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
-			    drm_intel_bo *target_bo, uint32_t target_offset,
-			    uint32_t read_domains, uint32_t write_domain)
+do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
+		 drm_intel_bo *target_bo, uint32_t target_offset,
+		 uint32_t read_domains, uint32_t write_domain, int need_fence)
 {
     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
     drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
@@ -976,7 +1122,8 @@ drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
      */
     assert(!bo_gem->used_as_reloc_target);
     bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size;
-    bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences;
+    if (need_fence)
+	    bo_gem->reloc_tree_fences++;
 
     /* Flag the target to disallow further relocations in it. */
     target_bo_gem->used_as_reloc_target = 1;
@@ -989,7 +1136,12 @@ drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
     bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain;
     bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset;
 
-    bo_gem->reloc_target_bo[bo_gem->reloc_count] = target_bo;
+    bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo;
+    if (need_fence)
+	    bo_gem->reloc_target_info[bo_gem->reloc_count].flags =
+		    DRM_INTEL_RELOC_FENCE;
+    else
+	    bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0;
     drm_intel_gem_bo_reference_locked(target_bo);
 
     bo_gem->reloc_count++;
@@ -999,6 +1151,24 @@ drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
     return 0;
 }
 
+static int
+drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
+			    drm_intel_bo *target_bo, uint32_t target_offset,
+			    uint32_t read_domains, uint32_t write_domain)
+{
+	return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
+				read_domains, write_domain, 0);
+}
+
+static int
+drm_intel_gem_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset,
+				  drm_intel_bo *target_bo,
+				  uint32_t target_offset,
+				  uint32_t read_domains, uint32_t write_domain)
+{
+	return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
+				read_domains, write_domain, 1);
+}
 /**
  * Walk the tree of relocations rooted at BO and accumulate the list of
  * validations to be performed and update the relocation buffers with
@@ -1014,7 +1184,7 @@ drm_intel_gem_bo_process_reloc(drm_intel_bo *bo)
 	return;
 
     for (i = 0; i < bo_gem->reloc_count; i++) {
-	drm_intel_bo *target_bo = bo_gem->reloc_target_bo[i];
+	drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
 
 	/* Continue walking the tree depth-first. */
 	drm_intel_gem_bo_process_reloc(target_bo);
@@ -1025,6 +1195,29 @@ drm_intel_gem_bo_process_reloc(drm_intel_bo *bo)
 }
 
 static void
+drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
+{
+    drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
+    int i;
+
+    if (bo_gem->relocs == NULL)
+	return;
+
+    for (i = 0; i < bo_gem->reloc_count; i++) {
+	drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
+	int need_fence;
+
+	/* Continue walking the tree depth-first. */
+	drm_intel_gem_bo_process_reloc(target_bo);
+
+	need_fence = bo_gem->reloc_target_info[i].flags & DRM_INTEL_RELOC_FENCE;
+
+	/* Add the target to the validate list */
+	drm_intel_add_validate_buffer2(target_bo, need_fence);
+    }
+}
+
+static void
 drm_intel_update_buffer_offsets (drm_intel_bufmgr_gem *bufmgr_gem)
 {
     int i;
@@ -1043,6 +1236,25 @@ drm_intel_update_buffer_offsets (drm_intel_bufmgr_gem *bufmgr_gem)
     }
 }
 
+static void
+drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem)
+{
+    int i;
+
+    for (i = 0; i < bufmgr_gem->exec_count; i++) {
+	drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
+	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
+
+	/* Update the buffer offset */
+	if (bufmgr_gem->exec2_objects[i].offset != bo->offset) {
+	    DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n",
+		bo_gem->gem_handle, bo_gem->name, bo->offset,
+		(unsigned long long)bufmgr_gem->exec2_objects[i].offset);
+	    bo->offset = bufmgr_gem->exec2_objects[i].offset;
+	}
+    }
+}
+
 static int
 drm_intel_gem_bo_exec(drm_intel_bo *bo, int used,
 		      drm_clip_rect_t *cliprects, int num_cliprects,
@@ -1106,6 +1318,71 @@ drm_intel_gem_bo_exec(drm_intel_bo *bo, int used,
 }
 
 static int
+drm_intel_gem_bo_exec2(drm_intel_bo *bo, int used,
+		      drm_clip_rect_t *cliprects, int num_cliprects,
+		      int DR4)
+{
+    drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
+    struct drm_i915_gem_execbuffer2 execbuf;
+    int ret, i;
+
+    pthread_mutex_lock(&bufmgr_gem->lock);
+    /* Update indices and set up the validate list. */
+    drm_intel_gem_bo_process_reloc2(bo);
+
+    /* Add the batch buffer to the validation list.  There are no relocations
+     * pointing to it.
+     */
+    drm_intel_add_validate_buffer2(bo, 0);
+
+    execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects;
+    execbuf.buffer_count = bufmgr_gem->exec_count;
+    execbuf.batch_start_offset = 0;
+    execbuf.batch_len = used;
+    execbuf.cliprects_ptr = (uintptr_t)cliprects;
+    execbuf.num_cliprects = num_cliprects;
+    execbuf.DR1 = 0;
+    execbuf.DR4 = DR4;
+    execbuf.flags = 0;
+    execbuf.rsvd1 = 0;
+    execbuf.rsvd2 = 0;
+
+    do {
+	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf);
+    } while (ret != 0 && errno == EAGAIN);
+
+    if (ret != 0 && errno == ENOMEM) {
+	fprintf(stderr, "Execbuffer fails to pin. Estimate: %u. Actual: %u. Available: %u\n",
+		drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos,
+						   bufmgr_gem->exec_count),
+		drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos,
+						  bufmgr_gem->exec_count),
+		(unsigned int) bufmgr_gem->gtt_size);
+    }
+    drm_intel_update_buffer_offsets2 (bufmgr_gem);
+
+    if (bufmgr_gem->bufmgr.debug)
+	drm_intel_gem_dump_validation_list(bufmgr_gem);
+
+    for (i = 0; i < bufmgr_gem->exec_count; i++) {
+	drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
+	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
+
+	/* Need to call swrast on next bo_map */
+	bo_gem->swrast = 0;
+
+	/* Disconnect the buffer from the validate list */
+	bo_gem->validate_index = -1;
+	drm_intel_gem_bo_unreference_locked(bo);
+	bufmgr_gem->exec_bos[i] = NULL;
+    }
+    bufmgr_gem->exec_count = 0;
+    pthread_mutex_unlock(&bufmgr_gem->lock);
+
+    return 0;
+}
+
+static int
 drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment)
 {
     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
@@ -1158,10 +1435,6 @@ drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t *tiling_mode,
     if (bo_gem->global_name == 0 && *tiling_mode == bo_gem->tiling_mode)
 	return 0;
 
-    /* If we're going from non-tiling to tiling, bump fence count */
-    if (bo_gem->tiling_mode == I915_TILING_NONE)
-	bo_gem->reloc_tree_fences++;
-
     memset(&set_tiling, 0, sizeof(set_tiling));
     set_tiling.handle = bo_gem->gem_handle;
     set_tiling.tiling_mode = *tiling_mode;
@@ -1175,10 +1448,6 @@ drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t *tiling_mode,
     bo_gem->tiling_mode = set_tiling.tiling_mode;
     bo_gem->swizzle_mode = set_tiling.swizzle_mode;
 
-    /* If we're going from tiling to non-tiling, drop fence count */
-    if (bo_gem->tiling_mode == I915_TILING_NONE)
-	bo_gem->reloc_tree_fences--;
-
     *tiling_mode = bo_gem->tiling_mode;
     return 0;
 }
@@ -1253,7 +1522,7 @@ drm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo)
     bo_gem->included_in_check_aperture = 1;
 
     for (i = 0; i < bo_gem->reloc_count; i++)
-	total += drm_intel_gem_bo_get_aperture_space(bo_gem->reloc_target_bo[i]);
+	total += drm_intel_gem_bo_get_aperture_space(bo_gem->reloc_target_info[i].bo);
 
     return total;
 }
@@ -1299,7 +1568,7 @@ drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo)
     bo_gem->included_in_check_aperture = 0;
 
     for (i = 0; i < bo_gem->reloc_count; i++)
-	drm_intel_gem_bo_clear_aperture_space_flag(bo_gem->reloc_target_bo[i]);
+	drm_intel_gem_bo_clear_aperture_space_flag(bo_gem->reloc_target_info[i].bo);
 }
 
 /**
@@ -1426,6 +1695,7 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
     drm_i915_getparam_t gp;
     int ret, i;
     unsigned long size;
+    int exec2 = 0;
 
     bufmgr_gem = calloc(1, sizeof(*bufmgr_gem));
     bufmgr_gem->fd = fd;
@@ -1467,6 +1737,11 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
 	}
     }
 
+    gp.param = I915_PARAM_HAS_EXECBUF2;
+    ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
+    if (!ret)
+	    exec2 = 1;
+
     /* Let's go with one relocation per every 2 dwords (but round down a bit
      * since a power of two will mean an extra page allocation for the reloc
      * buffer).
@@ -1475,6 +1750,7 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
      */
     bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2;
 
+    bufmgr_gem->bufmgr.bo_alloc_tiled = drm_intel_gem_bo_alloc_tiled;
     bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc;
     bufmgr_gem->bufmgr.bo_alloc_for_render = drm_intel_gem_bo_alloc_for_render;
     bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference;
@@ -1485,12 +1761,17 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
     bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata;
     bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering;
     bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc;
+    bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence;
     bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin;
     bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin;
     bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling;
     bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling;
     bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink;
     bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec;
+    /* Use the new one if available */
+    if (exec2)
+	    bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec2;
+
     bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_destroy;
     bufmgr_gem->bufmgr.debug = 0;
     bufmgr_gem->bufmgr.check_aperture_space = drm_intel_gem_check_aperture_space;
diff --git a/libdrm/intel/intel_bufmgr_priv.h b/libdrm/intel/intel_bufmgr_priv.h
index 0098076..d832882 100644
--- a/libdrm/intel/intel_bufmgr_priv.h
+++ b/libdrm/intel/intel_bufmgr_priv.h
@@ -41,6 +41,25 @@
  */
 struct _drm_intel_bufmgr {
    /**
+    * Allocate a tiled buffer object.
+    *
+    * Alignment for tiled objects is set automatically; the 'flags'
+    * argument provides a hint about how the object will be used initially.
+    *
+    * Valid tiling formats are:
+    *  I915_TILING_NONE
+    *  I915_TILING_X
+    *  I915_TILING_Y
+    *
+    * Note the tiling format may be rejected; callers should check the
+    * 'tiling_mode' field on return, as well as the pitch value, which
+    * may have been rounded up to accommodate for tiling restrictions.
+    */
+   drm_intel_bo *(*bo_alloc_tiled)(drm_intel_bufmgr *bufmgr, const char *name,
+				   int x, int y, int cpp, uint32_t *tiling_mode,
+				   unsigned long *pitch, unsigned long flags);
+
+   /**
     * Allocate a buffer object.
     *
     * Buffer objects are not necessarily initially mapped into CPU virtual
@@ -133,6 +152,9 @@ struct _drm_intel_bufmgr {
     int (*bo_emit_reloc)(drm_intel_bo *bo, uint32_t offset,
 			 drm_intel_bo *target_bo, uint32_t target_offset,
 			 uint32_t read_domains, uint32_t write_domain);
+    int (*bo_emit_reloc_fence)(drm_intel_bo *bo, uint32_t offset,
+			       drm_intel_bo *target_bo, uint32_t target_offset,
+			       uint32_t read_domains, uint32_t write_domain);
 
     /** Executes the command buffer pointed to by bo. */
     int (*bo_exec)(drm_intel_bo *bo, int used,
diff --git a/shared-core/i915_drm.h b/shared-core/i915_drm.h
index 2539966..3ee768c 100644
--- a/shared-core/i915_drm.h
+++ b/shared-core/i915_drm.h
@@ -206,6 +206,7 @@ typedef struct drm_i915_sarea {
 #define DRM_I915_GEM_GET_APERTURE 0x23
 #define DRM_I915_GEM_MMAP_GTT	0x24
 #define DRM_I915_GET_PIPE_FROM_CRTC_ID	0x25
+#define DRM_I915_GEM_EXECBUFFER2	0x26
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -227,6 +228,7 @@ typedef struct drm_i915_sarea {
 #define DRM_IOCTL_I915_EXECBUFFER	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_EXECBUFFER, struct drm_i915_execbuffer)
 #define DRM_IOCTL_I915_GEM_INIT		DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init)
 #define DRM_IOCTL_I915_GEM_EXECBUFFER	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer)
+#define DRM_IOCTL_I915_GEM_EXECBUFFER2	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
 #define DRM_IOCTL_I915_GEM_PIN		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin)
 #define DRM_IOCTL_I915_GEM_UNPIN	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin)
 #define DRM_IOCTL_I915_GEM_BUSY		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy)
@@ -299,6 +301,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_CHIPSET_ID            4
 #define I915_PARAM_HAS_GEM               5
 #define I915_PARAM_NUM_FENCES_AVAIL      6
+#define I915_PARAM_HAS_EXECBUF2          7
 
 typedef struct drm_i915_getparam {
 	int param;
@@ -622,6 +625,57 @@ struct drm_i915_gem_execbuffer {
 	uint64_t cliprects_ptr;
 };
 
+struct drm_i915_gem_exec_object2 {
+	/**
+	 * User's handle for a buffer to be bound into the GTT for this
+	 * operation.
+	 */
+	uint32_t handle;
+
+	/** Number of relocations to be performed on this buffer */
+	uint32_t relocation_count;
+	/**
+	 * Pointer to array of struct drm_i915_gem_relocation_entry containing
+	 * the relocations to be performed in this buffer.
+	 */
+	uint64_t relocs_ptr;
+
+	/** Required alignment in graphics aperture */
+	uint64_t alignment;
+
+	/**
+	 * Returned value of the updated offset of the object, for future
+	 * presumed_offset writes.
+	 */
+	uint64_t offset;
+
+#define EXEC_OBJECT_NEEDS_FENCE (1<<0)
+	uint64_t flags;
+	uint64_t rsvd1;
+	uint64_t rsvd2;
+};
+
+struct drm_i915_gem_execbuffer2 {
+	/**
+	 * List of gem_exec_object2 structs
+	 */
+	uint64_t buffers_ptr;
+	uint32_t buffer_count;
+
+	/** Offset in the batchbuffer to start execution from. */
+	uint32_t batch_start_offset;
+	/** Bytes used in batchbuffer from batch_start_offset */
+	uint32_t batch_len;
+	uint32_t DR1;
+	uint32_t DR4;
+	uint32_t num_cliprects;
+	/** This is a struct drm_clip_rect *cliprects */
+	uint64_t cliprects_ptr;
+	uint64_t flags; /* currently unused */
+	uint64_t rsvd1;
+	uint64_t rsvd2;
+};
+
 struct drm_i915_gem_pin {
 	/** Handle of the buffer to be pinned. */
 	uint32_t handle;



More information about the Intel-gfx mailing list