[Intel-gfx] [PATCH 2/2] intel: Add prelocation support

Ben Widawsky benjamin.widawsky at intel.com
Fri Aug 22 05:12:33 CEST 2014


Words

Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
---
 intel/intel_bufmgr.h     |   8 ++++
 intel/intel_bufmgr_gem.c | 102 +++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 102 insertions(+), 8 deletions(-)

diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
index 9383c72..e4ecc44 100644
--- a/intel/intel_bufmgr.h
+++ b/intel/intel_bufmgr.h
@@ -88,6 +88,8 @@ struct _drm_intel_bo {
 	 * Last seen card virtual address (offset from the beginning of the
 	 * aperture) for the object.  This should be used to fill relocation
 	 * entries when calling drm_intel_bo_emit_reloc()
+	 *
+	 * This is also useful when prelocating an object.
 	 */
 	uint64_t offset64;
 };
@@ -106,6 +108,8 @@ typedef struct _drm_intel_aub_annotation {
 } drm_intel_aub_annotation;
 
 #define BO_ALLOC_FOR_RENDER (1<<0)
+#define BO_ALLOC_PRELOCATE  (1<<1)
+#define BO_ALLOC_PRELOCATE_32  (1<<2)
 
 drm_intel_bo *drm_intel_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name,
 				 unsigned long size, unsigned int alignment);
@@ -119,6 +123,10 @@ drm_intel_bo *drm_intel_bo_alloc_tiled(drm_intel_bufmgr *bufmgr,
 				       uint32_t *tiling_mode,
 				       unsigned long *pitch,
 				       unsigned long flags);
+drm_intel_bo *drm_intel_bo_alloc_prelocated(drm_intel_bufmgr *bufmgr,
+					    const char *name,
+					    unsigned long size,
+					    int low);
 void drm_intel_bo_reference(drm_intel_bo *bo);
 void drm_intel_bo_unreference(drm_intel_bo *bo);
 int drm_intel_bo_map(drm_intel_bo *bo, int write_enable);
diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
index d7d3769..5a2a9bd 100644
--- a/intel/intel_bufmgr_gem.c
+++ b/intel/intel_bufmgr_gem.c
@@ -221,6 +221,11 @@ struct _drm_intel_bo_gem {
 	 */
 	bool idle;
 
+	/** Fillme in */
+	#define PRELOCATE_MMAP 1
+	#define PRELOCATE_MALLOC 2
+	int prelocated;
+
 	/**
 	 * Size in bytes of this buffer and its relocation descendents.
 	 *
@@ -489,7 +494,10 @@ drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
 	bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count;
 	bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs;
 	bufmgr_gem->exec2_objects[index].alignment = 0;
-	bufmgr_gem->exec2_objects[index].offset = 0;
+	if (bo_gem->prelocated)
+		bufmgr_gem->exec2_objects[index].offset = bo->offset64;
+	else
+		bufmgr_gem->exec2_objects[index].offset = 0;
 	bufmgr_gem->exec_bos[index] = bo;
 	bufmgr_gem->exec2_objects[index].flags = 0;
 	bufmgr_gem->exec2_objects[index].rsvd1 = 0;
@@ -637,9 +645,10 @@ drm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem,
 }
 
 static drm_intel_bo_gem *
-__bo_alloc(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size)
+__bo_alloc(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size, bool prelocate, bool low32)
 {
 	struct drm_i915_gem_create create;
+	drm_intel_bo *bo;
 	drm_intel_bo_gem *bo_gem;
 	int ret;
 
@@ -647,10 +656,35 @@ __bo_alloc(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size)
 	if (!bo_gem)
 		return NULL;
 
+	bo = (drm_intel_bo *)bo_gem;
+
 	bo_gem->bo.size = size;
 
 	VG_CLEAR(create);
 	create.size = size;
+	/* FIXME: This is a gross hack to repurpose the create args */
+	if (prelocate) {
+		create.size |= (1ULL << 63);
+		if (low32) {
+			bo->offset64 = (uint64_t)mmap(NULL, size,
+					PROT_READ | PROT_WRITE,
+					MAP_ANONYMOUS | MAP_PRIVATE | MAP_32BIT,
+					-1, 0);
+			bo_gem->prelocated = PRELOCATE_MALLOC;
+		} else {
+			bo->offset64 = (uint64_t)aligned_alloc(getpagesize(), size);
+			bo_gem->prelocated = PRELOCATE_MMAP;
+		}
+		if (!bo->offset64) {
+			DBG("Couldn't allocate %ld address space for object. %s\n",
+			    size, strerror(errno));
+			free(bo_gem);
+			return NULL;
+		}
+		create.handle = bo->offset64 >> 32;
+		create.pad = bo->offset64;
+	} else
+		bo->offset64 = 0x1;
 
 	ret = drmIoctl(bufmgr_gem->fd,
 		       DRM_IOCTL_I915_GEM_CREATE,
@@ -658,6 +692,10 @@ __bo_alloc(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size)
 	bo_gem->gem_handle = create.handle;
 	bo_gem->bo.handle = bo_gem->gem_handle;
 	if (ret != 0) {
+		if (prelocate && low32)
+			munmap((void *)bo->offset64, size);
+		else if (prelocate)
+			free((void *)bo->offset64);
 		free(bo_gem);
 		return NULL;
 	}
@@ -687,10 +725,17 @@ drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr,
 	struct drm_intel_gem_bo_bucket *bucket;
 	bool alloc_from_cache;
 	unsigned long bo_size;
-	bool for_render = false;
+	bool for_render = false, prelocate = false, low = false;
 
 	if (flags & BO_ALLOC_FOR_RENDER)
 		for_render = true;
+	if (flags & BO_ALLOC_PRELOCATE) {
+		if (flags & BO_ALLOC_PRELOCATE_32)
+			low = true;
+		prelocate = true;
+		bo_size = size;
+		goto skip_cache;
+	}
 
 	/* Round the allocated size up to a power of two number of pages. */
 	bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size);
@@ -756,7 +801,8 @@ retry:
 	pthread_mutex_unlock(&bufmgr_gem->lock);
 
 	if (!alloc_from_cache) {
-		bo_gem = __bo_alloc(bufmgr_gem, bo_size);
+skip_cache:
+		bo_gem = __bo_alloc(bufmgr_gem, bo_size, prelocate, low);
 		if (!bo_gem)
 			return NULL;
 
@@ -774,7 +820,7 @@ retry:
 	bo_gem->reloc_tree_fences = 0;
 	bo_gem->used_as_reloc_target = false;
 	bo_gem->has_error = false;
-	bo_gem->reusable = true;
+	bo_gem->reusable = !prelocate;
 	bo_gem->aub_annotations = NULL;
 	bo_gem->aub_annotation_count = 0;
 
@@ -859,6 +905,25 @@ drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
 					       tiling, stride);
 }
 
+drm_public drm_intel_bo *
+drm_intel_bo_alloc_prelocated(drm_intel_bufmgr *bufmgr,
+			      const char *name,
+			      unsigned long size,
+			      int low)
+{
+	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
+	int flag = BO_ALLOC_PRELOCATE;
+	/* FIXME: Need to replace this with a paramcheck */
+	if (bufmgr_gem->gen < 8 || !bufmgr_gem->has_llc)
+		return NULL;
+
+	if (low)
+		flag |= BO_ALLOC_PRELOCATE_32;
+
+	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size,
+					       flag, I915_TILING_NONE, 0);
+}
+
 /**
  * Returns a drm_intel_bo wrapping the given buffer object handle.
  *
@@ -964,7 +1029,7 @@ drm_intel_gem_bo_free(drm_intel_bo *bo)
 	int ret;
 
 	DRMLISTDEL(&bo_gem->vma_list);
-	if (bo_gem->mem_virtual) {
+	if (bo_gem->mem_virtual && !bo_gem->prelocated) {
 		VG(VALGRIND_FREELIKE_BLOCK(bo_gem->mem_virtual, 0));
 		munmap(bo_gem->mem_virtual, bo_gem->bo.size);
 		bufmgr_gem->vma_count--;
@@ -982,6 +1047,12 @@ drm_intel_gem_bo_free(drm_intel_bo *bo)
 		DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
 		    bo_gem->gem_handle, bo_gem->name, strerror(errno));
 	}
+
+	if (bo_gem->prelocated == PRELOCATE_MMAP)
+		munmap((void *)bo->offset64, bo->size);
+	else if (bo_gem->prelocated == PRELOCATE_MALLOC)
+		free((void *)bo->offset64);
+
 	free(bo_gem->aub_annotations);
 	free(bo);
 }
@@ -1190,7 +1261,9 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
 	if (bo_gem->map_count++ == 0)
 		drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
 
-	if (!bo_gem->mem_virtual) {
+	if (bo_gem->prelocated) {
+		bo_gem->mem_virtual = (void *)bo->offset64;
+	} else if (!bo_gem->mem_virtual) {
 		struct drm_i915_gem_mmap mmap_arg;
 
 		DBG("bo_map: %d (%s), map_count=%d\n",
@@ -1683,6 +1756,17 @@ do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
 		return -ENOMEM;
 	}
 
+	/* If the target we're trying point to was a prelocated target, then we
+	 * can skip actually telling the kernel about the relocation. Userspace
+	 * is expected to use offset64 */
+	if (target_bo_gem->prelocated) {
+		assert(target_bo->offset64 != 0x1);
+		assert(target_bo->offset64 != 0); // temp hack
+		if (bo_gem->validate_index == -1)
+			drm_intel_add_validate_buffer2(target_bo, false);
+		return 0;
+	}
+
 	/* We never use HW fences for rendering on 965+ */
 	if (bufmgr_gem->gen >= 4)
 		need_fence = false;
@@ -1863,7 +1947,6 @@ drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
 	}
 }
 
-
 static void
 drm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem)
 {
@@ -1894,6 +1977,9 @@ drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem)
 		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
 		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
 
+		if (bo_gem->prelocated)
+			continue;
+
 		/* Update the buffer offset */
 		if (bufmgr_gem->exec2_objects[i].offset != bo->offset64) {
 			DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n",
-- 
2.0.4




More information about the Intel-gfx mailing list