[Beignet] [Patch V2] OCL20: Add drm and kernel softpin patches temp for SVM.

Yang Rong rong.r.yang at intel.com
Mon Dec 7 19:12:43 PST 2015


softpin_drm.patch is from MichaƂ Winiarski(http://patchwork.freedesktop.org/patch/59022/)
And softpin_kernel.patch is from Thomas Daniel(http://patchwork.freedesktop.org/patch/62008/), I
rebase it to 83266b6b60b672.
After these patches upstream, will remove it.

V2: remove unrelated changes in softpin_kernel.patch.
Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
 patch/softpin_drm.patch    | 408 ++++++++++++++++++++++++++++++++++++++++++
 patch/softpin_kernel.patch | 436 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 844 insertions(+)
 create mode 100644 patch/softpin_drm.patch
 create mode 100644 patch/softpin_kernel.patch

diff --git a/patch/softpin_drm.patch b/patch/softpin_drm.patch
new file mode 100644
index 0000000..53fb767
--- /dev/null
+++ b/patch/softpin_drm.patch
@@ -0,0 +1,408 @@
+diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
+index ded43b1..27f4f0d 100644
+--- a/include/drm/i915_drm.h
++++ b/include/drm/i915_drm.h
+@@ -350,6 +350,7 @@ typedef struct drm_i915_irq_wait {
+ #define I915_PARAM_REVISION              32
+ #define I915_PARAM_SUBSLICE_TOTAL	 33
+ #define I915_PARAM_EU_TOTAL		 34
++#define I915_PARAM_HAS_EXEC_SOFTPIN	 37
+ 
+ typedef struct drm_i915_getparam {
+ 	int param;
+@@ -680,7 +681,8 @@ struct drm_i915_gem_exec_object2 {
+ #define EXEC_OBJECT_NEEDS_FENCE (1<<0)
+ #define EXEC_OBJECT_NEEDS_GTT	(1<<1)
+ #define EXEC_OBJECT_WRITE	(1<<2)
+-#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1)
++#define EXEC_OBJECT_PINNED	(1<<5)
++#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1)
+ 	__u64 flags;
+ 
+ 	__u64 rsvd1;
+diff --git a/intel/intel_bufmgr.c b/intel/intel_bufmgr.c
+index 14ea9f9..bd92335 100644
+--- a/intel/intel_bufmgr.c
++++ b/intel/intel_bufmgr.c
+@@ -261,6 +261,15 @@ drm_intel_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
+ }
+ 
+ int
++drm_intel_bo_set_softpin_offset(drm_intel_bo *bo, uint64_t offset)
++{
++	if (bo->bufmgr->bo_set_softpin_offset)
++		return bo->bufmgr->bo_set_softpin_offset(bo, offset);
++
++	return -ENODEV;
++}
++
++int
+ drm_intel_bo_disable_reuse(drm_intel_bo *bo)
+ {
+ 	if (bo->bufmgr->bo_disable_reuse)
+diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
+index 95eecb8..62ea4a0 100644
+--- a/intel/intel_bufmgr.h
++++ b/intel/intel_bufmgr.h
+@@ -164,6 +164,7 @@ int drm_intel_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
+ int drm_intel_bo_flink(drm_intel_bo *bo, uint32_t * name);
+ int drm_intel_bo_busy(drm_intel_bo *bo);
+ int drm_intel_bo_madvise(drm_intel_bo *bo, int madv);
++int drm_intel_bo_set_softpin_offset(drm_intel_bo *bo, uint64_t offset);
+ 
+ int drm_intel_bo_disable_reuse(drm_intel_bo *bo);
+ int drm_intel_bo_is_reusable(drm_intel_bo *bo);
+diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
+index 63122d0..991e0fa 100644
+--- a/intel/intel_bufmgr_gem.c
++++ b/intel/intel_bufmgr_gem.c
+@@ -184,6 +184,13 @@ struct _drm_intel_bo_gem {
+ 	drm_intel_reloc_target *reloc_target_info;
+ 	/** Number of entries in relocs */
+ 	int reloc_count;
++	/** Array of BOs that are referenced by this buffer and will be softpinned */
++	drm_intel_bo **softpin_target;
++	/** Number softpinned BOs that are referenced by this buffer */
++	int softpin_target_count;
++	/** Maximum amount of softpinned BOs that are referenced by this buffer */
++	int softpin_target_size;
++
+ 	/** Mapped address for the buffer, saved across map/unmap cycles */
+ 	void *mem_virtual;
+ 	/** GTT virtual address for the buffer, saved across map/unmap cycles */
+@@ -237,6 +244,11 @@ struct _drm_intel_bo_gem {
+ 	bool is_userptr;
+ 
+ 	/**
++	 * Whether this buffer is softpinned at offset specified by the user
++	 */
++	bool is_softpin;
++
++	/**
+ 	 * Size in bytes of this buffer and its relocation descendents.
+ 	 *
+ 	 * Used to avoid costly tree walking in
+@@ -389,8 +401,9 @@ drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem)
+ 		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
+ 		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+ 
+-		if (bo_gem->relocs == NULL) {
+-			DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle,
++		if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL) {
++			DBG("%2d: %d %s(%s)\n", i, bo_gem->gem_handle,
++			    bo_gem->is_softpin ? "*" : "",
+ 			    bo_gem->name);
+ 			continue;
+ 		}
+@@ -400,16 +413,33 @@ drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem)
+ 			drm_intel_bo_gem *target_gem =
+ 			    (drm_intel_bo_gem *) target_bo;
+ 
+-			DBG("%2d: %d (%s)@0x%08llx -> "
++			DBG("%2d: %d %s(%s)@0x%016llx -> "
+ 			    "%d (%s)@0x%08lx + 0x%08x\n",
+ 			    i,
+-			    bo_gem->gem_handle, bo_gem->name,
++			    bo_gem->gem_handle,
++			    bo_gem->is_softpin ? "*" : "",
++			    bo_gem->name,
+ 			    (unsigned long long)bo_gem->relocs[j].offset,
+ 			    target_gem->gem_handle,
+ 			    target_gem->name,
+ 			    target_bo->offset64,
+ 			    bo_gem->relocs[j].delta);
+ 		}
++
++		for (j = 0; j < bo_gem->softpin_target_count; j++) {
++			drm_intel_bo *target_bo = bo_gem->softpin_target[j];
++			drm_intel_bo_gem *target_gem =
++			    (drm_intel_bo_gem *) target_bo;
++			DBG("%2d: %d %s(%s) -> "
++			    "%d *(%s)@0x%016lx\n",
++			    i,
++			    bo_gem->gem_handle,
++			    bo_gem->is_softpin ? "*" : "",
++			    bo_gem->name,
++			    target_gem->gem_handle,
++			    target_gem->name,
++			    target_bo->offset64);
++		}
+ 	}
+ }
+ 
+@@ -473,11 +503,18 @@ drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
+ 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
+ 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
+ 	int index;
++	int flags = 0;
++
++	if (need_fence)
++		flags |= EXEC_OBJECT_NEEDS_FENCE;
++	if (bo_gem->is_softpin)
++		flags |= EXEC_OBJECT_PINNED;
+ 
+ 	if (bo_gem->validate_index != -1) {
+ 		if (need_fence)
+ 			bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |=
+ 				EXEC_OBJECT_NEEDS_FENCE;
++		bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= flags;
+ 		return;
+ 	}
+ 
+@@ -504,15 +541,12 @@ drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
+ 	bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count;
+ 	bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs;
+ 	bufmgr_gem->exec2_objects[index].alignment = bo->align;
+-	bufmgr_gem->exec2_objects[index].offset = 0;
++	bufmgr_gem->exec2_objects[index].offset = bo_gem->is_softpin ?
++		bo->offset64 : 0;
+ 	bufmgr_gem->exec_bos[index] = bo;
+-	bufmgr_gem->exec2_objects[index].flags = 0;
++	bufmgr_gem->exec2_objects[index].flags = flags;
+ 	bufmgr_gem->exec2_objects[index].rsvd1 = 0;
+ 	bufmgr_gem->exec2_objects[index].rsvd2 = 0;
+-	if (need_fence) {
+-		bufmgr_gem->exec2_objects[index].flags |=
+-			EXEC_OBJECT_NEEDS_FENCE;
+-	}
+ 	bufmgr_gem->exec_count++;
+ }
+ 
+@@ -1261,8 +1295,12 @@ drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time)
+ 								  time);
+ 		}
+ 	}
++	for (i = 0; i < bo_gem->softpin_target_count; i++)
++		drm_intel_gem_bo_unreference_locked_timed(bo_gem->softpin_target[i],
++								  time);
+ 	bo_gem->reloc_count = 0;
+ 	bo_gem->used_as_reloc_target = false;
++	bo_gem->softpin_target_count = 0;
+ 
+ 	DBG("bo_unreference final: %d (%s)\n",
+ 	    bo_gem->gem_handle, bo_gem->name);
+@@ -1276,6 +1314,11 @@ drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time)
+ 		free(bo_gem->relocs);
+ 		bo_gem->relocs = NULL;
+ 	}
++	if (bo_gem->softpin_target) {
++		free(bo_gem->softpin_target);
++		bo_gem->softpin_target = NULL;
++		bo_gem->softpin_target_size = 0;
++	}
+ 
+ 	/* Clear any left-over mappings */
+ 	if (bo_gem->map_count) {
+@@ -1913,14 +1956,6 @@ do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
+ 		bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences;
+ 	}
+ 
+-	bo_gem->relocs[bo_gem->reloc_count].offset = offset;
+-	bo_gem->relocs[bo_gem->reloc_count].delta = target_offset;
+-	bo_gem->relocs[bo_gem->reloc_count].target_handle =
+-	    target_bo_gem->gem_handle;
+-	bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains;
+-	bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain;
+-	bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset64;
+-
+ 	bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo;
+ 	if (target_bo != bo)
+ 		drm_intel_gem_bo_reference(target_bo);
+@@ -1930,21 +1965,70 @@ do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
+ 	else
+ 		bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0;
+ 
++	bo_gem->relocs[bo_gem->reloc_count].offset = offset;
++	bo_gem->relocs[bo_gem->reloc_count].delta = target_offset;
++	bo_gem->relocs[bo_gem->reloc_count].target_handle =
++	    target_bo_gem->gem_handle;
++	bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains;
++	bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain;
++	bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset64;
+ 	bo_gem->reloc_count++;
+ 
+ 	return 0;
+ }
+ 
+ static int
++drm_intel_gem_bo_add_softpin_target(drm_intel_bo *bo, drm_intel_bo *target_bo)
++{
++	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
++	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
++	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo;
++	if (bo_gem->has_error)
++		return -ENOMEM;
++
++	if (target_bo_gem->has_error) {
++		bo_gem->has_error = true;
++		return -ENOMEM;
++	}
++
++	if (!target_bo_gem->is_softpin)
++		return -EINVAL;
++	if (target_bo_gem == bo_gem)
++		return -EINVAL;
++
++	if (bo_gem->softpin_target_count == bo_gem->softpin_target_size) {
++		int new_size = bo_gem->softpin_target_size * 2;
++		if (new_size == 0)
++			new_size = bufmgr_gem->max_relocs;
++
++		bo_gem->softpin_target = realloc(bo_gem->softpin_target, new_size *
++				sizeof(drm_intel_bo *));
++		if (!bo_gem->softpin_target)
++			return -ENOMEM;
++
++		bo_gem->softpin_target_size = new_size;
++	}
++	bo_gem->softpin_target[bo_gem->softpin_target_count] = target_bo;
++	drm_intel_gem_bo_reference(target_bo);
++	bo_gem->softpin_target_count++;
++
++	return 0;
++}
++
++static int
+ drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
+ 			    drm_intel_bo *target_bo, uint32_t target_offset,
+ 			    uint32_t read_domains, uint32_t write_domain)
+ {
+ 	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
++	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *)target_bo;
+ 
+-	return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
+-				read_domains, write_domain,
+-				!bufmgr_gem->fenced_relocs);
++	if (target_bo_gem->is_softpin)
++		return drm_intel_gem_bo_add_softpin_target(bo, target_bo);
++	else
++		return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
++					read_domains, write_domain,
++					!bufmgr_gem->fenced_relocs);
+ }
+ 
+ static int
+@@ -1977,6 +2061,8 @@ drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo)
+  *
+  * Any further drm_intel_bufmgr_check_aperture_space() queries
+  * involving this buffer in the tree are undefined after this call.
++ *
++ * This also removes all softpinned targets being referenced by the BO.
+  */
+ void
+ drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start)
+@@ -2003,6 +2089,12 @@ drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start)
+ 	}
+ 	bo_gem->reloc_count = start;
+ 
++	for (i = 0; i < bo_gem->softpin_target_count; i++) {
++		drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->softpin_target[i];
++		drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo, time.tv_sec);
++	}
++	bo_gem->softpin_target_count = 0;
++
+ 	pthread_mutex_unlock(&bufmgr_gem->lock);
+ 
+ }
+@@ -2043,7 +2135,7 @@ drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
+ 	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
+ 	int i;
+ 
+-	if (bo_gem->relocs == NULL)
++	if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL)
+ 		return;
+ 
+ 	for (i = 0; i < bo_gem->reloc_count; i++) {
+@@ -2064,6 +2156,17 @@ drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
+ 		/* Add the target to the validate list */
+ 		drm_intel_add_validate_buffer2(target_bo, need_fence);
+ 	}
++
++	for (i = 0; i < bo_gem->softpin_target_count; i++) {
++		drm_intel_bo *target_bo = bo_gem->softpin_target[i];
++
++		if (target_bo == bo)
++			continue;
++
++		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
++		drm_intel_gem_bo_process_reloc2(target_bo);
++		drm_intel_add_validate_buffer2(target_bo, false);
++	}
+ }
+ 
+ 
+@@ -2099,7 +2202,11 @@ drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem)
+ 
+ 		/* Update the buffer offset */
+ 		if (bufmgr_gem->exec2_objects[i].offset != bo->offset64) {
+-			DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n",
++			/* If we're seeing softpinned object here it means that the kernel
++			 * has relocated our object... Indicating a programming error
++			 */
++			assert(!bo_gem->is_softpin);
++			DBG("BO %d (%s) migrated: 0x%016lx -> 0x%016llx\n",
+ 			    bo_gem->gem_handle, bo_gem->name, bo->offset64,
+ 			    (unsigned long long)bufmgr_gem->exec2_objects[i].offset);
+ 			bo->offset64 = bufmgr_gem->exec2_objects[i].offset;
+@@ -2423,6 +2530,17 @@ drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
+ 	return 0;
+ }
+ 
++static int
++drm_intel_gem_bo_set_softpin_offset(drm_intel_bo *bo, uint64_t offset)
++{
++	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
++
++	bo_gem->is_softpin = true;
++	bo->offset64 = offset;
++	bo->offset = offset;
++	return 0;
++}
++
+ drm_intel_bo *
+ drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int size)
+ {
+@@ -2801,6 +2919,13 @@ _drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo)
+ 			return 1;
+ 	}
+ 
++	for (i = 0; i< bo_gem->softpin_target_count; i++) {
++		if (bo_gem->softpin_target[i] == target_bo)
++			return 1;
++		if (_drm_intel_gem_bo_references(bo_gem->softpin_target[i], target_bo))
++			return 1;
++	}
++
+ 	return 0;
+ }
+ 
+@@ -3257,6 +3382,11 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
+ 	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
+ 	bufmgr_gem->has_vebox = (ret == 0) & (*gp.value > 0);
+ 
++	gp.param = I915_PARAM_HAS_EXEC_SOFTPIN;
++	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
++	if (ret == 0 && *gp.value > 0)
++		bufmgr_gem->bufmgr.bo_set_softpin_offset = drm_intel_gem_bo_set_softpin_offset;
++
+ 	if (bufmgr_gem->gen < 4) {
+ 		gp.param = I915_PARAM_NUM_FENCES_AVAIL;
+ 		gp.value = &bufmgr_gem->available_fences;
+diff --git a/intel/intel_bufmgr_priv.h b/intel/intel_bufmgr_priv.h
+index 59ebd18..86991c9 100644
+--- a/intel/intel_bufmgr_priv.h
++++ b/intel/intel_bufmgr_priv.h
+@@ -227,6 +227,13 @@ struct _drm_intel_bufmgr {
+ 			      uint32_t * swizzle_mode);
+ 
+ 	/**
++	 * Set the offset at which this buffer will be softpinned
++	 * \param bo Buffer to set the softpin offset for
++	 * \param offset Softpin offset
++	 */
++	int (*bo_set_softpin_offset) (drm_intel_bo *bo, uint64_t offset);
++
++	/**
+ 	 * Create a visible name for a buffer which can be used by other apps
+ 	 *
+ 	 * \param buf Buffer to create a name for
diff --git a/patch/softpin_kernel.patch b/patch/softpin_kernel.patch
new file mode 100644
index 0000000..d66a110
--- /dev/null
+++ b/patch/softpin_kernel.patch
@@ -0,0 +1,436 @@
+diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
+index c4bf9a1..d770267 100644
+--- a/drivers/gpu/drm/Kconfig
++++ b/drivers/gpu/drm/Kconfig
+@@ -12,6 +12,7 @@ menuconfig DRM
+ 	select I2C
+ 	select I2C_ALGOBIT
+ 	select DMA_SHARED_BUFFER
++	select INTERVAL_TREE
+ 	help
+ 	  Kernel-level support for the Direct Rendering Infrastructure (DRI)
+ 	  introduced in XFree86 4.0. If you say Y here, you need to select
+diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
+index 04de6fd..e3acd86 100644
+--- a/drivers/gpu/drm/drm_mm.c
++++ b/drivers/gpu/drm/drm_mm.c
+@@ -153,6 +153,10 @@ static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
+ 	INIT_LIST_HEAD(&node->hole_stack);
+ 	list_add(&node->node_list, &hole_node->node_list);
+ 
++	node->it.start = node->start;
++	node->it.last = node->start + size - 1;
++	interval_tree_insert(&node->it, &mm->interval_tree);
++
+ 	BUG_ON(node->start + node->size > adj_end);
+ 
+ 	node->hole_follows = 0;
+@@ -178,39 +182,53 @@ static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
+  */
+ int drm_mm_reserve_node(struct drm_mm *mm, struct drm_mm_node *node)
+ {
+-	struct drm_mm_node *hole;
+ 	u64 end = node->start + node->size;
+-	u64 hole_start;
+-	u64 hole_end;
+-
+-	BUG_ON(node == NULL);
++	struct interval_tree_node *it;
++	struct drm_mm_node *hole;
++	u64 hole_start, hole_end;
+ 
+ 	/* Find the relevant hole to add our node to */
+-	drm_mm_for_each_hole(hole, mm, hole_start, hole_end) {
+-		if (hole_start > node->start || hole_end < end)
+-			continue;
++	it = interval_tree_iter_first(&mm->interval_tree,
++				      node->start, (u64)-1);
++	if (it == NULL) {
++		hole = list_last_entry(&mm->head_node.node_list,
++				       struct drm_mm_node, node_list);
++	} else {
++		hole = container_of(it, typeof(*hole), it);
++		if (hole->start <= node->start)
++			return -ENOSPC;
++
++		hole = list_last_entry(&hole->node_list,
++				       struct drm_mm_node, node_list);
++	}
+ 
+-		node->mm = mm;
+-		node->allocated = 1;
++	hole_start = drm_mm_hole_node_start(hole);
++	hole_end = drm_mm_hole_node_end(hole);
++	if (hole_start > node->start || hole_end < end)
++		return -ENOSPC;
+ 
+-		INIT_LIST_HEAD(&node->hole_stack);
+-		list_add(&node->node_list, &hole->node_list);
++	node->mm = mm;
++	node->allocated = 1;
+ 
+-		if (node->start == hole_start) {
+-			hole->hole_follows = 0;
+-			list_del_init(&hole->hole_stack);
+-		}
++	INIT_LIST_HEAD(&node->hole_stack);
++	list_add(&node->node_list, &hole->node_list);
+ 
+-		node->hole_follows = 0;
+-		if (end != hole_end) {
+-			list_add(&node->hole_stack, &mm->hole_stack);
+-			node->hole_follows = 1;
+-		}
++	node->it.start = node->start;
++	node->it.last = node->start + node->size - 1;
++	interval_tree_insert(&node->it, &mm->interval_tree);
+ 
+-		return 0;
++	if (node->start == hole_start) {
++		hole->hole_follows = 0;
++		list_del_init(&hole->hole_stack);
+ 	}
+ 
+-	return -ENOSPC;
++	node->hole_follows = 0;
++	if (end != hole_end) {
++		list_add(&node->hole_stack, &mm->hole_stack);
++		node->hole_follows = 1;
++	}
++
++	return 0;
+ }
+ EXPORT_SYMBOL(drm_mm_reserve_node);
+ 
+@@ -300,6 +318,10 @@ static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node,
+ 	INIT_LIST_HEAD(&node->hole_stack);
+ 	list_add(&node->node_list, &hole_node->node_list);
+ 
++	node->it.start = node->start;
++	node->it.last = node->start + node->size - 1;
++	interval_tree_insert(&node->it, &mm->interval_tree);
++
+ 	BUG_ON(node->start < start);
+ 	BUG_ON(node->start < adj_start);
+ 	BUG_ON(node->start + node->size > adj_end);
+@@ -388,6 +410,7 @@ void drm_mm_remove_node(struct drm_mm_node *node)
+ 	} else
+ 		list_move(&prev_node->hole_stack, &mm->hole_stack);
+ 
++	interval_tree_remove(&node->it, &mm->interval_tree);
+ 	list_del(&node->node_list);
+ 	node->allocated = 0;
+ }
+@@ -756,6 +779,8 @@ void drm_mm_init(struct drm_mm * mm, u64 start, u64 size)
+ 	mm->head_node.size = start - mm->head_node.start;
+ 	list_add_tail(&mm->head_node.hole_stack, &mm->hole_stack);
+ 
++	mm->interval_tree = RB_ROOT;
++
+ 	mm->color_adjust = NULL;
+ }
+ EXPORT_SYMBOL(drm_mm_init);
+diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
+index a81c766..52b8289 100644
+--- a/drivers/gpu/drm/i915/i915_dma.c
++++ b/drivers/gpu/drm/i915/i915_dma.c
+@@ -169,6 +169,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
+ 	case I915_PARAM_HAS_RESOURCE_STREAMER:
+ 		value = HAS_RESOURCE_STREAMER(dev);
+ 		break;
++	case I915_PARAM_HAS_EXEC_SOFTPIN:
++		value = 1;
++		break;
+ 	default:
+ 		DRM_DEBUG("Unknown parameter %d\n", param->param);
+ 		return -EINVAL;
+diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
+index 4c03449..244d6eb 100644
+--- a/drivers/gpu/drm/i915/i915_drv.h
++++ b/drivers/gpu/drm/i915/i915_drv.h
+@@ -2857,10 +2857,11 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
+ #define PIN_NONBLOCK	(1<<1)
+ #define PIN_GLOBAL	(1<<2)
+ #define PIN_OFFSET_BIAS	(1<<3)
+-#define PIN_USER	(1<<4)
+-#define PIN_UPDATE	(1<<5)
+-#define PIN_ZONE_4G	(1<<6)
+-#define PIN_HIGH	(1<<7)
++#define PIN_OFFSET_FIXED (1<<4)
++#define PIN_USER	(1<<5)
++#define PIN_UPDATE	(1<<6)
++#define PIN_ZONE_4G	(1<<7)
++#define PIN_HIGH	(1<<8)
+ #define PIN_OFFSET_MASK (~4095)
+ int __must_check
+ i915_gem_object_pin(struct drm_i915_gem_object *obj,
+@@ -3206,6 +3207,7 @@ int __must_check i915_gem_evict_something(struct drm_device *dev,
+ 					  unsigned long start,
+ 					  unsigned long end,
+ 					  unsigned flags);
++int __must_check i915_gem_evict_for_vma(struct i915_vma *vma, unsigned flags);
+ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
+ 
+ /* belongs in i915_gem_gtt.h */
+diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
+index cd27ecc..47c8524 100644
+--- a/drivers/gpu/drm/i915/i915_gem.c
++++ b/drivers/gpu/drm/i915/i915_gem.c
+@@ -3391,7 +3391,6 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
+ 	struct drm_device *dev = obj->base.dev;
+ 	struct drm_i915_private *dev_priv = dev->dev_private;
+ 	u32 fence_alignment, unfenced_alignment;
+-	u32 search_flag, alloc_flag;
+ 	u64 start, end;
+ 	u64 size, fence_size;
+ 	struct i915_vma *vma;
+@@ -3474,32 +3473,54 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
+ 
+ 	if (IS_ERR(vma))
+ 		goto err_unpin;
++	if (flags & PIN_OFFSET_FIXED) {
++		uint64_t offset = flags & PIN_OFFSET_MASK;
++		if (offset & (alignment - 1) || offset + size > end) {
++			vma = ERR_PTR(-EINVAL);
++			goto err_free_vma;
++		}
++		vma->node.start = offset;
++		vma->node.size = size;
++		vma->node.color = obj->cache_level;
++		ret = drm_mm_reserve_node(&vm->mm, &vma->node);
++		if (ret) {
++			ret = i915_gem_evict_for_vma(vma, flags);
++			if (ret == 0)
++				ret = drm_mm_reserve_node(&vm->mm, &vma->node);
++		}
++		if (ret) {
++			vma = ERR_PTR(ret);
++			goto err_free_vma;
++		}
++        } else {
++		u32 search_flag, alloc_flag;
+ 
+-	if (flags & PIN_HIGH) {
+-		search_flag = DRM_MM_SEARCH_BELOW;
+-		alloc_flag = DRM_MM_CREATE_TOP;
+-	} else {
+-		search_flag = DRM_MM_SEARCH_DEFAULT;
+-		alloc_flag = DRM_MM_CREATE_DEFAULT;
+-	}
++		if (flags & PIN_HIGH) {
++			search_flag = DRM_MM_SEARCH_BELOW;
++			alloc_flag = DRM_MM_CREATE_TOP;
++		} else {
++			search_flag = DRM_MM_SEARCH_DEFAULT;
++			alloc_flag = DRM_MM_CREATE_DEFAULT;
++		}
+ 
+ search_free:
+-	ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
+-						  size, alignment,
+-						  obj->cache_level,
+-						  start, end,
+-						  search_flag,
+-						  alloc_flag);
+-	if (ret) {
+-		ret = i915_gem_evict_something(dev, vm, size, alignment,
+-					       obj->cache_level,
+-					       start, end,
+-					       flags);
+-		if (ret == 0)
+-			goto search_free;
+-
+-		goto err_free_vma;
+-	}
++		ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
++							  size, alignment,
++							  obj->cache_level,
++							  start, end,
++							  search_flag,
++							  alloc_flag);
++		if (ret) {
++			ret = i915_gem_evict_something(dev, vm, size, alignment,
++						       obj->cache_level,
++						       start, end,
++						       flags);
++			if (ret == 0)
++				goto search_free;
++ 
++			goto err_free_vma;
++		}
++ 	}
+ 	if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
+ 		ret = -EINVAL;
+ 		goto err_remove_node;
+@@ -4089,6 +4110,10 @@ i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
+ 	    vma->node.start < (flags & PIN_OFFSET_MASK))
+ 		return true;
+ 
++	if (flags & PIN_OFFSET_FIXED &&
++	    vma->node.start != (flags & PIN_OFFSET_MASK))
++		return true;
++
+ 	return false;
+ }
+ 
+diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
+index d71a133..60450a9 100644
+--- a/drivers/gpu/drm/i915/i915_gem_evict.c
++++ b/drivers/gpu/drm/i915/i915_gem_evict.c
+@@ -199,6 +199,67 @@ found:
+ 	return ret;
+ }
+ 
++int
++i915_gem_evict_for_vma(struct i915_vma *target, unsigned flags)
++{
++	struct list_head eviction_list;
++	struct interval_tree_node *it;
++	u64 end = target->node.start + target->node.size;
++	struct drm_mm_node *node;
++	struct i915_vma *vma, *next;
++	int ret;
++
++	trace_i915_gem_evict_vma(target, flags);
++
++	it = interval_tree_iter_first(&target->vm->mm.interval_tree,
++				      target->node.start, end -1);
++	if (it == NULL)
++		return 0;
++
++	INIT_LIST_HEAD(&eviction_list);
++	node = container_of(it, typeof(*node), it);
++	list_for_each_entry_from(node,
++				 &target->vm->mm.head_node.node_list,
++				 node_list) {
++		if (node->start >= end)
++			break;
++
++		vma = container_of(node, typeof(*vma), node);
++		if (flags & PIN_NONBLOCK &&
++		    (vma->pin_count || vma->obj->active)) {
++			ret = -ENOSPC;
++			break;
++		}
++
++		if (vma->exec_entry &&
++		    vma->exec_entry->flags & EXEC_OBJECT_PINNED) {
++			/* Overlapping pinned objects in the same batch */
++			ret = -EINVAL;
++			break;
++		}
++
++		if (vma->pin_count) {
++			/* We may need to evict an buffer in the same batch */
++			ret = vma->exec_entry ? -ENOSPC : -EBUSY;
++			break;
++		}
++
++		list_add(&vma->exec_list, &eviction_list);
++		drm_gem_object_reference(&vma->obj->base);
++	}
++
++	ret = 0;
++	list_for_each_entry_safe(vma, next, &eviction_list, exec_list) {
++		struct drm_i915_gem_object *obj = vma->obj;
++		list_del_init(&vma->exec_list);
++		if (ret == 0)
++			ret = i915_vma_unbind(vma);
++		drm_gem_object_unreference(&obj->base);
++	}
++
++	return ret;
++}
++
+ /**
+  * i915_gem_evict_vm - Evict all idle vmas from a vm
+  * @vm: Address space to cleanse
+diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+index a4c243c..aae68cc 100644
+--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
++++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+@@ -601,6 +601,8 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
+ 			flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
+ 		if ((flags & PIN_MAPPABLE) == 0)
+ 			flags |= PIN_HIGH;
++		if (entry->flags & EXEC_OBJECT_PINNED)
++			flags |= entry->offset | PIN_OFFSET_FIXED;
+ 	}
+ 
+ 	ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
+@@ -670,6 +672,10 @@ eb_vma_misplaced(struct i915_vma *vma)
+ 	    vma->node.start & (entry->alignment - 1))
+ 		return true;
+ 
++	if (entry->flags & EXEC_OBJECT_PINNED &&
++	    vma->node.start != entry->offset)
++		return true;
++
+ 	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
+ 	    vma->node.start < BATCH_OFFSET_BIAS)
+ 		return true;
+@@ -1017,18 +1023,13 @@ static struct intel_context *
+ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
+ 			  struct intel_engine_cs *ring, const u32 ctx_id)
+ {
+-	struct intel_context *ctx = NULL;
+-	struct i915_ctx_hang_stats *hs;
+-
+-	if (ring->id != RCS && ctx_id != DEFAULT_CONTEXT_HANDLE)
+-		return ERR_PTR(-EINVAL);
++	struct intel_context *ctx;
+ 
+ 	ctx = i915_gem_context_get(file->driver_priv, ctx_id);
+ 	if (IS_ERR(ctx))
+ 		return ctx;
+ 
+-	hs = &ctx->hang_stats;
+-	if (hs->banned) {
++	if (ctx->hang_stats.banned) {
+ 		DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id);
+ 		return ERR_PTR(-EIO);
+ 	}
+@@ -1317,7 +1318,8 @@ eb_get_batch(struct eb_vmas *eb)
+ 	 * Note that actual hangs have only been observed on gen7, but for
+ 	 * paranoia do it everywhere.
+ 	 */
+-	vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
++	if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
++		vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
+ 
+ 	return vma->obj;
+ }
+diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
+index 52b2d40..912943a 100644
+--- a/drivers/gpu/drm/i915/i915_trace.h
++++ b/drivers/gpu/drm/i915/i915_trace.h
+@@ -459,6 +459,29 @@ TRACE_EVENT(i915_gem_evict_vm,
+ 	    TP_printk("dev=%d, vm=%p", __entry->dev, __entry->vm)
+ );
+ 
++TRACE_EVENT(i915_gem_evict_vma,
++	    TP_PROTO(struct i915_vma *vma, unsigned flags),
++	    TP_ARGS(vma, flags),
++
++	    TP_STRUCT__entry(
++			     __field(u32, dev)
++			     __field(struct i915_address_space *, vm)
++			     __field(u64, start)
++			     __field(u64, size)
++			     __field(unsigned, flags)
++			    ),
++
++	    TP_fast_assign(
++			   __entry->dev = vma->vm->dev->primary->index;
++			   __entry->vm = vma->vm;
++			   __entry->start = vma->node.start;
++			   __entry->size = vma->node.size;
++			   __entry->flags = flags;
++			  ),
++
++	    TP_printk("dev=%d, vm=%p, start=%llx size=%llx, flags=%x", __entry->dev, __entry->vm, (long long)__entry->start, (long long)__entry->size, __entry->flags)
++);
++
+ TRACE_EVENT(i915_gem_ring_sync_to,
+ 	    TP_PROTO(struct drm_i915_gem_request *to_req,
+ 		     struct intel_engine_cs *from,
-- 
1.9.1



More information about the Beignet mailing list