[Beignet] [Patch V2] OCL20: Add drm and kernel softpin patches temp for SVM.
Yang Rong
rong.r.yang at intel.com
Mon Dec 7 19:12:43 PST 2015
softpin_drm.patch is from MichaĆ Winiarski(http://patchwork.freedesktop.org/patch/59022/)
And softpin_kernel.patch is from Thomas Daniel(http://patchwork.freedesktop.org/patch/62008/), I
rebase it to 83266b6b60b672.
After these patches upstream, will remove it.
V2: remove unrelated changes in softpin_kernel.patch.
Signed-off-by: Yang Rong <rong.r.yang at intel.com>
---
patch/softpin_drm.patch | 408 ++++++++++++++++++++++++++++++++++++++++++
patch/softpin_kernel.patch | 436 +++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 844 insertions(+)
create mode 100644 patch/softpin_drm.patch
create mode 100644 patch/softpin_kernel.patch
diff --git a/patch/softpin_drm.patch b/patch/softpin_drm.patch
new file mode 100644
index 0000000..53fb767
--- /dev/null
+++ b/patch/softpin_drm.patch
@@ -0,0 +1,408 @@
+diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
+index ded43b1..27f4f0d 100644
+--- a/include/drm/i915_drm.h
++++ b/include/drm/i915_drm.h
+@@ -350,6 +350,7 @@ typedef struct drm_i915_irq_wait {
+ #define I915_PARAM_REVISION 32
+ #define I915_PARAM_SUBSLICE_TOTAL 33
+ #define I915_PARAM_EU_TOTAL 34
++#define I915_PARAM_HAS_EXEC_SOFTPIN 37
+
+ typedef struct drm_i915_getparam {
+ int param;
+@@ -680,7 +681,8 @@ struct drm_i915_gem_exec_object2 {
+ #define EXEC_OBJECT_NEEDS_FENCE (1<<0)
+ #define EXEC_OBJECT_NEEDS_GTT (1<<1)
+ #define EXEC_OBJECT_WRITE (1<<2)
+-#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_WRITE<<1)
++#define EXEC_OBJECT_PINNED (1<<5)
++#define __EXEC_OBJECT_UNKNOWN_FLAGS -(EXEC_OBJECT_PINNED<<1)
+ __u64 flags;
+
+ __u64 rsvd1;
+diff --git a/intel/intel_bufmgr.c b/intel/intel_bufmgr.c
+index 14ea9f9..bd92335 100644
+--- a/intel/intel_bufmgr.c
++++ b/intel/intel_bufmgr.c
+@@ -261,6 +261,15 @@ drm_intel_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
+ }
+
+ int
++drm_intel_bo_set_softpin_offset(drm_intel_bo *bo, uint64_t offset)
++{
++ if (bo->bufmgr->bo_set_softpin_offset)
++ return bo->bufmgr->bo_set_softpin_offset(bo, offset);
++
++ return -ENODEV;
++}
++
++int
+ drm_intel_bo_disable_reuse(drm_intel_bo *bo)
+ {
+ if (bo->bufmgr->bo_disable_reuse)
+diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
+index 95eecb8..62ea4a0 100644
+--- a/intel/intel_bufmgr.h
++++ b/intel/intel_bufmgr.h
+@@ -164,6 +164,7 @@ int drm_intel_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
+ int drm_intel_bo_flink(drm_intel_bo *bo, uint32_t * name);
+ int drm_intel_bo_busy(drm_intel_bo *bo);
+ int drm_intel_bo_madvise(drm_intel_bo *bo, int madv);
++int drm_intel_bo_set_softpin_offset(drm_intel_bo *bo, uint64_t offset);
+
+ int drm_intel_bo_disable_reuse(drm_intel_bo *bo);
+ int drm_intel_bo_is_reusable(drm_intel_bo *bo);
+diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
+index 63122d0..991e0fa 100644
+--- a/intel/intel_bufmgr_gem.c
++++ b/intel/intel_bufmgr_gem.c
+@@ -184,6 +184,13 @@ struct _drm_intel_bo_gem {
+ drm_intel_reloc_target *reloc_target_info;
+ /** Number of entries in relocs */
+ int reloc_count;
++ /** Array of BOs that are referenced by this buffer and will be softpinned */
++ drm_intel_bo **softpin_target;
++ /** Number softpinned BOs that are referenced by this buffer */
++ int softpin_target_count;
++ /** Maximum amount of softpinned BOs that are referenced by this buffer */
++ int softpin_target_size;
++
+ /** Mapped address for the buffer, saved across map/unmap cycles */
+ void *mem_virtual;
+ /** GTT virtual address for the buffer, saved across map/unmap cycles */
+@@ -237,6 +244,11 @@ struct _drm_intel_bo_gem {
+ bool is_userptr;
+
+ /**
++ * Whether this buffer is softpinned at offset specified by the user
++ */
++ bool is_softpin;
++
++ /**
+ * Size in bytes of this buffer and its relocation descendents.
+ *
+ * Used to avoid costly tree walking in
+@@ -389,8 +401,9 @@ drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem)
+ drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
+ drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+
+- if (bo_gem->relocs == NULL) {
+- DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle,
++ if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL) {
++ DBG("%2d: %d %s(%s)\n", i, bo_gem->gem_handle,
++ bo_gem->is_softpin ? "*" : "",
+ bo_gem->name);
+ continue;
+ }
+@@ -400,16 +413,33 @@ drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem)
+ drm_intel_bo_gem *target_gem =
+ (drm_intel_bo_gem *) target_bo;
+
+- DBG("%2d: %d (%s)@0x%08llx -> "
++ DBG("%2d: %d %s(%s)@0x%016llx -> "
+ "%d (%s)@0x%08lx + 0x%08x\n",
+ i,
+- bo_gem->gem_handle, bo_gem->name,
++ bo_gem->gem_handle,
++ bo_gem->is_softpin ? "*" : "",
++ bo_gem->name,
+ (unsigned long long)bo_gem->relocs[j].offset,
+ target_gem->gem_handle,
+ target_gem->name,
+ target_bo->offset64,
+ bo_gem->relocs[j].delta);
+ }
++
++ for (j = 0; j < bo_gem->softpin_target_count; j++) {
++ drm_intel_bo *target_bo = bo_gem->softpin_target[j];
++ drm_intel_bo_gem *target_gem =
++ (drm_intel_bo_gem *) target_bo;
++ DBG("%2d: %d %s(%s) -> "
++ "%d *(%s)@0x%016lx\n",
++ i,
++ bo_gem->gem_handle,
++ bo_gem->is_softpin ? "*" : "",
++ bo_gem->name,
++ target_gem->gem_handle,
++ target_gem->name,
++ target_bo->offset64);
++ }
+ }
+ }
+
+@@ -473,11 +503,18 @@ drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
+ drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
+ drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
+ int index;
++ int flags = 0;
++
++ if (need_fence)
++ flags |= EXEC_OBJECT_NEEDS_FENCE;
++ if (bo_gem->is_softpin)
++ flags |= EXEC_OBJECT_PINNED;
+
+ if (bo_gem->validate_index != -1) {
+ if (need_fence)
+ bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |=
+ EXEC_OBJECT_NEEDS_FENCE;
++ bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= flags;
+ return;
+ }
+
+@@ -504,15 +541,12 @@ drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
+ bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count;
+ bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs;
+ bufmgr_gem->exec2_objects[index].alignment = bo->align;
+- bufmgr_gem->exec2_objects[index].offset = 0;
++ bufmgr_gem->exec2_objects[index].offset = bo_gem->is_softpin ?
++ bo->offset64 : 0;
+ bufmgr_gem->exec_bos[index] = bo;
+- bufmgr_gem->exec2_objects[index].flags = 0;
++ bufmgr_gem->exec2_objects[index].flags = flags;
+ bufmgr_gem->exec2_objects[index].rsvd1 = 0;
+ bufmgr_gem->exec2_objects[index].rsvd2 = 0;
+- if (need_fence) {
+- bufmgr_gem->exec2_objects[index].flags |=
+- EXEC_OBJECT_NEEDS_FENCE;
+- }
+ bufmgr_gem->exec_count++;
+ }
+
+@@ -1261,8 +1295,12 @@ drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time)
+ time);
+ }
+ }
++ for (i = 0; i < bo_gem->softpin_target_count; i++)
++ drm_intel_gem_bo_unreference_locked_timed(bo_gem->softpin_target[i],
++ time);
+ bo_gem->reloc_count = 0;
+ bo_gem->used_as_reloc_target = false;
++ bo_gem->softpin_target_count = 0;
+
+ DBG("bo_unreference final: %d (%s)\n",
+ bo_gem->gem_handle, bo_gem->name);
+@@ -1276,6 +1314,11 @@ drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time)
+ free(bo_gem->relocs);
+ bo_gem->relocs = NULL;
+ }
++ if (bo_gem->softpin_target) {
++ free(bo_gem->softpin_target);
++ bo_gem->softpin_target = NULL;
++ bo_gem->softpin_target_size = 0;
++ }
+
+ /* Clear any left-over mappings */
+ if (bo_gem->map_count) {
+@@ -1913,14 +1956,6 @@ do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
+ bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences;
+ }
+
+- bo_gem->relocs[bo_gem->reloc_count].offset = offset;
+- bo_gem->relocs[bo_gem->reloc_count].delta = target_offset;
+- bo_gem->relocs[bo_gem->reloc_count].target_handle =
+- target_bo_gem->gem_handle;
+- bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains;
+- bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain;
+- bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset64;
+-
+ bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo;
+ if (target_bo != bo)
+ drm_intel_gem_bo_reference(target_bo);
+@@ -1930,21 +1965,70 @@ do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
+ else
+ bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0;
+
++ bo_gem->relocs[bo_gem->reloc_count].offset = offset;
++ bo_gem->relocs[bo_gem->reloc_count].delta = target_offset;
++ bo_gem->relocs[bo_gem->reloc_count].target_handle =
++ target_bo_gem->gem_handle;
++ bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains;
++ bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain;
++ bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset64;
+ bo_gem->reloc_count++;
+
+ return 0;
+ }
+
+ static int
++drm_intel_gem_bo_add_softpin_target(drm_intel_bo *bo, drm_intel_bo *target_bo)
++{
++ drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
++ drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
++ drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo;
++ if (bo_gem->has_error)
++ return -ENOMEM;
++
++ if (target_bo_gem->has_error) {
++ bo_gem->has_error = true;
++ return -ENOMEM;
++ }
++
++ if (!target_bo_gem->is_softpin)
++ return -EINVAL;
++ if (target_bo_gem == bo_gem)
++ return -EINVAL;
++
++ if (bo_gem->softpin_target_count == bo_gem->softpin_target_size) {
++ int new_size = bo_gem->softpin_target_size * 2;
++ if (new_size == 0)
++ new_size = bufmgr_gem->max_relocs;
++
++ bo_gem->softpin_target = realloc(bo_gem->softpin_target, new_size *
++ sizeof(drm_intel_bo *));
++ if (!bo_gem->softpin_target)
++ return -ENOMEM;
++
++ bo_gem->softpin_target_size = new_size;
++ }
++ bo_gem->softpin_target[bo_gem->softpin_target_count] = target_bo;
++ drm_intel_gem_bo_reference(target_bo);
++ bo_gem->softpin_target_count++;
++
++ return 0;
++}
++
++static int
+ drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
+ drm_intel_bo *target_bo, uint32_t target_offset,
+ uint32_t read_domains, uint32_t write_domain)
+ {
+ drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
++ drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *)target_bo;
+
+- return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
+- read_domains, write_domain,
+- !bufmgr_gem->fenced_relocs);
++ if (target_bo_gem->is_softpin)
++ return drm_intel_gem_bo_add_softpin_target(bo, target_bo);
++ else
++ return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
++ read_domains, write_domain,
++ !bufmgr_gem->fenced_relocs);
+ }
+
+ static int
+@@ -1977,6 +2061,8 @@ drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo)
+ *
+ * Any further drm_intel_bufmgr_check_aperture_space() queries
+ * involving this buffer in the tree are undefined after this call.
++ *
++ * This also removes all softpinned targets being referenced by the BO.
+ */
+ void
+ drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start)
+@@ -2003,6 +2089,12 @@ drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start)
+ }
+ bo_gem->reloc_count = start;
+
++ for (i = 0; i < bo_gem->softpin_target_count; i++) {
++ drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->softpin_target[i];
++ drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo, time.tv_sec);
++ }
++ bo_gem->softpin_target_count = 0;
++
+ pthread_mutex_unlock(&bufmgr_gem->lock);
+
+ }
+@@ -2043,7 +2135,7 @@ drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
+ drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
+ int i;
+
+- if (bo_gem->relocs == NULL)
++ if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL)
+ return;
+
+ for (i = 0; i < bo_gem->reloc_count; i++) {
+@@ -2064,6 +2156,17 @@ drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
+ /* Add the target to the validate list */
+ drm_intel_add_validate_buffer2(target_bo, need_fence);
+ }
++
++ for (i = 0; i < bo_gem->softpin_target_count; i++) {
++ drm_intel_bo *target_bo = bo_gem->softpin_target[i];
++
++ if (target_bo == bo)
++ continue;
++
++ drm_intel_gem_bo_mark_mmaps_incoherent(bo);
++ drm_intel_gem_bo_process_reloc2(target_bo);
++ drm_intel_add_validate_buffer2(target_bo, false);
++ }
+ }
+
+
+@@ -2099,7 +2202,11 @@ drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem)
+
+ /* Update the buffer offset */
+ if (bufmgr_gem->exec2_objects[i].offset != bo->offset64) {
+- DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n",
++ /* If we're seeing softpinned object here it means that the kernel
++ * has relocated our object... Indicating a programming error
++ */
++ assert(!bo_gem->is_softpin);
++ DBG("BO %d (%s) migrated: 0x%016lx -> 0x%016llx\n",
+ bo_gem->gem_handle, bo_gem->name, bo->offset64,
+ (unsigned long long)bufmgr_gem->exec2_objects[i].offset);
+ bo->offset64 = bufmgr_gem->exec2_objects[i].offset;
+@@ -2423,6 +2530,17 @@ drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
+ return 0;
+ }
+
++static int
++drm_intel_gem_bo_set_softpin_offset(drm_intel_bo *bo, uint64_t offset)
++{
++ drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
++
++ bo_gem->is_softpin = true;
++ bo->offset64 = offset;
++ bo->offset = offset;
++ return 0;
++}
++
+ drm_intel_bo *
+ drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int size)
+ {
+@@ -2801,6 +2919,13 @@ _drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo)
+ return 1;
+ }
+
++ for (i = 0; i< bo_gem->softpin_target_count; i++) {
++ if (bo_gem->softpin_target[i] == target_bo)
++ return 1;
++ if (_drm_intel_gem_bo_references(bo_gem->softpin_target[i], target_bo))
++ return 1;
++ }
++
+ return 0;
+ }
+
+@@ -3257,6 +3382,11 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
+ ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
+ bufmgr_gem->has_vebox = (ret == 0) & (*gp.value > 0);
+
++ gp.param = I915_PARAM_HAS_EXEC_SOFTPIN;
++ ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
++ if (ret == 0 && *gp.value > 0)
++ bufmgr_gem->bufmgr.bo_set_softpin_offset = drm_intel_gem_bo_set_softpin_offset;
++
+ if (bufmgr_gem->gen < 4) {
+ gp.param = I915_PARAM_NUM_FENCES_AVAIL;
+ gp.value = &bufmgr_gem->available_fences;
+diff --git a/intel/intel_bufmgr_priv.h b/intel/intel_bufmgr_priv.h
+index 59ebd18..86991c9 100644
+--- a/intel/intel_bufmgr_priv.h
++++ b/intel/intel_bufmgr_priv.h
+@@ -227,6 +227,13 @@ struct _drm_intel_bufmgr {
+ uint32_t * swizzle_mode);
+
+ /**
++ * Set the offset at which this buffer will be softpinned
++ * \param bo Buffer to set the softpin offset for
++ * \param offset Softpin offset
++ */
++ int (*bo_set_softpin_offset) (drm_intel_bo *bo, uint64_t offset);
++
++ /**
+ * Create a visible name for a buffer which can be used by other apps
+ *
+ * \param buf Buffer to create a name for
diff --git a/patch/softpin_kernel.patch b/patch/softpin_kernel.patch
new file mode 100644
index 0000000..d66a110
--- /dev/null
+++ b/patch/softpin_kernel.patch
@@ -0,0 +1,436 @@
+diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
+index c4bf9a1..d770267 100644
+--- a/drivers/gpu/drm/Kconfig
++++ b/drivers/gpu/drm/Kconfig
+@@ -12,6 +12,7 @@ menuconfig DRM
+ select I2C
+ select I2C_ALGOBIT
+ select DMA_SHARED_BUFFER
++ select INTERVAL_TREE
+ help
+ Kernel-level support for the Direct Rendering Infrastructure (DRI)
+ introduced in XFree86 4.0. If you say Y here, you need to select
+diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
+index 04de6fd..e3acd86 100644
+--- a/drivers/gpu/drm/drm_mm.c
++++ b/drivers/gpu/drm/drm_mm.c
+@@ -153,6 +153,10 @@ static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
+ INIT_LIST_HEAD(&node->hole_stack);
+ list_add(&node->node_list, &hole_node->node_list);
+
++ node->it.start = node->start;
++ node->it.last = node->start + size - 1;
++ interval_tree_insert(&node->it, &mm->interval_tree);
++
+ BUG_ON(node->start + node->size > adj_end);
+
+ node->hole_follows = 0;
+@@ -178,39 +182,53 @@ static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
+ */
+ int drm_mm_reserve_node(struct drm_mm *mm, struct drm_mm_node *node)
+ {
+- struct drm_mm_node *hole;
+ u64 end = node->start + node->size;
+- u64 hole_start;
+- u64 hole_end;
+-
+- BUG_ON(node == NULL);
++ struct interval_tree_node *it;
++ struct drm_mm_node *hole;
++ u64 hole_start, hole_end;
+
+ /* Find the relevant hole to add our node to */
+- drm_mm_for_each_hole(hole, mm, hole_start, hole_end) {
+- if (hole_start > node->start || hole_end < end)
+- continue;
++ it = interval_tree_iter_first(&mm->interval_tree,
++ node->start, (u64)-1);
++ if (it == NULL) {
++ hole = list_last_entry(&mm->head_node.node_list,
++ struct drm_mm_node, node_list);
++ } else {
++ hole = container_of(it, typeof(*hole), it);
++ if (hole->start <= node->start)
++ return -ENOSPC;
++
++ hole = list_last_entry(&hole->node_list,
++ struct drm_mm_node, node_list);
++ }
+
+- node->mm = mm;
+- node->allocated = 1;
++ hole_start = drm_mm_hole_node_start(hole);
++ hole_end = drm_mm_hole_node_end(hole);
++ if (hole_start > node->start || hole_end < end)
++ return -ENOSPC;
+
+- INIT_LIST_HEAD(&node->hole_stack);
+- list_add(&node->node_list, &hole->node_list);
++ node->mm = mm;
++ node->allocated = 1;
+
+- if (node->start == hole_start) {
+- hole->hole_follows = 0;
+- list_del_init(&hole->hole_stack);
+- }
++ INIT_LIST_HEAD(&node->hole_stack);
++ list_add(&node->node_list, &hole->node_list);
+
+- node->hole_follows = 0;
+- if (end != hole_end) {
+- list_add(&node->hole_stack, &mm->hole_stack);
+- node->hole_follows = 1;
+- }
++ node->it.start = node->start;
++ node->it.last = node->start + node->size - 1;
++ interval_tree_insert(&node->it, &mm->interval_tree);
+
+- return 0;
++ if (node->start == hole_start) {
++ hole->hole_follows = 0;
++ list_del_init(&hole->hole_stack);
+ }
+
+- return -ENOSPC;
++ node->hole_follows = 0;
++ if (end != hole_end) {
++ list_add(&node->hole_stack, &mm->hole_stack);
++ node->hole_follows = 1;
++ }
++
++ return 0;
+ }
+ EXPORT_SYMBOL(drm_mm_reserve_node);
+
+@@ -300,6 +318,10 @@ static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node,
+ INIT_LIST_HEAD(&node->hole_stack);
+ list_add(&node->node_list, &hole_node->node_list);
+
++ node->it.start = node->start;
++ node->it.last = node->start + node->size - 1;
++ interval_tree_insert(&node->it, &mm->interval_tree);
++
+ BUG_ON(node->start < start);
+ BUG_ON(node->start < adj_start);
+ BUG_ON(node->start + node->size > adj_end);
+@@ -388,6 +410,7 @@ void drm_mm_remove_node(struct drm_mm_node *node)
+ } else
+ list_move(&prev_node->hole_stack, &mm->hole_stack);
+
++ interval_tree_remove(&node->it, &mm->interval_tree);
+ list_del(&node->node_list);
+ node->allocated = 0;
+ }
+@@ -756,6 +779,8 @@ void drm_mm_init(struct drm_mm * mm, u64 start, u64 size)
+ mm->head_node.size = start - mm->head_node.start;
+ list_add_tail(&mm->head_node.hole_stack, &mm->hole_stack);
+
++ mm->interval_tree = RB_ROOT;
++
+ mm->color_adjust = NULL;
+ }
+ EXPORT_SYMBOL(drm_mm_init);
+diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
+index a81c766..52b8289 100644
+--- a/drivers/gpu/drm/i915/i915_dma.c
++++ b/drivers/gpu/drm/i915/i915_dma.c
+@@ -169,6 +169,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
+ case I915_PARAM_HAS_RESOURCE_STREAMER:
+ value = HAS_RESOURCE_STREAMER(dev);
+ break;
++ case I915_PARAM_HAS_EXEC_SOFTPIN:
++ value = 1;
++ break;
+ default:
+ DRM_DEBUG("Unknown parameter %d\n", param->param);
+ return -EINVAL;
+diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
+index 4c03449..244d6eb 100644
+--- a/drivers/gpu/drm/i915/i915_drv.h
++++ b/drivers/gpu/drm/i915/i915_drv.h
+@@ -2857,10 +2857,11 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
+ #define PIN_NONBLOCK (1<<1)
+ #define PIN_GLOBAL (1<<2)
+ #define PIN_OFFSET_BIAS (1<<3)
+-#define PIN_USER (1<<4)
+-#define PIN_UPDATE (1<<5)
+-#define PIN_ZONE_4G (1<<6)
+-#define PIN_HIGH (1<<7)
++#define PIN_OFFSET_FIXED (1<<4)
++#define PIN_USER (1<<5)
++#define PIN_UPDATE (1<<6)
++#define PIN_ZONE_4G (1<<7)
++#define PIN_HIGH (1<<8)
+ #define PIN_OFFSET_MASK (~4095)
+ int __must_check
+ i915_gem_object_pin(struct drm_i915_gem_object *obj,
+@@ -3206,6 +3207,7 @@ int __must_check i915_gem_evict_something(struct drm_device *dev,
+ unsigned long start,
+ unsigned long end,
+ unsigned flags);
++int __must_check i915_gem_evict_for_vma(struct i915_vma *vma, unsigned flags);
+ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle);
+
+ /* belongs in i915_gem_gtt.h */
+diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
+index cd27ecc..47c8524 100644
+--- a/drivers/gpu/drm/i915/i915_gem.c
++++ b/drivers/gpu/drm/i915/i915_gem.c
+@@ -3391,7 +3391,6 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
+ struct drm_device *dev = obj->base.dev;
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ u32 fence_alignment, unfenced_alignment;
+- u32 search_flag, alloc_flag;
+ u64 start, end;
+ u64 size, fence_size;
+ struct i915_vma *vma;
+@@ -3474,32 +3473,54 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
+
+ if (IS_ERR(vma))
+ goto err_unpin;
++ if (flags & PIN_OFFSET_FIXED) {
++ uint64_t offset = flags & PIN_OFFSET_MASK;
++ if (offset & (alignment - 1) || offset + size > end) {
++ vma = ERR_PTR(-EINVAL);
++ goto err_free_vma;
++ }
++ vma->node.start = offset;
++ vma->node.size = size;
++ vma->node.color = obj->cache_level;
++ ret = drm_mm_reserve_node(&vm->mm, &vma->node);
++ if (ret) {
++ ret = i915_gem_evict_for_vma(vma, flags);
++ if (ret == 0)
++ ret = drm_mm_reserve_node(&vm->mm, &vma->node);
++ }
++ if (ret) {
++ vma = ERR_PTR(ret);
++ goto err_free_vma;
++ }
++ } else {
++ u32 search_flag, alloc_flag;
+
+- if (flags & PIN_HIGH) {
+- search_flag = DRM_MM_SEARCH_BELOW;
+- alloc_flag = DRM_MM_CREATE_TOP;
+- } else {
+- search_flag = DRM_MM_SEARCH_DEFAULT;
+- alloc_flag = DRM_MM_CREATE_DEFAULT;
+- }
++ if (flags & PIN_HIGH) {
++ search_flag = DRM_MM_SEARCH_BELOW;
++ alloc_flag = DRM_MM_CREATE_TOP;
++ } else {
++ search_flag = DRM_MM_SEARCH_DEFAULT;
++ alloc_flag = DRM_MM_CREATE_DEFAULT;
++ }
+
+ search_free:
+- ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
+- size, alignment,
+- obj->cache_level,
+- start, end,
+- search_flag,
+- alloc_flag);
+- if (ret) {
+- ret = i915_gem_evict_something(dev, vm, size, alignment,
+- obj->cache_level,
+- start, end,
+- flags);
+- if (ret == 0)
+- goto search_free;
+-
+- goto err_free_vma;
+- }
++ ret = drm_mm_insert_node_in_range_generic(&vm->mm, &vma->node,
++ size, alignment,
++ obj->cache_level,
++ start, end,
++ search_flag,
++ alloc_flag);
++ if (ret) {
++ ret = i915_gem_evict_something(dev, vm, size, alignment,
++ obj->cache_level,
++ start, end,
++ flags);
++ if (ret == 0)
++ goto search_free;
++
++ goto err_free_vma;
++ }
++ }
+ if (WARN_ON(!i915_gem_valid_gtt_space(vma, obj->cache_level))) {
+ ret = -EINVAL;
+ goto err_remove_node;
+@@ -4089,6 +4110,10 @@ i915_vma_misplaced(struct i915_vma *vma, uint32_t alignment, uint64_t flags)
+ vma->node.start < (flags & PIN_OFFSET_MASK))
+ return true;
+
++ if (flags & PIN_OFFSET_FIXED &&
++ vma->node.start != (flags & PIN_OFFSET_MASK))
++ return true;
++
+ return false;
+ }
+
+diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
+index d71a133..60450a9 100644
+--- a/drivers/gpu/drm/i915/i915_gem_evict.c
++++ b/drivers/gpu/drm/i915/i915_gem_evict.c
+@@ -199,6 +199,67 @@ found:
+ return ret;
+ }
+
++int
++i915_gem_evict_for_vma(struct i915_vma *target, unsigned flags)
++{
++ struct list_head eviction_list;
++ struct interval_tree_node *it;
++ u64 end = target->node.start + target->node.size;
++ struct drm_mm_node *node;
++ struct i915_vma *vma, *next;
++ int ret;
++
++ trace_i915_gem_evict_vma(target, flags);
++
++ it = interval_tree_iter_first(&target->vm->mm.interval_tree,
++ target->node.start, end -1);
++ if (it == NULL)
++ return 0;
++
++ INIT_LIST_HEAD(&eviction_list);
++ node = container_of(it, typeof(*node), it);
++ list_for_each_entry_from(node,
++ &target->vm->mm.head_node.node_list,
++ node_list) {
++ if (node->start >= end)
++ break;
++
++ vma = container_of(node, typeof(*vma), node);
++ if (flags & PIN_NONBLOCK &&
++ (vma->pin_count || vma->obj->active)) {
++ ret = -ENOSPC;
++ break;
++ }
++
++ if (vma->exec_entry &&
++ vma->exec_entry->flags & EXEC_OBJECT_PINNED) {
++ /* Overlapping pinned objects in the same batch */
++ ret = -EINVAL;
++ break;
++ }
++
++ if (vma->pin_count) {
++ /* We may need to evict an buffer in the same batch */
++ ret = vma->exec_entry ? -ENOSPC : -EBUSY;
++ break;
++ }
++
++ list_add(&vma->exec_list, &eviction_list);
++ drm_gem_object_reference(&vma->obj->base);
++ }
++
++ ret = 0;
++ list_for_each_entry_safe(vma, next, &eviction_list, exec_list) {
++ struct drm_i915_gem_object *obj = vma->obj;
++ list_del_init(&vma->exec_list);
++ if (ret == 0)
++ ret = i915_vma_unbind(vma);
++ drm_gem_object_unreference(&obj->base);
++ }
++
++ return ret;
++}
++
+ /**
+ * i915_gem_evict_vm - Evict all idle vmas from a vm
+ * @vm: Address space to cleanse
+diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+index a4c243c..aae68cc 100644
+--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
++++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+@@ -601,6 +601,8 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
+ flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
+ if ((flags & PIN_MAPPABLE) == 0)
+ flags |= PIN_HIGH;
++ if (entry->flags & EXEC_OBJECT_PINNED)
++ flags |= entry->offset | PIN_OFFSET_FIXED;
+ }
+
+ ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
+@@ -670,6 +672,10 @@ eb_vma_misplaced(struct i915_vma *vma)
+ vma->node.start & (entry->alignment - 1))
+ return true;
+
++ if (entry->flags & EXEC_OBJECT_PINNED &&
++ vma->node.start != entry->offset)
++ return true;
++
+ if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
+ vma->node.start < BATCH_OFFSET_BIAS)
+ return true;
+@@ -1017,18 +1023,13 @@ static struct intel_context *
+ i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
+ struct intel_engine_cs *ring, const u32 ctx_id)
+ {
+- struct intel_context *ctx = NULL;
+- struct i915_ctx_hang_stats *hs;
+-
+- if (ring->id != RCS && ctx_id != DEFAULT_CONTEXT_HANDLE)
+- return ERR_PTR(-EINVAL);
++ struct intel_context *ctx;
+
+ ctx = i915_gem_context_get(file->driver_priv, ctx_id);
+ if (IS_ERR(ctx))
+ return ctx;
+
+- hs = &ctx->hang_stats;
+- if (hs->banned) {
++ if (ctx->hang_stats.banned) {
+ DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id);
+ return ERR_PTR(-EIO);
+ }
+@@ -1317,7 +1318,8 @@ eb_get_batch(struct eb_vmas *eb)
+ * Note that actual hangs have only been observed on gen7, but for
+ * paranoia do it everywhere.
+ */
+- vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
++ if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
++ vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;
+
+ return vma->obj;
+ }
+diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
+index 52b2d40..912943a 100644
+--- a/drivers/gpu/drm/i915/i915_trace.h
++++ b/drivers/gpu/drm/i915/i915_trace.h
+@@ -459,6 +459,29 @@ TRACE_EVENT(i915_gem_evict_vm,
+ TP_printk("dev=%d, vm=%p", __entry->dev, __entry->vm)
+ );
+
++TRACE_EVENT(i915_gem_evict_vma,
++ TP_PROTO(struct i915_vma *vma, unsigned flags),
++ TP_ARGS(vma, flags),
++
++ TP_STRUCT__entry(
++ __field(u32, dev)
++ __field(struct i915_address_space *, vm)
++ __field(u64, start)
++ __field(u64, size)
++ __field(unsigned, flags)
++ ),
++
++ TP_fast_assign(
++ __entry->dev = vma->vm->dev->primary->index;
++ __entry->vm = vma->vm;
++ __entry->start = vma->node.start;
++ __entry->size = vma->node.size;
++ __entry->flags = flags;
++ ),
++
++ TP_printk("dev=%d, vm=%p, start=%llx size=%llx, flags=%x", __entry->dev, __entry->vm, (long long)__entry->start, (long long)__entry->size, __entry->flags)
++);
++
+ TRACE_EVENT(i915_gem_ring_sync_to,
+ TP_PROTO(struct drm_i915_gem_request *to_req,
+ struct intel_engine_cs *from,
--
1.9.1
More information about the Beignet
mailing list