[PATCH 09/14] drm/i915/cmdparser: Improve hash function

Chris Wilson chris at chris-wilson.co.uk
Sat Aug 6 13:34:03 UTC 2016


The existing code's hashfunction is very suboptimal (most 3D commands
use the same bucket degrading the hash to a long list). The code even
acknowledge that the issue was known and the fix simple:

/*
 * If we attempt to generate a perfect hash, we should be able to look at bits
 * 31:29 of a command from a batch buffer and use the full mask for that
 * client. The existing INSTR_CLIENT_MASK/SHIFT defines can be used for this.
 */

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/drm_irq.c                  |  64 +++++++++++---
 drivers/gpu/drm/drm_mm.c                   |  37 ++++-----
 drivers/gpu/drm/i915/i915_cmd_parser.c     | 116 +++++++++++++++-----------
 drivers/gpu/drm/i915/i915_debugfs.c        |   2 +-
 drivers/gpu/drm/i915/i915_drv.c            |   3 +
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   5 +-
 drivers/gpu/drm/i915/i915_gem_gtt.c        |   4 +
 drivers/gpu/drm/i915/intel_ringbuffer.c    | 129 +++++++++++++----------------
 drivers/gpu/drm/i915/intel_ringbuffer.h    |   2 +-
 include/drm/drm_mm.h                       |  16 ++--
 include/uapi/drm/i915_drm.h                |   4 +-
 11 files changed, 215 insertions(+), 167 deletions(-)

diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index 77f357b2c386..0206d63ba472 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -1172,9 +1172,9 @@ static void drm_vblank_put(struct drm_device *dev, unsigned int pipe)
 	if (atomic_dec_and_test(&vblank->refcount)) {
 		if (drm_vblank_offdelay == 0)
 			return;
-		else if (dev->vblank_disable_immediate || drm_vblank_offdelay < 0)
+		else if (drm_vblank_offdelay < 0)
 			vblank_disable_fn((unsigned long)vblank);
-		else
+		else if (!dev->vblank_disable_immediate)
 			mod_timer(&vblank->disable_timer,
 				  jiffies + ((drm_vblank_offdelay * HZ)/1000));
 	}
@@ -1614,6 +1614,17 @@ err_put:
 	return ret;
 }
 
+static bool drm_wait_vblank_is_query(union drm_wait_vblank *vblwait)
+{
+	if (vblwait->request.sequence)
+		return false;
+
+	return _DRM_VBLANK_RELATIVE ==
+		(vblwait->request.type & (_DRM_VBLANK_TYPES_MASK |
+					  _DRM_VBLANK_EVENT |
+					  _DRM_VBLANK_NEXTONMISS));
+}
+
 /*
  * Wait for VBLANK.
  *
@@ -1663,9 +1674,24 @@ int drm_wait_vblank(struct drm_device *dev, void *data,
 
 	vblank = &dev->vblank[pipe];
 
+	/* If the counter is currently enabled and accurate, short-circuit queries
+	 * to return the cached timestamp of the last vblank.
+	 */
+	if (dev->vblank_disable_immediate &&
+	    drm_wait_vblank_is_query(vblwait) &&
+	    vblank->enabled) {
+		struct timeval now;
+
+		vblwait->reply.sequence =
+			drm_vblank_count_and_time(dev, pipe, &now);
+		vblwait->reply.tval_sec = now.tv_sec;
+		vblwait->reply.tval_usec = now.tv_usec;
+		return 0;
+	}
+
 	ret = drm_vblank_get(dev, pipe);
 	if (ret) {
-		DRM_DEBUG("failed to acquire vblank counter, %d\n", ret);
+		DRM_DEBUG("crtc %d failed to acquire vblank counter, %d\n", pipe, ret);
 		return ret;
 	}
 	seq = drm_vblank_count(dev, pipe);
@@ -1693,13 +1719,15 @@ int drm_wait_vblank(struct drm_device *dev, void *data,
 		return drm_queue_vblank_event(dev, pipe, vblwait, file_priv);
 	}
 
-	DRM_DEBUG("waiting on vblank count %d, crtc %u\n",
-		  vblwait->request.sequence, pipe);
-	DRM_WAIT_ON(ret, vblank->queue, 3 * HZ,
-		    (((drm_vblank_count(dev, pipe) -
-		       vblwait->request.sequence) <= (1 << 23)) ||
-		     !vblank->enabled ||
-		     !dev->irq_enabled));
+	if (vblwait->request.sequence != seq) {
+		DRM_DEBUG("waiting on vblank count %d, crtc %u\n",
+			  vblwait->request.sequence, pipe);
+		DRM_WAIT_ON(ret, vblank->queue, 3 * HZ,
+			    (((drm_vblank_count(dev, pipe) -
+			       vblwait->request.sequence) <= (1 << 23)) ||
+			     !vblank->enabled ||
+			     !dev->irq_enabled));
+	}
 
 	if (ret != -EINTR) {
 		struct timeval now;
@@ -1708,10 +1736,10 @@ int drm_wait_vblank(struct drm_device *dev, void *data,
 		vblwait->reply.tval_sec = now.tv_sec;
 		vblwait->reply.tval_usec = now.tv_usec;
 
-		DRM_DEBUG("returning %d to client\n",
-			  vblwait->reply.sequence);
+		DRM_DEBUG("crtc %d returning %d to client\n",
+			  pipe, vblwait->reply.sequence);
 	} else {
-		DRM_DEBUG("vblank wait interrupted by signal\n");
+		DRM_DEBUG("crtc %d vblank wait interrupted by signal\n", pipe);
 	}
 
 done:
@@ -1789,6 +1817,16 @@ bool drm_handle_vblank(struct drm_device *dev, unsigned int pipe)
 	wake_up(&vblank->queue);
 	drm_handle_vblank_events(dev, pipe);
 
+	/* With instant-off, we defer disabling the interrupt until after
+	 * we finish processing the following vblank. The disable has to
+	 * be last (after drm_handle_vblank_events) so that the timestamp
+	 * is always accurate.
+	 */
+	if (dev->vblank_disable_immediate &&
+	    drm_vblank_offdelay > 0 &&
+	    !atomic_read(&vblank->refcount))
+		vblank_disable_fn((unsigned long)vblank);
+
 	spin_unlock_irqrestore(&dev->event_lock, irqflags);
 
 	return true;
diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index a13215a525e6..9f2c86cc662e 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -93,12 +93,12 @@
 
 static struct drm_mm_node *drm_mm_search_free_generic(const struct drm_mm *mm,
 						u64 size,
-						unsigned alignment,
+						u64 alignment,
 						unsigned long color,
 						enum drm_mm_search_flags flags);
 static struct drm_mm_node *drm_mm_search_free_in_range_generic(const struct drm_mm *mm,
 						u64 size,
-						unsigned alignment,
+						u64 alignment,
 						unsigned long color,
 						u64 start,
 						u64 end,
@@ -172,7 +172,7 @@ static void drm_mm_interval_tree_add_node(struct drm_mm_node *hole_node,
 
 static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
 				 struct drm_mm_node *node,
-				 u64 size, unsigned alignment,
+				 u64 size, u64 alignment,
 				 unsigned long color,
 				 enum drm_mm_allocator_flags flags)
 {
@@ -191,10 +191,9 @@ static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
 		adj_start = adj_end - size;
 
 	if (alignment) {
-		u64 tmp = adj_start;
-		unsigned rem;
+		u64 rem;
 
-		rem = do_div(tmp, alignment);
+		div64_u64_rem(adj_start, alignment, &rem);
 		if (rem) {
 			if (flags & DRM_MM_CREATE_TOP)
 				adj_start -= rem;
@@ -322,7 +321,7 @@ EXPORT_SYMBOL(drm_mm_reserve_node);
  * 0 on success, -ENOSPC if there's no suitable hole.
  */
 int drm_mm_insert_node_generic(struct drm_mm *mm, struct drm_mm_node *node,
-			       u64 size, unsigned alignment,
+			       u64 size, u64 alignment,
 			       unsigned long color,
 			       enum drm_mm_search_flags sflags,
 			       enum drm_mm_allocator_flags aflags)
@@ -344,7 +343,7 @@ EXPORT_SYMBOL(drm_mm_insert_node_generic);
 
 static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node,
 				       struct drm_mm_node *node,
-				       u64 size, unsigned alignment,
+				       u64 size, u64 alignment,
 				       unsigned long color,
 				       u64 start, u64 end,
 				       enum drm_mm_allocator_flags flags)
@@ -369,10 +368,9 @@ static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node,
 		adj_start = adj_end - size;
 
 	if (alignment) {
-		u64 tmp = adj_start;
-		unsigned rem;
+		u64 rem;
 
-		rem = do_div(tmp, alignment);
+		div64_u64_rem(adj_start, alignment, &rem);
 		if (rem) {
 			if (flags & DRM_MM_CREATE_TOP)
 				adj_start -= rem;
@@ -426,7 +424,7 @@ static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node,
  * 0 on success, -ENOSPC if there's no suitable hole.
  */
 int drm_mm_insert_node_in_range_generic(struct drm_mm *mm, struct drm_mm_node *node,
-					u64 size, unsigned alignment,
+					u64 size, u64 alignment,
 					unsigned long color,
 					u64 start, u64 end,
 					enum drm_mm_search_flags sflags,
@@ -493,16 +491,15 @@ void drm_mm_remove_node(struct drm_mm_node *node)
 }
 EXPORT_SYMBOL(drm_mm_remove_node);
 
-static int check_free_hole(u64 start, u64 end, u64 size, unsigned alignment)
+static int check_free_hole(u64 start, u64 end, u64 size, u64 alignment)
 {
 	if (end - start < size)
 		return 0;
 
 	if (alignment) {
-		u64 tmp = start;
-		unsigned rem;
+		u64 rem;
 
-		rem = do_div(tmp, alignment);
+		div64_u64_rem(start, alignment, &rem);
 		if (rem)
 			start += alignment - rem;
 	}
@@ -512,7 +509,7 @@ static int check_free_hole(u64 start, u64 end, u64 size, unsigned alignment)
 
 static struct drm_mm_node *drm_mm_search_free_generic(const struct drm_mm *mm,
 						      u64 size,
-						      unsigned alignment,
+						      u64 alignment,
 						      unsigned long color,
 						      enum drm_mm_search_flags flags)
 {
@@ -554,7 +551,7 @@ static struct drm_mm_node *drm_mm_search_free_generic(const struct drm_mm *mm,
 
 static struct drm_mm_node *drm_mm_search_free_in_range_generic(const struct drm_mm *mm,
 							u64 size,
-							unsigned alignment,
+							u64 alignment,
 							unsigned long color,
 							u64 start,
 							u64 end,
@@ -672,7 +669,7 @@ EXPORT_SYMBOL(drm_mm_replace_node);
  */
 void drm_mm_init_scan(struct drm_mm *mm,
 		      u64 size,
-		      unsigned alignment,
+		      u64 alignment,
 		      unsigned long color)
 {
 	mm->scan_color = color;
@@ -705,7 +702,7 @@ EXPORT_SYMBOL(drm_mm_init_scan);
  */
 void drm_mm_init_scan_with_range(struct drm_mm *mm,
 				 u64 size,
-				 unsigned alignment,
+				 u64 alignment,
 				 unsigned long color,
 				 u64 start,
 				 u64 end)
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index b35f31483887..40b1887d8962 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -86,24 +86,24 @@
  * general bitmasking mechanism.
  */
 
-#define STD_MI_OPCODE_MASK  0xFF800000
-#define STD_3D_OPCODE_MASK  0xFFFF0000
-#define STD_2D_OPCODE_MASK  0xFFC00000
-#define STD_MFX_OPCODE_MASK 0xFFFF0000
+#define STD_MI_OPCODE_SHIFT  (32 - 9)
+#define STD_3D_OPCODE_SHIFT  (32 - 16)
+#define STD_2D_OPCODE_SHIFT  (32 - 10)
+#define STD_MFX_OPCODE_SHIFT (32 - 16)
 
 #define CMD(op, opm, f, lm, fl, ...)				\
 	{							\
 		.flags = (fl) | ((f) ? CMD_DESC_FIXED : 0),	\
-		.cmd = { (op), (opm) },				\
+		.cmd = { (op), ~0u << (opm) },			\
 		.length = { (lm) },				\
 		__VA_ARGS__					\
 	}
 
 /* Convenience macros to compress the tables */
-#define SMI STD_MI_OPCODE_MASK
-#define S3D STD_3D_OPCODE_MASK
-#define S2D STD_2D_OPCODE_MASK
-#define SMFX STD_MFX_OPCODE_MASK
+#define SMI STD_MI_OPCODE_SHIFT
+#define S3D STD_3D_OPCODE_SHIFT
+#define S2D STD_2D_OPCODE_SHIFT
+#define SMFX STD_MFX_OPCODE_SHIFT
 #define F true
 #define S CMD_DESC_SKIP
 #define R CMD_DESC_REJECT
@@ -350,6 +350,9 @@ static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = {
 	CMD(  MI_LOAD_SCAN_LINES_EXCL,          SMI,   !F,  0x3F,   R  ),
 };
 
+static const struct drm_i915_cmd_descriptor noop_desc =
+	CMD(MI_NOOP, SMI, F, 1, S);
+
 #undef CMD
 #undef SMI
 #undef S3D
@@ -696,12 +699,26 @@ struct cmd_node {
  * non-opcode bits being set. But if we don't include those bits, some 3D
  * commands may hash to the same bucket due to not including opcode bits that
  * make the command unique. For now, we will risk hashing to the same bucket.
- *
- * If we attempt to generate a perfect hash, we should be able to look at bits
- * 31:29 of a command from a batch buffer and use the full mask for that
- * client. The existing INSTR_CLIENT_MASK/SHIFT defines can be used for this.
  */
-#define CMD_HASH_MASK STD_MI_OPCODE_MASK
+static inline u32 cmd_header_key(u32 x)
+{
+	u32 shift;
+
+	switch (x >> INSTR_CLIENT_SHIFT) {
+	default:
+	case INSTR_MI_CLIENT:
+		shift = STD_MI_OPCODE_SHIFT;
+		break;
+	case INSTR_RC_CLIENT:
+		shift = STD_3D_OPCODE_SHIFT;
+		break;
+	case INSTR_BC_CLIENT:
+		shift = STD_2D_OPCODE_SHIFT;
+		break;
+	}
+
+	return x >> shift;
+}
 
 static int init_hash_table(struct intel_engine_cs *engine,
 			   const struct drm_i915_cmd_table *cmd_tables,
@@ -725,7 +742,7 @@ static int init_hash_table(struct intel_engine_cs *engine,
 
 			desc_node->desc = desc;
 			hash_add(engine->cmd_hash, &desc_node->node,
-				 desc->cmd.value & CMD_HASH_MASK);
+				 cmd_header_key(desc->cmd.value));
 		}
 	}
 
@@ -864,12 +881,9 @@ find_cmd_in_table(struct intel_engine_cs *engine,
 	struct cmd_node *desc_node;
 
 	hash_for_each_possible(engine->cmd_hash, desc_node, node,
-			       cmd_header & CMD_HASH_MASK) {
+			       cmd_header_key(cmd_header)) {
 		const struct drm_i915_cmd_descriptor *desc = desc_node->desc;
-		u32 masked_cmd = desc->cmd.mask & cmd_header;
-		u32 masked_value = desc->cmd.value & desc->cmd.mask;
-
-		if (masked_cmd == masked_value)
+		if (((cmd_header ^ desc->cmd.value) & desc->cmd.mask) == 0)
 			return desc;
 	}
 
@@ -887,11 +901,14 @@ find_cmd_in_table(struct intel_engine_cs *engine,
 static const struct drm_i915_cmd_descriptor*
 find_cmd(struct intel_engine_cs *engine,
 	 u32 cmd_header,
+	 const struct drm_i915_cmd_descriptor *desc,
 	 struct drm_i915_cmd_descriptor *default_desc)
 {
-	const struct drm_i915_cmd_descriptor *desc;
 	u32 mask;
 
+	if (((cmd_header ^ desc->cmd.value) & desc->cmd.mask) == 0)
+		return desc;
+
 	desc = find_cmd_in_table(engine, cmd_header);
 	if (desc)
 		return desc;
@@ -900,44 +917,45 @@ find_cmd(struct intel_engine_cs *engine,
 	if (!mask)
 		return NULL;
 
-	BUG_ON(!default_desc);
-	default_desc->flags = CMD_DESC_SKIP;
+	default_desc->cmd.value = cmd_header;
+	default_desc->cmd.mask = 0xffff0000;
 	default_desc->length.mask = mask;
-
+	default_desc->flags = CMD_DESC_SKIP;
 	return default_desc;
 }
 
 static const struct drm_i915_reg_descriptor *
-find_reg(const struct drm_i915_reg_descriptor *table,
-	 int count, u32 addr)
+__find_reg(const struct drm_i915_reg_descriptor *table, int count, u32 addr)
 {
-	int i;
-
-	for (i = 0; i < count; i++) {
-		if (i915_mmio_reg_offset(table[i].addr) == addr)
-			return &table[i];
+	int start = 0, end = count;
+	while (start < end) {
+		int mid = start + (end - start) / 2;
+		int ret = addr - i915_mmio_reg_offset(table[mid].addr);
+		if (ret < 0)
+			end = mid;
+		else if (ret > 0)
+			start = mid + 1;
+		else
+			return &table[mid];
 	}
-
 	return NULL;
 }
 
 static const struct drm_i915_reg_descriptor *
-find_reg_in_tables(const struct drm_i915_reg_table *tables,
-		   int count, bool is_master, u32 addr)
+find_reg(const struct intel_engine_cs *engine, bool is_master, u32 addr)
 {
-	int i;
-	const struct drm_i915_reg_table *table;
-	const struct drm_i915_reg_descriptor *reg;
+	const struct drm_i915_reg_table *table = engine->reg_tables;
+	int count = engine->reg_table_count;
 
-	for (i = 0; i < count; i++) {
-		table = &tables[i];
+	do {
 		if (!table->master || is_master) {
-			reg = find_reg(table->regs, table->num_regs,
-				       addr);
+			const struct drm_i915_reg_descriptor *reg;
+
+			reg = __find_reg(table->regs, table->num_regs, addr);
 			if (reg != NULL)
 				return reg;
 		}
-	}
+	} while (table++, --count);
 
 	return NULL;
 }
@@ -1005,6 +1023,9 @@ static bool check_cmd(const struct intel_engine_cs *engine,
 		      const bool is_master,
 		      bool *oacontrol_set)
 {
+	if (desc->flags & CMD_DESC_SKIP)
+		return true;
+
 	if (desc->flags & CMD_DESC_REJECT) {
 		DRM_DEBUG_DRIVER("CMD: Rejected command: 0x%08X\n", *cmd);
 		return false;
@@ -1029,10 +1050,7 @@ static bool check_cmd(const struct intel_engine_cs *engine,
 		     offset += step) {
 			const u32 reg_addr = cmd[offset] & desc->reg.mask;
 			const struct drm_i915_reg_descriptor *reg =
-				find_reg_in_tables(engine->reg_tables,
-						   engine->reg_table_count,
-						   is_master,
-						   reg_addr);
+				find_reg(engine, is_master, reg_addr);
 
 			if (!reg) {
 				DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (exec_id=%d)\n",
@@ -1154,7 +1172,8 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
 			    bool is_master)
 {
 	u32 *cmd, *batch_end;
-	struct drm_i915_cmd_descriptor default_desc = { 0 };
+	struct drm_i915_cmd_descriptor default_desc = noop_desc;
+	const struct drm_i915_cmd_descriptor *desc = &default_desc;
 	bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */
 	bool needs_clflush_after = false;
 	int ret = 0;
@@ -1174,13 +1193,12 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
 	 */
 	batch_end = cmd + (batch_len / sizeof(*batch_end));
 	while (cmd < batch_end) {
-		const struct drm_i915_cmd_descriptor *desc;
 		u32 length;
 
 		if (*cmd == MI_BATCH_BUFFER_END)
 			break;
 
-		desc = find_cmd(engine, *cmd, &default_desc);
+		desc = find_cmd(engine, *cmd, desc, &default_desc);
 		if (!desc) {
 			DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n",
 					 *cmd);
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 498cdd2605fc..59955c6150f6 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -395,7 +395,7 @@ static int per_file_ctx_stats(int id, void *ptr, void *data)
 		if (ctx->engine[n].state)
 			per_file_stats(0, ctx->engine[n].state->obj, data);
 		if (ctx->engine[n].ring)
-			per_file_stats(0, ctx->engine[n].ring->obj, data);
+			per_file_stats(0, ctx->engine[n].ring->vma->obj, data);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 1c97904eff00..042ad94d2ec7 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -354,6 +354,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_MIN_EU_IN_POOL:
 		value = INTEL_INFO(dev)->min_eu_in_pool;
 		break;
+	case I915_PARAM_HAS_EXEC_BATCH_FIRST:
+		value = 1;
+		break;
 	default:
 		DRM_DEBUG("Unknown parameter %d\n", param->param);
 		return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 3d03a2853a29..733f8697df19 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -427,7 +427,10 @@ ht_head(const struct i915_gem_context *ctx, u32 handle)
 
 static int eb_batch_index(const struct i915_execbuffer *eb)
 {
-	return eb->args->buffer_count - 1;
+	if (eb->args->flags & I915_EXEC_BATCH_FIRST)
+		return 0;
+	else
+		return eb->args->buffer_count - 1;
 }
 
 static int eb_select_context(struct i915_execbuffer *eb)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index ceb25d34e6f9..ff5f5287cf23 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3725,6 +3725,10 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 	void __iomem *ptr;
 
 	lockdep_assert_held(&vma->vm->dev->struct_mutex);
+
+	/* Access through the GTT requires the device to be awake. */
+	assert_rpm_wakelock_held(to_i915(vma->vm->dev));
+
 	if (WARN_ON(!i915_vma_is_map_and_fenceable(vma)))
 		return IO_ERR_PTR(-ENODEV);
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index bbbf128701ff..fce860edcec8 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1941,55 +1941,31 @@ static int init_phys_status_page(struct intel_engine_cs *engine)
 
 int intel_ring_pin(struct intel_ring *ring)
 {
-	struct drm_i915_private *dev_priv = ring->engine->i915;
-	struct drm_i915_gem_object *obj = ring->obj;
-	struct i915_vma *vma;
 	/* Ring wraparound at offset 0 sometimes hangs. No idea why. */
-	unsigned flags = PIN_OFFSET_BIAS | 4096;
+	unsigned int flags = PIN_GLOBAL | PIN_OFFSET_BIAS | 4096;
 	void *addr;
 	int ret;
 
-	if (HAS_LLC(dev_priv) && !obj->stolen) {
-		vma = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE, flags);
-		if (IS_ERR(vma))
-			return PTR_ERR(vma);
-
-		ret = i915_gem_object_set_to_cpu_domain(obj, true);
-		if (ret)
-			goto err_unpin;
-
-		addr = i915_gem_object_pin_map(obj);
-		if (IS_ERR(addr)) {
-			ret = PTR_ERR(addr);
-			goto err_unpin;
-		}
-	} else {
-		vma = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE,
-					       flags | PIN_MAPPABLE);
-		if (IS_ERR(vma))
-			return PTR_ERR(vma);
+	GEM_BUG_ON(ring->vaddr);
 
-		ret = i915_gem_object_set_to_gtt_domain(obj, true);
-		if (ret)
-			goto err_unpin;
+	if (ring->vmap)
+		flags |= PIN_MAPPABLE;
 
-		/* Access through the GTT requires the device to be awake. */
-		assert_rpm_wakelock_held(dev_priv);
+	ret = i915_vma_pin(ring->vma, 0, PAGE_SIZE, flags);
+	if (unlikely(ret))
+		return ret;
 
-		addr = (void __force *)i915_vma_pin_iomap(vma);
-		if (IS_ERR(addr)) {
-			ret = PTR_ERR(addr);
-			goto err_unpin;
-		}
+	if (ring->vmap)
+		addr = i915_gem_object_pin_map(ring->vma->obj);
+	else
+		addr = (void __force *)i915_vma_pin_iomap(ring->vma);
+	if (IS_ERR(addr)) {
+		i915_vma_unpin(ring->vma);
+		return PTR_ERR(addr);
 	}
 
 	ring->vaddr = addr;
-	ring->vma = vma;
 	return 0;
-
-err_unpin:
-	i915_vma_unpin(vma);
-	return ret;
 }
 
 void intel_ring_unpin(struct intel_ring *ring)
@@ -1997,60 +1973,68 @@ void intel_ring_unpin(struct intel_ring *ring)
 	GEM_BUG_ON(!ring->vma);
 	GEM_BUG_ON(!ring->vaddr);
 
-	if (HAS_LLC(ring->engine->i915) && !ring->obj->stolen)
-		i915_gem_object_unpin_map(ring->obj);
+	if (ring->vmap)
+		i915_gem_object_unpin_map(ring->vma->obj);
 	else
 		i915_vma_unpin_iomap(ring->vma);
 	ring->vaddr = NULL;
 
 	i915_vma_unpin(ring->vma);
-	ring->vma = NULL;
 }
 
-static void intel_destroy_ringbuffer_obj(struct intel_ring *ring)
-{
-	__i915_gem_object_release_unless_active(ring->obj);
-	ring->obj = NULL;
-}
-
-static int intel_alloc_ringbuffer_obj(struct drm_device *dev,
-				      struct intel_ring *ring)
+static struct i915_vma *
+intel_ring_create_vma(struct drm_device *dev, int size)
 {
 	struct drm_i915_gem_object *obj;
+	struct i915_vma *vma;
+	int ret;
 
-	obj = NULL;
+	obj = ERR_PTR(-ENODEV);
 	if (!HAS_LLC(dev))
-		obj = i915_gem_object_create_stolen(dev, ring->size);
-	if (obj == NULL)
-		obj = i915_gem_object_create(dev, ring->size);
+		obj = i915_gem_object_create_stolen(dev, size);
 	if (IS_ERR(obj))
-		return PTR_ERR(obj);
+		obj = i915_gem_object_create(dev, size);
+	if (IS_ERR(obj))
+		return ERR_CAST(obj);
 
 	/* mark ring buffers as read-only from GPU side by default */
 	obj->gt_ro = 1;
 
-	ring->obj = obj;
+	if (HAS_LLC(dev) && !obj->stolen)
+		ret = i915_gem_object_set_to_cpu_domain(obj, true);
+	else
+		ret = i915_gem_object_set_to_gtt_domain(obj, true);
+	if (ret) {
+		vma = ERR_PTR(ret);
+		goto err;
+	}
+
+	vma = i915_gem_obj_lookup_or_create_vma(obj,
+						&to_i915(dev)->ggtt.base,
+						NULL);
+	if (IS_ERR(vma))
+		goto err;
+
+	return vma;
 
-	return 0;
+err:
+	i915_gem_object_put(obj);
+	return vma;
 }
 
 struct intel_ring *
 intel_engine_create_ring(struct intel_engine_cs *engine, int size)
 {
 	struct intel_ring *ring;
-	int ret;
+	struct i915_vma *vma;
 
 	GEM_BUG_ON(!is_power_of_2(size));
 
 	ring = kzalloc(sizeof(*ring), GFP_KERNEL);
-	if (ring == NULL) {
-		DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n",
-				 engine->name);
+	if (!ring)
 		return ERR_PTR(-ENOMEM);
-	}
 
 	ring->engine = engine;
-	list_add(&ring->link, &engine->buffers);
 
 	INIT_LIST_HEAD(&ring->request_list);
 
@@ -2066,22 +2050,23 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size)
 	ring->last_retired_head = -1;
 	intel_ring_update_space(ring);
 
-	ret = intel_alloc_ringbuffer_obj(&engine->i915->drm, ring);
-	if (ret) {
-		DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s: %d\n",
-				 engine->name, ret);
-		list_del(&ring->link);
+	vma = intel_ring_create_vma(&engine->i915->drm, size);
+	if (IS_ERR(vma)) {
 		kfree(ring);
-		return ERR_PTR(ret);
+		return ERR_CAST(vma);
 	}
+	ring->vma = vma;
+	if (HAS_LLC(engine->i915) && !vma->obj->stolen)
+		ring->vmap = true;
 
+	list_add(&ring->link, &engine->buffers);
 	return ring;
 }
 
 void
 intel_ring_free(struct intel_ring *ring)
 {
-	intel_destroy_ringbuffer_obj(ring);
+	__i915_gem_object_release_unless_active(ring->vma->obj);
 	list_del(&ring->link);
 	kfree(ring);
 }
@@ -2169,7 +2154,6 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
 		ret = PTR_ERR(ring);
 		goto error;
 	}
-	engine->buffer = ring;
 
 	if (I915_NEED_GFX_HWS(dev_priv)) {
 		ret = init_status_page(engine);
@@ -2184,11 +2168,10 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
 
 	ret = intel_ring_pin(ring);
 	if (ret) {
-		DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n",
-				engine->name, ret);
-		intel_destroy_ringbuffer_obj(ring);
+		intel_ring_free(ring);
 		goto error;
 	}
+	engine->buffer = ring;
 
 	return 0;
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 7e7cdc302b22..270c8459b73a 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -83,7 +83,6 @@ struct intel_engine_hangcheck {
 };
 
 struct intel_ring {
-	struct drm_i915_gem_object *obj;
 	struct i915_vma *vma;
 	void *vaddr;
 
@@ -97,6 +96,7 @@ struct intel_ring {
 	int space;
 	int size;
 	int effective_size;
+	bool vmap;
 
 	/** We track the position of the requests in the ring buffer, and
 	 * when each is retired we increment last_retired_head as the GPU
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index 205ddcf6d55d..ee092061b404 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -86,12 +86,12 @@ struct drm_mm {
 	struct rb_root interval_tree;
 
 	unsigned int scan_check_range : 1;
-	unsigned scan_alignment;
+	unsigned int scanned_blocks;
 	unsigned long scan_color;
+	u64 scan_alignment;
 	u64 scan_size;
 	u64 scan_hit_start;
 	u64 scan_hit_end;
-	unsigned scanned_blocks;
 	u64 scan_start;
 	u64 scan_end;
 	struct drm_mm_node *prev_scanned_node;
@@ -223,7 +223,7 @@ int drm_mm_reserve_node(struct drm_mm *mm, struct drm_mm_node *node);
 int drm_mm_insert_node_generic(struct drm_mm *mm,
 			       struct drm_mm_node *node,
 			       u64 size,
-			       unsigned alignment,
+			       u64 alignment,
 			       unsigned long color,
 			       enum drm_mm_search_flags sflags,
 			       enum drm_mm_allocator_flags aflags);
@@ -246,7 +246,7 @@ int drm_mm_insert_node_generic(struct drm_mm *mm,
 static inline int drm_mm_insert_node(struct drm_mm *mm,
 				     struct drm_mm_node *node,
 				     u64 size,
-				     unsigned alignment,
+				     u64 alignment,
 				     enum drm_mm_search_flags flags)
 {
 	return drm_mm_insert_node_generic(mm, node, size, alignment, 0, flags,
@@ -256,7 +256,7 @@ static inline int drm_mm_insert_node(struct drm_mm *mm,
 int drm_mm_insert_node_in_range_generic(struct drm_mm *mm,
 					struct drm_mm_node *node,
 					u64 size,
-					unsigned alignment,
+					u64 alignment,
 					unsigned long color,
 					u64 start,
 					u64 end,
@@ -283,7 +283,7 @@ int drm_mm_insert_node_in_range_generic(struct drm_mm *mm,
 static inline int drm_mm_insert_node_in_range(struct drm_mm *mm,
 					      struct drm_mm_node *node,
 					      u64 size,
-					      unsigned alignment,
+					      u64 alignment,
 					      u64 start,
 					      u64 end,
 					      enum drm_mm_search_flags flags)
@@ -309,11 +309,11 @@ drm_mm_interval_next(struct drm_mm_node *node, u64 start, u64 last);
 
 void drm_mm_init_scan(struct drm_mm *mm,
 		      u64 size,
-		      unsigned alignment,
+		      u64 alignment,
 		      unsigned long color);
 void drm_mm_init_scan_with_range(struct drm_mm *mm,
 				 u64 size,
-				 unsigned alignment,
+				 u64 alignment,
 				 unsigned long color,
 				 u64 start,
 				 u64 end);
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 452629de7a57..25d5bdb5af5f 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -387,6 +387,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_HAS_EXEC_SOFTPIN	 37
 #define I915_PARAM_HAS_POOLED_EU	 38
 #define I915_PARAM_MIN_EU_IN_POOL	 39
+#define I915_PARAM_HAS_EXEC_BATCH_FIRST	 40
 
 typedef struct drm_i915_getparam {
 	__s32 param;
@@ -819,7 +820,8 @@ struct drm_i915_gem_execbuffer2 {
  */
 #define I915_EXEC_RESOURCE_STREAMER     (1<<15)
 
-#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER<<1)
+#define I915_EXEC_BATCH_FIRST		(1<<16)
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_BATCH_FIRST<<1))
 
 #define I915_EXEC_CONTEXT_ID_MASK	(0xffffffff)
 #define i915_execbuffer2_set_context_id(eb2, context) \
-- 
2.8.1



More information about the Intel-gfx-trybot mailing list