[PATCH 09/14] drm/i915/cmdparser: Improve hash function
Chris Wilson
chris at chris-wilson.co.uk
Sat Aug 6 13:34:03 UTC 2016
The existing code's hashfunction is very suboptimal (most 3D commands
use the same bucket degrading the hash to a long list). The code even
acknowledge that the issue was known and the fix simple:
/*
* If we attempt to generate a perfect hash, we should be able to look at bits
* 31:29 of a command from a batch buffer and use the full mask for that
* client. The existing INSTR_CLIENT_MASK/SHIFT defines can be used for this.
*/
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
drivers/gpu/drm/drm_irq.c | 64 +++++++++++---
drivers/gpu/drm/drm_mm.c | 37 ++++-----
drivers/gpu/drm/i915/i915_cmd_parser.c | 116 +++++++++++++++-----------
drivers/gpu/drm/i915/i915_debugfs.c | 2 +-
drivers/gpu/drm/i915/i915_drv.c | 3 +
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 5 +-
drivers/gpu/drm/i915/i915_gem_gtt.c | 4 +
drivers/gpu/drm/i915/intel_ringbuffer.c | 129 +++++++++++++----------------
drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +-
include/drm/drm_mm.h | 16 ++--
include/uapi/drm/i915_drm.h | 4 +-
11 files changed, 215 insertions(+), 167 deletions(-)
diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index 77f357b2c386..0206d63ba472 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -1172,9 +1172,9 @@ static void drm_vblank_put(struct drm_device *dev, unsigned int pipe)
if (atomic_dec_and_test(&vblank->refcount)) {
if (drm_vblank_offdelay == 0)
return;
- else if (dev->vblank_disable_immediate || drm_vblank_offdelay < 0)
+ else if (drm_vblank_offdelay < 0)
vblank_disable_fn((unsigned long)vblank);
- else
+ else if (!dev->vblank_disable_immediate)
mod_timer(&vblank->disable_timer,
jiffies + ((drm_vblank_offdelay * HZ)/1000));
}
@@ -1614,6 +1614,17 @@ err_put:
return ret;
}
+static bool drm_wait_vblank_is_query(union drm_wait_vblank *vblwait)
+{
+ if (vblwait->request.sequence)
+ return false;
+
+ return _DRM_VBLANK_RELATIVE ==
+ (vblwait->request.type & (_DRM_VBLANK_TYPES_MASK |
+ _DRM_VBLANK_EVENT |
+ _DRM_VBLANK_NEXTONMISS));
+}
+
/*
* Wait for VBLANK.
*
@@ -1663,9 +1674,24 @@ int drm_wait_vblank(struct drm_device *dev, void *data,
vblank = &dev->vblank[pipe];
+ /* If the counter is currently enabled and accurate, short-circuit queries
+ * to return the cached timestamp of the last vblank.
+ */
+ if (dev->vblank_disable_immediate &&
+ drm_wait_vblank_is_query(vblwait) &&
+ vblank->enabled) {
+ struct timeval now;
+
+ vblwait->reply.sequence =
+ drm_vblank_count_and_time(dev, pipe, &now);
+ vblwait->reply.tval_sec = now.tv_sec;
+ vblwait->reply.tval_usec = now.tv_usec;
+ return 0;
+ }
+
ret = drm_vblank_get(dev, pipe);
if (ret) {
- DRM_DEBUG("failed to acquire vblank counter, %d\n", ret);
+ DRM_DEBUG("crtc %d failed to acquire vblank counter, %d\n", pipe, ret);
return ret;
}
seq = drm_vblank_count(dev, pipe);
@@ -1693,13 +1719,15 @@ int drm_wait_vblank(struct drm_device *dev, void *data,
return drm_queue_vblank_event(dev, pipe, vblwait, file_priv);
}
- DRM_DEBUG("waiting on vblank count %d, crtc %u\n",
- vblwait->request.sequence, pipe);
- DRM_WAIT_ON(ret, vblank->queue, 3 * HZ,
- (((drm_vblank_count(dev, pipe) -
- vblwait->request.sequence) <= (1 << 23)) ||
- !vblank->enabled ||
- !dev->irq_enabled));
+ if (vblwait->request.sequence != seq) {
+ DRM_DEBUG("waiting on vblank count %d, crtc %u\n",
+ vblwait->request.sequence, pipe);
+ DRM_WAIT_ON(ret, vblank->queue, 3 * HZ,
+ (((drm_vblank_count(dev, pipe) -
+ vblwait->request.sequence) <= (1 << 23)) ||
+ !vblank->enabled ||
+ !dev->irq_enabled));
+ }
if (ret != -EINTR) {
struct timeval now;
@@ -1708,10 +1736,10 @@ int drm_wait_vblank(struct drm_device *dev, void *data,
vblwait->reply.tval_sec = now.tv_sec;
vblwait->reply.tval_usec = now.tv_usec;
- DRM_DEBUG("returning %d to client\n",
- vblwait->reply.sequence);
+ DRM_DEBUG("crtc %d returning %d to client\n",
+ pipe, vblwait->reply.sequence);
} else {
- DRM_DEBUG("vblank wait interrupted by signal\n");
+ DRM_DEBUG("crtc %d vblank wait interrupted by signal\n", pipe);
}
done:
@@ -1789,6 +1817,16 @@ bool drm_handle_vblank(struct drm_device *dev, unsigned int pipe)
wake_up(&vblank->queue);
drm_handle_vblank_events(dev, pipe);
+ /* With instant-off, we defer disabling the interrupt until after
+ * we finish processing the following vblank. The disable has to
+ * be last (after drm_handle_vblank_events) so that the timestamp
+ * is always accurate.
+ */
+ if (dev->vblank_disable_immediate &&
+ drm_vblank_offdelay > 0 &&
+ !atomic_read(&vblank->refcount))
+ vblank_disable_fn((unsigned long)vblank);
+
spin_unlock_irqrestore(&dev->event_lock, irqflags);
return true;
diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index a13215a525e6..9f2c86cc662e 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -93,12 +93,12 @@
static struct drm_mm_node *drm_mm_search_free_generic(const struct drm_mm *mm,
u64 size,
- unsigned alignment,
+ u64 alignment,
unsigned long color,
enum drm_mm_search_flags flags);
static struct drm_mm_node *drm_mm_search_free_in_range_generic(const struct drm_mm *mm,
u64 size,
- unsigned alignment,
+ u64 alignment,
unsigned long color,
u64 start,
u64 end,
@@ -172,7 +172,7 @@ static void drm_mm_interval_tree_add_node(struct drm_mm_node *hole_node,
static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
struct drm_mm_node *node,
- u64 size, unsigned alignment,
+ u64 size, u64 alignment,
unsigned long color,
enum drm_mm_allocator_flags flags)
{
@@ -191,10 +191,9 @@ static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
adj_start = adj_end - size;
if (alignment) {
- u64 tmp = adj_start;
- unsigned rem;
+ u64 rem;
- rem = do_div(tmp, alignment);
+ div64_u64_rem(adj_start, alignment, &rem);
if (rem) {
if (flags & DRM_MM_CREATE_TOP)
adj_start -= rem;
@@ -322,7 +321,7 @@ EXPORT_SYMBOL(drm_mm_reserve_node);
* 0 on success, -ENOSPC if there's no suitable hole.
*/
int drm_mm_insert_node_generic(struct drm_mm *mm, struct drm_mm_node *node,
- u64 size, unsigned alignment,
+ u64 size, u64 alignment,
unsigned long color,
enum drm_mm_search_flags sflags,
enum drm_mm_allocator_flags aflags)
@@ -344,7 +343,7 @@ EXPORT_SYMBOL(drm_mm_insert_node_generic);
static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node,
struct drm_mm_node *node,
- u64 size, unsigned alignment,
+ u64 size, u64 alignment,
unsigned long color,
u64 start, u64 end,
enum drm_mm_allocator_flags flags)
@@ -369,10 +368,9 @@ static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node,
adj_start = adj_end - size;
if (alignment) {
- u64 tmp = adj_start;
- unsigned rem;
+ u64 rem;
- rem = do_div(tmp, alignment);
+ div64_u64_rem(adj_start, alignment, &rem);
if (rem) {
if (flags & DRM_MM_CREATE_TOP)
adj_start -= rem;
@@ -426,7 +424,7 @@ static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node,
* 0 on success, -ENOSPC if there's no suitable hole.
*/
int drm_mm_insert_node_in_range_generic(struct drm_mm *mm, struct drm_mm_node *node,
- u64 size, unsigned alignment,
+ u64 size, u64 alignment,
unsigned long color,
u64 start, u64 end,
enum drm_mm_search_flags sflags,
@@ -493,16 +491,15 @@ void drm_mm_remove_node(struct drm_mm_node *node)
}
EXPORT_SYMBOL(drm_mm_remove_node);
-static int check_free_hole(u64 start, u64 end, u64 size, unsigned alignment)
+static int check_free_hole(u64 start, u64 end, u64 size, u64 alignment)
{
if (end - start < size)
return 0;
if (alignment) {
- u64 tmp = start;
- unsigned rem;
+ u64 rem;
- rem = do_div(tmp, alignment);
+ div64_u64_rem(start, alignment, &rem);
if (rem)
start += alignment - rem;
}
@@ -512,7 +509,7 @@ static int check_free_hole(u64 start, u64 end, u64 size, unsigned alignment)
static struct drm_mm_node *drm_mm_search_free_generic(const struct drm_mm *mm,
u64 size,
- unsigned alignment,
+ u64 alignment,
unsigned long color,
enum drm_mm_search_flags flags)
{
@@ -554,7 +551,7 @@ static struct drm_mm_node *drm_mm_search_free_generic(const struct drm_mm *mm,
static struct drm_mm_node *drm_mm_search_free_in_range_generic(const struct drm_mm *mm,
u64 size,
- unsigned alignment,
+ u64 alignment,
unsigned long color,
u64 start,
u64 end,
@@ -672,7 +669,7 @@ EXPORT_SYMBOL(drm_mm_replace_node);
*/
void drm_mm_init_scan(struct drm_mm *mm,
u64 size,
- unsigned alignment,
+ u64 alignment,
unsigned long color)
{
mm->scan_color = color;
@@ -705,7 +702,7 @@ EXPORT_SYMBOL(drm_mm_init_scan);
*/
void drm_mm_init_scan_with_range(struct drm_mm *mm,
u64 size,
- unsigned alignment,
+ u64 alignment,
unsigned long color,
u64 start,
u64 end)
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index b35f31483887..40b1887d8962 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -86,24 +86,24 @@
* general bitmasking mechanism.
*/
-#define STD_MI_OPCODE_MASK 0xFF800000
-#define STD_3D_OPCODE_MASK 0xFFFF0000
-#define STD_2D_OPCODE_MASK 0xFFC00000
-#define STD_MFX_OPCODE_MASK 0xFFFF0000
+#define STD_MI_OPCODE_SHIFT (32 - 9)
+#define STD_3D_OPCODE_SHIFT (32 - 16)
+#define STD_2D_OPCODE_SHIFT (32 - 10)
+#define STD_MFX_OPCODE_SHIFT (32 - 16)
#define CMD(op, opm, f, lm, fl, ...) \
{ \
.flags = (fl) | ((f) ? CMD_DESC_FIXED : 0), \
- .cmd = { (op), (opm) }, \
+ .cmd = { (op), ~0u << (opm) }, \
.length = { (lm) }, \
__VA_ARGS__ \
}
/* Convenience macros to compress the tables */
-#define SMI STD_MI_OPCODE_MASK
-#define S3D STD_3D_OPCODE_MASK
-#define S2D STD_2D_OPCODE_MASK
-#define SMFX STD_MFX_OPCODE_MASK
+#define SMI STD_MI_OPCODE_SHIFT
+#define S3D STD_3D_OPCODE_SHIFT
+#define S2D STD_2D_OPCODE_SHIFT
+#define SMFX STD_MFX_OPCODE_SHIFT
#define F true
#define S CMD_DESC_SKIP
#define R CMD_DESC_REJECT
@@ -350,6 +350,9 @@ static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = {
CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ),
};
+static const struct drm_i915_cmd_descriptor noop_desc =
+ CMD(MI_NOOP, SMI, F, 1, S);
+
#undef CMD
#undef SMI
#undef S3D
@@ -696,12 +699,26 @@ struct cmd_node {
* non-opcode bits being set. But if we don't include those bits, some 3D
* commands may hash to the same bucket due to not including opcode bits that
* make the command unique. For now, we will risk hashing to the same bucket.
- *
- * If we attempt to generate a perfect hash, we should be able to look at bits
- * 31:29 of a command from a batch buffer and use the full mask for that
- * client. The existing INSTR_CLIENT_MASK/SHIFT defines can be used for this.
*/
-#define CMD_HASH_MASK STD_MI_OPCODE_MASK
+static inline u32 cmd_header_key(u32 x)
+{
+ u32 shift;
+
+ switch (x >> INSTR_CLIENT_SHIFT) {
+ default:
+ case INSTR_MI_CLIENT:
+ shift = STD_MI_OPCODE_SHIFT;
+ break;
+ case INSTR_RC_CLIENT:
+ shift = STD_3D_OPCODE_SHIFT;
+ break;
+ case INSTR_BC_CLIENT:
+ shift = STD_2D_OPCODE_SHIFT;
+ break;
+ }
+
+ return x >> shift;
+}
static int init_hash_table(struct intel_engine_cs *engine,
const struct drm_i915_cmd_table *cmd_tables,
@@ -725,7 +742,7 @@ static int init_hash_table(struct intel_engine_cs *engine,
desc_node->desc = desc;
hash_add(engine->cmd_hash, &desc_node->node,
- desc->cmd.value & CMD_HASH_MASK);
+ cmd_header_key(desc->cmd.value));
}
}
@@ -864,12 +881,9 @@ find_cmd_in_table(struct intel_engine_cs *engine,
struct cmd_node *desc_node;
hash_for_each_possible(engine->cmd_hash, desc_node, node,
- cmd_header & CMD_HASH_MASK) {
+ cmd_header_key(cmd_header)) {
const struct drm_i915_cmd_descriptor *desc = desc_node->desc;
- u32 masked_cmd = desc->cmd.mask & cmd_header;
- u32 masked_value = desc->cmd.value & desc->cmd.mask;
-
- if (masked_cmd == masked_value)
+ if (((cmd_header ^ desc->cmd.value) & desc->cmd.mask) == 0)
return desc;
}
@@ -887,11 +901,14 @@ find_cmd_in_table(struct intel_engine_cs *engine,
static const struct drm_i915_cmd_descriptor*
find_cmd(struct intel_engine_cs *engine,
u32 cmd_header,
+ const struct drm_i915_cmd_descriptor *desc,
struct drm_i915_cmd_descriptor *default_desc)
{
- const struct drm_i915_cmd_descriptor *desc;
u32 mask;
+ if (((cmd_header ^ desc->cmd.value) & desc->cmd.mask) == 0)
+ return desc;
+
desc = find_cmd_in_table(engine, cmd_header);
if (desc)
return desc;
@@ -900,44 +917,45 @@ find_cmd(struct intel_engine_cs *engine,
if (!mask)
return NULL;
- BUG_ON(!default_desc);
- default_desc->flags = CMD_DESC_SKIP;
+ default_desc->cmd.value = cmd_header;
+ default_desc->cmd.mask = 0xffff0000;
default_desc->length.mask = mask;
-
+ default_desc->flags = CMD_DESC_SKIP;
return default_desc;
}
static const struct drm_i915_reg_descriptor *
-find_reg(const struct drm_i915_reg_descriptor *table,
- int count, u32 addr)
+__find_reg(const struct drm_i915_reg_descriptor *table, int count, u32 addr)
{
- int i;
-
- for (i = 0; i < count; i++) {
- if (i915_mmio_reg_offset(table[i].addr) == addr)
- return &table[i];
+ int start = 0, end = count;
+ while (start < end) {
+ int mid = start + (end - start) / 2;
+ int ret = addr - i915_mmio_reg_offset(table[mid].addr);
+ if (ret < 0)
+ end = mid;
+ else if (ret > 0)
+ start = mid + 1;
+ else
+ return &table[mid];
}
-
return NULL;
}
static const struct drm_i915_reg_descriptor *
-find_reg_in_tables(const struct drm_i915_reg_table *tables,
- int count, bool is_master, u32 addr)
+find_reg(const struct intel_engine_cs *engine, bool is_master, u32 addr)
{
- int i;
- const struct drm_i915_reg_table *table;
- const struct drm_i915_reg_descriptor *reg;
+ const struct drm_i915_reg_table *table = engine->reg_tables;
+ int count = engine->reg_table_count;
- for (i = 0; i < count; i++) {
- table = &tables[i];
+ do {
if (!table->master || is_master) {
- reg = find_reg(table->regs, table->num_regs,
- addr);
+ const struct drm_i915_reg_descriptor *reg;
+
+ reg = __find_reg(table->regs, table->num_regs, addr);
if (reg != NULL)
return reg;
}
- }
+ } while (table++, --count);
return NULL;
}
@@ -1005,6 +1023,9 @@ static bool check_cmd(const struct intel_engine_cs *engine,
const bool is_master,
bool *oacontrol_set)
{
+ if (desc->flags & CMD_DESC_SKIP)
+ return true;
+
if (desc->flags & CMD_DESC_REJECT) {
DRM_DEBUG_DRIVER("CMD: Rejected command: 0x%08X\n", *cmd);
return false;
@@ -1029,10 +1050,7 @@ static bool check_cmd(const struct intel_engine_cs *engine,
offset += step) {
const u32 reg_addr = cmd[offset] & desc->reg.mask;
const struct drm_i915_reg_descriptor *reg =
- find_reg_in_tables(engine->reg_tables,
- engine->reg_table_count,
- is_master,
- reg_addr);
+ find_reg(engine, is_master, reg_addr);
if (!reg) {
DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (exec_id=%d)\n",
@@ -1154,7 +1172,8 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
bool is_master)
{
u32 *cmd, *batch_end;
- struct drm_i915_cmd_descriptor default_desc = { 0 };
+ struct drm_i915_cmd_descriptor default_desc = noop_desc;
+ const struct drm_i915_cmd_descriptor *desc = &default_desc;
bool oacontrol_set = false; /* OACONTROL tracking. See check_cmd() */
bool needs_clflush_after = false;
int ret = 0;
@@ -1174,13 +1193,12 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
*/
batch_end = cmd + (batch_len / sizeof(*batch_end));
while (cmd < batch_end) {
- const struct drm_i915_cmd_descriptor *desc;
u32 length;
if (*cmd == MI_BATCH_BUFFER_END)
break;
- desc = find_cmd(engine, *cmd, &default_desc);
+ desc = find_cmd(engine, *cmd, desc, &default_desc);
if (!desc) {
DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n",
*cmd);
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 498cdd2605fc..59955c6150f6 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -395,7 +395,7 @@ static int per_file_ctx_stats(int id, void *ptr, void *data)
if (ctx->engine[n].state)
per_file_stats(0, ctx->engine[n].state->obj, data);
if (ctx->engine[n].ring)
- per_file_stats(0, ctx->engine[n].ring->obj, data);
+ per_file_stats(0, ctx->engine[n].ring->vma->obj, data);
}
return 0;
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 1c97904eff00..042ad94d2ec7 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -354,6 +354,9 @@ static int i915_getparam(struct drm_device *dev, void *data,
case I915_PARAM_MIN_EU_IN_POOL:
value = INTEL_INFO(dev)->min_eu_in_pool;
break;
+ case I915_PARAM_HAS_EXEC_BATCH_FIRST:
+ value = 1;
+ break;
default:
DRM_DEBUG("Unknown parameter %d\n", param->param);
return -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 3d03a2853a29..733f8697df19 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -427,7 +427,10 @@ ht_head(const struct i915_gem_context *ctx, u32 handle)
static int eb_batch_index(const struct i915_execbuffer *eb)
{
- return eb->args->buffer_count - 1;
+ if (eb->args->flags & I915_EXEC_BATCH_FIRST)
+ return 0;
+ else
+ return eb->args->buffer_count - 1;
}
static int eb_select_context(struct i915_execbuffer *eb)
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index ceb25d34e6f9..ff5f5287cf23 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -3725,6 +3725,10 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
void __iomem *ptr;
lockdep_assert_held(&vma->vm->dev->struct_mutex);
+
+ /* Access through the GTT requires the device to be awake. */
+ assert_rpm_wakelock_held(to_i915(vma->vm->dev));
+
if (WARN_ON(!i915_vma_is_map_and_fenceable(vma)))
return IO_ERR_PTR(-ENODEV);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index bbbf128701ff..fce860edcec8 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -1941,55 +1941,31 @@ static int init_phys_status_page(struct intel_engine_cs *engine)
int intel_ring_pin(struct intel_ring *ring)
{
- struct drm_i915_private *dev_priv = ring->engine->i915;
- struct drm_i915_gem_object *obj = ring->obj;
- struct i915_vma *vma;
/* Ring wraparound at offset 0 sometimes hangs. No idea why. */
- unsigned flags = PIN_OFFSET_BIAS | 4096;
+ unsigned int flags = PIN_GLOBAL | PIN_OFFSET_BIAS | 4096;
void *addr;
int ret;
- if (HAS_LLC(dev_priv) && !obj->stolen) {
- vma = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE, flags);
- if (IS_ERR(vma))
- return PTR_ERR(vma);
-
- ret = i915_gem_object_set_to_cpu_domain(obj, true);
- if (ret)
- goto err_unpin;
-
- addr = i915_gem_object_pin_map(obj);
- if (IS_ERR(addr)) {
- ret = PTR_ERR(addr);
- goto err_unpin;
- }
- } else {
- vma = i915_gem_object_ggtt_pin(obj, NULL, 0, PAGE_SIZE,
- flags | PIN_MAPPABLE);
- if (IS_ERR(vma))
- return PTR_ERR(vma);
+ GEM_BUG_ON(ring->vaddr);
- ret = i915_gem_object_set_to_gtt_domain(obj, true);
- if (ret)
- goto err_unpin;
+ if (ring->vmap)
+ flags |= PIN_MAPPABLE;
- /* Access through the GTT requires the device to be awake. */
- assert_rpm_wakelock_held(dev_priv);
+ ret = i915_vma_pin(ring->vma, 0, PAGE_SIZE, flags);
+ if (unlikely(ret))
+ return ret;
- addr = (void __force *)i915_vma_pin_iomap(vma);
- if (IS_ERR(addr)) {
- ret = PTR_ERR(addr);
- goto err_unpin;
- }
+ if (ring->vmap)
+ addr = i915_gem_object_pin_map(ring->vma->obj);
+ else
+ addr = (void __force *)i915_vma_pin_iomap(ring->vma);
+ if (IS_ERR(addr)) {
+ i915_vma_unpin(ring->vma);
+ return PTR_ERR(addr);
}
ring->vaddr = addr;
- ring->vma = vma;
return 0;
-
-err_unpin:
- i915_vma_unpin(vma);
- return ret;
}
void intel_ring_unpin(struct intel_ring *ring)
@@ -1997,60 +1973,68 @@ void intel_ring_unpin(struct intel_ring *ring)
GEM_BUG_ON(!ring->vma);
GEM_BUG_ON(!ring->vaddr);
- if (HAS_LLC(ring->engine->i915) && !ring->obj->stolen)
- i915_gem_object_unpin_map(ring->obj);
+ if (ring->vmap)
+ i915_gem_object_unpin_map(ring->vma->obj);
else
i915_vma_unpin_iomap(ring->vma);
ring->vaddr = NULL;
i915_vma_unpin(ring->vma);
- ring->vma = NULL;
}
-static void intel_destroy_ringbuffer_obj(struct intel_ring *ring)
-{
- __i915_gem_object_release_unless_active(ring->obj);
- ring->obj = NULL;
-}
-
-static int intel_alloc_ringbuffer_obj(struct drm_device *dev,
- struct intel_ring *ring)
+static struct i915_vma *
+intel_ring_create_vma(struct drm_device *dev, int size)
{
struct drm_i915_gem_object *obj;
+ struct i915_vma *vma;
+ int ret;
- obj = NULL;
+ obj = ERR_PTR(-ENODEV);
if (!HAS_LLC(dev))
- obj = i915_gem_object_create_stolen(dev, ring->size);
- if (obj == NULL)
- obj = i915_gem_object_create(dev, ring->size);
+ obj = i915_gem_object_create_stolen(dev, size);
if (IS_ERR(obj))
- return PTR_ERR(obj);
+ obj = i915_gem_object_create(dev, size);
+ if (IS_ERR(obj))
+ return ERR_CAST(obj);
/* mark ring buffers as read-only from GPU side by default */
obj->gt_ro = 1;
- ring->obj = obj;
+ if (HAS_LLC(dev) && !obj->stolen)
+ ret = i915_gem_object_set_to_cpu_domain(obj, true);
+ else
+ ret = i915_gem_object_set_to_gtt_domain(obj, true);
+ if (ret) {
+ vma = ERR_PTR(ret);
+ goto err;
+ }
+
+ vma = i915_gem_obj_lookup_or_create_vma(obj,
+ &to_i915(dev)->ggtt.base,
+ NULL);
+ if (IS_ERR(vma))
+ goto err;
+
+ return vma;
- return 0;
+err:
+ i915_gem_object_put(obj);
+ return vma;
}
struct intel_ring *
intel_engine_create_ring(struct intel_engine_cs *engine, int size)
{
struct intel_ring *ring;
- int ret;
+ struct i915_vma *vma;
GEM_BUG_ON(!is_power_of_2(size));
ring = kzalloc(sizeof(*ring), GFP_KERNEL);
- if (ring == NULL) {
- DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s\n",
- engine->name);
+ if (!ring)
return ERR_PTR(-ENOMEM);
- }
ring->engine = engine;
- list_add(&ring->link, &engine->buffers);
INIT_LIST_HEAD(&ring->request_list);
@@ -2066,22 +2050,23 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size)
ring->last_retired_head = -1;
intel_ring_update_space(ring);
- ret = intel_alloc_ringbuffer_obj(&engine->i915->drm, ring);
- if (ret) {
- DRM_DEBUG_DRIVER("Failed to allocate ringbuffer %s: %d\n",
- engine->name, ret);
- list_del(&ring->link);
+ vma = intel_ring_create_vma(&engine->i915->drm, size);
+ if (IS_ERR(vma)) {
kfree(ring);
- return ERR_PTR(ret);
+ return ERR_CAST(vma);
}
+ ring->vma = vma;
+ if (HAS_LLC(engine->i915) && !vma->obj->stolen)
+ ring->vmap = true;
+ list_add(&ring->link, &engine->buffers);
return ring;
}
void
intel_ring_free(struct intel_ring *ring)
{
- intel_destroy_ringbuffer_obj(ring);
+ __i915_gem_object_release_unless_active(ring->vma->obj);
list_del(&ring->link);
kfree(ring);
}
@@ -2169,7 +2154,6 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
ret = PTR_ERR(ring);
goto error;
}
- engine->buffer = ring;
if (I915_NEED_GFX_HWS(dev_priv)) {
ret = init_status_page(engine);
@@ -2184,11 +2168,10 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
ret = intel_ring_pin(ring);
if (ret) {
- DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n",
- engine->name, ret);
- intel_destroy_ringbuffer_obj(ring);
+ intel_ring_free(ring);
goto error;
}
+ engine->buffer = ring;
return 0;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 7e7cdc302b22..270c8459b73a 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -83,7 +83,6 @@ struct intel_engine_hangcheck {
};
struct intel_ring {
- struct drm_i915_gem_object *obj;
struct i915_vma *vma;
void *vaddr;
@@ -97,6 +96,7 @@ struct intel_ring {
int space;
int size;
int effective_size;
+ bool vmap;
/** We track the position of the requests in the ring buffer, and
* when each is retired we increment last_retired_head as the GPU
diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h
index 205ddcf6d55d..ee092061b404 100644
--- a/include/drm/drm_mm.h
+++ b/include/drm/drm_mm.h
@@ -86,12 +86,12 @@ struct drm_mm {
struct rb_root interval_tree;
unsigned int scan_check_range : 1;
- unsigned scan_alignment;
+ unsigned int scanned_blocks;
unsigned long scan_color;
+ u64 scan_alignment;
u64 scan_size;
u64 scan_hit_start;
u64 scan_hit_end;
- unsigned scanned_blocks;
u64 scan_start;
u64 scan_end;
struct drm_mm_node *prev_scanned_node;
@@ -223,7 +223,7 @@ int drm_mm_reserve_node(struct drm_mm *mm, struct drm_mm_node *node);
int drm_mm_insert_node_generic(struct drm_mm *mm,
struct drm_mm_node *node,
u64 size,
- unsigned alignment,
+ u64 alignment,
unsigned long color,
enum drm_mm_search_flags sflags,
enum drm_mm_allocator_flags aflags);
@@ -246,7 +246,7 @@ int drm_mm_insert_node_generic(struct drm_mm *mm,
static inline int drm_mm_insert_node(struct drm_mm *mm,
struct drm_mm_node *node,
u64 size,
- unsigned alignment,
+ u64 alignment,
enum drm_mm_search_flags flags)
{
return drm_mm_insert_node_generic(mm, node, size, alignment, 0, flags,
@@ -256,7 +256,7 @@ static inline int drm_mm_insert_node(struct drm_mm *mm,
int drm_mm_insert_node_in_range_generic(struct drm_mm *mm,
struct drm_mm_node *node,
u64 size,
- unsigned alignment,
+ u64 alignment,
unsigned long color,
u64 start,
u64 end,
@@ -283,7 +283,7 @@ int drm_mm_insert_node_in_range_generic(struct drm_mm *mm,
static inline int drm_mm_insert_node_in_range(struct drm_mm *mm,
struct drm_mm_node *node,
u64 size,
- unsigned alignment,
+ u64 alignment,
u64 start,
u64 end,
enum drm_mm_search_flags flags)
@@ -309,11 +309,11 @@ drm_mm_interval_next(struct drm_mm_node *node, u64 start, u64 last);
void drm_mm_init_scan(struct drm_mm *mm,
u64 size,
- unsigned alignment,
+ u64 alignment,
unsigned long color);
void drm_mm_init_scan_with_range(struct drm_mm *mm,
u64 size,
- unsigned alignment,
+ u64 alignment,
unsigned long color,
u64 start,
u64 end);
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 452629de7a57..25d5bdb5af5f 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -387,6 +387,7 @@ typedef struct drm_i915_irq_wait {
#define I915_PARAM_HAS_EXEC_SOFTPIN 37
#define I915_PARAM_HAS_POOLED_EU 38
#define I915_PARAM_MIN_EU_IN_POOL 39
+#define I915_PARAM_HAS_EXEC_BATCH_FIRST 40
typedef struct drm_i915_getparam {
__s32 param;
@@ -819,7 +820,8 @@ struct drm_i915_gem_execbuffer2 {
*/
#define I915_EXEC_RESOURCE_STREAMER (1<<15)
-#define __I915_EXEC_UNKNOWN_FLAGS -(I915_EXEC_RESOURCE_STREAMER<<1)
+#define I915_EXEC_BATCH_FIRST (1<<16)
+#define __I915_EXEC_UNKNOWN_FLAGS (-(I915_EXEC_BATCH_FIRST<<1))
#define I915_EXEC_CONTEXT_ID_MASK (0xffffffff)
#define i915_execbuffer2_set_context_id(eb2, context) \
--
2.8.1
More information about the Intel-gfx-trybot
mailing list