[PATCH 34/36] drm/i915: Move cmd parser pinning to execbuffer
Maarten Lankhorst
maarten.lankhorst at linux.intel.com
Fri Sep 18 10:34:03 UTC 2020
We need to get rid of allocations in the cmd parser, because it needs
to be called from a signaling context, first move all pinning to
execbuf, where we already hold all locks.
Signed-off-by: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
---
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 41 ++++++++++++++-
drivers/gpu/drm/i915/i915_cmd_parser.c | 51 ++++++-------------
drivers/gpu/drm/i915/i915_drv.h | 4 +-
drivers/gpu/drm/i915/i915_memcpy.c | 2 +-
drivers/gpu/drm/i915/i915_memcpy.h | 2 +-
5 files changed, 60 insertions(+), 40 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 37c692beacac..fd1f0da9862b 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -27,6 +27,7 @@
#include "i915_sw_fence_work.h"
#include "i915_trace.h"
#include "i915_user_extensions.h"
+#include "i915_memcpy.h"
struct eb_vma {
struct i915_vma *vma;
@@ -2361,6 +2362,8 @@ struct eb_parse_work {
struct i915_vma *trampoline;
unsigned int batch_offset;
unsigned int batch_length;
+ const void *batch_map;
+ void *shadow_map;
};
static int __eb_parse(struct dma_fence_work *work)
@@ -2372,13 +2375,22 @@ static int __eb_parse(struct dma_fence_work *work)
pw->batch_offset,
pw->batch_length,
pw->shadow,
- pw->trampoline);
+ pw->trampoline,
+ pw->shadow_map,
+ pw->batch_map);
}
static void __eb_parse_release(struct dma_fence_work *work)
{
struct eb_parse_work *pw = container_of(work, typeof(*pw), base);
+ if (pw->batch_map)
+ i915_gem_object_unpin_map(pw->batch->obj);
+ else
+ i915_gem_object_unpin_pages(pw->batch->obj);
+
+ i915_gem_object_unpin_map(pw->shadow->obj);
+
if (pw->trampoline)
i915_active_release(&pw->trampoline->active);
i915_active_release(&pw->shadow->active);
@@ -2428,6 +2440,8 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb,
struct i915_vma *trampoline)
{
struct eb_parse_work *pw;
+ struct drm_i915_gem_object *batch = eb->batch->vma->obj;
+ bool needs_clflush;
int err;
pw = kzalloc(sizeof(*pw), GFP_KERNEL);
@@ -2448,6 +2462,26 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb,
goto err_shadow;
}
+ pw->shadow_map = i915_gem_object_pin_map(shadow->obj, I915_MAP_FORCE_WB);
+ if (IS_ERR(pw->shadow_map)) {
+ err = PTR_ERR(pw->shadow_map);
+ goto err_trampoline;
+ }
+
+ needs_clflush =
+ !(batch->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ);
+
+ pw->batch_map = ERR_PTR(-ENODEV);
+ if (needs_clflush && i915_has_memcpy_from_wc())
+ pw->batch_map = i915_gem_object_pin_map(batch, I915_MAP_WC);
+
+ if (IS_ERR(pw->batch_map)) {
+ err = i915_gem_object_pin_pages(batch);
+ if (err)
+ goto err_unmap_shadow;
+ pw->batch_map = NULL;
+ }
+
dma_fence_work_init(&pw->base, &eb_parse_ops);
pw->engine = eb->engine;
@@ -2491,6 +2525,11 @@ static int eb_parse_pipeline(struct i915_execbuffer *eb,
dma_fence_work_commit_imm(&pw->base);
return err;
+err_unmap_shadow:
+ i915_gem_object_unpin_map(shadow->obj);
+err_trampoline:
+ if (trampoline)
+ i915_active_release(&trampoline->active);
err_shadow:
i915_active_release(&shadow->active);
err_batch:
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 5ac4a999f05a..e48c8b631abe 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -1136,36 +1136,15 @@ find_reg(const struct intel_engine_cs *engine, u32 addr)
/* Returns a vmap'd pointer to dst_obj, which the caller must unmap */
static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
struct drm_i915_gem_object *src_obj,
- u32 offset, u32 length)
+ u32 offset, u32 length, void *dst, const void *src)
{
- bool needs_clflush;
- void *dst, *src;
- int ret;
-
- dst = i915_gem_object_pin_map(dst_obj, I915_MAP_FORCE_WB);
- if (IS_ERR(dst))
- return dst;
-
- ret = i915_gem_object_pin_pages(src_obj);
- if (ret) {
- i915_gem_object_unpin_map(dst_obj);
- return ERR_PTR(ret);
- }
-
- needs_clflush =
+ bool needs_clflush =
!(src_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ);
- src = ERR_PTR(-ENODEV);
- if (needs_clflush && i915_has_memcpy_from_wc()) {
- src = i915_gem_object_pin_map(src_obj, I915_MAP_WC);
- if (!IS_ERR(src)) {
- i915_unaligned_memcpy_from_wc(dst,
- src + offset,
- length);
- i915_gem_object_unpin_map(src_obj);
- }
- }
- if (IS_ERR(src)) {
+ if (src) {
+ GEM_BUG_ON(!needs_clflush);
+ i915_unaligned_memcpy_from_wc(dst, src + offset, length);
+ } else {
void *ptr;
int x, n;
@@ -1185,12 +1164,12 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
x = offset_in_page(offset);
for (n = offset >> PAGE_SHIFT; length; n++) {
int len = min_t(int, length, PAGE_SIZE - x);
+ void *map = kmap_atomic(i915_gem_object_get_page(src_obj, n));
- src = kmap_atomic(i915_gem_object_get_page(src_obj, n));
if (needs_clflush)
- drm_clflush_virt_range(src + x, len);
- memcpy(ptr, src + x, len);
- kunmap_atomic(src);
+ drm_clflush_virt_range(map + x, len);
+ memcpy(ptr, map + x, len);
+ kunmap_atomic(map);
ptr += len;
length -= len;
@@ -1198,8 +1177,6 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
}
}
- i915_gem_object_unpin_pages(src_obj);
-
/* dst_obj is returned with vmap pinned */
return dst;
}
@@ -1417,7 +1394,9 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
u32 batch_offset,
u32 batch_length,
struct i915_vma *shadow,
- bool trampoline)
+ bool trampoline,
+ void *shadow_map,
+ const void *batch_map)
{
u32 *cmd, *batch_end, offset = 0;
struct drm_i915_cmd_descriptor default_desc = noop_desc;
@@ -1432,7 +1411,8 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
batch->size));
GEM_BUG_ON(!batch_length);
- cmd = copy_batch(shadow->obj, batch->obj, batch_offset, batch_length);
+ cmd = copy_batch(shadow->obj, batch->obj, batch_offset, batch_length,
+ shadow_map, batch_map);
if (IS_ERR(cmd)) {
DRM_DEBUG("CMD: Failed to copy batch\n");
return PTR_ERR(cmd);
@@ -1552,7 +1532,6 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
if (!IS_ERR_OR_NULL(jump_whitelist))
kfree(jump_whitelist);
- i915_gem_object_unpin_map(shadow->obj);
return ret;
}
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 3ab22bdc88ca..b4a80fa4f8e2 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1951,7 +1951,9 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
u32 batch_offset,
u32 batch_length,
struct i915_vma *shadow,
- bool trampoline);
+ bool trampoline,
+ void *shadow_map,
+ const void *batch_map);
#define I915_CMD_PARSER_TRAMPOLINE_SIZE 8
/* intel_device_info.c */
diff --git a/drivers/gpu/drm/i915/i915_memcpy.c b/drivers/gpu/drm/i915/i915_memcpy.c
index 7b3b83bd5ab8..1b021a4902de 100644
--- a/drivers/gpu/drm/i915/i915_memcpy.c
+++ b/drivers/gpu/drm/i915/i915_memcpy.c
@@ -135,7 +135,7 @@ bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len)
* accepts that its arguments may not be aligned, but are valid for the
* potential 16-byte read past the end.
*/
-void i915_unaligned_memcpy_from_wc(void *dst, void *src, unsigned long len)
+void i915_unaligned_memcpy_from_wc(void *dst, const void *src, unsigned long len)
{
unsigned long addr;
diff --git a/drivers/gpu/drm/i915/i915_memcpy.h b/drivers/gpu/drm/i915/i915_memcpy.h
index e36d30edd987..3df063a3293b 100644
--- a/drivers/gpu/drm/i915/i915_memcpy.h
+++ b/drivers/gpu/drm/i915/i915_memcpy.h
@@ -13,7 +13,7 @@ struct drm_i915_private;
void i915_memcpy_init_early(struct drm_i915_private *i915);
bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len);
-void i915_unaligned_memcpy_from_wc(void *dst, void *src, unsigned long len);
+void i915_unaligned_memcpy_from_wc(void *dst, const void *src, unsigned long len);
/* The movntdqa instructions used for memcpy-from-wc require 16-byte alignment,
* as well as SSE4.1 support. i915_memcpy_from_wc() will report if it cannot
--
2.28.0
More information about the Intel-gfx-trybot
mailing list