[PATCH 7/7] pipe-parse
Chris Wilson
chris at chris-wilson.co.uk
Sat Dec 7 10:37:17 UTC 2019
---
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 82 +++++++++++++++++--
drivers/gpu/drm/i915/i915_cmd_parser.c | 33 +++-----
2 files changed, 85 insertions(+), 30 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 5f4e460701ca..6efca2bcf46a 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -25,6 +25,7 @@
#include "i915_gem_clflush.h"
#include "i915_gem_context.h"
#include "i915_gem_ioctls.h"
+#include "i915_sw_fence_work.h"
#include "i915_trace.h"
enum {
@@ -1225,10 +1226,6 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
if (unlikely(!cache->rq)) {
int err;
- /* If we need to copy for the cmdparser, we will stall anyway */
- if (eb_use_cmdparser(eb))
- return ERR_PTR(-EWOULDBLOCK);
-
if (!intel_engine_can_store_dword(eb->engine))
return ERR_PTR(-ENODEV);
@@ -1967,6 +1964,77 @@ shadow_batch_pin(struct drm_i915_gem_object *obj,
return vma;
}
+struct eb_parse_work {
+ struct dma_fence_work base;
+ struct intel_engine_cs *engine;
+ struct i915_vma *batch;
+ struct i915_vma *shadow;
+ unsigned int batch_offset;
+ unsigned int batch_length;
+};
+
+static int __eb_parse(struct dma_fence_work *work)
+{
+ struct eb_parse_work *pw = container_of(work, typeof(*pw), base);
+
+ return intel_engine_cmd_parser(pw->engine,
+ pw->batch,
+ pw->batch_offset,
+ pw->batch_length,
+ pw->shadow);
+}
+
+static const struct dma_fence_work_ops eb_parse_ops = {
+ .name = "parse",
+ .work = __eb_parse,
+};
+
+static int eb_parse_pipeline(struct i915_execbuffer *eb,
+ struct i915_vma *shadow)
+{
+ struct eb_parse_work *pw;
+ int err;
+
+ pw = kzalloc(sizeof(*pw), GFP_KERNEL);
+ if (!pw)
+ return -ENOMEM;
+
+ dma_fence_work_init(&pw->base, &eb_parse_ops);
+
+ pw->engine = eb->engine;
+ pw->batch = eb->batch;
+ pw->batch_offset = eb->batch_start_offset;
+ pw->batch_length = eb->batch_len;
+ pw->shadow = shadow;
+
+ dma_resv_lock(pw->batch->resv, NULL);
+ err = dma_resv_reserve_shared(pw->batch->resv, 1);
+ if (err) {
+ dma_resv_unlock(pw->batch->resv);
+ kfree(pw);
+ return err;
+ }
+
+ err = i915_sw_fence_await_reservation(&pw->base.chain,
+ pw->batch->resv, NULL, false,
+ 0, I915_FENCE_GFP);
+ if (err < 0) {
+ dma_resv_unlock(pw->batch->resv);
+ kfree(pw);
+ return err;
+ }
+
+ dma_resv_add_shared_fence(pw->batch->resv, &pw->base.dma);
+ dma_resv_unlock(pw->batch->resv);
+
+ dma_resv_lock(shadow->resv, NULL);
+ dma_resv_add_excl_fence(shadow->resv, &pw->base.dma);
+ dma_resv_unlock(shadow->resv);
+
+ dma_fence_work_commit(&pw->base);
+ return 0;
+}
+
static int eb_parse(struct i915_execbuffer *eb)
{
struct intel_engine_pool_node *pool;
@@ -2018,11 +2086,7 @@ static int eb_parse(struct i915_execbuffer *eb)
eb->batch_flags |= I915_DISPATCH_SECURE;
}
- err = intel_engine_cmd_parser(eb->engine,
- eb->batch,
- eb->batch_start_offset,
- eb->batch_len,
- shadow);
+ err = eb_parse_pipeline(eb, shadow);
if (err)
goto err_trampoline;
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 5c942a582b06..9845182ce587 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -1127,32 +1127,28 @@ find_reg(const struct intel_engine_cs *engine, u32 addr)
/* Returns a vmap'd pointer to dst_obj, which the caller must unmap */
static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
struct drm_i915_gem_object *src_obj,
- u32 offset, u32 length,
- bool *needs_clflush_after)
+ u32 offset, u32 length)
{
- unsigned int src_needs_clflush;
- unsigned int dst_needs_clflush;
void *dst, *src, *ptr;
+ bool needs_clflush;
int ret, len;
- ret = i915_gem_object_prepare_write(dst_obj, &dst_needs_clflush);
- if (ret)
- return ERR_PTR(ret);
-
dst = i915_gem_object_pin_map(dst_obj, I915_MAP_FORCE_WB);
- i915_gem_object_finish_access(dst_obj);
if (IS_ERR(dst))
return dst;
- ret = i915_gem_object_prepare_read(src_obj, &src_needs_clflush);
+ ret = i915_gem_object_pin_pages(src_obj);
if (ret) {
i915_gem_object_unpin_map(dst_obj);
return ERR_PTR(ret);
}
+ needs_clflush =
+ !(src_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ);
+
ptr = dst;
src = ERR_PTR(-ENODEV);
- if (src_needs_clflush && i915_has_memcpy_from_wc()) {
+ if (needs_clflush && i915_has_memcpy_from_wc()) {
src = i915_gem_object_pin_map(src_obj, I915_MAP_WC);
if (!IS_ERR(src)) {
src += offset;
@@ -1184,7 +1180,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
* We don't care about copying too much here as we only
* validate up to the end of the batch.
*/
- if (dst_needs_clflush & CLFLUSH_BEFORE)
+ if (!(dst_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ))
length = round_up(length,
boot_cpu_data.x86_clflush_size);
@@ -1193,7 +1189,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
len = min_t(int, length, PAGE_SIZE - x);
src = kmap_atomic(i915_gem_object_get_page(src_obj, n));
- if (src_needs_clflush)
+ if (needs_clflush)
drm_clflush_virt_range(src + x, len);
memcpy(ptr, src + x, len);
kunmap_atomic(src);
@@ -1204,11 +1200,9 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
}
}
- i915_gem_object_finish_access(src_obj);
+ i915_gem_object_unpin_pages(src_obj);
/* dst_obj is returned with vmap pinned */
- *needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER;
-
return dst;
}
@@ -1422,7 +1416,6 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
u32 *cmd, *batch_end, offset = 0;
struct drm_i915_cmd_descriptor default_desc = noop_desc;
const struct drm_i915_cmd_descriptor *desc = &default_desc;
- bool needs_clflush_after = false;
unsigned long *jump_whitelist;
u64 batch_addr, shadow_addr;
int ret = 0;
@@ -1433,9 +1426,7 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
batch->size));
GEM_BUG_ON(!batch_length);
- cmd = copy_batch(shadow->obj, batch->obj,
- batch_offset, batch_length,
- &needs_clflush_after);
+ cmd = copy_batch(shadow->obj, batch->obj, batch_offset, batch_length);
if (IS_ERR(cmd)) {
DRM_DEBUG("CMD: Failed to copy batch\n");
return PTR_ERR(cmd);
@@ -1531,7 +1522,7 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
}
}
- if (needs_clflush_after) {
+ if (!(shadow->obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) {
void *ptr = page_mask_bits(shadow->obj->mm.mapping);
drm_clflush_virt_range(ptr, (void *)(cmd + 1) - ptr);
--
2.24.0
More information about the Intel-gfx-trybot
mailing list