[PATCH] pipe-parse
Chris Wilson
chris at chris-wilson.co.uk
Fri Dec 6 11:34:30 UTC 2019
---
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 110 +++++++++++++-----
drivers/gpu/drm/i915/i915_cmd_parser.c | 33 +++---
2 files changed, 92 insertions(+), 51 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index 34044c6203a5..d0c4d6302375 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -1241,10 +1241,6 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
if (unlikely(!cache->rq)) {
int err;
- /* If we need to copy for the cmdparser, we will stall anyway */
- if (eb_use_cmdparser(eb))
- return ERR_PTR(-EWOULDBLOCK);
-
if (!intel_engine_can_store_dword(eb->engine))
return ERR_PTR(-ENODEV);
@@ -2001,12 +1997,87 @@ shadow_batch_pin(struct i915_execbuffer *eb, struct drm_i915_gem_object *obj)
return vma;
}
+#include "i915_sw_fence_work.h"
+
+struct eb_parse_work {
+ struct dma_fence_work base;
+ struct intel_engine_cs *engine;
+ struct i915_vma *batch;
+ struct i915_vma *vma;
+ unsigned int batch_offset;
+ unsigned int batch_length;
+};
+
+static int __eb_parse(struct dma_fence_work *work)
+{
+ struct eb_parse_work *pw = container_of(work, typeof(*pw), base);
+ struct i915_vma *batch = pw->batch;
+ struct i915_vma *vma = pw->vma;
+
+ return intel_engine_cmd_parser(pw->engine,
+ batch->obj,
+ gen8_canonical_addr(batch->node.start) +
+ pw->batch_offset,
+ pw->batch_offset,
+ pw->batch_length,
+ vma->obj,
+ gen8_canonical_addr(vma->node.start));
+}
+
+static const struct dma_fence_work_ops eb_parse_ops = {
+ .name = "parse",
+ .work = __eb_parse,
+};
+
+static int eb_parse_pipeline(struct i915_execbuffer *eb, struct i915_vma *vma)
+{
+ struct eb_parse_work *pw;
+ int err;
+
+ pw = kzalloc(sizeof(*pw), GFP_KERNEL);
+ if (!pw)
+ return -ENOMEM;
+
+ dma_fence_work_init(&pw->base, &eb_parse_ops);
+
+ pw->engine = eb->engine;
+ pw->batch = eb->batch;
+ pw->batch_offset = eb->batch_start_offset;
+ pw->batch_length = eb->batch_len;
+ pw->vma = vma;
+
+ dma_resv_lock(eb->batch->resv, NULL);
+ err = dma_resv_reserve_shared(eb->batch->resv, 1);
+ if (err) {
+ dma_resv_unlock(eb->batch->resv);
+ kfree(pw);
+ return err;
+ }
+
+ err = i915_sw_fence_await_reservation(&pw->base.chain,
+ eb->batch->resv, NULL, false,
+ 0, I915_FENCE_GFP);
+ if (err < 0) {
+ dma_resv_unlock(eb->batch->resv);
+ kfree(pw);
+ return err;
+ }
+
+ dma_resv_add_shared_fence(eb->batch->resv, &pw->base.dma);
+ dma_resv_unlock(eb->batch->resv);
+
+ dma_resv_lock(vma->resv, NULL);
+ dma_resv_add_excl_fence(vma->resv, &pw->base.dma);
+ dma_resv_unlock(vma->resv);
+
+ dma_fence_work_commit(&pw->base);
+ return 0;
+}
+
static struct i915_vma *eb_parse(struct i915_execbuffer *eb)
{
struct intel_engine_pool_node *pool;
struct i915_vma *vma;
- u64 batch_start;
- u64 shadow_batch_start;
int err;
pool = intel_engine_get_pool(eb->engine, eb->batch_len);
@@ -2017,33 +2088,10 @@ static struct i915_vma *eb_parse(struct i915_execbuffer *eb)
if (IS_ERR(vma))
goto err;
- batch_start = gen8_canonical_addr(eb->batch->node.start) +
- eb->batch_start_offset;
-
- shadow_batch_start = gen8_canonical_addr(vma->node.start);
-
- err = intel_engine_cmd_parser(eb->engine,
- eb->batch->obj,
- batch_start,
- eb->batch_start_offset,
- eb->batch_len,
- pool->obj,
- shadow_batch_start);
-
+ err = eb_parse_pipeline(eb, vma);
if (err) {
i915_vma_unpin(vma);
-
- /*
- * Unsafe GGTT-backed buffers can still be submitted safely
- * as non-secure.
- * For PPGTT backing however, we have no choice but to forcibly
- * reject unsafe buffers
- */
- if (i915_vma_is_ggtt(vma) && err == -EACCES)
- /* Execute original buffer non-secure */
- vma = NULL;
- else
- vma = ERR_PTR(err);
+ vma = ERR_PTR(err);
goto err;
}
diff --git a/drivers/gpu/drm/i915/i915_cmd_parser.c b/drivers/gpu/drm/i915/i915_cmd_parser.c
index 2ed497e7c9fd..4dc113f0c65a 100644
--- a/drivers/gpu/drm/i915/i915_cmd_parser.c
+++ b/drivers/gpu/drm/i915/i915_cmd_parser.c
@@ -1128,31 +1128,28 @@ find_reg(const struct intel_engine_cs *engine, u32 addr)
static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
struct drm_i915_gem_object *src_obj,
u32 batch_start_offset,
- u32 batch_len,
- bool *needs_clflush_after)
+ u32 batch_len)
{
- unsigned int src_needs_clflush;
- unsigned int dst_needs_clflush;
+ bool needs_clflush;
void *dst, *src;
int ret;
- ret = i915_gem_object_prepare_write(dst_obj, &dst_needs_clflush);
- if (ret)
- return ERR_PTR(ret);
-
dst = i915_gem_object_pin_map(dst_obj, I915_MAP_FORCE_WB);
- i915_gem_object_finish_access(dst_obj);
if (IS_ERR(dst))
return dst;
- ret = i915_gem_object_prepare_read(src_obj, &src_needs_clflush);
+ ret = i915_gem_object_pin_pages(src_obj);
if (ret) {
i915_gem_object_unpin_map(dst_obj);
return ERR_PTR(ret);
}
+ needs_clflush =
+ !(src_obj->read_domains & I915_GEM_DOMAIN_CPU) &&
+ !(src_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_READ);
+
src = ERR_PTR(-ENODEV);
- if (src_needs_clflush &&
+ if (needs_clflush &&
i915_can_memcpy_from_wc(NULL, batch_start_offset, 0)) {
src = i915_gem_object_pin_map(src_obj, I915_MAP_WC);
if (!IS_ERR(src)) {
@@ -1175,7 +1172,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
* We don't care about copying too much here as we only
* validate up to the end of the batch.
*/
- if (dst_needs_clflush & CLFLUSH_BEFORE)
+ if (!(dst_obj->read_domains & I915_GEM_DOMAIN_CPU))
batch_len = roundup(batch_len,
boot_cpu_data.x86_clflush_size);
@@ -1184,7 +1181,7 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
int len = min_t(int, batch_len, PAGE_SIZE - offset);
src = kmap_atomic(i915_gem_object_get_page(src_obj, n));
- if (src_needs_clflush)
+ if (needs_clflush)
drm_clflush_virt_range(src + offset, len);
memcpy(ptr, src + offset, len);
kunmap_atomic(src);
@@ -1195,11 +1192,9 @@ static u32 *copy_batch(struct drm_i915_gem_object *dst_obj,
}
}
- i915_gem_object_finish_access(src_obj);
+ i915_gem_object_unpin_pages(src_obj);
/* dst_obj is returned with vmap pinned */
- *needs_clflush_after = dst_needs_clflush & CLFLUSH_AFTER;
-
return dst;
}
@@ -1417,13 +1412,11 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
u32 *cmd, *batch_end, offset = 0;
struct drm_i915_cmd_descriptor default_desc = noop_desc;
const struct drm_i915_cmd_descriptor *desc = &default_desc;
- bool needs_clflush_after = false;
unsigned long *jump_whitelist;
int ret = 0;
cmd = copy_batch(shadow_batch_obj, batch_obj,
- batch_start_offset, batch_len,
- &needs_clflush_after);
+ batch_start_offset, batch_len);
if (IS_ERR(cmd)) {
DRM_DEBUG_DRIVER("CMD: Failed to copy batch\n");
return PTR_ERR(cmd);
@@ -1494,7 +1487,7 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
}
} while (1);
- if (needs_clflush_after) {
+ if (!(shadow_batch_obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE)) {
void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping);
drm_clflush_virt_range(ptr, (void *)(cmd + 1) - ptr);
--
2.24.0
More information about the Intel-gfx-trybot
mailing list