[Intel-gfx] [PATCH] drm/i915: use a separate context for gpu relocs
Daniele Ceraolo Spurio
daniele.ceraolospurio at intel.com
Sat Aug 24 00:20:22 UTC 2019
The CS pre-parser can pre-fetch commands across memory sync points and
starting from gen12 it is able to pre-fetch across BB_START and BB_END
boundaries as well, so when we emit gpu relocs the pre-parser might
fetch the target location of the reloc before the memory write lands.
The parser can't pre-fetch across the ctx switch, so we use a separate
context to guarantee that the memory is syncronized before the parser
can get to it.
Suggested-by: Chris Wilson <chris at chris-wilson.co.uk>
Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
Cc: Chris Wilson <chris at chris-wilson.co.uk>
---
.../gpu/drm/i915/gem/i915_gem_execbuffer.c | 27 ++++++++++++++++++-
1 file changed, 26 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
index b5f6937369ea..d27201c654e9 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
@@ -252,6 +252,7 @@ struct i915_execbuffer {
bool has_fence : 1;
bool needs_unfenced : 1;
+ struct intel_context *ce;
struct i915_request *rq;
u32 *rq_cmd;
unsigned int rq_size;
@@ -903,6 +904,7 @@ static void reloc_cache_init(struct reloc_cache *cache,
cache->has_fence = cache->gen < 4;
cache->needs_unfenced = INTEL_INFO(i915)->unfenced_needs_alignment;
cache->node.allocated = false;
+ cache->ce = NULL;
cache->rq = NULL;
cache->rq_size = 0;
}
@@ -943,6 +945,7 @@ static void reloc_gpu_flush(struct reloc_cache *cache)
static void reloc_cache_reset(struct reloc_cache *cache)
{
void *vaddr;
+ struct intel_context *ce;
if (cache->rq)
reloc_gpu_flush(cache);
@@ -973,6 +976,10 @@ static void reloc_cache_reset(struct reloc_cache *cache)
}
}
+ ce = fetch_and_zero(&cache->ce);
+ if (ce)
+ intel_context_put(ce);
+
cache->vaddr = 0;
cache->page = -1;
}
@@ -1168,7 +1175,7 @@ static int __reloc_gpu_alloc(struct i915_execbuffer *eb,
if (err)
goto err_unmap;
- rq = i915_request_create(eb->context);
+ rq = intel_context_create_request(cache->ce);
if (IS_ERR(rq)) {
err = PTR_ERR(rq);
goto err_unpin;
@@ -1239,6 +1246,24 @@ static u32 *reloc_gpu(struct i915_execbuffer *eb,
if (!intel_engine_can_store_dword(eb->engine))
return ERR_PTR(-ENODEV);
+ /*
+ * The CS pre-parser can pre-fetch commands across memory sync
+ * points and starting gen12 it is able to pre-fetch across
+ * BB_START and BB_END boundaries (within the same context). We
+ * use a separate context to guarantee that the reloc writes
+ * land before the parser gets to the target memory location.
+ */
+ if (!cache->ce) {
+ struct intel_context *ce;
+
+ ce = intel_context_create(eb->context->gem_context,
+ eb->engine);
+ if (IS_ERR(ce))
+ return ERR_CAST(ce);
+
+ cache->ce = ce; /* released in reloc_cache_reset */
+ }
+
err = __reloc_gpu_alloc(eb, vma, len);
if (unlikely(err))
return ERR_PTR(err);
--
2.23.0
More information about the Intel-gfx
mailing list