[Intel-gfx] [PATCH] intel: Add support for (possibly) unsynchronized maps.
Eric Anholt
eric at anholt.net
Sat Feb 25 04:53:22 CET 2012
This improves the performance of Mesa's GL_MAP_UNSYNCHRONIZED_BIT path
in GL_ARB_map_buffer_range. Improves Unigine Tropics performance at
1024x768 by 2.06236% +/- 0.50272% (n=11).
---
intel/intel_bufmgr.h | 2 +
intel/intel_bufmgr_gem.c | 72 +++++++++++++++++++++++++++++++++++++++++----
2 files changed, 67 insertions(+), 7 deletions(-)
diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
index 85da8b9..e852eab 100644
--- a/intel/intel_bufmgr.h
+++ b/intel/intel_bufmgr.h
@@ -148,8 +148,10 @@ void drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr);
void drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr);
void drm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr,
int limit);
+int drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo);
int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo);
int drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo);
+
int drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo);
void drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start);
void drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable);
diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
index 187e8ec..12641e1 100644
--- a/intel/intel_bufmgr_gem.c
+++ b/intel/intel_bufmgr_gem.c
@@ -1150,15 +1150,13 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
return 0;
}
-int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
+static int
+map_gtt(drm_intel_bo *bo)
{
drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
- struct drm_i915_gem_set_domain set_domain;
int ret;
- pthread_mutex_lock(&bufmgr_gem->lock);
-
if (bo_gem->map_count++ == 0)
drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
@@ -1184,7 +1182,6 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
strerror(errno));
if (--bo_gem->map_count == 0)
drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
- pthread_mutex_unlock(&bufmgr_gem->lock);
return ret;
}
@@ -1201,7 +1198,6 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
strerror(errno));
if (--bo_gem->map_count == 0)
drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
- pthread_mutex_unlock(&bufmgr_gem->lock);
return ret;
}
}
@@ -1211,7 +1207,33 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
bo_gem->gtt_virtual);
- /* Now move it to the GTT domain so that the CPU caches are flushed */
+ return 0;
+}
+
+int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
+{
+ drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+ drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
+ struct drm_i915_gem_set_domain set_domain;
+ int ret;
+
+ pthread_mutex_lock(&bufmgr_gem->lock);
+
+ ret = map_gtt(bo);
+ if (ret) {
+ pthread_mutex_unlock(&bufmgr_gem->lock);
+ return ret;
+ }
+
+ /* Now move it to the GTT domain so that the GPU and CPU
+ * caches are flushed and the GPU isn't actively using the
+ * buffer.
+ *
+ * The pagefault handler does this domain change for us when
+ * it has unbound the BO from the GTT, but it's up to us to
+ * tell it when we're about to use things if we had done
+ * rendering and it still happens to be bound to the GTT.
+ */
set_domain.handle = bo_gem->gem_handle;
set_domain.read_domains = I915_GEM_DOMAIN_GTT;
set_domain.write_domain = I915_GEM_DOMAIN_GTT;
@@ -1229,6 +1251,42 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
return 0;
}
+/**
+ * Performs a mapping of the buffer object like the normal GTT
+ * mapping, but avoiding waiting for the GPU to be done reading from
+ * or rendering to the buffer.
+ *
+ * This is used in the implementation of GL_ARB_map_buffer_range: The
+ * user asks to create a buffer, then does a mapping, fills some
+ * space, runs a drawing command, then asks to map it again without
+ * synchronizing because it guarantees that it won't write over the
+ * data that the GPU is busy using (or, more specifically, that if it
+ * does write over the data, it acknowledges that rendering is
+ * undefined).
+ */
+
+int drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo)
+{
+ drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
+ int ret;
+
+ /* If the CPU cache isn't coherent with the GTT, then use a
+ * regular synchronized mapping. The problem is that we don't
+ * track where the buffer was last used on the CPU side in
+ * terms of drm_intel_bo_map vs drm_intel_gem_bo_map_gtt, so
+ * we would potentially corrupt the buffer even when the user
+ * does reasonable things.
+ */
+ if (!bufmgr_gem->has_llc)
+ return drm_intel_gem_bo_map_gtt(bo);
+
+ pthread_mutex_lock(&bufmgr_gem->lock);
+ ret = map_gtt(bo);
+ pthread_mutex_unlock(&bufmgr_gem->lock);
+
+ return ret;
+}
+
static int drm_intel_gem_bo_unmap(drm_intel_bo *bo)
{
drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
--
1.7.9.1
More information about the Intel-gfx
mailing list