[Intel-gfx] [PATCH 1/2] intel: Allocate MRU inactive buffer instead of LRU from bo_cache.

Tue Mar 31 04:04:05 CEST 2009

Similar to the fix for render-target buffers, this tries to use the hottest
buffer we have available to us, by moving inactive buffers off to a separate
list.  This should have approximately the same algorithmic cost as what we
were doing before, but increases the chance that an application recovers
from falling off the working-set-bigger-than-aperture-size performance cliff.
---
 libdrm/intel/intel_bufmgr_gem.c |  100 +++++++++++++++++++++++++++------------
 tests/gem_flink.c               |    1 +
 2 files changed, 71 insertions(+), 30 deletions(-)

diff --git a/libdrm/intel/intel_bufmgr_gem.c b/libdrm/intel/intel_bufmgr_gem.c
index e48778c..79f9dea 100644
--- a/libdrm/intel/intel_bufmgr_gem.c
+++ b/libdrm/intel/intel_bufmgr_gem.c
@@ -68,7 +68,8 @@
 typedef struct _drm_intel_bo_gem drm_intel_bo_gem;
 
 struct drm_intel_gem_bo_bucket {
-   drmMMListHead head;
+   drmMMListHead active_head;
+   drmMMListHead inactive_head;
 
    /**
     * Limit on the number of entries in this bucket.
@@ -233,6 +234,35 @@ drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem,
     return &bufmgr_gem->cache_bucket[i];
 }
 
+/**
+ * Walk the current set of active buffers in this bucket, from LRU to MRU,
+ * moving buffers that are now unbusy (no longer being rendered with) to the
+ * inactive list.
+ */
+static void
+drm_intel_gem_update_bucket_inactive(drm_intel_bufmgr_gem *bufmgr_gem,
+				     struct drm_intel_gem_bo_bucket *bucket)
+{
+    struct drm_i915_gem_busy busy;
+    drm_intel_bo_gem *bo_gem;
+
+    while (!DRMLISTEMPTY(&bucket->active_head)) {
+	int ret;
+
+	bo_gem = DRMLISTENTRY(drm_intel_bo_gem, bucket->active_head.next, head);
+
+	memset(&busy, 0, sizeof(busy));
+	busy.handle = bo_gem->gem_handle;
+
+	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
+	if (ret != 0 || busy.busy == 0) {
+	    DRMLISTDEL(&bo_gem->head);
+	    DRMLISTADDTAIL(&bo_gem->head, &bucket->inactive_head);
+	} else {
+	    break;
+	}
+    }
+}
 
 static void drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem)
 {
@@ -332,11 +362,10 @@ drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, const char *name,
 				int for_render)
 {
     drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
-    drm_intel_bo_gem *bo_gem;
+    drm_intel_bo_gem *bo_gem = NULL;
     unsigned int page_size = getpagesize();
     int ret;
     struct drm_intel_gem_bo_bucket *bucket;
-    int alloc_from_cache = 0;
     unsigned long bo_size;
 
     /* Round the allocated size up to a power of two number of pages. */
@@ -356,34 +385,35 @@ drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, const char *name,
 
     pthread_mutex_lock(&bufmgr_gem->lock);
     /* Get a buffer out of the cache if available */
-    if (bucket != NULL && bucket->num_entries > 0) {
-	struct drm_i915_gem_busy busy;
-
+    if (bucket != NULL) {
 	if (for_render) {
 	    /* Allocate new render-target BOs from the tail (MRU)
-	     * of the list, as it will likely be hot in the GPU cache
+	     * of the lists, as it will likely be hot in the GPU cache
 	     * and in the aperture for us.
 	     */
-	    bo_gem = DRMLISTENTRY(drm_intel_bo_gem, bucket->head.prev, head);
-	    DRMLISTDEL(&bo_gem->head);
-	    bucket->num_entries--;
-	    alloc_from_cache = 1;
+	    if (!DRMLISTEMPTY(&bucket->active_head)) {
+		bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
+				      bucket->active_head.prev, head);
+	    } else if (!DRMLISTEMPTY(&bucket->inactive_head)) {
+		bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
+				      bucket->inactive_head.prev, head);
+	    }
+	    if (bo_gem != NULL) {
+		DRMLISTDEL(&bo_gem->head);
+		bucket->num_entries--;
+	    }
 	} else {
 	    /* For non-render-target BOs (where we're probably going to map it
-	     * first thing in order to fill it with data), check if the
-	     * last BO in the cache is unbusy, and only reuse in that case.
-	     * Otherwise, allocating a new buffer is probably faster than
-	     * waiting for the GPU to finish.
+	     * first thing in order to fill it with data), choose the most
+	     * recently used inactive buffer.  We want something that's in
+	     * the aperture if possible, but most important is to not block
+	     * on the GPU finishing.
 	     */
-	    bo_gem = DRMLISTENTRY(drm_intel_bo_gem, bucket->head.next, head);
-
-	    memset(&busy, 0, sizeof(busy));
-	    busy.handle = bo_gem->gem_handle;
-
-	    ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
-	    alloc_from_cache = (ret == 0 && busy.busy == 0);
+	    drm_intel_gem_update_bucket_inactive(bufmgr_gem, bucket);
 
-	    if (alloc_from_cache) {
+	    if (!DRMLISTEMPTY(&bucket->inactive_head)) {
+		bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
+				      bucket->inactive_head.prev, head);
 		DRMLISTDEL(&bo_gem->head);
 		bucket->num_entries--;
 	    }
@@ -391,7 +421,7 @@ drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, const char *name,
     }
     pthread_mutex_unlock(&bufmgr_gem->lock);
 
-    if (!alloc_from_cache) {
+    if (bo_gem == NULL) {
 	struct drm_i915_gem_create create;
 
 	bo_gem = calloc(1, sizeof(*bo_gem));
@@ -582,7 +612,7 @@ drm_intel_gem_bo_unreference_locked(drm_intel_bo *bo)
 	    bo_gem->reloc_target_bo = NULL;
 	    bo_gem->reloc_count = 0;
 
-	    DRMLISTADDTAIL(&bo_gem->head, &bucket->head);
+	    DRMLISTADDTAIL(&bo_gem->head, &bucket->active_head);
 	    bucket->num_entries++;
 	} else {
 	    drm_intel_gem_bo_free(bo);
@@ -883,8 +913,17 @@ drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr)
 	struct drm_intel_gem_bo_bucket *bucket = &bufmgr_gem->cache_bucket[i];
 	drm_intel_bo_gem *bo_gem;
 
-	while (!DRMLISTEMPTY(&bucket->head)) {
-	    bo_gem = DRMLISTENTRY(drm_intel_bo_gem, bucket->head.next, head);
+	while (!DRMLISTEMPTY(&bucket->active_head)) {
+	    bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
+				  bucket->active_head.next, head);
+	    DRMLISTDEL(&bo_gem->head);
+	    bucket->num_entries--;
+
+	    drm_intel_gem_bo_free(&bo_gem->bo);
+	}
+	while (!DRMLISTEMPTY(&bucket->inactive_head)) {
+	    bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
+				  bucket->inactive_head.next, head);
 	    DRMLISTDEL(&bo_gem->head);
 	    bucket->num_entries--;
 
@@ -1435,9 +1474,10 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
     bufmgr_gem->bufmgr.debug = 0;
     bufmgr_gem->bufmgr.check_aperture_space = drm_intel_gem_check_aperture_space;
     /* Initialize the linked lists for BO reuse cache. */
-    for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++)
-	DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head);
-
+    for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++) {
+	DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].active_head);
+	DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].inactive_head);
+    }
     return &bufmgr_gem->bufmgr;
 }
 
diff --git a/tests/gem_flink.c b/tests/gem_flink.c
index d2e062f..0bc16eb 100644
--- a/tests/gem_flink.c
+++ b/tests/gem_flink.c
@@ -53,6 +53,7 @@ test_flink(int fd)
 
 	flink.handle = create.handle;
 	ret = ioctl(fd, DRM_IOCTL_GEM_FLINK, &flink);
+	printf("%d\n", ret);
 	assert(ret == 0);
 
 	open.name = flink.name;
-- 
1.6.2.1