[Intel-gfx] [PATCH 1/2] intel: Allocate MRU inactive buffer instead of LRU from bo_cache.
Eric Anholt
eric at anholt.net
Tue Mar 31 04:04:05 CEST 2009
Similar to the fix for render-target buffers, this tries to use the hottest
buffer we have available to us, by moving inactive buffers off to a separate
list. This should have approximately the same algorithmic cost as what we
were doing before, but increases the chance that an application recovers
from falling off the working-set-bigger-than-aperture-size performance cliff.
---
libdrm/intel/intel_bufmgr_gem.c | 100 +++++++++++++++++++++++++++------------
tests/gem_flink.c | 1 +
2 files changed, 71 insertions(+), 30 deletions(-)
diff --git a/libdrm/intel/intel_bufmgr_gem.c b/libdrm/intel/intel_bufmgr_gem.c
index e48778c..79f9dea 100644
--- a/libdrm/intel/intel_bufmgr_gem.c
+++ b/libdrm/intel/intel_bufmgr_gem.c
@@ -68,7 +68,8 @@
typedef struct _drm_intel_bo_gem drm_intel_bo_gem;
struct drm_intel_gem_bo_bucket {
- drmMMListHead head;
+ drmMMListHead active_head;
+ drmMMListHead inactive_head;
/**
* Limit on the number of entries in this bucket.
@@ -233,6 +234,35 @@ drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem,
return &bufmgr_gem->cache_bucket[i];
}
+/**
+ * Walk the current set of active buffers in this bucket, from LRU to MRU,
+ * moving buffers that are now unbusy (no longer being rendered with) to the
+ * inactive list.
+ */
+static void
+drm_intel_gem_update_bucket_inactive(drm_intel_bufmgr_gem *bufmgr_gem,
+ struct drm_intel_gem_bo_bucket *bucket)
+{
+ struct drm_i915_gem_busy busy;
+ drm_intel_bo_gem *bo_gem;
+
+ while (!DRMLISTEMPTY(&bucket->active_head)) {
+ int ret;
+
+ bo_gem = DRMLISTENTRY(drm_intel_bo_gem, bucket->active_head.next, head);
+
+ memset(&busy, 0, sizeof(busy));
+ busy.handle = bo_gem->gem_handle;
+
+ ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
+ if (ret != 0 || busy.busy == 0) {
+ DRMLISTDEL(&bo_gem->head);
+ DRMLISTADDTAIL(&bo_gem->head, &bucket->inactive_head);
+ } else {
+ break;
+ }
+ }
+}
static void drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem)
{
@@ -332,11 +362,10 @@ drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, const char *name,
int for_render)
{
drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
- drm_intel_bo_gem *bo_gem;
+ drm_intel_bo_gem *bo_gem = NULL;
unsigned int page_size = getpagesize();
int ret;
struct drm_intel_gem_bo_bucket *bucket;
- int alloc_from_cache = 0;
unsigned long bo_size;
/* Round the allocated size up to a power of two number of pages. */
@@ -356,34 +385,35 @@ drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, const char *name,
pthread_mutex_lock(&bufmgr_gem->lock);
/* Get a buffer out of the cache if available */
- if (bucket != NULL && bucket->num_entries > 0) {
- struct drm_i915_gem_busy busy;
-
+ if (bucket != NULL) {
if (for_render) {
/* Allocate new render-target BOs from the tail (MRU)
- * of the list, as it will likely be hot in the GPU cache
+ * of the lists, as it will likely be hot in the GPU cache
* and in the aperture for us.
*/
- bo_gem = DRMLISTENTRY(drm_intel_bo_gem, bucket->head.prev, head);
- DRMLISTDEL(&bo_gem->head);
- bucket->num_entries--;
- alloc_from_cache = 1;
+ if (!DRMLISTEMPTY(&bucket->active_head)) {
+ bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
+ bucket->active_head.prev, head);
+ } else if (!DRMLISTEMPTY(&bucket->inactive_head)) {
+ bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
+ bucket->inactive_head.prev, head);
+ }
+ if (bo_gem != NULL) {
+ DRMLISTDEL(&bo_gem->head);
+ bucket->num_entries--;
+ }
} else {
/* For non-render-target BOs (where we're probably going to map it
- * first thing in order to fill it with data), check if the
- * last BO in the cache is unbusy, and only reuse in that case.
- * Otherwise, allocating a new buffer is probably faster than
- * waiting for the GPU to finish.
+ * first thing in order to fill it with data), choose the most
+ * recently used inactive buffer. We want something that's in
+ * the aperture if possible, but most important is to not block
+ * on the GPU finishing.
*/
- bo_gem = DRMLISTENTRY(drm_intel_bo_gem, bucket->head.next, head);
-
- memset(&busy, 0, sizeof(busy));
- busy.handle = bo_gem->gem_handle;
-
- ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
- alloc_from_cache = (ret == 0 && busy.busy == 0);
+ drm_intel_gem_update_bucket_inactive(bufmgr_gem, bucket);
- if (alloc_from_cache) {
+ if (!DRMLISTEMPTY(&bucket->inactive_head)) {
+ bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
+ bucket->inactive_head.prev, head);
DRMLISTDEL(&bo_gem->head);
bucket->num_entries--;
}
@@ -391,7 +421,7 @@ drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, const char *name,
}
pthread_mutex_unlock(&bufmgr_gem->lock);
- if (!alloc_from_cache) {
+ if (bo_gem == NULL) {
struct drm_i915_gem_create create;
bo_gem = calloc(1, sizeof(*bo_gem));
@@ -582,7 +612,7 @@ drm_intel_gem_bo_unreference_locked(drm_intel_bo *bo)
bo_gem->reloc_target_bo = NULL;
bo_gem->reloc_count = 0;
- DRMLISTADDTAIL(&bo_gem->head, &bucket->head);
+ DRMLISTADDTAIL(&bo_gem->head, &bucket->active_head);
bucket->num_entries++;
} else {
drm_intel_gem_bo_free(bo);
@@ -883,8 +913,17 @@ drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr)
struct drm_intel_gem_bo_bucket *bucket = &bufmgr_gem->cache_bucket[i];
drm_intel_bo_gem *bo_gem;
- while (!DRMLISTEMPTY(&bucket->head)) {
- bo_gem = DRMLISTENTRY(drm_intel_bo_gem, bucket->head.next, head);
+ while (!DRMLISTEMPTY(&bucket->active_head)) {
+ bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
+ bucket->active_head.next, head);
+ DRMLISTDEL(&bo_gem->head);
+ bucket->num_entries--;
+
+ drm_intel_gem_bo_free(&bo_gem->bo);
+ }
+ while (!DRMLISTEMPTY(&bucket->inactive_head)) {
+ bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
+ bucket->inactive_head.next, head);
DRMLISTDEL(&bo_gem->head);
bucket->num_entries--;
@@ -1435,9 +1474,10 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
bufmgr_gem->bufmgr.debug = 0;
bufmgr_gem->bufmgr.check_aperture_space = drm_intel_gem_check_aperture_space;
/* Initialize the linked lists for BO reuse cache. */
- for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++)
- DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head);
-
+ for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++) {
+ DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].active_head);
+ DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].inactive_head);
+ }
return &bufmgr_gem->bufmgr;
}
diff --git a/tests/gem_flink.c b/tests/gem_flink.c
index d2e062f..0bc16eb 100644
--- a/tests/gem_flink.c
+++ b/tests/gem_flink.c
@@ -53,6 +53,7 @@ test_flink(int fd)
flink.handle = create.handle;
ret = ioctl(fd, DRM_IOCTL_GEM_FLINK, &flink);
+ printf("%d\n", ret);
assert(ret == 0);
open.name = flink.name;
--
1.6.2.1
More information about the Intel-gfx
mailing list