Mesa (master): vc4: Add a userspace BO cache.

Eric Anholt anholt at kemper.freedesktop.org
Thu Dec 18 00:09:32 UTC 2014


Module: Mesa
Branch: master
Commit: 06890c444a893a96b6ec1cfb36f77915ec9acda8
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=06890c444a893a96b6ec1cfb36f77915ec9acda8

Author: Eric Anholt <eric at anholt.net>
Date:   Sat Dec 13 15:27:39 2014 -0800

vc4: Add a userspace BO cache.

Since our kernel BOs require CMA allocation, and the use of them requires
new mmaps, it's pretty expensive and we should avoid it if possible.
Copying my original design for Intel, make a userspace cache that reuses
BOs that haven't been shared to other processes but frees BOs that have
sat in the cache for over a second.

Improves glxgears framerate on RPi by around 30%.

---

 src/gallium/drivers/vc4/vc4_bufmgr.c |  131 +++++++++++++++++++++++++++++++++-
 src/gallium/drivers/vc4/vc4_bufmgr.h |   34 ++++++++-
 src/gallium/drivers/vc4/vc4_screen.c |    2 +
 src/gallium/drivers/vc4/vc4_screen.h |   12 ++++
 4 files changed, 175 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.c b/src/gallium/drivers/vc4/vc4_bufmgr.c
index 64fe2e4..34596be 100644
--- a/src/gallium/drivers/vc4/vc4_bufmgr.c
+++ b/src/gallium/drivers/vc4/vc4_bufmgr.c
@@ -29,14 +29,49 @@
 #include <xf86drmMode.h>
 
 #include "util/u_memory.h"
+#include "util/ralloc.h"
 
 #include "vc4_context.h"
 #include "vc4_screen.h"
 
+#define container_of(ptr, type, field) \
+   (type*)((char*)ptr - offsetof(type, field))
+
+static struct vc4_bo *
+vc4_bo_from_cache(struct vc4_screen *screen, uint32_t size, const char *name)
+{
+        struct vc4_bo_cache *cache = &screen->bo_cache;
+        uint32_t page_index = size / 4096 - 1;
+
+        if (cache->size_list_size <= page_index)
+                return NULL;
+
+        struct vc4_bo *bo = NULL;
+        pipe_mutex_lock(cache->lock);
+        if (!is_empty_list(&cache->size_list[page_index])) {
+                struct simple_node *node = last_elem(&cache->size_list[page_index]);
+                bo = container_of(node, struct vc4_bo, size_list);
+                pipe_reference_init(&bo->reference, 1);
+                remove_from_list(&bo->time_list);
+                remove_from_list(&bo->size_list);
+
+                bo->name = name;
+        }
+        pipe_mutex_unlock(cache->lock);
+        return bo;
+}
+
 struct vc4_bo *
 vc4_bo_alloc(struct vc4_screen *screen, uint32_t size, const char *name)
 {
-        struct vc4_bo *bo = CALLOC_STRUCT(vc4_bo);
+        struct vc4_bo *bo;
+        size = align(size, 4096);
+
+        bo = vc4_bo_from_cache(screen, size, name);
+        if (bo)
+                return bo;
+
+        bo = CALLOC_STRUCT(vc4_bo);
         if (!bo)
                 return NULL;
 
@@ -44,6 +79,7 @@ vc4_bo_alloc(struct vc4_screen *screen, uint32_t size, const char *name)
         bo->screen = screen;
         bo->size = size;
         bo->name = name;
+        bo->private = true;
 
         struct drm_mode_create_dumb create;
         memset(&create, 0, sizeof(create));
@@ -65,6 +101,18 @@ vc4_bo_alloc(struct vc4_screen *screen, uint32_t size, const char *name)
 }
 
 void
+vc4_bo_last_unreference(struct vc4_bo *bo)
+{
+        struct vc4_screen *screen = bo->screen;
+
+        struct timespec time;
+        clock_gettime(CLOCK_MONOTONIC, &time);
+        pipe_mutex_lock(screen->bo_cache.lock);
+        vc4_bo_last_unreference_locked_timed(bo, time.tv_sec);
+        pipe_mutex_unlock(screen->bo_cache.lock);
+}
+
+static void
 vc4_bo_free(struct vc4_bo *bo)
 {
         struct vc4_screen *screen = bo->screen;
@@ -89,6 +137,69 @@ vc4_bo_free(struct vc4_bo *bo)
         free(bo);
 }
 
+static void
+free_stale_bos(struct vc4_screen *screen, time_t time)
+{
+        while (!is_empty_list(&screen->bo_cache.time_list)) {
+                struct simple_node *node =
+                        first_elem(&screen->bo_cache.time_list);
+                struct vc4_bo *bo = container_of(node, struct vc4_bo, time_list);
+
+                /* If it's more than a second old, free it. */
+                if (time - bo->free_time > 2) {
+                        remove_from_list(&bo->time_list);
+                        remove_from_list(&bo->size_list);
+                        vc4_bo_free(bo);
+                } else {
+                        break;
+                }
+        }
+}
+
+void
+vc4_bo_last_unreference_locked_timed(struct vc4_bo *bo, time_t time)
+{
+        struct vc4_screen *screen = bo->screen;
+        struct vc4_bo_cache *cache = &screen->bo_cache;
+        uint32_t page_index = bo->size / 4096 - 1;
+
+        if (!bo->private) {
+                vc4_bo_free(bo);
+                return;
+        }
+
+        if (cache->size_list_size <= page_index) {
+                struct simple_node *new_list =
+                        ralloc_array(screen, struct simple_node, page_index + 1);
+
+                /* Move old list contents over (since the array has moved, and
+                 * therefore the pointers to the list heads have to change.
+                 */
+                for (int i = 0; i < cache->size_list_size; i++) {
+                        struct simple_node *old_head = &cache->size_list[i];
+                        if (is_empty_list(old_head))
+                                make_empty_list(&new_list[i]);
+                        else {
+                                new_list[i].next = old_head->next;
+                                new_list[i].prev = old_head->prev;
+                                new_list[i].next->prev = &new_list[i];
+                                new_list[i].prev->next = &new_list[i];
+                        }
+                }
+                for (int i = cache->size_list_size; i < page_index + 1; i++)
+                        make_empty_list(&new_list[i]);
+
+                cache->size_list = new_list;
+                cache->size_list_size = page_index + 1;
+        }
+
+        bo->free_time = time;
+        insert_at_tail(&cache->size_list[page_index], &bo->size_list);
+        insert_at_tail(&cache->time_list, &bo->time_list);
+
+        free_stale_bos(screen, time);
+}
+
 static struct vc4_bo *
 vc4_bo_open_handle(struct vc4_screen *screen,
                    uint32_t winsys_stride,
@@ -103,6 +214,7 @@ vc4_bo_open_handle(struct vc4_screen *screen,
         bo->handle = handle;
         bo->size = size;
         bo->name = "winsys";
+        bo->private = false;
 
 #ifdef USE_VC4_SIMULATOR
         vc4_bo_map(bo);
@@ -194,6 +306,7 @@ vc4_bo_flink(struct vc4_bo *bo, uint32_t *name)
                 return false;
         }
 
+        bo->private = false;
         *name = flink.name;
 
         return true;
@@ -289,3 +402,19 @@ vc4_bo_map(struct vc4_bo *bo)
 
         return map;
 }
+
+void
+vc4_bufmgr_destroy(struct pipe_screen *pscreen)
+{
+        struct vc4_screen *screen = vc4_screen(pscreen);
+        struct vc4_bo_cache *cache = &screen->bo_cache;
+
+        while (!is_empty_list(&cache->time_list)) {
+                struct simple_node *node = first_elem(&cache->time_list);
+                struct vc4_bo *bo = container_of(node, struct vc4_bo, time_list);
+
+                remove_from_list(&bo->time_list);
+                remove_from_list(&bo->size_list);
+                vc4_bo_free(bo);
+        }
+}
diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.h b/src/gallium/drivers/vc4/vc4_bufmgr.h
index baaecfd..f9559e9 100644
--- a/src/gallium/drivers/vc4/vc4_bufmgr.h
+++ b/src/gallium/drivers/vc4/vc4_bufmgr.h
@@ -26,6 +26,7 @@
 
 #include <stdint.h>
 #include "util/u_inlines.h"
+#include "vc4_qir.h"
 
 struct vc4_context;
 
@@ -41,13 +42,26 @@ struct vc4_bo {
         void *simulator_winsys_map;
         uint32_t simulator_winsys_stride;
 #endif
+
+        /** Entry in the linked list of buffers freed, by age. */
+        struct simple_node time_list;
+        /** Entry in the per-page-count linked list of buffers freed (by age). */
+        struct simple_node size_list;
+        /** Approximate second when the bo was freed. */
+        time_t free_time;
+        /**
+         * Whether only our process has a reference to the BO (meaning that
+         * it's safe to reuse it in the BO cache).
+         */
+        bool private;
 };
 
 struct vc4_bo *vc4_bo_alloc(struct vc4_screen *screen, uint32_t size,
                             const char *name);
 struct vc4_bo *vc4_bo_alloc_mem(struct vc4_screen *screen, const void *data,
                                 uint32_t size, const char *name);
-void vc4_bo_free(struct vc4_bo *bo);
+void vc4_bo_last_unreference(struct vc4_bo *bo);
+void vc4_bo_last_unreference_locked_timed(struct vc4_bo *bo, time_t time);
 struct vc4_bo *vc4_bo_open_name(struct vc4_screen *screen, uint32_t name,
                                 uint32_t winsys_stride);
 struct vc4_bo *vc4_bo_open_dmabuf(struct vc4_screen *screen, int fd,
@@ -59,7 +73,7 @@ static inline void
 vc4_bo_set_reference(struct vc4_bo **old_bo, struct vc4_bo *new_bo)
 {
         if (pipe_reference(&(*old_bo)->reference, &new_bo->reference))
-                vc4_bo_free(*old_bo);
+                vc4_bo_last_unreference(*old_bo);
         *old_bo = new_bo;
 }
 
@@ -77,7 +91,18 @@ vc4_bo_unreference(struct vc4_bo **bo)
                 return;
 
         if (pipe_reference(&(*bo)->reference, NULL))
-                vc4_bo_free(*bo);
+                vc4_bo_last_unreference(*bo);
+        *bo = NULL;
+}
+
+static inline void
+vc4_bo_unreference_locked_timed(struct vc4_bo **bo, time_t time)
+{
+        if (!*bo)
+                return;
+
+        if (pipe_reference(&(*bo)->reference, NULL))
+                vc4_bo_last_unreference_locked_timed(*bo, time);
         *bo = NULL;
 }
 
@@ -93,5 +118,8 @@ vc4_bo_wait(struct vc4_bo *bo, uint64_t timeout_ns);
 bool
 vc4_wait_seqno(struct vc4_screen *screen, uint64_t seqno, uint64_t timeout_ns);
 
+void
+vc4_bufmgr_destroy(struct pipe_screen *pscreen);
+
 #endif /* VC4_BUFMGR_H */
 
diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c
index b532cc6..8d21633 100644
--- a/src/gallium/drivers/vc4/vc4_screen.c
+++ b/src/gallium/drivers/vc4/vc4_screen.c
@@ -76,6 +76,7 @@ vc4_screen_get_vendor(struct pipe_screen *pscreen)
 static void
 vc4_screen_destroy(struct pipe_screen *pscreen)
 {
+        vc4_bufmgr_destroy(pscreen);
         ralloc_free(pscreen);
 }
 
@@ -449,6 +450,7 @@ vc4_screen_create(int fd)
         pscreen->is_format_supported = vc4_screen_is_format_supported;
 
         screen->fd = fd;
+        make_empty_list(&screen->bo_cache.time_list);
 
         vc4_fence_init(screen);
 
diff --git a/src/gallium/drivers/vc4/vc4_screen.h b/src/gallium/drivers/vc4/vc4_screen.h
index 4a8b1f4..50a763f 100644
--- a/src/gallium/drivers/vc4/vc4_screen.h
+++ b/src/gallium/drivers/vc4/vc4_screen.h
@@ -25,7 +25,9 @@
 #define VC4_SCREEN_H
 
 #include "pipe/p_screen.h"
+#include "os/os_thread.h"
 #include "state_tracker/drm_driver.h"
+#include "vc4_qir.h"
 
 struct vc4_bo;
 
@@ -55,6 +57,16 @@ struct vc4_screen {
          * if we know the job's already done.
          */
         uint64_t finished_seqno;
+
+        struct vc4_bo_cache {
+                /** List of struct vc4_bo freed, by age. */
+                struct simple_node time_list;
+                /** List of struct vc4_bo freed, per size, by age. */
+                struct simple_node *size_list;
+                uint32_t size_list_size;
+
+                pipe_mutex lock;
+        } bo_cache;
 };
 
 static inline struct vc4_screen *




More information about the mesa-commit mailing list