[Mesa-dev] [PATCH 4/4] i965: Drop non-LLC lunacy in the program cache code.

Kenneth Graunke kenneth at whitecape.org
Wed Jul 12 07:22:25 UTC 2017


The non-LLC story was a horror show.  We uploaded data via pwrite
(drm_intel_bo_subdata), which would stall if the cache BO was in
use (being read) by the GPU.  Obviously, we wanted to avoid that.
So, we tried to detect whether the buffer was busy, and if so, we'd
allocate a new BO, map the old one read-only (hopefully not stalling),
copy all shaders compiled since the dawn of time to the new buffer,
upload our new one, toss the old BO, and let the state upload code
know that our program cache BO changed.  This was a lot of extra data
copying, and flagging BRW_NEW_PROGRAM_CACHE would also cause a new
STATE_BASE_ADDRESS to be emitted, stalling the entire pipeline.

Not only that, but our rudimentary busy tracking consistented of a flag
set at execbuf time, and not cleared until we threw out the program
cache BO.  So, the first shader upload after any drawing would hit this
"abandon the cache and start over" copying path.

None of this is necessary - it's just ancient crufty code.  We can
use the same persistent mapping paths on all platforms.  On non-LLC
platforms, this should use a write combining map, which should be
decently fast.  (On ancient kernels, this will fall through to an
uncached GTT map, which will be less efficient, but...upgrade your
kernel, seriously...)

This is not only better, but the code is significantly simpler.
---
 src/mesa/drivers/dri/i965/brw_program_cache.c | 70 +++++----------------------
 1 file changed, 12 insertions(+), 58 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_program_cache.c b/src/mesa/drivers/dri/i965/brw_program_cache.c
index 2fd989a41c9..3b05334bb4f 100644
--- a/src/mesa/drivers/dri/i965/brw_program_cache.c
+++ b/src/mesa/drivers/dri/i965/brw_program_cache.c
@@ -214,30 +214,22 @@ brw_cache_new_bo(struct brw_cache *cache, uint32_t new_size)
 {
    struct brw_context *brw = cache->brw;
    struct brw_bo *new_bo;
-   void *llc_map;
 
    new_bo = brw_bo_alloc(brw->bufmgr, "program cache", new_size, 64);
    if (can_do_exec_capture(brw->screen))
       new_bo->kflags = EXEC_OBJECT_CAPTURE;
-   if (brw->has_llc)
-      llc_map = brw_bo_map(brw, new_bo, MAP_READ | MAP_ASYNC);
+
+   void *map = brw_bo_map(brw, new_bo, MAP_READ | MAP_ASYNC);
 
    /* Copy any existing data that needs to be saved. */
    if (cache->next_offset != 0) {
-      if (brw->has_llc) {
-         memcpy(llc_map, cache->map, cache->next_offset);
-      } else {
-         void *map = brw_bo_map(brw, cache->bo, MAP_READ);
-         brw_bo_subdata(new_bo, 0, cache->next_offset, map);
-         brw_bo_unmap(cache->bo);
-      }
+      memcpy(map, cache->map, cache->next_offset);
    }
 
-   if (brw->has_llc)
-      brw_bo_unmap(cache->bo);
+   brw_bo_unmap(cache->bo);
    brw_bo_unreference(cache->bo);
    cache->bo = new_bo;
-   cache->map = brw->has_llc ? llc_map : NULL;
+   cache->map = map;
    cache->bo_used_by_gpu = false;
 
    /* Since we have a new BO in place, we need to signal the units
@@ -255,27 +247,13 @@ brw_lookup_prog(const struct brw_cache *cache,
                 enum brw_cache_id cache_id,
                 const void *data, unsigned data_size)
 {
-   struct brw_context *brw = cache->brw;
    unsigned i;
    const struct brw_cache_item *item;
 
    for (i = 0; i < cache->size; i++) {
       for (item = cache->items[i]; item; item = item->next) {
-         int ret;
-
-         if (item->cache_id != cache_id || item->size != data_size)
-            continue;
-
-         void *map;
-         if (!brw->has_llc)
-            map = brw_bo_map(brw, cache->bo, MAP_READ);
-         else
-            map = cache->map;
-
-         ret = memcmp(map + item->offset, data, item->size);
-         if (!brw->has_llc)
-            brw_bo_unmap(cache->bo);
-         if (ret)
+         if (item->cache_id != cache_id || item->size != data_size ||
+             memcmp(cache->map + item->offset, data, item->size) != 0)
             continue;
 
          return item;
@@ -289,7 +267,6 @@ static uint32_t
 brw_alloc_item_data(struct brw_cache *cache, uint32_t size)
 {
    uint32_t offset;
-   struct brw_context *brw = cache->brw;
 
    /* Allocate space in the cache BO for our new program. */
    if (cache->next_offset + size > cache->bo->size) {
@@ -301,14 +278,6 @@ brw_alloc_item_data(struct brw_cache *cache, uint32_t size)
       brw_cache_new_bo(cache, new_size);
    }
 
-   /* If we would block on writing to an in-use program BO, just
-    * recreate it.
-    */
-   if (!brw->has_llc && cache->bo_used_by_gpu) {
-      perf_debug("Copying busy program cache buffer.\n");
-      brw_cache_new_bo(cache, cache->bo->size);
-   }
-
    offset = cache->next_offset;
 
    /* Programs are always 64-byte aligned, so set up the next one now */
@@ -346,7 +315,6 @@ brw_upload_cache(struct brw_cache *cache,
                  uint32_t *out_offset,
                  void *out_aux)
 {
-   struct brw_context *brw = cache->brw;
    struct brw_cache_item *item = CALLOC_STRUCT(brw_cache_item);
    const struct brw_cache_item *matching_data =
       brw_lookup_prog(cache, cache_id, data, data_size);
@@ -373,11 +341,7 @@ brw_upload_cache(struct brw_cache *cache,
       item->offset = brw_alloc_item_data(cache, data_size);
 
       /* Copy data to the buffer */
-      if (brw->has_llc) {
-         memcpy(cache->map + item->offset, data, data_size);
-      } else {
-         brw_bo_subdata(cache->bo, item->offset, data_size, data);
-      }
+      memcpy(cache->map + item->offset, data, data_size);
    }
 
    /* Set up the memory containing the key and aux_data */
@@ -416,8 +380,8 @@ brw_init_caches(struct brw_context *brw)
    cache->bo = brw_bo_alloc(brw->bufmgr, "program cache",  4096, 64);
    if (can_do_exec_capture(brw->screen))
       cache->bo->kflags = EXEC_OBJECT_CAPTURE;
-   if (brw->has_llc)
-      cache->map = brw_bo_map(brw, cache->bo, MAP_READ | MAP_WRITE | MAP_ASYNC);
+
+   cache->map = brw_bo_map(brw, cache->bo, MAP_READ | MAP_WRITE | MAP_ASYNC);
 }
 
 static void
@@ -496,8 +460,7 @@ brw_destroy_cache(struct brw_context *brw, struct brw_cache *cache)
 
    /* This can be NULL if context creation failed early on */
    if (cache->bo) {
-      if (brw->has_llc)
-         brw_bo_unmap(cache->bo);
+      brw_bo_unmap(cache->bo);
       brw_bo_unreference(cache->bo);
       cache->bo = NULL;
       cache->map = NULL;
@@ -547,21 +510,12 @@ brw_print_program_cache(struct brw_context *brw)
 {
    const struct brw_cache *cache = &brw->cache;
    struct brw_cache_item *item;
-   void *map;
-
-   if (!brw->has_llc)
-      map = brw_bo_map(brw, cache->bo, MAP_READ);
-   else
-      map = cache->map;
 
    for (unsigned i = 0; i < cache->size; i++) {
       for (item = cache->items[i]; item; item = item->next) {
          fprintf(stderr, "%s:\n", cache_name(i));
-         brw_disassemble(&brw->screen->devinfo, map,
+         brw_disassemble(&brw->screen->devinfo, cache->map,
                          item->offset, item->size, stderr);
       }
    }
-
-   if (!brw->has_llc)
-      brw_bo_unmap(cache->bo);
 }
-- 
2.13.2



More information about the mesa-dev mailing list