[Mesa-dev] [PATCH 2/2] st/mesa: throttle texture uploads if their memory usage goes beyond a limit

Marek Olšák maraeo at gmail.com
Sat Sep 1 01:02:14 UTC 2018


From: Marek Olšák <marek.olsak at amd.com>

This prevents radeonsi from running out of memory. It also increases
texture upload performance by being nice to the kernel memory manager.
---
 src/gallium/auxiliary/util/u_helpers.c | 120 +++++++++++++++++++++++++
 src/gallium/auxiliary/util/u_helpers.h |  17 ++++
 src/mesa/state_tracker/st_cb_texture.c |  16 ++++
 src/mesa/state_tracker/st_context.c    |   5 ++
 src/mesa/state_tracker/st_context.h    |   7 ++
 5 files changed, 165 insertions(+)

diff --git a/src/gallium/auxiliary/util/u_helpers.c b/src/gallium/auxiliary/util/u_helpers.c
index 365238631a2..25d8fbce6f7 100644
--- a/src/gallium/auxiliary/util/u_helpers.c
+++ b/src/gallium/auxiliary/util/u_helpers.c
@@ -172,10 +172,130 @@ util_end_pipestat_query(struct pipe_context *ctx, struct pipe_query *q,
 
 /* This is a helper for hardware bring-up. Don't remove. */
 void
 util_wait_for_idle(struct pipe_context *ctx)
 {
    struct pipe_fence_handle *fence = NULL;
 
    ctx->flush(ctx, &fence, 0);
    ctx->screen->fence_finish(ctx->screen, NULL, fence, PIPE_TIMEOUT_INFINITE);
 }
+
+void
+util_throttle_init(struct util_throttle *t, uint64_t max_mem_usage)
+{
+   t->max_mem_usage = max_mem_usage;
+}
+
+void
+util_throttle_deinit(struct pipe_screen *screen, struct util_throttle *t)
+{
+   for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++)
+      screen->fence_reference(screen, &t->ring[i].fence, NULL);
+}
+
+static uint64_t
+util_get_throttle_total_memory_usage(struct util_throttle *t)
+{
+   uint64_t total_usage = 0;
+
+   for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++)
+      total_usage += t->ring[i].mem_usage;
+   return total_usage;
+}
+
+static void util_dump_throttle_ring(struct util_throttle *t)
+{
+   printf("Throttle:\n");
+   for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++) {
+      printf("  ring[%u]: fence = %s, mem_usage = %"PRIu64"%s%s\n",
+             i, t->ring[i].fence ? "yes" : " no",
+             t->ring[i].mem_usage,
+             t->flush_index == i ? " [flush]" : "",
+             t->wait_index == i ? " [wait]" : "");
+   }
+}
+
+/**
+ * Notify util_throttle that the next operation allocates memory.
+ * util_throttle tracks memory usage and waits for fences until its tracked
+ * memory usage decreases.
+ *
+ * Example:
+ *   util_throttle_memory_usage(..., w*h*d*Bpp);
+ *   TexSubImage(..., w, h, d, ...);
+ *
+ * This means that TexSubImage can't allocate more memory its maximum limit
+ * set during initialization.
+ */
+void
+util_throttle_memory_usage(struct pipe_context *pipe,
+                           struct util_throttle *t, uint64_t memory_size)
+{
+   (void)util_dump_throttle_ring; /* silence warning */
+
+   if (!t->max_mem_usage)
+      return;
+
+   struct pipe_screen *screen = pipe->screen;
+   struct pipe_fence_handle **fence = NULL;
+   unsigned ring_size = ARRAY_SIZE(t->ring);
+   uint64_t total = util_get_throttle_total_memory_usage(t);
+
+   /* If there is not enough memory, walk the list of fences and find
+    * the latest one that we need to wait for.
+    */
+   while (t->wait_index != t->flush_index &&
+          total && total + memory_size > t->max_mem_usage) {
+      assert(t->ring[t->wait_index].fence);
+
+      /* Release an older fence if we need to wait for a newer one. */
+      if (fence)
+         screen->fence_reference(screen, fence, NULL);
+
+      fence = &t->ring[t->wait_index].fence;
+      t->ring[t->wait_index].mem_usage = 0;
+      t->wait_index = (t->wait_index + 1) % ring_size;
+
+      total = util_get_throttle_total_memory_usage(t);
+   }
+
+   /* Wait for the fence to decrease memory usage. */
+   if (fence) {
+      screen->fence_finish(screen, pipe, *fence, PIPE_TIMEOUT_INFINITE);
+      screen->fence_reference(screen, fence, NULL);
+   }
+
+   /* Flush and get a fence if we've exhausted memory usage for the current
+    * slot.
+    */
+   if (t->ring[t->flush_index].mem_usage &&
+       t->ring[t->flush_index].mem_usage + memory_size >
+       t->max_mem_usage / (ring_size / 2)) {
+      struct pipe_fence_handle **fence =
+         &t->ring[t->flush_index].fence;
+
+      /* Expect that the current flush slot doesn't have a fence yet. */
+      assert(!*fence);
+
+      pipe->flush(pipe, fence, PIPE_FLUSH_ASYNC);
+      t->flush_index = (t->flush_index + 1) % ring_size;
+
+      /* Vacate the next slot if it's occupied. This should be rare. */
+      if (t->flush_index == t->wait_index) {
+         struct pipe_fence_handle **fence =
+            &t->ring[t->wait_index].fence;
+
+         t->ring[t->wait_index].mem_usage = 0;
+         t->wait_index = (t->wait_index + 1) % ring_size;
+
+         assert(*fence);
+         screen->fence_finish(screen, pipe, *fence, PIPE_TIMEOUT_INFINITE);
+         screen->fence_reference(screen, fence, NULL);
+      }
+
+      assert(!t->ring[t->flush_index].mem_usage);
+      assert(!t->ring[t->flush_index].fence);
+   }
+
+   t->ring[t->flush_index].mem_usage += memory_size;
+}
diff --git a/src/gallium/auxiliary/util/u_helpers.h b/src/gallium/auxiliary/util/u_helpers.h
index 1e57e32959b..e65e64d7781 100644
--- a/src/gallium/auxiliary/util/u_helpers.h
+++ b/src/gallium/auxiliary/util/u_helpers.h
@@ -53,15 +53,32 @@ bool util_upload_index_buffer(struct pipe_context *pipe,
 struct pipe_query *
 util_begin_pipestat_query(struct pipe_context *ctx);
 
 void
 util_end_pipestat_query(struct pipe_context *ctx, struct pipe_query *q,
                         FILE *f);
 
 void
 util_wait_for_idle(struct pipe_context *ctx);
 
+/* A utility for throttling execution based on memory usage. */
+struct util_throttle {
+   struct {
+      struct pipe_fence_handle *fence;
+      uint64_t mem_usage;
+   } ring[10];
+
+   unsigned flush_index;
+   unsigned wait_index;
+   uint64_t max_mem_usage;
+};
+
+void util_throttle_init(struct util_throttle *t, uint64_t max_mem_usage);
+void util_throttle_deinit(struct pipe_screen *screen, struct util_throttle *t);
+void util_throttle_memory_usage(struct pipe_context *pipe,
+                                struct util_throttle *t, uint64_t memory_size);
+
 #ifdef __cplusplus
 }
 #endif
 
 #endif
diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c
index 5406d0247c5..e6e27a852f5 100644
--- a/src/mesa/state_tracker/st_cb_texture.c
+++ b/src/mesa/state_tracker/st_cb_texture.c
@@ -1385,20 +1385,21 @@ try_pbo_upload(struct gl_context *ctx, GLuint dims,
          return false;
    }
 
    success = try_pbo_upload_common(ctx, surface, &addr, src_format);
 
    pipe_surface_reference(&surface, NULL);
 
    return success;
 }
 
+
 static void
 st_TexSubImage(struct gl_context *ctx, GLuint dims,
                struct gl_texture_image *texImage,
                GLint xoffset, GLint yoffset, GLint zoffset,
                GLint width, GLint height, GLint depth,
                GLenum format, GLenum type, const void *pixels,
                const struct gl_pixelstore_attrib *unpack)
 {
    struct st_context *st = st_context(ctx);
    struct st_texture_image *stImage = st_texture_image(texImage);
@@ -1410,20 +1411,21 @@ st_TexSubImage(struct gl_context *ctx, GLuint dims,
    struct pipe_resource src_templ;
    struct pipe_transfer *transfer;
    struct pipe_blit_info blit;
    enum pipe_format src_format, dst_format;
    mesa_format mesa_src_format;
    GLenum gl_target = texImage->TexObject->Target;
    unsigned bind;
    GLubyte *map;
    unsigned dstz = texImage->Face + texImage->TexObject->MinLayer;
    unsigned dst_level = 0;
+   bool throttled = false;
 
    st_flush_bitmap_cache(st);
    st_invalidate_readpix_cache(st);
 
    if (stObj->pt == stImage->pt)
       dst_level = texImage->TexObject->MinLevel + texImage->Level;
 
    assert(!_mesa_is_format_etc2(texImage->TexFormat) &&
           !_mesa_is_format_astc_2d(texImage->TexFormat) &&
           texImage->TexFormat != MESA_FORMAT_ETC1_RGB8);
@@ -1449,20 +1451,24 @@ st_TexSubImage(struct gl_context *ctx, GLuint dims,
 
       /* Convert to Gallium coordinates. */
       if (gl_target == GL_TEXTURE_1D_ARRAY) {
          zoffset = yoffset;
          yoffset = 0;
          depth = height;
          height = 1;
          layer_stride = stride;
       }
 
+      util_throttle_memory_usage(pipe, &st->throttle,
+                                 width * height * depth *
+                                 util_format_get_blocksize(dst->format));
+
       u_box_3d(xoffset, yoffset, zoffset + dstz, width, height, depth, &box);
       pipe->texture_subdata(pipe, dst, dst_level, 0,
                             &box, data, stride, layer_stride);
       return;
    }
 
    if (!st->prefer_blit_based_texture_transfer) {
       goto fallback;
    }
 
@@ -1554,20 +1560,25 @@ st_TexSubImage(struct gl_context *ctx, GLuint dims,
                                    &src_templ.depth0, &src_templ.array_size);
 
    /* Check for NPOT texture support. */
    if (!screen->get_param(screen, PIPE_CAP_NPOT_TEXTURES) &&
        (!util_is_power_of_two_or_zero(src_templ.width0) ||
         !util_is_power_of_two_or_zero(src_templ.height0) ||
         !util_is_power_of_two_or_zero(src_templ.depth0))) {
       goto fallback;
    }
 
+   util_throttle_memory_usage(pipe, &st->throttle,
+                              width * height * depth *
+                              util_format_get_blocksize(src_templ.format));
+   throttled = true;
+
    /* Create the source texture. */
    src = screen->resource_create(screen, &src_templ);
    if (!src) {
       goto fallback;
    }
 
    /* Map source pixels. */
    pixels = _mesa_validate_pbo_teximage(ctx, dims, width, height, depth,
                                         format, type, pixels, unpack,
                                         "glTexSubImage");
@@ -1644,20 +1655,25 @@ st_TexSubImage(struct gl_context *ctx, GLuint dims,
    blit.mask = st_get_blit_mask(format, texImage->_BaseFormat);
    blit.filter = PIPE_TEX_FILTER_NEAREST;
    blit.scissor_enable = FALSE;
 
    st->pipe->blit(st->pipe, &blit);
 
    pipe_resource_reference(&src, NULL);
    return;
 
 fallback:
+   if (!throttled) {
+      util_throttle_memory_usage(pipe, &st->throttle,
+                                 width * height * depth *
+                                 _mesa_get_format_bytes(texImage->TexFormat));
+   }
    _mesa_store_texsubimage(ctx, dims, texImage, xoffset, yoffset, zoffset,
                            width, height, depth, format, type, pixels,
                            unpack);
 }
 
 static void
 st_TexImage(struct gl_context * ctx, GLuint dims,
             struct gl_texture_image *texImage,
             GLenum format, GLenum type, const void *pixels,
             const struct gl_pixelstore_attrib *unpack)
diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c
index 34bfb845bb0..edcbd36a1bf 100644
--- a/src/mesa/state_tracker/st_context.c
+++ b/src/mesa/state_tracker/st_context.c
@@ -267,20 +267,21 @@ st_destroy_context_priv(struct st_context *st, bool destroy_pipe)
    st_destroy_bound_texture_handles(st);
    st_destroy_bound_image_handles(st);
 
    for (i = 0; i < ARRAY_SIZE(st->state.frag_sampler_views); i++) {
       pipe_sampler_view_release(st->pipe,
                                 &st->state.frag_sampler_views[i]);
    }
 
    /* free glReadPixels cache data */
    st_invalidate_readpix_cache(st);
+   util_throttle_deinit(st->pipe->screen, &st->throttle);
 
    cso_destroy_context(st->cso_context);
 
    if (st->pipe && destroy_pipe)
       st->pipe->destroy(st->pipe);
 
    free(st);
 }
 
 
@@ -459,20 +460,24 @@ st_create_context_priv(struct gl_context *ctx, struct pipe_context *pipe,
    st->has_half_float_packing =
       screen->get_param(screen, PIPE_CAP_TGSI_PACK_HALF_FLOAT);
    st->has_multi_draw_indirect =
       screen->get_param(screen, PIPE_CAP_MULTI_DRAW_INDIRECT);
 
    st->has_hw_atomics =
       screen->get_shader_param(screen, PIPE_SHADER_FRAGMENT,
                                PIPE_SHADER_CAP_MAX_HW_ATOMIC_COUNTERS)
       ? true : false;
 
+   util_throttle_init(&st->throttle,
+                      screen->get_param(screen,
+                                        PIPE_CAP_MAX_TEXTURE_UPLOAD_MEMORY_BUDGET));
+
    /* GL limits and extensions */
    st_init_limits(pipe->screen, &ctx->Const, &ctx->Extensions, ctx->API);
    st_init_extensions(pipe->screen, &ctx->Const,
                       &ctx->Extensions, &st->options, ctx->API);
 
    if (st_have_perfmon(st)) {
       ctx->Extensions.AMD_performance_monitor = GL_TRUE;
    }
 
    /* Enable shader-based fallbacks for ARB_color_buffer_float if needed. */
diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h
index 6b1b5633ecc..14b9b018809 100644
--- a/src/mesa/state_tracker/st_context.h
+++ b/src/mesa/state_tracker/st_context.h
@@ -25,20 +25,21 @@
  * 
  **************************************************************************/
 
 #ifndef ST_CONTEXT_H
 #define ST_CONTEXT_H
 
 #include "main/mtypes.h"
 #include "state_tracker/st_api.h"
 #include "main/fbobject.h"
 #include "state_tracker/st_atom.h"
+#include "util/u_helpers.h"
 #include "util/u_inlines.h"
 #include "util/list.h"
 #include "vbo/vbo.h"
 
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 
@@ -295,20 +296,26 @@ struct st_context
 
    enum pipe_reset_status reset_status;
 
    /* Array of bound texture/image handles which are resident in the context.
     */
    struct st_bound_handles bound_texture_handles[PIPE_SHADER_TYPES];
    struct st_bound_handles bound_image_handles[PIPE_SHADER_TYPES];
 
    /* Winsys buffers */
    struct list_head winsys_buffers;
+
+   /* Throttling for texture uploads and similar operations to limit memory
+    * usage by limiting the number of in-flight operations based on
+    * the estimated allocated size needed to execute those operations.
+    */
+   struct util_throttle throttle;
 };
 
 
 /* Need this so that we can implement Mesa callbacks in this module.
  */
 static inline struct st_context *st_context(struct gl_context *ctx)
 {
    return ctx->st;
 }
 
-- 
2.17.1



More information about the mesa-dev mailing list