[Mesa-dev] [PATCH 12/24] cso: don't track the number of sampler states bound

Marek Olšák maraeo at gmail.com
Mon Jun 12 18:18:43 UTC 2017


From: Marek Olšák <marek.olsak at amd.com>

This removes 2 loops from hot codepaths and adds 1 loop to a rare codepath
(restore_sampler_states), and makes sanitize_hash() slightly worse.

Sampler states, when bound, are not unbound for draw calls that don't need
them. That's OK, because bound sampler states don't add any overhead.

This results in lower CPU overhead in most cases.
---
 src/gallium/auxiliary/cso_cache/cso_context.c | 59 +++++++++++----------------
 1 file changed, 23 insertions(+), 36 deletions(-)

diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c
index 5558385..4947b8e 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -50,21 +50,20 @@
 #include "cso_context.h"
 
 
 /**
  * Per-shader sampler information.
  */
 struct sampler_info
 {
    struct cso_sampler *cso_samplers[PIPE_MAX_SAMPLERS];
    void *samplers[PIPE_MAX_SAMPLERS];
-   unsigned nr_samplers;
 };
 
 
 
 struct cso_context {
    struct pipe_context *pipe;
    struct cso_cache *cache;
    struct u_vbuf *vbuf;
 
    boolean has_geometry_shader;
@@ -76,20 +75,25 @@ struct cso_context {
 
    struct pipe_sampler_view *fragment_views[PIPE_MAX_SHADER_SAMPLER_VIEWS];
    unsigned nr_fragment_views;
 
    struct pipe_sampler_view *fragment_views_saved[PIPE_MAX_SHADER_SAMPLER_VIEWS];
    unsigned nr_fragment_views_saved;
 
    struct sampler_info fragment_samplers_saved;
    struct sampler_info samplers[PIPE_SHADER_TYPES];
 
+   /* Temporary number until cso_single_sampler_done is called.
+    * It tracks the highest sampler seen in cso_single_sampler.
+    */
+   int max_sampler_seen;
+
    struct pipe_vertex_buffer aux_vertex_buffer_current;
    struct pipe_vertex_buffer aux_vertex_buffer_saved;
    unsigned aux_vertex_buffer_index;
 
    struct pipe_constant_buffer aux_constbuf_current[PIPE_SHADER_TYPES];
    struct pipe_constant_buffer aux_constbuf_saved[PIPE_SHADER_TYPES];
 
    struct pipe_image_view fragment_image0_current;
    struct pipe_image_view fragment_image0_saved;
 
@@ -233,21 +237,21 @@ sanitize_hash(struct cso_hash *hash, enum cso_cache_type type,
    if (type == CSO_SAMPLER) {
       int i, j;
 
       samplers_to_restore = MALLOC(PIPE_SHADER_TYPES * PIPE_MAX_SAMPLERS *
                                    sizeof(*samplers_to_restore));
 
       /* Temporarily remove currently bound sampler states from the hash
        * table, to prevent them from being deleted
        */
       for (i = 0; i < PIPE_SHADER_TYPES; i++) {
-         for (j = 0; j < ctx->samplers[i].nr_samplers; j++) {
+         for (j = 0; j < PIPE_MAX_SAMPLERS; j++) {
             struct cso_sampler *sampler = ctx->samplers[i].cso_samplers[j];
 
             if (sampler && cso_hash_take(hash, sampler->hash_key))
                samplers_to_restore[to_restore++] = sampler;
          }
       }
    }
 
    iter = cso_hash_first_node(hash);
    while (to_remove) {
@@ -327,20 +331,21 @@ cso_create_context(struct pipe_context *pipe, unsigned u_vbuf_flags)
                                         PIPE_SHADER_CAP_SUPPORTED_IRS);
       if (supported_irs & (1 << PIPE_SHADER_IR_TGSI)) {
          ctx->has_compute_shader = TRUE;
       }
    }
    if (pipe->screen->get_param(pipe->screen,
                                PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS) != 0) {
       ctx->has_streamout = TRUE;
    }
 
+   ctx->max_sampler_seen = -1;
    return ctx;
 
 out:
    cso_destroy_context( ctx );
    return NULL;
 }
 
 /**
  * Free the CSO context.
  */
@@ -1223,116 +1228,98 @@ cso_single_sampler(struct cso_context *ctx, enum pipe_shader_type shader_stage,
             FREE(cso);
             return PIPE_ERROR_OUT_OF_MEMORY;
          }
       }
       else {
          cso = cso_hash_iter_data(iter);
       }
 
       ctx->samplers[shader_stage].cso_samplers[idx] = cso;
       ctx->samplers[shader_stage].samplers[idx] = cso->data;
-   } else {
-      ctx->samplers[shader_stage].cso_samplers[idx] = NULL;
-      ctx->samplers[shader_stage].samplers[idx] = NULL;
+      ctx->max_sampler_seen = MAX2(ctx->max_sampler_seen, (int)idx);
    }
 
    return PIPE_OK;
 }
 
 
 /**
  * Send staged sampler state to the driver.
  */
 void
 cso_single_sampler_done(struct cso_context *ctx,
                         enum pipe_shader_type shader_stage)
 {
    struct sampler_info *info = &ctx->samplers[shader_stage];
-   const unsigned old_nr_samplers = info->nr_samplers;
-   unsigned i;
 
-   /* find highest non-null sampler */
-   for (i = PIPE_MAX_SAMPLERS; i > 0; i--) {
-      if (info->samplers[i - 1] != NULL)
-         break;
-   }
+   if (ctx->max_sampler_seen == -1)
+      return;
 
-   info->nr_samplers = i;
    ctx->pipe->bind_sampler_states(ctx->pipe, shader_stage, 0,
-                                  MAX2(old_nr_samplers, info->nr_samplers),
+                                  ctx->max_sampler_seen + 1,
                                   info->samplers);
+   ctx->max_sampler_seen = -1;
 }
 
 
 /*
  * If the function encouters any errors it will return the
  * last one. Done to always try to set as many samplers
  * as possible.
  */
 enum pipe_error
 cso_set_samplers(struct cso_context *ctx,
                  enum pipe_shader_type shader_stage,
                  unsigned nr,
                  const struct pipe_sampler_state **templates)
 {
-   struct sampler_info *info = &ctx->samplers[shader_stage];
    unsigned i;
    enum pipe_error temp, error = PIPE_OK;
 
    for (i = 0; i < nr; i++) {
       temp = cso_single_sampler(ctx, shader_stage, i, templates[i]);
       if (temp != PIPE_OK)
          error = temp;
    }
 
-   for ( ; i < info->nr_samplers; i++) {
-      temp = cso_single_sampler(ctx, shader_stage, i, NULL);
-      if (temp != PIPE_OK)
-         error = temp;
-   }
-
    cso_single_sampler_done(ctx, shader_stage);
 
    return error;
 }
 
 static void
 cso_save_fragment_samplers(struct cso_context *ctx)
 {
    struct sampler_info *info = &ctx->samplers[PIPE_SHADER_FRAGMENT];
    struct sampler_info *saved = &ctx->fragment_samplers_saved;
 
-   saved->nr_samplers = info->nr_samplers;
-   memcpy(saved->cso_samplers, info->cso_samplers, info->nr_samplers *
-          sizeof(*info->cso_samplers));
-   memcpy(saved->samplers, info->samplers, info->nr_samplers *
-          sizeof(*info->samplers));
+   memcpy(saved->cso_samplers, info->cso_samplers,
+          sizeof(info->cso_samplers));
+   memcpy(saved->samplers, info->samplers, sizeof(info->samplers));
 }
 
 
 static void
 cso_restore_fragment_samplers(struct cso_context *ctx)
 {
    struct sampler_info *info = &ctx->samplers[PIPE_SHADER_FRAGMENT];
    struct sampler_info *saved = &ctx->fragment_samplers_saved;
-   int delta = (int)info->nr_samplers - saved->nr_samplers;
 
    memcpy(info->cso_samplers, saved->cso_samplers,
-          saved->nr_samplers * sizeof(*info->cso_samplers));
-   memcpy(info->samplers, saved->samplers,
-          saved->nr_samplers * sizeof(*info->samplers));
-
-   if (delta > 0) {
-      memset(&info->cso_samplers[saved->nr_samplers], 0,
-             delta * sizeof(*info->cso_samplers));
-      memset(&info->samplers[saved->nr_samplers], 0,
-             delta * sizeof(*info->samplers));
+          sizeof(info->cso_samplers));
+   memcpy(info->samplers, saved->samplers, sizeof(info->samplers));
+
+   for (int i = PIPE_MAX_SAMPLERS - 1; i >= 0; i--) {
+      if (info->samplers[i]) {
+         ctx->max_sampler_seen = i;
+         break;
+      }
    }
 
    cso_single_sampler_done(ctx, PIPE_SHADER_FRAGMENT);
 }
 
 
 void
 cso_set_sampler_views(struct cso_context *ctx,
                       enum pipe_shader_type shader_stage,
                       unsigned count,
-- 
2.7.4



More information about the mesa-dev mailing list