Mesa (master): vc4: Speed up glGenerateMipmaps by avoiding shadow baselevel.

Eric Anholt anholt at kemper.freedesktop.org
Fri Jul 15 21:00:20 UTC 2016


Module: Mesa
Branch: master
Commit: 3bcd0f1912a60cc9d3813923d18d29465e41ff56
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=3bcd0f1912a60cc9d3813923d18d29465e41ff56

Author: Eric Anholt <eric at anholt.net>
Date:   Thu Jul 14 17:26:43 2016 -0700

vc4: Speed up glGenerateMipmaps by avoiding shadow baselevel.

To support general GL_TEXTURE_BASE_LEVEL we have to copy to a temporary
miptree.  However, if a single level is being selected, we can use the
existing miptree and force all the sampling to be from that particular
level.

This avoids a ton of software fallbacks in glGenerateMipmaps(), which uses
base levels in the blit implementation in gallium.  Improves "glmark2 -b
terrain" from 2 fps to 3 (perhaps some more precision would be useful?),
and cuts its CPU usage during the benchmarking from ~30% to ~10% (total
CPU time from 8.8s to 7.6s).

---

 src/gallium/drivers/vc4/vc4_context.h  |  1 +
 src/gallium/drivers/vc4/vc4_program.c  | 11 +++++++++++
 src/gallium/drivers/vc4/vc4_qir.h      |  1 +
 src/gallium/drivers/vc4/vc4_resource.c |  4 +++-
 src/gallium/drivers/vc4/vc4_state.c    |  9 +++++++--
 5 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_context.h b/src/gallium/drivers/vc4/vc4_context.h
index 7da2b55..751f043 100644
--- a/src/gallium/drivers/vc4/vc4_context.h
+++ b/src/gallium/drivers/vc4/vc4_context.h
@@ -74,6 +74,7 @@ struct vc4_sampler_view {
         struct pipe_sampler_view base;
         uint32_t texture_p0;
         uint32_t texture_p1;
+        bool force_first_level;
 };
 
 struct vc4_sampler_state {
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index b4b62e3..4ee49a2 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -393,6 +393,12 @@ ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr)
                 }
         }
 
+        if (c->key->tex[unit].forced_first_level) {
+                lod = qir_uniform_f(c, c->key->tex[unit].forced_first_level);
+                is_txl = true;
+                is_txb = false;
+        }
+
         struct qreg texture_u[] = {
                 qir_uniform(c, QUNIFORM_TEXTURE_CONFIG_P0, unit),
                 qir_uniform(c, QUNIFORM_TEXTURE_CONFIG_P1, unit),
@@ -2313,6 +2319,7 @@ vc4_setup_shared_key(struct vc4_context *vc4, struct vc4_key *key,
 {
         for (int i = 0; i < texstate->num_textures; i++) {
                 struct pipe_sampler_view *sampler = texstate->textures[i];
+                struct vc4_sampler_view *vc4_sampler = vc4_sampler_view(sampler);
                 struct pipe_sampler_state *sampler_state =
                         texstate->samplers[i];
 
@@ -2333,6 +2340,10 @@ vc4_setup_shared_key(struct vc4_context *vc4, struct vc4_key *key,
                         key->tex[i].compare_func = sampler_state->compare_func;
                         key->tex[i].wrap_s = sampler_state->wrap_s;
                         key->tex[i].wrap_t = sampler_state->wrap_t;
+                        if (vc4_sampler->force_first_level) {
+                                key->tex[i].forced_first_level =
+                                        sampler->u.tex.first_level;
+                        }
                 }
         }
 
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index 88eda22..81b5565 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -314,6 +314,7 @@ struct vc4_key {
                                 unsigned compare_func:3;
                                 unsigned wrap_s:3;
                                 unsigned wrap_t:3;
+                                unsigned forced_first_level:8;
                         };
                         struct {
                                 uint16_t msaa_width, msaa_height;
diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c
index a07fa3d..08d7d20 100644
--- a/src/gallium/drivers/vc4/vc4_resource.c
+++ b/src/gallium/drivers/vc4/vc4_resource.c
@@ -877,7 +877,9 @@ vc4_update_shadow_baselevel_texture(struct pipe_context *pctx,
         if (shadow->writes == orig->writes && orig->bo->private)
                 return;
 
-        perf_debug("Updating shadow texture due to %s\n",
+        perf_debug("Updating %dx%d@%d shadow texture due to %s\n",
+                   orig->base.b.width0, orig->base.b.height0,
+                   view->u.tex.first_level,
                    view->u.tex.first_level ? "base level" : "raster layout");
 
         for (int i = 0; i <= shadow->base.b.last_level; i++) {
diff --git a/src/gallium/drivers/vc4/vc4_state.c b/src/gallium/drivers/vc4/vc4_state.c
index bf4e023..df9e1a3 100644
--- a/src/gallium/drivers/vc4/vc4_state.c
+++ b/src/gallium/drivers/vc4/vc4_state.c
@@ -603,7 +603,8 @@ vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
          * Also, Raspberry Pi doesn't support sampling from raster textures,
          * so we also have to copy to a temporary then.
          */
-        if (cso->u.tex.first_level ||
+        if ((cso->u.tex.first_level &&
+             (cso->u.tex.first_level != cso->u.tex.last_level)) ||
             rsc->vc4_format == VC4_TEXTURE_TYPE_RGBA32R) {
                 struct vc4_resource *shadow_parent = vc4_resource(prsc);
                 struct pipe_resource tmpl = shadow_parent->base.b;
@@ -626,6 +627,8 @@ vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
                 clone->writes = shadow_parent->writes - 1;
 
                 assert(clone->vc4_format != VC4_TEXTURE_TYPE_RGBA32R);
+        } else if (cso->u.tex.first_level) {
+                so->force_first_level = true;
         }
         so->base.texture = prsc;
         so->base.reference.count = 1;
@@ -634,7 +637,9 @@ vc4_create_sampler_view(struct pipe_context *pctx, struct pipe_resource *prsc,
         so->texture_p0 =
                 (VC4_SET_FIELD(rsc->slices[0].offset >> 12, VC4_TEX_P0_OFFSET) |
                  VC4_SET_FIELD(rsc->vc4_format & 15, VC4_TEX_P0_TYPE) |
-                 VC4_SET_FIELD(cso->u.tex.last_level -
+                 VC4_SET_FIELD(so->force_first_level ?
+                               cso->u.tex.last_level :
+                               cso->u.tex.last_level -
                                cso->u.tex.first_level, VC4_TEX_P0_MIPLVLS) |
                  VC4_SET_FIELD(cso->target == PIPE_TEXTURE_CUBE,
                                VC4_TEX_P0_CMMODE));




More information about the mesa-commit mailing list