Mesa (master): freedreno/ir3: optimize shader key comparision

Rob Clark robclark at kemper.freedesktop.org
Wed Oct 15 19:52:02 UTC 2014


Module: Mesa
Branch: master
Commit: 368466b7b72aed74b917aeb3225d7a0a7101678c
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=368466b7b72aed74b917aeb3225d7a0a7101678c

Author: Rob Clark <robclark at freedesktop.org>
Date:   Tue Oct 14 16:23:18 2014 -0400

freedreno/ir3: optimize shader key comparision

Signed-off-by: Rob Clark <robclark at freedesktop.org>

---

 src/gallium/drivers/freedreno/a3xx/fd3_context.h |    3 ++
 src/gallium/drivers/freedreno/a3xx/fd3_draw.c    |   21 ++++----
 src/gallium/drivers/freedreno/a3xx/fd3_texture.c |   10 +++-
 src/gallium/drivers/freedreno/ir3/ir3_shader.c   |   27 ++++++----
 src/gallium/drivers/freedreno/ir3/ir3_shader.h   |   58 ++++++++++++++--------
 5 files changed, 79 insertions(+), 40 deletions(-)

diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_context.h b/src/gallium/drivers/freedreno/a3xx/fd3_context.h
index 324edb2..77e4605 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_context.h
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_context.h
@@ -92,6 +92,9 @@ struct fd3_context {
 	struct u_upload_mgr *border_color_uploader;
 	struct pipe_resource *border_color_buf;
 
+	/* if *any* of bits are set in {v,f}saturate_{s,t,r} */
+	bool vsaturate, fsaturate;
+
 	/* bitmask of sampler which needs coords clamped for vertex
 	 * shader:
 	 */
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
index ccedb39..7cc24e5 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_draw.c
@@ -82,18 +82,20 @@ fixup_shader_state(struct fd_context *ctx, struct ir3_shader_key *key)
 	struct fd3_context *fd3_ctx = fd3_context(ctx);
 	struct ir3_shader_key *last_key = &fd3_ctx->last_key;
 
-	if (memcmp(last_key, key, sizeof(*key))) {
+	if (!ir3_shader_key_equal(last_key, key)) {
 		ctx->dirty |= FD_DIRTY_PROG;
 
-		if ((last_key->vsaturate_s != key->vsaturate_s) ||
-				(last_key->vsaturate_t != key->vsaturate_t) ||
-				(last_key->vsaturate_r != key->vsaturate_r))
-			ctx->prog.dirty |= FD_SHADER_DIRTY_VP;
+		if (last_key->has_per_samp || key->has_per_samp) {
+			if ((last_key->vsaturate_s != key->vsaturate_s) ||
+					(last_key->vsaturate_t != key->vsaturate_t) ||
+					(last_key->vsaturate_r != key->vsaturate_r))
+				ctx->prog.dirty |= FD_SHADER_DIRTY_VP;
 
-		if ((last_key->fsaturate_s != key->fsaturate_s) ||
-				(last_key->fsaturate_t != key->fsaturate_t) ||
-				(last_key->fsaturate_r != key->fsaturate_r))
-			ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
+			if ((last_key->fsaturate_s != key->fsaturate_s) ||
+					(last_key->fsaturate_t != key->fsaturate_t) ||
+					(last_key->fsaturate_r != key->fsaturate_r))
+				ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
+		}
 
 		if (last_key->color_two_side != key->color_two_side)
 			ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
@@ -124,6 +126,7 @@ fd3_draw(struct fd_context *ctx, const struct pipe_draw_info *info)
 			// TODO set .half_precision based on render target format,
 			// ie. float16 and smaller use half, float32 use full..
 			.half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF),
+			.has_per_samp = fd3_ctx->fsaturate || fd3_ctx->vsaturate,
 			.vsaturate_s = fd3_ctx->vsaturate_s,
 			.vsaturate_t = fd3_ctx->vsaturate_t,
 			.vsaturate_r = fd3_ctx->vsaturate_r,
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
index d70b39e..39befef 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_texture.c
@@ -143,7 +143,7 @@ fd3_sampler_states_bind(struct pipe_context *pctx,
 {
 	struct fd_context *ctx = fd_context(pctx);
 	struct fd3_context *fd3_ctx = fd3_context(ctx);
-	unsigned saturate_s = 0, saturate_t = 0, saturate_r = 0;
+	uint16_t saturate_s = 0, saturate_t = 0, saturate_r = 0;
 	unsigned i;
 
 	for (i = 0; i < nr; i++) {
@@ -162,10 +162,18 @@ fd3_sampler_states_bind(struct pipe_context *pctx,
 	fd_sampler_states_bind(pctx, shader, start, nr, hwcso);
 
 	if (shader == PIPE_SHADER_FRAGMENT) {
+		fd3_ctx->fsaturate =
+			(saturate_s != 0) ||
+			(saturate_t != 0) ||
+			(saturate_r != 0);
 		fd3_ctx->fsaturate_s = saturate_s;
 		fd3_ctx->fsaturate_t = saturate_t;
 		fd3_ctx->fsaturate_r = saturate_r;
 	} else if (shader == PIPE_SHADER_VERTEX) {
+		fd3_ctx->vsaturate =
+			(saturate_s != 0) ||
+			(saturate_t != 0) ||
+			(saturate_r != 0);
 		fd3_ctx->vsaturate_s = saturate_s;
 		fd3_ctx->vsaturate_t = saturate_t;
 		fd3_ctx->vsaturate_r = saturate_r;
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.c b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
index c77cec1..1f7e869 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.c
@@ -182,23 +182,30 @@ ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key)
 	 * so normalize the key to avoid constructing multiple identical
 	 * variants:
 	 */
-	if (shader->type == SHADER_FRAGMENT) {
+	switch (shader->type) {
+	case SHADER_FRAGMENT:
+	case SHADER_COMPUTE:
 		key.binning_pass = false;
-		key.vsaturate_s = 0;
-		key.vsaturate_t = 0;
-		key.vsaturate_r = 0;
-	}
-	if (shader->type == SHADER_VERTEX) {
+		if (key.has_per_samp) {
+			key.vsaturate_s = 0;
+			key.vsaturate_t = 0;
+			key.vsaturate_r = 0;
+		}
+		break;
+	case SHADER_VERTEX:
 		key.color_two_side = false;
 		key.half_precision = false;
 		key.alpha = false;
-		key.fsaturate_s = 0;
-		key.fsaturate_t = 0;
-		key.fsaturate_r = 0;
+		if (key.has_per_samp) {
+			key.fsaturate_s = 0;
+			key.fsaturate_t = 0;
+			key.fsaturate_r = 0;
+		}
+		break;
 	}
 
 	for (v = shader->variants; v; v = v->next)
-		if (!memcmp(&key, &v->key, sizeof(key)))
+		if (ir3_shader_key_equal(&key, &v->key))
 			return v;
 
 	/* compile new variant if it doesn't exist already: */
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_shader.h b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
index c531ad7..628c09e 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_shader.h
+++ b/src/gallium/drivers/freedreno/ir3/ir3_shader.h
@@ -54,36 +54,54 @@ static inline uint16_t sem2idx(ir3_semantic sem)
  * in hw (two sided color), binning-pass vertex shader, etc.
  */
 struct ir3_shader_key {
+	union {
+		struct {
+			/* do we need to check {v,f}saturate_{s,t,r}? */
+			unsigned has_per_samp : 1;
+
+			/*
+			 * Vertex shader variant parameters:
+			 */
+			unsigned binning_pass : 1;
+
+			/*
+			 * Fragment shader variant parameters:
+			 */
+			unsigned color_two_side : 1;
+			unsigned half_precision : 1;
+			/* For rendering to alpha, we need a bit of special handling
+			 * since the hw always takes gl_FragColor starting from x
+			 * component, rather than figuring out to take the w component.
+			 * We could be more clever and generate variants for other
+			 * render target formats (ie. luminance formats are xxx1), but
+			 * let's start with this and see how it goes:
+			 */
+			unsigned alpha : 1;
+		};
+		uint32_t global;
+	};
+
 	/* bitmask of sampler which needs coords clamped for vertex
 	 * shader:
 	 */
-	unsigned vsaturate_s, vsaturate_t, vsaturate_r;
+	uint16_t vsaturate_s, vsaturate_t, vsaturate_r;
 
 	/* bitmask of sampler which needs coords clamped for frag
 	 * shader:
 	 */
-	unsigned fsaturate_s, fsaturate_t, fsaturate_r;
-
-	/*
-	 * Vertex shader variant parameters:
-	 */
-	unsigned binning_pass : 1;
+	uint16_t fsaturate_s, fsaturate_t, fsaturate_r;
 
-	/*
-	 * Fragment shader variant parameters:
-	 */
-	unsigned color_two_side : 1;
-	unsigned half_precision : 1;
-	/* For rendering to alpha, we need a bit of special handling
-	 * since the hw always takes gl_FragColor starting from x
-	 * component, rather than figuring out to take the w component.
-	 * We could be more clever and generate variants for other
-	 * render target formats (ie. luminance formats are xxx1), but
-	 * let's start with this and see how it goes:
-	 */
-	unsigned alpha : 1;
 };
 
+static inline bool
+ir3_shader_key_equal(struct ir3_shader_key *a, struct ir3_shader_key *b)
+{
+	/* slow-path if we need to check {v,f}saturate_{s,t,r} */
+	if (a->has_per_samp || b->has_per_samp)
+		return memcmp(a, b, sizeof(struct ir3_shader_key)) == 0;
+	return a->global == b->global;
+}
+
 struct ir3_shader_variant {
 	struct fd_bo *bo;
 




More information about the mesa-commit mailing list