[Mesa-dev] [PATCH] i965: Micro-optimize swizzle_to_scs() and make it inlinable.
Kenneth Graunke
kenneth at whitecape.org
Fri Jan 2 19:26:28 PST 2015
brw_swizzle_to_scs has been showing up in my CPU profiling, which is
rather silly - it's a tiny amount of code. It really should be inlined,
and can easily be implemented with fewer instructions.
The enum translation is as follows:
SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
0 1 2 3 4 5
4 5 6 7 0 1
SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, h SCS_ZERO, SCS_ONE
which is simply (swizzle + 4) & 7.
Haswell needs extra textureGather workarounds to remap GREEN to BLUE,
but Broadwell and later do not.
This patch replicates swizzle_to_scs in gen7_wm_surface_state.c and
gen8_surface_state.c, since the Gen8+ code can be simplified to a mere
two instructions. Both copies can be marked static for easy inlining.
Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
---
src/mesa/drivers/dri/i965/brw_state.h | 1 -
src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 29 +++++++----------------
src/mesa/drivers/dri/i965/gen8_surface_state.c | 18 ++++++++++----
3 files changed, 22 insertions(+), 26 deletions(-)
No real performance stats, sorry...but it's likely to be barely measurable
at best, and it's negative lines of C code, and fewer assembly instructions...
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 399347c..f195407 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -225,7 +225,6 @@ int brw_get_texture_swizzle(const struct gl_context *ctx,
const struct gl_texture_object *t);
/* gen7_wm_surface_state.c */
-unsigned brw_swizzle_to_scs(GLenum swizzle, bool need_green_to_blue);
uint32_t gen7_surface_tiling_mode(uint32_t tiling);
uint32_t gen7_surface_msaa_bits(unsigned num_samples, enum intel_msaa_layout l);
void gen7_set_surface_mcs_info(struct brw_context *brw,
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
index c257cb7..4b62853 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
@@ -41,25 +41,12 @@
* Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
* "Shader Channel Select" enumerations (i.e. HSW_SCS_RED)
*/
-unsigned
-brw_swizzle_to_scs(GLenum swizzle, bool need_green_to_blue)
+static unsigned
+swizzle_to_scs(GLenum swizzle, bool need_green_to_blue)
{
- switch (swizzle) {
- case SWIZZLE_X:
- return HSW_SCS_RED;
- case SWIZZLE_Y:
- return need_green_to_blue ? HSW_SCS_BLUE : HSW_SCS_GREEN;
- case SWIZZLE_Z:
- return HSW_SCS_BLUE;
- case SWIZZLE_W:
- return HSW_SCS_ALPHA;
- case SWIZZLE_ZERO:
- return HSW_SCS_ZERO;
- case SWIZZLE_ONE:
- return HSW_SCS_ONE;
- }
+ unsigned scs = (swizzle + 4) & 7;
- unreachable("Should not get here: invalid swizzle mode");
+ return (need_green_to_blue && scs == HSW_SCS_GREEN) ? HSW_SCS_BLUE : scs;
}
uint32_t
@@ -353,10 +340,10 @@ gen7_update_texture_surface(struct gl_context *ctx,
const bool need_scs_green_to_blue = for_gather && tex_format == BRW_SURFACEFORMAT_R32G32_FLOAT_LD;
surf[7] |=
- SET_FIELD(brw_swizzle_to_scs(GET_SWZ(swizzle, 0), need_scs_green_to_blue), GEN7_SURFACE_SCS_R) |
- SET_FIELD(brw_swizzle_to_scs(GET_SWZ(swizzle, 1), need_scs_green_to_blue), GEN7_SURFACE_SCS_G) |
- SET_FIELD(brw_swizzle_to_scs(GET_SWZ(swizzle, 2), need_scs_green_to_blue), GEN7_SURFACE_SCS_B) |
- SET_FIELD(brw_swizzle_to_scs(GET_SWZ(swizzle, 3), need_scs_green_to_blue), GEN7_SURFACE_SCS_A);
+ SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 0), need_scs_green_to_blue), GEN7_SURFACE_SCS_R) |
+ SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 1), need_scs_green_to_blue), GEN7_SURFACE_SCS_G) |
+ SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 2), need_scs_green_to_blue), GEN7_SURFACE_SCS_B) |
+ SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 3), need_scs_green_to_blue), GEN7_SURFACE_SCS_A);
}
if (mt->mcs_mt) {
diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c
index 56c46b0..328c8a4 100644
--- a/src/mesa/drivers/dri/i965/gen8_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c
@@ -38,6 +38,16 @@
#include "brw_defines.h"
#include "brw_wm.h"
+/**
+ * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
+ * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED)
+ */
+static unsigned
+swizzle_to_scs(unsigned swizzle)
+{
+ return (swizzle + 4) & 7;
+}
+
static uint32_t
surface_tiling_mode(uint32_t tiling)
{
@@ -231,10 +241,10 @@ gen8_update_texture_surface(struct gl_context *ctx,
const int swizzle =
unlikely(alpha_depth) ? SWIZZLE_XYZW : brw_get_texture_swizzle(ctx, tObj);
surf[7] |=
- SET_FIELD(brw_swizzle_to_scs(GET_SWZ(swizzle, 0), false), GEN7_SURFACE_SCS_R) |
- SET_FIELD(brw_swizzle_to_scs(GET_SWZ(swizzle, 1), false), GEN7_SURFACE_SCS_G) |
- SET_FIELD(brw_swizzle_to_scs(GET_SWZ(swizzle, 2), false), GEN7_SURFACE_SCS_B) |
- SET_FIELD(brw_swizzle_to_scs(GET_SWZ(swizzle, 3), false), GEN7_SURFACE_SCS_A);
+ SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 0)), GEN7_SURFACE_SCS_R) |
+ SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 1)), GEN7_SURFACE_SCS_G) |
+ SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 2)), GEN7_SURFACE_SCS_B) |
+ SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 3)), GEN7_SURFACE_SCS_A);
*((uint64_t *) &surf[8]) = mt->bo->offset64 + mt->offset; /* reloc */
--
2.2.1
More information about the mesa-dev
mailing list