<p dir="ltr"><br>
On Jan 2, 2015 7:26 PM, "Kenneth Graunke" <<a href="mailto:kenneth@whitecape.org">kenneth@whitecape.org</a>> wrote:<br>
><br>
> brw_swizzle_to_scs has been showing up in my CPU profiling, which is<br>
> rather silly - it's a tiny amount of code. It really should be inlined,<br>
> and can easily be implemented with fewer instructions.<br>
><br>
> The enum translation is as follows:<br>
><br>
> SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE<br>
> 0 1 2 3 4 5<br>
> 4 5 6 7 0 1<br>
> SCS_RED, SCS_GREEN, SCS_BLUE, SCS_ALPHA, h SCS_ZERO, SCS_ONE<br>
><br>
> which is simply (swizzle + 4) & 7.</p>
<p dir="ltr">Seems fine to me but I would like to see the above table as a comment so we know what it does in 2 months (or weeks). I'm OK with micro-optimizations like this but they need to be very well documented.</p>
<p dir="ltr">With the comment added,<br>
Reviewed-by: Jason Ekstrand <<a href="mailto:jason.ekstrand@intel.com">jason.ekstrand@intel.com</a>></p>
<p dir="ltr">><br>
> Haswell needs extra textureGather workarounds to remap GREEN to BLUE,<br>
> but Broadwell and later do not.<br>
><br>
> This patch replicates swizzle_to_scs in gen7_wm_surface_state.c and<br>
> gen8_surface_state.c, since the Gen8+ code can be simplified to a mere<br>
> two instructions. Both copies can be marked static for easy inlining.<br>
><br>
> Signed-off-by: Kenneth Graunke <<a href="mailto:kenneth@whitecape.org">kenneth@whitecape.org</a>><br>
> ---<br>
> src/mesa/drivers/dri/i965/brw_state.h | 1 -<br>
> src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 29 +++++++----------------<br>
> src/mesa/drivers/dri/i965/gen8_surface_state.c | 18 ++++++++++----<br>
> 3 files changed, 22 insertions(+), 26 deletions(-)<br>
><br>
> No real performance stats, sorry...but it's likely to be barely measurable<br>
> at best, and it's negative lines of C code, and fewer assembly instructions...<br>
><br>
> diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h<br>
> index 399347c..f195407 100644<br>
> --- a/src/mesa/drivers/dri/i965/brw_state.h<br>
> +++ b/src/mesa/drivers/dri/i965/brw_state.h<br>
> @@ -225,7 +225,6 @@ int brw_get_texture_swizzle(const struct gl_context *ctx,<br>
> const struct gl_texture_object *t);<br>
><br>
> /* gen7_wm_surface_state.c */<br>
> -unsigned brw_swizzle_to_scs(GLenum swizzle, bool need_green_to_blue);<br>
> uint32_t gen7_surface_tiling_mode(uint32_t tiling);<br>
> uint32_t gen7_surface_msaa_bits(unsigned num_samples, enum intel_msaa_layout l);<br>
> void gen7_set_surface_mcs_info(struct brw_context *brw,<br>
> diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c<br>
> index c257cb7..4b62853 100644<br>
> --- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c<br>
> +++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c<br>
> @@ -41,25 +41,12 @@<br>
> * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+<br>
> * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED)<br>
> */<br>
> -unsigned<br>
> -brw_swizzle_to_scs(GLenum swizzle, bool need_green_to_blue)<br>
> +static unsigned<br>
> +swizzle_to_scs(GLenum swizzle, bool need_green_to_blue)<br>
> {<br>
> - switch (swizzle) {<br>
> - case SWIZZLE_X:<br>
> - return HSW_SCS_RED;<br>
> - case SWIZZLE_Y:<br>
> - return need_green_to_blue ? HSW_SCS_BLUE : HSW_SCS_GREEN;<br>
> - case SWIZZLE_Z:<br>
> - return HSW_SCS_BLUE;<br>
> - case SWIZZLE_W:<br>
> - return HSW_SCS_ALPHA;<br>
> - case SWIZZLE_ZERO:<br>
> - return HSW_SCS_ZERO;<br>
> - case SWIZZLE_ONE:<br>
> - return HSW_SCS_ONE;<br>
> - }<br>
> + unsigned scs = (swizzle + 4) & 7;<br>
><br>
> - unreachable("Should not get here: invalid swizzle mode");<br>
> + return (need_green_to_blue && scs == HSW_SCS_GREEN) ? HSW_SCS_BLUE : scs;<br>
> }<br>
><br>
> uint32_t<br>
> @@ -353,10 +340,10 @@ gen7_update_texture_surface(struct gl_context *ctx,<br>
> const bool need_scs_green_to_blue = for_gather && tex_format == BRW_SURFACEFORMAT_R32G32_FLOAT_LD;<br>
><br>
> surf[7] |=<br>
> - SET_FIELD(brw_swizzle_to_scs(GET_SWZ(swizzle, 0), need_scs_green_to_blue), GEN7_SURFACE_SCS_R) |<br>
> - SET_FIELD(brw_swizzle_to_scs(GET_SWZ(swizzle, 1), need_scs_green_to_blue), GEN7_SURFACE_SCS_G) |<br>
> - SET_FIELD(brw_swizzle_to_scs(GET_SWZ(swizzle, 2), need_scs_green_to_blue), GEN7_SURFACE_SCS_B) |<br>
> - SET_FIELD(brw_swizzle_to_scs(GET_SWZ(swizzle, 3), need_scs_green_to_blue), GEN7_SURFACE_SCS_A);<br>
> + SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 0), need_scs_green_to_blue), GEN7_SURFACE_SCS_R) |<br>
> + SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 1), need_scs_green_to_blue), GEN7_SURFACE_SCS_G) |<br>
> + SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 2), need_scs_green_to_blue), GEN7_SURFACE_SCS_B) |<br>
> + SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 3), need_scs_green_to_blue), GEN7_SURFACE_SCS_A);<br>
> }<br>
><br>
> if (mt->mcs_mt) {<br>
> diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c<br>
> index 56c46b0..328c8a4 100644<br>
> --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c<br>
> +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c<br>
> @@ -38,6 +38,16 @@<br>
> #include "brw_defines.h"<br>
> #include "brw_wm.h"<br>
><br>
> +/**<br>
> + * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+<br>
> + * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED)<br>
> + */<br>
> +static unsigned<br>
> +swizzle_to_scs(unsigned swizzle)<br>
> +{<br>
> + return (swizzle + 4) & 7;<br>
> +}<br>
> +<br>
> static uint32_t<br>
> surface_tiling_mode(uint32_t tiling)<br>
> {<br>
> @@ -231,10 +241,10 @@ gen8_update_texture_surface(struct gl_context *ctx,<br>
> const int swizzle =<br>
> unlikely(alpha_depth) ? SWIZZLE_XYZW : brw_get_texture_swizzle(ctx, tObj);<br>
> surf[7] |=<br>
> - SET_FIELD(brw_swizzle_to_scs(GET_SWZ(swizzle, 0), false), GEN7_SURFACE_SCS_R) |<br>
> - SET_FIELD(brw_swizzle_to_scs(GET_SWZ(swizzle, 1), false), GEN7_SURFACE_SCS_G) |<br>
> - SET_FIELD(brw_swizzle_to_scs(GET_SWZ(swizzle, 2), false), GEN7_SURFACE_SCS_B) |<br>
> - SET_FIELD(brw_swizzle_to_scs(GET_SWZ(swizzle, 3), false), GEN7_SURFACE_SCS_A);<br>
> + SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 0)), GEN7_SURFACE_SCS_R) |<br>
> + SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 1)), GEN7_SURFACE_SCS_G) |<br>
> + SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 2)), GEN7_SURFACE_SCS_B) |<br>
> + SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 3)), GEN7_SURFACE_SCS_A);<br>
><br>
> *((uint64_t *) &surf[8]) = mt->bo->offset64 + mt->offset; /* reloc */<br>
><br>
> --<br>
> 2.2.1<br>
><br>
> _______________________________________________<br>
> mesa-dev mailing list<br>
> <a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
> <a href="http://lists.freedesktop.org/mailman/listinfo/mesa-dev">http://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</p>