[Mesa-dev] [PATCH] i965: Micro-optimize swizzle_to_scs() and make it inlinable.

Jason Ekstrand jason at jlekstrand.net
Fri Jan 2 23:24:35 PST 2015


On Jan 2, 2015 7:26 PM, "Kenneth Graunke" <kenneth at whitecape.org> wrote:
>
> brw_swizzle_to_scs has been showing up in my CPU profiling, which is
> rather silly - it's a tiny amount of code.  It really should be inlined,
> and can easily be implemented with fewer instructions.
>
> The enum translation is as follows:
>
> SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_ZERO, SWIZZLE_ONE
>         0          1          2          3             4            5
>         4          5          6          7             0            1
>   SCS_RED, SCS_GREEN,  SCS_BLUE, SCS_ALPHA, h   SCS_ZERO,     SCS_ONE
>
> which is simply (swizzle + 4) & 7.

Seems fine to me but I would like to see the above table as a comment so we
know what it does in 2 months (or weeks).  I'm OK with micro-optimizations
like this but they need to be very well documented.

With the comment added,
Reviewed-by: Jason Ekstrand <jason.ekstrand at intel.com>

>
> Haswell needs extra textureGather workarounds to remap GREEN to BLUE,
> but Broadwell and later do not.
>
> This patch replicates swizzle_to_scs in gen7_wm_surface_state.c and
> gen8_surface_state.c, since the Gen8+ code can be simplified to a mere
> two instructions.  Both copies can be marked static for easy inlining.
>
> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
> ---
>  src/mesa/drivers/dri/i965/brw_state.h             |  1 -
>  src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 29
+++++++----------------
>  src/mesa/drivers/dri/i965/gen8_surface_state.c    | 18 ++++++++++----
>  3 files changed, 22 insertions(+), 26 deletions(-)
>
> No real performance stats, sorry...but it's likely to be barely measurable
> at best, and it's negative lines of C code, and fewer assembly
instructions...
>
> diff --git a/src/mesa/drivers/dri/i965/brw_state.h
b/src/mesa/drivers/dri/i965/brw_state.h
> index 399347c..f195407 100644
> --- a/src/mesa/drivers/dri/i965/brw_state.h
> +++ b/src/mesa/drivers/dri/i965/brw_state.h
> @@ -225,7 +225,6 @@ int brw_get_texture_swizzle(const struct gl_context
*ctx,
>                              const struct gl_texture_object *t);
>
>  /* gen7_wm_surface_state.c */
> -unsigned brw_swizzle_to_scs(GLenum swizzle, bool need_green_to_blue);
>  uint32_t gen7_surface_tiling_mode(uint32_t tiling);
>  uint32_t gen7_surface_msaa_bits(unsigned num_samples, enum
intel_msaa_layout l);
>  void gen7_set_surface_mcs_info(struct brw_context *brw,
> diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
> index c257cb7..4b62853 100644
> --- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
> @@ -41,25 +41,12 @@
>   * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
>   * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED)
>   */
> -unsigned
> -brw_swizzle_to_scs(GLenum swizzle, bool need_green_to_blue)
> +static unsigned
> +swizzle_to_scs(GLenum swizzle, bool need_green_to_blue)
>  {
> -   switch (swizzle) {
> -   case SWIZZLE_X:
> -      return HSW_SCS_RED;
> -   case SWIZZLE_Y:
> -      return need_green_to_blue ? HSW_SCS_BLUE : HSW_SCS_GREEN;
> -   case SWIZZLE_Z:
> -      return HSW_SCS_BLUE;
> -   case SWIZZLE_W:
> -      return HSW_SCS_ALPHA;
> -   case SWIZZLE_ZERO:
> -      return HSW_SCS_ZERO;
> -   case SWIZZLE_ONE:
> -      return HSW_SCS_ONE;
> -   }
> +   unsigned scs = (swizzle + 4) & 7;
>
> -   unreachable("Should not get here: invalid swizzle mode");
> +   return (need_green_to_blue && scs == HSW_SCS_GREEN) ? HSW_SCS_BLUE :
scs;
>  }
>
>  uint32_t
> @@ -353,10 +340,10 @@ gen7_update_texture_surface(struct gl_context *ctx,
>        const bool need_scs_green_to_blue = for_gather && tex_format ==
BRW_SURFACEFORMAT_R32G32_FLOAT_LD;
>
>        surf[7] |=
> -         SET_FIELD(brw_swizzle_to_scs(GET_SWZ(swizzle, 0),
need_scs_green_to_blue), GEN7_SURFACE_SCS_R) |
> -         SET_FIELD(brw_swizzle_to_scs(GET_SWZ(swizzle, 1),
need_scs_green_to_blue), GEN7_SURFACE_SCS_G) |
> -         SET_FIELD(brw_swizzle_to_scs(GET_SWZ(swizzle, 2),
need_scs_green_to_blue), GEN7_SURFACE_SCS_B) |
> -         SET_FIELD(brw_swizzle_to_scs(GET_SWZ(swizzle, 3),
need_scs_green_to_blue), GEN7_SURFACE_SCS_A);
> +         SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 0),
need_scs_green_to_blue), GEN7_SURFACE_SCS_R) |
> +         SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 1),
need_scs_green_to_blue), GEN7_SURFACE_SCS_G) |
> +         SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 2),
need_scs_green_to_blue), GEN7_SURFACE_SCS_B) |
> +         SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 3),
need_scs_green_to_blue), GEN7_SURFACE_SCS_A);
>     }
>
>     if (mt->mcs_mt) {
> diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c
b/src/mesa/drivers/dri/i965/gen8_surface_state.c
> index 56c46b0..328c8a4 100644
> --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c
> @@ -38,6 +38,16 @@
>  #include "brw_defines.h"
>  #include "brw_wm.h"
>
> +/**
> + * Convert an swizzle enumeration (i.e. SWIZZLE_X) to one of the Gen7.5+
> + * "Shader Channel Select" enumerations (i.e. HSW_SCS_RED)
> + */
> +static unsigned
> +swizzle_to_scs(unsigned swizzle)
> +{
> +   return (swizzle + 4) & 7;
> +}
> +
>  static uint32_t
>  surface_tiling_mode(uint32_t tiling)
>  {
> @@ -231,10 +241,10 @@ gen8_update_texture_surface(struct gl_context *ctx,
>     const int swizzle =
>        unlikely(alpha_depth) ? SWIZZLE_XYZW :
brw_get_texture_swizzle(ctx, tObj);
>     surf[7] |=
> -      SET_FIELD(brw_swizzle_to_scs(GET_SWZ(swizzle, 0), false),
GEN7_SURFACE_SCS_R) |
> -      SET_FIELD(brw_swizzle_to_scs(GET_SWZ(swizzle, 1), false),
GEN7_SURFACE_SCS_G) |
> -      SET_FIELD(brw_swizzle_to_scs(GET_SWZ(swizzle, 2), false),
GEN7_SURFACE_SCS_B) |
> -      SET_FIELD(brw_swizzle_to_scs(GET_SWZ(swizzle, 3), false),
GEN7_SURFACE_SCS_A);
> +      SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 0)), GEN7_SURFACE_SCS_R)
|
> +      SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 1)), GEN7_SURFACE_SCS_G)
|
> +      SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 2)), GEN7_SURFACE_SCS_B)
|
> +      SET_FIELD(swizzle_to_scs(GET_SWZ(swizzle, 3)), GEN7_SURFACE_SCS_A);
>
>     *((uint64_t *) &surf[8]) = mt->bo->offset64 + mt->offset; /* reloc */
>
> --
> 2.2.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20150102/976c3bf7/attachment-0001.html>


More information about the mesa-dev mailing list