[Mesa-dev] [PATCH 6/6] softpipe: start adding gather support

Tue May 19 17:17:00 PDT 2015

On 05/19/2015 03:48 PM, Dave Airlie wrote:
> From: Dave Airlie <airlied at redhat.com>
>
> This adds both ARB_texture_gather and the enhanced gather
> for ARB_gpu_shader5.
>
> This passes all the piglit tests, it relies on the GLSL
> lowering pass to make textureGatherOffsets work.
>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
>   src/gallium/drivers/softpipe/sp_screen.c     |   5 +-
>   src/gallium/drivers/softpipe/sp_tex_sample.c | 267 +++++++++++++++++++--------
>   src/gallium/drivers/softpipe/sp_tex_sample.h |   2 +
>   3 files changed, 191 insertions(+), 83 deletions(-)
>
> diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
> index b3bc177..a688d31 100644
> --- a/src/gallium/drivers/softpipe/sp_screen.c
> +++ b/src/gallium/drivers/softpipe/sp_screen.c
> @@ -191,7 +191,9 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
>      case PIPE_CAP_ENDIANNESS:
>         return PIPE_ENDIAN_NATIVE;
>      case PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS:
> +      return 4;
>      case PIPE_CAP_TEXTURE_GATHER_SM5:
> +      return 1;
>      case PIPE_CAP_BUFFER_MAP_PERSISTENT_COHERENT:
>      case PIPE_CAP_TEXTURE_QUERY_LOD:
>      case PIPE_CAP_SAMPLE_SHADING:
> @@ -206,8 +208,9 @@ softpipe_get_param(struct pipe_screen *screen, enum pipe_cap param)
>      case PIPE_CAP_FAKE_SW_MSAA:
>         return 1;
>      case PIPE_CAP_MIN_TEXTURE_GATHER_OFFSET:
> +      return -32;
>      case PIPE_CAP_MAX_TEXTURE_GATHER_OFFSET:
> -      return 0;
> +      return 31;
>      case PIPE_CAP_DRAW_INDIRECT:
>         return 1;
>
> diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
> index 02f3b37..5504189 100644
> --- a/src/gallium/drivers/softpipe/sp_tex_sample.c
> +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
> @@ -1476,6 +1476,60 @@ img_filter_1d_array_linear(struct sp_sampler_view *sp_sview,
>         rgba[TGSI_NUM_CHANNELS*c] = lerp(xw, tx0[c], tx1[c]);
>   }
>
> +/*
> + * Retrieve the gathered value, need to convert to the
> + * TGSI expected interface, and take component select
> + * and swizzling into account.

Not sure I understand what the function does from the comment.  Can it 
be improved?

> + */
> +static float get_gather_value(struct sp_sampler_view *sp_sview,
> +                              int chan_in, int comp_sel,
> +                              const float *tx[4])

We usually put the 'static float' part on its own line.

Can sp_sview be const-qualified?

> +{
> +   int chan;
> +   unsigned swizzle;
> +
> +   switch (chan_in) {
> +   case 0:
> +   default:

Is the default case expected to be hit?  If not, I'd assert.

Or could the switch instead be implemented with a small table?

const int channel_map[4] =  { 2, 3, 1, 0 };

chan = channel_map[chan_in];

> +      chan = 2;
> +      break;
> +   case 1:
> +      chan = 3;
> +      break;
> +   case 2:
> +      chan = 1;
> +      break;
> +   case 3:
> +      chan = 0;
> +      break;
> +   }
> +
> +   switch (comp_sel) {
> +   case 0:
> +   default:
> +      swizzle = sp_sview->base.swizzle_r;
> +      break;
> +   case 1:
> +      swizzle = sp_sview->base.swizzle_g;
> +      break;
> +   case 2:
> +      swizzle = sp_sview->base.swizzle_b;
> +      break;
> +   case 3:
> +      swizzle = sp_sview->base.swizzle_a;
> +      break;
> +   }
> +
> +   switch (swizzle) {
> +   case PIPE_SWIZZLE_ZERO:
> +      return 0.0;
> +   case PIPE_SWIZZLE_ONE:
> +      return 1.0;
> +   default:
> +      return tx[chan][swizzle];
> +   }
> +}
> +
>
>   static void
>   img_filter_2d_linear(struct sp_sampler_view *sp_sview,
> @@ -1508,11 +1562,18 @@ img_filter_2d_linear(struct sp_sampler_view *sp_sview,
>      tx[2] = get_texel_2d(sp_sview, sp_samp, addr, x0, y1);
>      tx[3] = get_texel_2d(sp_sview, sp_samp, addr, x1, y1);
>
> -   /* interpolate R, G, B, A */
> -   for (c = 0; c < TGSI_QUAD_SIZE; c++)
> -      rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
> -                                          tx[0][c], tx[1][c],
> -                                          tx[2][c], tx[3][c]);
> +   if (args->gather_only) {
> +      for (c = 0; c < TGSI_QUAD_SIZE; c++)
> +         rgba[TGSI_NUM_CHANNELS*c] = get_gather_value(sp_sview, c,
> +                                                      args->gather_comp,
> +                                                      tx);
> +   } else {
> +      /* interpolate R, G, B, A */
> +      for (c = 0; c < TGSI_QUAD_SIZE; c++)
> +         rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
> +                                             tx[0][c], tx[1][c],
> +                                             tx[2][c], tx[3][c]);
> +   }
>   }
>
>
> @@ -1549,11 +1610,18 @@ img_filter_2d_array_linear(struct sp_sampler_view *sp_sview,
>      tx[2] = get_texel_2d_array(sp_sview, sp_samp, addr, x0, y1, layer);
>      tx[3] = get_texel_2d_array(sp_sview, sp_samp, addr, x1, y1, layer);
>
> -   /* interpolate R, G, B, A */
> -   for (c = 0; c < TGSI_QUAD_SIZE; c++)
> -      rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
> -                                          tx[0][c], tx[1][c],
> -                                          tx[2][c], tx[3][c]);
> +   if (args->gather_only) {
> +      for (c = 0; c < TGSI_QUAD_SIZE; c++)
> +         rgba[TGSI_NUM_CHANNELS*c] = get_gather_value(sp_sview, c,
> +                                                      args->gather_comp,
> +                                                      tx);
> +   } else {
> +      /* interpolate R, G, B, A */
> +      for (c = 0; c < TGSI_QUAD_SIZE; c++)
> +         rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
> +                                             tx[0][c], tx[1][c],
> +                                             tx[2][c], tx[3][c]);
> +   }
>   }
>
>
> @@ -1610,11 +1678,18 @@ img_filter_cube_linear(struct sp_sampler_view *sp_sview,
>         tx[3] = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y1, layer + args->face_id);
>      }
>
> -   /* interpolate R, G, B, A */
> -   for (c = 0; c < TGSI_QUAD_SIZE; c++)
> -      rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
> -                                          tx[0][c], tx[1][c],
> -                                          tx[2][c], tx[3][c]);
> +   if (args->gather_only) {
> +      for (c = 0; c < TGSI_QUAD_SIZE; c++)
> +         rgba[TGSI_NUM_CHANNELS*c] = get_gather_value(sp_sview, c,
> +                                                      args->gather_comp,
> +                                                      tx);
> +   } else {
> +      /* interpolate R, G, B, A */
> +      for (c = 0; c < TGSI_QUAD_SIZE; c++)
> +         rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
> +                                             tx[0][c], tx[1][c],
> +                                             tx[2][c], tx[3][c]);
> +   }
>   }
>
>
> @@ -1673,11 +1748,18 @@ img_filter_cube_array_linear(struct sp_sampler_view *sp_sview,
>         tx[3] = get_texel_cube_array(sp_sview, sp_samp, addr, x1, y1, layer + args->face_id);
>      }
>
> -   /* interpolate R, G, B, A */
> -   for (c = 0; c < TGSI_QUAD_SIZE; c++)
> -      rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
> -                                          tx[0][c], tx[1][c],
> -                                          tx[2][c], tx[3][c]);
> +   if (args->gather_only) {
> +      for (c = 0; c < TGSI_QUAD_SIZE; c++)
> +         rgba[TGSI_NUM_CHANNELS*c] = get_gather_value(sp_sview, c,
> +                                                      args->gather_comp,
> +                                                      tx);
> +   } else {
> +      /* interpolate R, G, B, A */
> +      for (c = 0; c < TGSI_QUAD_SIZE; c++)
> +         rgba[TGSI_NUM_CHANNELS*c] = lerp_2d(xw, yw,
> +                                             tx[0][c], tx[1][c],
> +                                             tx[2][c], tx[3][c]);
> +   }
>   }
>
>   static void
> @@ -1795,6 +1877,7 @@ compute_lambda_lod(struct sp_sampler_view *sp_sview,
>
>      switch (control) {
>      case tgsi_sampler_lod_none:
> +   case tgsi_sampler_gather:
>         /* XXX FIXME */
>      case tgsi_sampler_derivs_explicit:
>         lambda = sp_sview->compute_lambda(sp_sview, s, t, p) + lod_bias;
> @@ -1844,6 +1927,8 @@ mip_filter_linear(struct sp_sampler_view *sp_sview,
>      compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod);
>
>      args.offset = filt_args->offset;
> +   args.gather_only = filt_args->control == tgsi_sampler_gather;
> +   args.gather_comp = (*(unsigned int *)lod_in) & 0x3;

Maybe use a little inline function that converts the lod_in to a gather 
component?  Might be more readable that way.

>
>      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
>         int level0 = psview->u.tex.first_level + (int)lod[j];
> @@ -1907,6 +1992,8 @@ mip_filter_nearest(struct sp_sampler_view *sp_sview,
>      struct img_filter_args args;
>
>      args.offset = filt_args->offset;
> +   args.gather_only = filt_args->control == tgsi_sampler_gather;
> +   args.gather_comp = (*(unsigned int *)lod_in) & 0x3;
>      compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod);
>
>      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
> @@ -1950,6 +2037,7 @@ mip_filter_none(struct sp_sampler_view *sp_sview,
>
>      args.level = sp_sview->base.u.tex.first_level;
>      args.offset = filt_args->offset;
> +   args.gather_only = filt_args->control == tgsi_sampler_gather;
>
>      compute_lambda_lod(sp_sview, sp_samp, s, t, p, lod_in, filt_args->control, lod);
>
> @@ -1985,6 +2073,7 @@ mip_filter_none_no_filter_select(struct sp_sampler_view *sp_sview,
>      struct img_filter_args args;
>      args.level = sp_sview->base.u.tex.first_level;
>      args.offset = filt_args->offset;
> +   args.gather_only = filt_args->control == tgsi_sampler_gather;
>      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
>         args.s = s[j];
>         args.t = t[j];
> @@ -2364,6 +2453,7 @@ mip_filter_linear_2d_linear_repeat_POT(
>         args.t = t[j];
>         args.p = p[j];
>         args.face_id = sp_sview->faces[j];
> +      args.gather_only = filt_args->control == tgsi_sampler_gather;
>         if ((unsigned)level0 >= psview->u.tex.last_level) {
>            if (level0 < 0)
>               args.level = psview->u.tex.first_level;
> @@ -2409,11 +2499,12 @@ sample_compare(struct sp_sampler_view *sp_sview,
>                  float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE])
>   {
>      const struct pipe_sampler_state *sampler = &sp_samp->base;
> -   int j;
> -   int k[4];
> +   int j, v;
> +   int k[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
>      float pc[4];
>      const struct util_format_description *format_desc;
>      unsigned chan_type;
> +   bool is_gather = (control == tgsi_sampler_gather);

I'm not sure the is_gather variable gains us much.  No big deal though.

>
>      /**
>       * Compare texcoord 'p' (aka R) against texture value 'rgba[0]'
> @@ -2457,65 +2548,74 @@ sample_compare(struct sp_sampler_view *sp_sview,
>         pc[3] = CLAMP(pc[3], 0.0F, 1.0F);
>      }
>
> -   /* compare four texcoords vs. four texture samples */
> -   switch (sampler->compare_func) {
> -   case PIPE_FUNC_LESS:
> -      k[0] = pc[0] < rgba[0][0];
> -      k[1] = pc[1] < rgba[0][1];
> -      k[2] = pc[2] < rgba[0][2];
> -      k[3] = pc[3] < rgba[0][3];
> -      break;
> -   case PIPE_FUNC_LEQUAL:
> -      k[0] = pc[0] <= rgba[0][0];
> -      k[1] = pc[1] <= rgba[0][1];
> -      k[2] = pc[2] <= rgba[0][2];
> -      k[3] = pc[3] <= rgba[0][3];
> -      break;
> -   case PIPE_FUNC_GREATER:
> -      k[0] = pc[0] > rgba[0][0];
> -      k[1] = pc[1] > rgba[0][1];
> -      k[2] = pc[2] > rgba[0][2];
> -      k[3] = pc[3] > rgba[0][3];
> -      break;
> -   case PIPE_FUNC_GEQUAL:
> -      k[0] = pc[0] >= rgba[0][0];
> -      k[1] = pc[1] >= rgba[0][1];
> -      k[2] = pc[2] >= rgba[0][2];
> -      k[3] = pc[3] >= rgba[0][3];
> -      break;
> -   case PIPE_FUNC_EQUAL:
> -      k[0] = pc[0] == rgba[0][0];
> -      k[1] = pc[1] == rgba[0][1];
> -      k[2] = pc[2] == rgba[0][2];
> -      k[3] = pc[3] == rgba[0][3];
> -      break;
> -   case PIPE_FUNC_NOTEQUAL:
> -      k[0] = pc[0] != rgba[0][0];
> -      k[1] = pc[1] != rgba[0][1];
> -      k[2] = pc[2] != rgba[0][2];
> -      k[3] = pc[3] != rgba[0][3];
> -      break;
> -   case PIPE_FUNC_ALWAYS:
> -      k[0] = k[1] = k[2] = k[3] = 1;
> -      break;
> -   case PIPE_FUNC_NEVER:
> -      k[0] = k[1] = k[2] = k[3] = 0;
> -      break;
> -   default:
> -      k[0] = k[1] = k[2] = k[3] = 0;
> -      assert(0);
> -      break;
> +   for (v = 0; v < (is_gather ? TGSI_NUM_CHANNELS : 1); v++) {
> +      /* compare four texcoords vs. four texture samples */
> +      switch (sampler->compare_func) {
> +      case PIPE_FUNC_LESS:
> +         k[v][0] = pc[0] < rgba[v][0];
> +         k[v][1] = pc[1] < rgba[v][1];
> +         k[v][2] = pc[2] < rgba[v][2];
> +         k[v][3] = pc[3] < rgba[v][3];
> +         break;
> +      case PIPE_FUNC_LEQUAL:
> +         k[v][0] = pc[0] <= rgba[v][0];
> +         k[v][1] = pc[1] <= rgba[v][1];
> +         k[v][2] = pc[2] <= rgba[v][2];
> +         k[v][3] = pc[3] <= rgba[v][3];
> +         break;
> +      case PIPE_FUNC_GREATER:
> +         k[v][0] = pc[0] > rgba[v][0];
> +         k[v][1] = pc[1] > rgba[v][1];
> +         k[v][2] = pc[2] > rgba[v][2];
> +         k[v][3] = pc[3] > rgba[v][3];
> +         break;
> +      case PIPE_FUNC_GEQUAL:
> +         k[v][0] = pc[0] >= rgba[v][0];
> +         k[v][1] = pc[1] >= rgba[v][1];
> +         k[v][2] = pc[2] >= rgba[v][2];
> +         k[v][3] = pc[3] >= rgba[v][3];
> +         break;
> +      case PIPE_FUNC_EQUAL:
> +         k[v][0] = pc[0] == rgba[v][0];
> +         k[v][1] = pc[1] == rgba[v][1];
> +         k[v][2] = pc[2] == rgba[v][2];
> +         k[v][3] = pc[3] == rgba[v][3];
> +         break;
> +      case PIPE_FUNC_NOTEQUAL:
> +         k[v][0] = pc[0] != rgba[v][0];
> +         k[v][1] = pc[1] != rgba[v][1];
> +         k[v][2] = pc[2] != rgba[v][2];
> +         k[v][3] = pc[3] != rgba[v][3];
> +         break;
> +      case PIPE_FUNC_ALWAYS:
> +         k[v][0] = k[v][1] = k[v][2] = k[v][3] = 1;
> +         break;
> +      case PIPE_FUNC_NEVER:
> +         k[v][0] = k[v][1] = k[v][2] = k[v][3] = 0;
> +         break;
> +      default:
> +         k[v][0] = k[v][1] = k[v][2] = k[v][3] = 0;
> +         assert(0);
> +         break;
> +      }
>      }
>
> -   for (j = 0; j < TGSI_QUAD_SIZE; j++) {
> -      rgba[0][j] = k[j];
> -      rgba[1][j] = k[j];
> -      rgba[2][j] = k[j];
> -      rgba[3][j] = 1.0F;
> +   if (is_gather) {
> +      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
> +         for (v = 0; v < TGSI_NUM_CHANNELS; v++) {
> +            rgba[v][j] = k[v][j];
> +         }
> +      }
> +   } else {
> +      for (j = 0; j < TGSI_QUAD_SIZE; j++) {
> +         rgba[0][j] = k[0][j];
> +         rgba[1][j] = k[0][j];
> +         rgba[2][j] = k[0][j];
> +         rgba[3][j] = 1.0F;
> +      }
>      }
>   }
>
> -
>   static void
>   do_swizzling(const struct pipe_sampler_view *sview,
>                float in[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE],
> @@ -2693,7 +2793,7 @@ any_swizzle(const struct pipe_sampler_view *view)
>   static img_filter_func
>   get_img_filter(const struct sp_sampler_view *sp_sview,
>                  const struct pipe_sampler_state *sampler,
> -               unsigned filter)
> +               unsigned filter, bool gather)
>   {
>      switch (sp_sview->base.target) {
>      case PIPE_BUFFER:
> @@ -2713,7 +2813,7 @@ get_img_filter(const struct sp_sampler_view *sp_sview,
>      case PIPE_TEXTURE_RECT:
>         /* Try for fast path:
>          */
> -      if (sp_sview->pot2d &&
> +      if (!gather && sp_sview->pot2d &&
>             sampler->wrap_s == sampler->wrap_t &&
>             sampler->normalized_coords)
>         {
> @@ -2790,17 +2890,20 @@ sample_mip(struct sp_sampler_view *sp_sview,
>      img_filter_func min_img_filter = NULL;
>      img_filter_func mag_img_filter = NULL;
>
> -   if (sp_sview->pot2d & sp_samp->min_mag_equal_repeat_linear) {
> +   if (filt_args->control == tgsi_sampler_gather) {
> +      mip_filter = mip_filter_nearest;
> +      min_img_filter = get_img_filter(sp_sview, &sp_samp->base, PIPE_TEX_FILTER_LINEAR, true);
> +   } else if (sp_sview->pot2d & sp_samp->min_mag_equal_repeat_linear) {
>         mip_filter = mip_filter_linear_2d_linear_repeat_POT;
>      }
>      else {
>         mip_filter = sp_samp->mip_filter;
> -      min_img_filter = get_img_filter(sp_sview, &sp_samp->base, sp_samp->min_img_filter);
> +      min_img_filter = get_img_filter(sp_sview, &sp_samp->base, sp_samp->min_img_filter, false);
>         if (sp_samp->min_mag_equal) {
>            mag_img_filter = min_img_filter;
>         }
>         else {
> -         mag_img_filter = get_img_filter(sp_sview, &sp_samp->base, sp_samp->base.mag_img_filter);
> +         mag_img_filter = get_img_filter(sp_sview, &sp_samp->base, sp_samp->base.mag_img_filter, false);
>         }
>      }
>
> @@ -2811,7 +2914,7 @@ sample_mip(struct sp_sampler_view *sp_sview,
>         sample_compare(sp_sview, sp_samp, s, t, p, c0, lod, filt_args->control, rgba);
>      }
>
> -   if (sp_sview->need_swizzle) {
> +   if (sp_sview->need_swizzle && filt_args->control != tgsi_sampler_gather) {
>         float rgba_temp[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
>         memcpy(rgba_temp, rgba, sizeof(rgba_temp));
>         do_swizzling(&sp_sview->base, rgba_temp, rgba);
> diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h
> index 2eca3fb..342d680 100644
> --- a/src/gallium/drivers/softpipe/sp_tex_sample.h
> +++ b/src/gallium/drivers/softpipe/sp_tex_sample.h
> @@ -60,6 +60,8 @@ struct img_filter_args {
>      unsigned level;
>      unsigned face_id;
>      const int8_t *offset;
> +   bool gather_only;
> +   int gather_comp;
>   };
>
>   typedef void (*img_filter_func)(struct sp_sampler_view *sp_sview,
>