[Mesa-dev] [PATCH] st/mesa: add st fp64 support (v7)

Thu Feb 19 15:41:53 PST 2015

On Thu, Feb 19, 2015 at 6:09 PM, Dave Airlie <airlied at gmail.com> wrote:
> From: Dave Airlie <airlied at redhat.com>
>
> v2 : add double to int/unsigned conversion
> v3: handle fp64 consts better
> v4: use DRSQ
> v4.1: add d2b
> v4.2: drop DDIV
>
> v5: split out some prep patches.
> v5.1: add some comments.
> v5.2: more comments
>
> v6: simplify down the double instruction
>     generation loop.
>
> v7: Merge Ilia's two cleanup patches.
>
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
>  src/mesa/state_tracker/st_extensions.c     |   6 +
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 578 ++++++++++++++++++++++-------
>  2 files changed, 458 insertions(+), 126 deletions(-)
>
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index 56502fb..003d280 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -464,7 +478,6 @@ public:
>  static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR);
>
>  static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR);
> -
>  static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 0);
>  static st_dst_reg address_reg2 = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 1);
>  static st_dst_reg sampler_reladdr = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 2);

Drop this hunk.

> @@ -597,22 +616,129 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
>
>     this->instructions.push_tail(inst);
>
> +   /*
> +    * This section contains the double processing.
> +    * GLSL just represents doubles as single channel values,
> +    * however most HW and TGSI represent doubles as pairs of register channels.
> +    *
> +    * so we have to fixup destination writemask/index and src swizzle/indexes.
> +    * dest writemasks need to translate from single channel write mask
> +    * to a dual-channel writemask, but also need to modify the index,
> +    * if we are touching the Z,W fields in the pre-translated writemask.
> +    *
> +    * src channels have similiar index modifications along with swizzle
> +    * changes to we pick the XY, ZW pairs from the correct index.
> +    *
> +    * GLSL [0].x -> TGSI [0].xy
> +    * GLSL [0].y -> TGSI [0].zw
> +    * GLSL [0].z -> TGSI [1].xy
> +    * GLSL [0].w -> TGSI [1].zw
> +    */
> +   if (inst->dst[0].type == GLSL_TYPE_DOUBLE || inst->dst[1].type == GLSL_TYPE_DOUBLE ||
> +       inst->src[0].type == GLSL_TYPE_DOUBLE) {
> +      glsl_to_tgsi_instruction *dinst = NULL;
> +      int initial_src_swz[4], initial_src_idx[4];
> +      int initial_dst_idx[2], initial_dst_writemask[2];
> +      /* select the writemask for dst0 or dst1 */
> +      unsigned writemask = inst->dst[0].file == PROGRAM_UNDEFINED ? inst->dst[1].writemask : inst->dst[0].writemask;
> +
> +      /* copy out the writemask, index and swizzles for all src/dsts. */
> +      for (j = 0; j < 2; j++) {
> +         initial_dst_writemask[j] = inst->dst[j].writemask;
> +         initial_dst_idx[j] = inst->dst[j].index;
> +      }
> +
> +      for (j = 0; j < 4; j++) {
> +         initial_src_swz[j] = inst->src[j].swizzle;
> +         initial_src_idx[j] = inst->src[j].index;
> +      }
> +
> +      /*
> +       * scan all the components in the dst writemask
> +       * generate an instruction for each of them if required.
> +       */
> +      while (writemask) {
> +
> +         int i = u_bit_scan(&writemask);
> +
> +         /* first time use previous instruction */
> +         if (dinst == NULL) {
> +            dinst = inst;
> +         } else {
> +            /* create a new instructions for subsequent attempts */
> +            dinst = new(mem_ctx) glsl_to_tgsi_instruction();
> +            *dinst = *inst;
> +            dinst->next = NULL;
> +            dinst->prev = NULL;
> +            this->instructions.push_tail(dinst);
> +         }
> +
> +         /* modify the destination if we are splitting */
> +         for (j = 0; j < 2; j++) {
> +            if (dinst->dst[j].type == GLSL_TYPE_DOUBLE) {
> +               dinst->dst[j].writemask = (i & 1) ? WRITEMASK_ZW : WRITEMASK_XY;
> +               dinst->dst[j].index = initial_dst_idx[j];
> +               if (i > 1)
> +                     dinst->dst[j].index++;
> +            } else {
> +               /* if we aren't writing to a double, just get the bit of the initial writemask
> +                  for this channel */
> +               dinst->dst[j].writemask = initial_dst_writemask[j] & (1 << i);
> +            }
> +         }
> +
> +         /* modify the src registers */
> +         for (j = 0; j < 4; j++) {
> +            int swz = GET_SWZ(initial_src_swz[j], i);
> +
> +            if (dinst->src[j].type == GLSL_TYPE_DOUBLE) {
> +               dinst->src[j].index = initial_src_idx[j];
> +               if (swz > 1)
> +                  dinst->src[j].index++;
> +
> +               if (swz & 1)
> +                  dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
> +               else
> +                  dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y);
> +
> +            } else {
> +               /* some opcodes are special case in what they use as sources
> +                  - F2D is a float src0, DLDEXP is integer src1 */
> +               if (op == TGSI_OPCODE_F2D || op == TGSI_OPCODE_DLDEXP || (op == TGSI_OPCODE_UCMP && dinst->dst[0].type == GLSL_TYPE_DOUBLE)) {

80 chars. I know some of the other lines probably don't fit either,
but... this is really egregious.

> +                  dinst->src[j].swizzle = MAKE_SWIZZLE4(swz, swz, swz, swz);
> +               }
> +            }
> +         }
> +      }
> +      inst = dinst;
> +   }
> +
> +
>     return inst;
>  }
> @@ -909,23 +1067,40 @@ glsl_to_tgsi_visitor::add_constant(gl_register_file file,
>
>     int index = 0;
>     immediate_storage *entry;
> +   int size32 = size * (datatype == GL_DOUBLE ? 2 : 1);
> +   int i;
>
>     /* Search immediate storage to see if we already have an identical
>      * immediate that we can use instead of adding a duplicate entry.
>      */
>     foreach_in_list(immediate_storage, entry, &this->immediates) {
> -      if (entry->size == size &&
> -          entry->type == datatype &&
> -          !memcmp(entry->values, values, size * sizeof(gl_constant_value))) {
> -         return index;
> +      immediate_storage *tmp = entry;
> +
> +      for (i = 0; i * 4 < size32; i++) {
> +         int slot_size = MIN2(size32 - (i * 4), 4);
> +         if (tmp->type != datatype || tmp->size32 != slot_size)
> +            break;
> +         if (memcmp(tmp->values, &values[i * 4],
> +                    slot_size * sizeof(gl_constant_value)))
> +            break;
> +
> +         /* Everything matches, keep going until the full size is matched */

OK, this is my bad coz it came from one of my 'cleanup' patches. But
there's a tmp = tmp->next; missing (or something along those lines).

>        }
> +
> +      /* The full value matched */
> +      if (i * 4 >= size32)
> +         return index;
> +
>        index++;
>     }

With those fixed, this is

Reviewed-by: Ilia Mirkin <imirkin at alum.mit.edu>