[virglrenderer-devel] [PATCH 4/4] renderer: add ARB_gpu_shader5 support.

Gurchetan Singh gurchetansingh at chromium.org
Thu May 10 22:29:42 UTC 2018


This patch doesn't apply cleanly to ToT ("shader: Invert y coordonate
if on gles host").  Rebase?

On Wed, May 9, 2018 at 8:44 PM, Dave Airlie <airlied at gmail.com> wrote:
> ---
>  src/gallium/include/pipe/p_state.h |   1 +
>  src/vrend_decode.c                 |   4 +-
>  src/vrend_renderer.c               |  35 ++-
>  src/vrend_shader.c                 | 496 +++++++++++++++++++++++++++++++------
>  src/vrend_shader.h                 |  16 ++
>  5 files changed, 472 insertions(+), 80 deletions(-)
>
> diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h
> index 2b4d9e4..80be1b5 100644
> --- a/src/gallium/include/pipe/p_state.h
> +++ b/src/gallium/include/pipe/p_state.h
> @@ -206,6 +206,7 @@ struct pipe_stream_output_info
>        unsigned num_components:3;  /** 1 to 4 */
>        unsigned output_buffer:3;   /**< 0 to PIPE_MAX_SO_BUFFERS */
>        unsigned dst_offset:16;     /**< offset into the buffer in dwords */
> +      unsigned stream:2;
>     } output[PIPE_MAX_SO_OUTPUTS];
>  };
>
> diff --git a/src/vrend_decode.c b/src/vrend_decode.c
> index 5f62568..1e31573 100644
> --- a/src/vrend_decode.c
> +++ b/src/vrend_decode.c
> @@ -97,6 +97,8 @@ static int vrend_decode_create_shader(struct vrend_decode_ctx *ctx,
>              so_info.output[i].num_components = (tmp >> 10) & 0x7;
>              so_info.output[i].output_buffer = (tmp >> 13) & 0x7;
>              so_info.output[i].dst_offset = (tmp >> 16) & 0xffff;
> +            tmp = get_buf_entry(ctx, VIRGL_OBJ_SHADER_SO_OUTPUT0_SO(i));
> +            so_info.output[i].stream = (tmp & 0x3);
>           }
>        }
>        shader_offset += 4 + (2 * num_so_outputs);
> @@ -659,7 +661,7 @@ static int vrend_decode_create_query(struct vrend_decode_ctx *ctx, uint32_t hand
>
>     tmp = get_buf_entry(ctx, VIRGL_OBJ_QUERY_TYPE_INDEX);
>     query_type = VIRGL_OBJ_QUERY_TYPE(tmp);
> -   query_index = VIRGL_OBJ_QUERY_INDEX(tmp);
> +   query_index = (tmp >> 16) & 0xffff;
>
>     offset = get_buf_entry(ctx, VIRGL_OBJ_QUERY_OFFSET);
>     res_handle = get_buf_entry(ctx, VIRGL_OBJ_QUERY_RES_HANDLE);
> diff --git a/src/vrend_renderer.c b/src/vrend_renderer.c
> index 26bf639..00b7a8c 100644
> --- a/src/vrend_renderer.c
> +++ b/src/vrend_renderer.c
> @@ -627,6 +627,7 @@ static void vrend_destroy_shader_selector(struct vrend_shader_selector *sel)
>     free(sel->tmp_buf);
>     free(sel->sinfo.so_names);
>     free(sel->sinfo.interpinfo);
> +   free(sel->sinfo.sampler_arrays);
>     free(sel->tokens);
>     free(sel);
>  }
> @@ -771,13 +772,14 @@ static void dump_stream_out(struct pipe_stream_output_info *so)
>     printf("\n");
>     printf("outputs:\n");
>     for (i = 0; i < so->num_outputs; i++) {
> -      printf("\t%d: reg: %d sc: %d, nc: %d ob: %d do: %d\n",
> +      printf("\t%d: reg: %d sc: %d, nc: %d ob: %d do: %d st: %d\n",
>               i,
>               so->output[i].register_index,
>               so->output[i].start_component,
>               so->output[i].num_components,
>               so->output[i].output_buffer,
> -             so->output[i].dst_offset);
> +             so->output[i].dst_offset,
> +             so->output[i].stream);
>     }
>  }
>
> @@ -870,7 +872,7 @@ static struct vrend_linked_shader_program *add_shader_program(struct vrend_conte
>                                                                struct vrend_shader *gs)
>  {
>     struct vrend_linked_shader_program *sprog = CALLOC_STRUCT(vrend_linked_shader_program);
> -   char name[32];
> +   char name[64];
>     int i;
>     GLuint prog_id;
>     GLint lret;
> @@ -993,7 +995,11 @@ static struct vrend_linked_shader_program *add_shader_program(struct vrend_conte
>              index = 0;
>              while(mask) {
>                 i = u_bit_scan(&mask);
> -               snprintf(name, 32, "%ssamp%d", prefix, i);
> +               if (sprog->ss[id]->sel->sinfo.num_sampler_arrays) {
> +                  int arr_idx = shader_lookup_sampler_array(&sprog->ss[id]->sel->sinfo, i);
> +                  snprintf(name, 32, "%ssamp%d[%d]", prefix, arr_idx, i - sprog->ss[id]->sel->sinfo.sampler_arrays[arr_idx].first);
> +               } else
> +                  snprintf(name, 32, "%ssamp%d", prefix, i);
>                 sprog->samp_locs[id][index] = glGetUniformLocation(prog_id, name);
>                 if (sprog->ss[id]->sel->sinfo.shadow_samp_mask & (1 << i)) {
>                    snprintf(name, 32, "%sshadmask%d", prefix, i);
> @@ -1046,7 +1052,11 @@ static struct vrend_linked_shader_program *add_shader_program(struct vrend_conte
>
>           sprog->ubo_locs[id] = calloc(sprog->ss[id]->sel->sinfo.num_ubos, sizeof(uint32_t));
>           for (i = 0; i < sprog->ss[id]->sel->sinfo.num_ubos; i++) {
> -            snprintf(name, 32, "%subo%d", prefix, i + 1);
> +            if (sprog->ss[id]->sel->sinfo.ubo_indirect)
> +               snprintf(name, 32, "%subo[%d]", prefix, i);
> +            else
> +               snprintf(name, 32, "%subo%d", prefix, i + 1);
> +
>              sprog->ubo_locs[id][i] = glGetUniformBlockIndex(prog_id, name);
>           }
>        } else
> @@ -6321,7 +6331,6 @@ int vrend_create_query(struct vrend_context *ctx, uint32_t handle,
>     q->type = query_type;
>     q->index = query_index;
>     q->ctx_id = ctx->ctx_id;
> -
>     vrend_resource_reference(&q->res, res);
>
>     switch (q->type) {
> @@ -6384,7 +6393,10 @@ void vrend_begin_query(struct vrend_context *ctx, uint32_t handle)
>     if (q->gltype == GL_TIMESTAMP)
>        return;
>
> -   glBeginQuery(q->gltype, q->id);
> +   if (q->index > 0)
> +      glBeginQueryIndexed(q->gltype, q->index, q->id);
> +   else
> +      glBeginQuery(q->gltype, q->id);
>  }
>
>  void vrend_end_query(struct vrend_context *ctx, uint32_t handle)
> @@ -6406,7 +6418,10 @@ void vrend_end_query(struct vrend_context *ctx, uint32_t handle)
>        return;
>     }
>
> -   glEndQuery(q->gltype);
> +   if (q->index > 0)
> +      glEndQueryIndexed(q->gltype, q->index);
> +   else
> +      glEndQuery(q->gltype);
>  }
>
>  void vrend_get_query_result(struct vrend_context *ctx, uint32_t handle,
> @@ -6581,8 +6596,10 @@ bool vrend_renderer_fill_caps_common(uint32_t set, uint32_t version,
>           caps->v1.glsl_level = 140;
>        else if (gl_ver == 32)
>           caps->v1.glsl_level = 150;
> -      else if (gl_ver >= 33)
> +      else if (gl_ver == 33)
>           caps->v1.glsl_level = 330;
> +      else if (gl_ver >= 40)
> +         caps->v1.glsl_level = 400;
>     } else {
>        caps->v1.glsl_level = 130;
>     }
> diff --git a/src/vrend_shader.c b/src/vrend_shader.c
> index d58b943..668d187 100644
> --- a/src/vrend_shader.c
> +++ b/src/vrend_shader.c
> @@ -37,7 +37,7 @@ extern int vrend_dump_shaders;
>
>  /* start convert of tgsi to glsl */
>
> -#define INTERP_PREFIX "               "
> +#define INTERP_PREFIX "                           "
>  #define INVARI_PREFIX "invariant"
>
>  struct vrend_shader_io {
> @@ -55,6 +55,7 @@ struct vrend_shader_io {
>     bool override_no_wm;
>     bool is_int;
>     char glsl_name[64];
> +   unsigned stream;
>  };
>
>  struct vrend_shader_sampler {
> @@ -101,8 +102,13 @@ struct dump_ctx {
>
>     struct vrend_shader_sampler samplers[32];
>     uint32_t samplers_used;
> -   int num_consts;
> +   bool sviews_used;
> +
> +   struct vrend_sampler_array *sampler_arrays;
> +   int num_sampler_arrays;
> +   int last_sampler_array_idx;
>
> +   int num_consts;
>     int num_imm;
>     struct immed imm[MAX_IMMEDIATE];
>     unsigned fragcoord_input;
> @@ -151,6 +157,8 @@ struct dump_ctx {
>     bool vs_has_pervertex;
>     bool uses_sample_shading;
>     bool uses_gpu_shader5;
> +   bool write_mul_temp;
> +   bool write_interp_temp;
>  };
>
>  static inline const char *tgsi_proc_to_prefix(int shader_type)
> @@ -247,6 +255,46 @@ static struct vrend_temp_range *find_temp_range(struct dump_ctx *ctx, int index)
>     return NULL;
>  }
>
> +static int add_sampler_array(struct dump_ctx *ctx, int first, int last, int sview_type, int sview_rtype)
> +{
> +   int idx = ctx->num_sampler_arrays;
> +   ctx->num_sampler_arrays++;
> +   ctx->sampler_arrays = realloc(ctx->sampler_arrays, sizeof(struct vrend_sampler_array) * ctx->num_sampler_arrays);
> +   if (!ctx->sampler_arrays)
> +      return -1;
> +
> +   ctx->sampler_arrays[idx].first = first;
> +   ctx->sampler_arrays[idx].last = last;
> +   ctx->sampler_arrays[idx].idx = idx;
> +   ctx->sampler_arrays[idx].sview_type = sview_type;
> +   ctx->sampler_arrays[idx].sview_rtype = sview_rtype;
> +   return idx;
> +}
> +
> +int lookup_sampler_array(struct dump_ctx *ctx, int index)
> +{
> +   int i;
> +   for (i = 0; i < ctx->num_sampler_arrays; i++) {
> +      if (index >= ctx->sampler_arrays[i].first &&
> +          index <= ctx->sampler_arrays[i].last) {
> +         return ctx->sampler_arrays[i].idx;
> +      }
> +   }
> +   return -1;
> +}
> +
> +int shader_lookup_sampler_array(struct vrend_shader_info *sinfo, int index)
> +{
> +   int i;
> +   for (i = 0; i < sinfo->num_sampler_arrays; i++) {
> +      if (index >= sinfo->sampler_arrays[i].first &&
> +          index <= sinfo->sampler_arrays[i].last) {
> +         return sinfo->sampler_arrays[i].idx;
> +      }
> +   }
> +   return -1;
> +}
> +
>  static boolean
>  iter_declaration(struct tgsi_iterate_context *iter,
>                   struct tgsi_full_declaration *decl )
> @@ -270,6 +318,7 @@ iter_declaration(struct tgsi_iterate_context *iter,
>        ctx->inputs[i].name = decl->Semantic.Name;
>        ctx->inputs[i].sid = decl->Semantic.Index;
>        ctx->inputs[i].interpolate = decl->Interp.Interpolate;
> +      ctx->inputs[i].centroid = decl->Interp.Location == TGSI_INTERPOLATE_LOC_CENTROID;
>        ctx->inputs[i].first = decl->Range.First;
>        ctx->inputs[i].glsl_predefined_no_emit = false;
>        ctx->inputs[i].glsl_no_index = false;
> @@ -656,6 +705,19 @@ iter_declaration(struct tgsi_iterate_context *iter,
>           return FALSE;
>        }
>        ctx->samplers[decl->Range.First].tgsi_sampler_return = decl->SamplerView.ReturnTypeX;
> +      if (ctx->info.indirect_files & (1 << TGSI_FILE_SAMPLER)) {
> +         if (ctx->last_sampler_array_idx != -1) {
> +            if (ctx->sampler_arrays[ctx->last_sampler_array_idx].sview_type == decl->SamplerView.Resource &&
> +                ctx->sampler_arrays[ctx->last_sampler_array_idx].sview_rtype == decl->SamplerView.ReturnTypeX) {
> +               ctx->sampler_arrays[ctx->last_sampler_array_idx].last = decl->Range.Last + 1;
> +            } else {
> +               ctx->last_sampler_array_idx = add_sampler_array(ctx, decl->Range.First, decl->Range.Last + 1, decl->SamplerView.Resource, decl->SamplerView.ReturnTypeX);
> +            }
> +         } else {
> +            ctx->last_sampler_array_idx = add_sampler_array(ctx, decl->Range.First, decl->Range.Last + 1, decl->SamplerView.Resource, decl->SamplerView.ReturnTypeX);
> +         }
> +      } else
> +      ctx->sviews_used = true;
>        break;
>     case TGSI_FILE_CONSTANT:
>        if (decl->Declaration.Dimension) {
> @@ -675,7 +737,7 @@ iter_declaration(struct tgsi_iterate_context *iter,
>        }
>        break;
>     case TGSI_FILE_ADDRESS:
> -      ctx->num_address = 1;
> +      ctx->num_address = decl->Range.Last + 1;
>        break;
>     case TGSI_FILE_SYSTEM_VALUE:
>        i = ctx->num_system_values++;
> @@ -707,6 +769,20 @@ iter_declaration(struct tgsi_iterate_context *iter,
>           name_prefix = "gl_InvocationID";
>           ctx->has_ints = true;
>           ctx->uses_gpu_shader5 = true;
> +      } else if (decl->Semantic.Name == TGSI_SEMANTIC_SAMPLEMASK) {
> +         name_prefix = "gl_SampleMaskIn[0]";
> +         ctx->has_ints = true;
> +         ctx->uses_gpu_shader5 = true;
> +      } else if (decl->Semantic.Name == TGSI_SEMANTIC_PRIMID) {
> +         name_prefix = "gl_PrimitiveID";
> +         ctx->has_ints = true;
> +         ctx->uses_gpu_shader5 = true;
> +      } else if (decl->Semantic.Name == TGSI_SEMANTIC_TESSCOORD) {
> +         name_prefix = "gl_TessCoord";
> +         ctx->system_values[i].override_no_wm = false;
> +      } else if (decl->Semantic.Name == TGSI_SEMANTIC_VERTICESIN) {
> +         name_prefix = "gl_PatchVerticesIn";
> +         ctx->system_values[i].override_no_wm = false;
>        } else {
>           fprintf(stderr, "unsupported system value %d\n", decl->Semantic.Name);
>           name_prefix = "unknown";
> @@ -927,6 +1003,29 @@ static int emit_prescale(struct dump_ctx *ctx)
>     return 0;
>  }
>
> +static int prepare_so_movs(struct dump_ctx *ctx)
> +{
> +   int i;
> +   for (i = 0; i < ctx->so->num_outputs; i++) {
> +      ctx->write_so_outputs[i] = true;
> +      if (ctx->so->output[i].start_component != 0)
> +         continue;
> +      if (ctx->so->output[i].num_components != 4)
> +         continue;
> +      if (ctx->outputs[ctx->so->output[i].register_index].name == TGSI_SEMANTIC_CLIPDIST)
> +         continue;
> +      if (ctx->outputs[ctx->so->output[i].register_index].name == TGSI_SEMANTIC_POSITION)
> +         continue;
> +
> +      ctx->outputs[ctx->so->output[i].register_index].stream = ctx->so->output[i].stream;
> +      if (ctx->prog_type == TGSI_PROCESSOR_GEOMETRY && ctx->so->output[i].stream)
> +         ctx->uses_gpu_shader5 = true;
> +
> +      ctx->write_so_outputs[i] = false;
> +   }
> +   return 0;
> +}
> +
>  static int emit_so_movs(struct dump_ctx *ctx)
>  {
>     char buf[255];
> @@ -957,7 +1056,7 @@ static int emit_so_movs(struct dump_ctx *ctx)
>        } else
>           writemask[0] = 0;
>
> -      if (ctx->so->output[i].num_components == 4 && writemask[0] == 0 && !(ctx->outputs[ctx->so->output[i].register_index].name == TGSI_SEMANTIC_CLIPDIST) && !(ctx->outputs[ctx->so->output[i].register_index].name == TGSI_SEMANTIC_POSITION)) {
> +      if (!ctx->write_so_outputs[i]) {
>           if (ctx->so->output[i].register_index > ctx->num_outputs)
>              ctx->so_names[i] = NULL;
>           else if (ctx->outputs[ctx->so->output[i].register_index].name == TGSI_SEMANTIC_CLIPVERTEX && ctx->has_clipvertex) {
> @@ -965,13 +1064,10 @@ static int emit_so_movs(struct dump_ctx *ctx)
>              ctx->has_clipvertex_so = true;
>           } else
>              ctx->so_names[i] = strdup(ctx->outputs[ctx->so->output[i].register_index].glsl_name);
> -         ctx->write_so_outputs[i] = false;
> -
>        } else {
>           char ntemp[8];
>           snprintf(ntemp, 8, "tfout%d", i);
>           ctx->so_names[i] = strdup(ntemp);
> -         ctx->write_so_outputs[i] = true;
>        }
>        if (ctx->so->output[i].num_components == 1) {
>           if (ctx->outputs[ctx->so->output[i].register_index].is_int)
> @@ -1121,7 +1217,8 @@ static int translate_tex(struct dump_ctx *ctx,
>                           char  dsts[3][255],
>                           const char *writemask,
>                           const char *dstconv,
> -                         bool dst0_override_no_wm)
> +                         bool dst0_override_no_wm,
> +                         bool tg4_has_component)
>  {
>     const char *twm = "", *gwm = NULL, *txfi;
>     const char *dtypeprefix = "";
> @@ -1318,7 +1415,9 @@ static int translate_tex(struct dump_ctx *ctx,
>     case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
>     case TGSI_TEXTURE_CUBE_ARRAY:
>     default:
> -      if (inst->Instruction.Opcode == TGSI_OPCODE_TG4 && inst->Texture.Texture != TGSI_TEXTURE_CUBE_ARRAY)
> +      if (inst->Instruction.Opcode == TGSI_OPCODE_TG4 &&
> +          inst->Texture.Texture != TGSI_TEXTURE_CUBE_ARRAY
> +          && inst->Texture.Texture != TGSI_TEXTURE_SHADOWCUBE_ARRAY)
>           twm = ".xyz";
>        else
>           twm = "";
> @@ -1376,10 +1475,9 @@ static int translate_tex(struct dump_ctx *ctx,
>        snprintf(bias, 128, ", %s%s, %s%s", srcs[1], gwm, srcs[2], gwm);
>        sampler_index = 3;
>     } else if (inst->Instruction.Opcode == TGSI_OPCODE_TG4) {
> -
>        sampler_index = 2;
>        ctx->uses_tg4 = true;
> -      if (inst->Texture.NumOffsets > 1 || is_shad)
> +      if (inst->Texture.NumOffsets > 1 || is_shad || ctx->uses_sampler_rect)
>           ctx->uses_gpu_shader5 = true;
>        if (inst->Texture.NumOffsets == 1) {
>           if (inst->TexOffsets[0].File != TGSI_FILE_IMMEDIATE)
> @@ -1387,11 +1485,26 @@ static int translate_tex(struct dump_ctx *ctx,
>        }
>        if (is_shad) {
>           if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
> -             inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY ||
> -             inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY)
> +             inst->Texture.Texture == TGSI_TEXTURE_SHADOW2D_ARRAY)
>              snprintf(bias, 64, ", %s.w", srcs[0]);
> +         else if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE_ARRAY)
> +            snprintf(bias, 64, ", %s.x", srcs[1]);
>           else
>              snprintf(bias, 64, ", %s.z", srcs[0]);
> +      } else if (tg4_has_component) {
> +         if (inst->Texture.NumOffsets == 0) {
> +            if (inst->Texture.Texture == TGSI_TEXTURE_2D ||
> +                inst->Texture.Texture == TGSI_TEXTURE_RECT ||
> +                inst->Texture.Texture == TGSI_TEXTURE_CUBE ||
> +                inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY ||
> +                inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY)
> +               snprintf(bias, 64, ", int(%s)", srcs[1]);
> +         } else if (inst->Texture.NumOffsets) {
> +            if (inst->Texture.Texture == TGSI_TEXTURE_2D ||
> +                inst->Texture.Texture == TGSI_TEXTURE_RECT ||
> +                inst->Texture.Texture == TGSI_TEXTURE_2D_ARRAY)
> +               snprintf(bias, 64, ", int(%s)", srcs[1]);
> +         }
>        }
>     } else
>        bias[0] = 0;
> @@ -1460,13 +1573,16 @@ static int translate_tex(struct dump_ctx *ctx,
>              return false;
>           }
>        } else if (inst->TexOffsets[0].File == TGSI_FILE_TEMPORARY) {
> +         struct vrend_temp_range *range = find_temp_range(ctx, inst->TexOffsets[0].Index);
> +         int idx = inst->TexOffsets[0].Index - range->first;
>           switch (inst->Texture.Texture) {
>           case TGSI_TEXTURE_1D:
>           case TGSI_TEXTURE_1D_ARRAY:
>           case TGSI_TEXTURE_SHADOW1D:
>           case TGSI_TEXTURE_SHADOW1D_ARRAY:
> -            snprintf(offbuf, 120, ", int(floatBitsToInt(temps[%d].%c))",
> -                     inst->TexOffsets[0].Index, get_swiz_char(inst->TexOffsets[0].SwizzleX));
> +            snprintf(offbuf, 120, ", int(floatBitsToInt(temp%d[%d].%c))",
> +                     range->first, idx,
> +                     get_swiz_char(inst->TexOffsets[0].SwizzleX));
>              break;
>           case TGSI_TEXTURE_RECT:
>           case TGSI_TEXTURE_SHADOWRECT:
> @@ -1474,15 +1590,20 @@ static int translate_tex(struct dump_ctx *ctx,
>           case TGSI_TEXTURE_2D_ARRAY:
>           case TGSI_TEXTURE_SHADOW2D:
>           case TGSI_TEXTURE_SHADOW2D_ARRAY:
> -            snprintf(offbuf, 120, ", ivec2(floatBitsToInt(temps[%d].%c), floatBitsToInt(temps[%d].%c))",
> -                     inst->TexOffsets[0].Index, get_swiz_char(inst->TexOffsets[0].SwizzleX),
> -                     inst->TexOffsets[0].Index, get_swiz_char(inst->TexOffsets[0].SwizzleY));
> +            snprintf(offbuf, 120, ", ivec2(floatBitsToInt(temp%d[%d].%c), floatBitsToInt(temp%d[%d].%c))",
> +                     range->first, idx,
> +                     get_swiz_char(inst->TexOffsets[0].SwizzleX),
> +                     range->first, idx,
> +                     get_swiz_char(inst->TexOffsets[0].SwizzleY));
>              break;
>           case TGSI_TEXTURE_3D:
> -            snprintf(offbuf, 120, ", ivec2(floatBitsToInt(temps[%d].%c), floatBitsToInt(temps[%d].%c), floatBitsToInt(temps[%d].%c)",
> -                     inst->TexOffsets[0].Index, get_swiz_char(inst->TexOffsets[0].SwizzleX),
> -                     inst->TexOffsets[0].Index, get_swiz_char(inst->TexOffsets[0].SwizzleY),
> -                     inst->TexOffsets[0].Index, get_swiz_char(inst->TexOffsets[0].SwizzleZ));
> +            snprintf(offbuf, 120, ", ivec2(floatBitsToInt(temp%d[%d].%c), floatBitsToInt(temp%d[%d].%c), floatBitsToInt(temp%d[%d].%c)",
> +                     range->first, idx,
> +                     get_swiz_char(inst->TexOffsets[0].SwizzleX),
> +                     range->first, idx,
> +                     get_swiz_char(inst->TexOffsets[0].SwizzleY),
> +                     range->first, idx,
> +                     get_swiz_char(inst->TexOffsets[0].SwizzleZ));
>                       break;
>           default:
>              fprintf(stderr, "unhandled texture: %x\n", inst->Texture.Texture);
> @@ -1490,12 +1611,11 @@ static int translate_tex(struct dump_ctx *ctx,
>              break;
>           }
>        }
> -      if (inst->Instruction.Opcode == TGSI_OPCODE_TXL || inst->Instruction.Opcode == TGSI_OPCODE_TXL2 || inst->Instruction.Opcode == TGSI_OPCODE_TXD) {
> +      if (inst->Instruction.Opcode == TGSI_OPCODE_TXL || inst->Instruction.Opcode == TGSI_OPCODE_TXL2 || inst->Instruction.Opcode == TGSI_OPCODE_TXD || (inst->Instruction.Opcode == TGSI_OPCODE_TG4 && is_shad)) {
>           char tmp[128];
>           strcpy(tmp, offbuf);
>           strcpy(offbuf, bias);
>           strcpy(bias, tmp);
> -
>        }
>     }
>     if (inst->Instruction.Opcode == TGSI_OPCODE_TXF) {
> @@ -1591,8 +1711,9 @@ iter_instruction(struct tgsi_iterate_context *iter,
>     bool override_no_wm[4];
>     bool dst_override_no_wm[2];
>     char *sret;
> +   char interpSrc0[255], interpSwizzle0[10];
>     int ret;
> -
> +   bool tg4_has_component = false;
>     if (ctx->prog_type == -1)
>        ctx->prog_type = iter->processor.Processor;
>     if (dtype == TGSI_TYPE_SIGNED || dtype == TGSI_TYPE_UNSIGNED ||
> @@ -1638,6 +1759,8 @@ iter_instruction(struct tgsi_iterate_context *iter,
>           if (ret)
>              return FALSE;
>        }
> +      if (ctx->so)
> +         prepare_so_movs(ctx);
>     }
>     for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
>        const struct tgsi_full_dst_register *dst = &inst->Dst[i];
> @@ -1709,10 +1832,14 @@ iter_instruction(struct tgsi_iterate_context *iter,
>           if (!range)
>              return FALSE;
>           if (dst->Register.Indirect) {
> -            snprintf(dsts[i], 255, "temp%d[addr0 + %d]%s", range->first, dst->Register.Index - range->first, writemask);
> +            assert(dst->Indirect.File == TGSI_FILE_ADDRESS);
> +            snprintf(dsts[i], 255, "temp%d[addr%d + %d]%s", range->first, dst->Indirect.Index, dst->Register.Index - range->first, writemask);
>           } else
>              snprintf(dsts[i], 255, "temp%d[%d]%s", range->first, dst->Register.Index - range->first, writemask);
>        }
> +      else if (dst->Register.File == TGSI_FILE_ADDRESS) {
> +         snprintf(dsts[i], 255, "addr%d", dst->Register.Index);
> +      }
>     }
>
>     for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
> @@ -1732,8 +1859,13 @@ iter_instruction(struct tgsi_iterate_context *iter,
>        if (isabsolute)
>           strcpy(&prefix[pre_idx++], "abs(");
>
> -      if (src->Register.Dimension)
> -         sprintf(arrayname, "[%d]", src->Dimension.Index);
> +      if (src->Register.Dimension) {
> +         if (src->Dimension.Indirect) {
> +            assert(src->DimIndirect.File == TGSI_FILE_ADDRESS);
> +            sprintf(arrayname, "[addr%d]", src->DimIndirect.Index);
> +         } else
> +            sprintf(arrayname, "[%d]", src->Dimension.Index);
> +      }
>
>        if (src->Register.SwizzleX != TGSI_SWIZZLE_X ||
>            src->Register.SwizzleY != TGSI_SWIZZLE_Y ||
> @@ -1769,8 +1901,18 @@ iter_instruction(struct tgsi_iterate_context *iter,
>                    if (stype == TGSI_TYPE_UNSIGNED &&
>                        ctx->inputs[j].is_int)
>                       srcstypeprefix = "";
> -                  snprintf(srcs[i], 255, "%s(%s%s%s%s)",
> -                           srcstypeprefix, prefix, ctx->inputs[j].glsl_name, arrayname, ctx->inputs[j].is_int ? "" : swizzle);
> +
> +                  if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE && i == 1) {
> +                     snprintf(srcs[i], 255, "floatBitsToInt(%s%s%s%s)", prefix, ctx->inputs[j].glsl_name, arrayname, swizzle);
> +                  } else
> +                     snprintf(srcs[i], 255, "%s(%s%s%s%s)", srcstypeprefix, prefix, ctx->inputs[j].glsl_name, arrayname, ctx->inputs[j].is_int ? "" : swizzle);
> +               }
> +               if ((inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE ||
> +                    inst->Instruction.Opcode == TGSI_OPCODE_INTERP_OFFSET ||
> +                    inst->Instruction.Opcode == TGSI_OPCODE_INTERP_CENTROID) &&
> +                   i == 0) {
> +                  snprintf(interpSrc0, 255, "%s", ctx->inputs[j].glsl_name);
> +                  snprintf(interpSwizzle0, 10, "%s", swizzle);
>                 }
>                 override_no_wm[i] = ctx->inputs[j].override_no_wm;
>                 break;
> @@ -1780,8 +1922,14 @@ iter_instruction(struct tgsi_iterate_context *iter,
>           struct vrend_temp_range *range = find_temp_range(ctx, src->Register.Index);
>           if (!range)
>              return FALSE;
> +         if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE && i == 1) {
> +            stprefix = true;
> +            stypeprefix = "floatBitsToInt";
> +         }
> +
>           if (src->Register.Indirect) {
> -            snprintf(srcs[i], 255, "%s%c%stemp%d[addr0 + %d]%s%c", stypeprefix, stprefix ? '(' : ' ', prefix, range->first, src->Register.Index - range->first, swizzle, stprefix ? ')' : ' ');
> +            assert(src->Indirect.File == TGSI_FILE_ADDRESS);
> +            snprintf(srcs[i], 255, "%s%c%stemp%d[addr%d + %d]%s%c", stypeprefix, stprefix ? '(' : ' ', prefix, range->first, src->Indirect.Index, src->Register.Index - range->first, swizzle, stprefix ? ')' : ' ');
>           } else
>              snprintf(srcs[i], 255, "%s%c%stemp%d[%d]%s%c", stypeprefix, stprefix ? '(' : ' ', prefix, range->first, src->Register.Index - range->first, swizzle, stprefix ? ')' : ' ');
>        } else if (src->Register.File == TGSI_FILE_CONSTANT) {
> @@ -1789,14 +1937,33 @@ iter_instruction(struct tgsi_iterate_context *iter,
>           int dim = 0;
>           if (src->Register.Dimension) {
>              dim = src->Dimension.Index;
> -            if (src->Register.Indirect) {
> -               snprintf(srcs[i], 255, "%s(%s%subo%dcontents[addr0 + %d]%s)", stypeprefix, prefix, cname, dim, src->Register.Index, swizzle);
> -            } else
> -               snprintf(srcs[i], 255, "%s(%s%subo%dcontents[%d]%s)", stypeprefix, prefix, cname, dim, src->Register.Index, swizzle);
> +            if (src->Dimension.Indirect) {
> +               assert(src->DimIndirect.File == TGSI_FILE_ADDRESS);
> +               ctx->uses_gpu_shader5 = true;
> +               if (src->Register.Indirect) {
> +                  assert(src->Indirect.File == TGSI_FILE_ADDRESS);
> +                  snprintf(srcs[i], 255, "%s(%s%suboarr[addr%d].ubocontents[addr%d + %d]%s)", stypeprefix, prefix, cname, src->DimIndirect.Index, src->Indirect.Index, src->Register.Index, swizzle);
> +               } else
> +                  snprintf(srcs[i], 255, "%s(%s%suboarr[addr%d].ubocontents[%d]%s)", stypeprefix, prefix, cname, src->DimIndirect.Index, src->Register.Index, swizzle);
> +            } else {
> +               if (ctx->info.dimension_indirect_files & (1 << TGSI_FILE_CONSTANT)) {
> +                  if (src->Register.Indirect) {
> +                     snprintf(srcs[i], 255, "%s(%s%suboarr[%d].ubocontents[addr%d + %d]%s)", stypeprefix, prefix, cname, dim, src->Indirect.Index, src->Register.Index, swizzle);
> +                  } else
> +                     snprintf(srcs[i], 255, "%s(%s%suboarr[%d].ubocontents[%d]%s)", stypeprefix, prefix, cname, dim, src->Register.Index, swizzle);
> +               } else {
> +                  if (src->Register.Indirect) {
> +                     snprintf(srcs[i], 255, "%s(%s%subo%dcontents[addr0 + %d]%s)", stypeprefix, prefix, cname, dim, src->Register.Index, swizzle);
> +                  } else
> +                     snprintf(srcs[i], 255, "%s(%s%subo%dcontents[%d]%s)", stypeprefix, prefix, cname, dim, src->Register.Index, swizzle);
> +               }
> +            }
>           } else {
>              const char *csp;
>              ctx->has_ints = true;
> -            if (stype == TGSI_TYPE_FLOAT || stype == TGSI_TYPE_UNTYPED)
> +            if (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE && i == 1)
> +               csp = "ivec4";
> +            else if (stype == TGSI_TYPE_FLOAT || stype == TGSI_TYPE_UNTYPED)
>                 csp = "uintBitsToFloat";
>              else if (stype == TGSI_TYPE_SIGNED)
>                 csp = "ivec4";
> @@ -1810,7 +1977,17 @@ iter_instruction(struct tgsi_iterate_context *iter,
>           }
>        } else if (src->Register.File == TGSI_FILE_SAMPLER) {
>           const char *cname = tgsi_proc_to_prefix(ctx->prog_type);
> -         snprintf(srcs[i], 255, "%ssamp%d%s", cname, src->Register.Index, swizzle);
> +         if (ctx->info.indirect_files & (1 << TGSI_FILE_SAMPLER)) {
> +            int arr_idx = lookup_sampler_array(ctx, src->Register.Index);
> +            if (src->Register.Indirect) {
> +
> +               snprintf(srcs[i], 255, "%ssamp%d[addr%d+%d]%s", cname, arr_idx, src->Indirect.Index, src->Register.Index - ctx->sampler_arrays[arr_idx].first, swizzle);
> +            } else {
> +               snprintf(srcs[i], 255, "%ssamp%d[%d]%s", cname, arr_idx, src->Register.Index - ctx->sampler_arrays[arr_idx].first, swizzle);
> +            }
> +         } else {
> +            snprintf(srcs[i], 255, "%ssamp%d%s", cname, src->Register.Index, swizzle);
> +         }
>           sreg_index = src->Register.Index;
>        } else if (src->Register.File == TGSI_FILE_IMMEDIATE) {
>           if (src->Register.Index >= ARRAY_SIZE(ctx->imm)) {
> @@ -1823,6 +2000,10 @@ iter_instruction(struct tgsi_iterate_context *iter,
>           const char *vtype = "vec4";
>           const char *imm_stypeprefix = stypeprefix;
>
> +         if ((inst->Instruction.Opcode == TGSI_OPCODE_TG4 && i == 1) ||
> +             (inst->Instruction.Opcode == TGSI_OPCODE_INTERP_SAMPLE && i == 1))
> +            stype = TGSI_TYPE_SIGNED;
> +
>           if (imd->type == TGSI_IMM_UINT32 || imd->type == TGSI_IMM_INT32) {
>              if (imd->type == TGSI_IMM_UINT32)
>                 vtype = "uvec4";
> @@ -1853,6 +2034,14 @@ iter_instruction(struct tgsi_iterate_context *iter,
>                 idx = src->Register.SwizzleZ;
>              else if (j == 3)
>                 idx = src->Register.SwizzleW;
> +
> +            if (inst->Instruction.Opcode == TGSI_OPCODE_TG4 && i == 1 && j == 0) {
> +               if (imd->val[idx].ui > 0) {
> +                  tg4_has_component = true;
> +                  ctx->uses_gpu_shader5 = true;
> +               }
> +            }
> +
>              switch (imd->type) {
>              case TGSI_IMM_FLOAT32:
>                 if (isinf(imd->val[idx].f) || isnan(imd->val[idx].f)) {
> @@ -2134,7 +2323,7 @@ iter_instruction(struct tgsi_iterate_context *iter,
>     case TGSI_OPCODE_TXP:
>     case TGSI_OPCODE_TXQ:
>     case TGSI_OPCODE_LODQ:
> -      ret = translate_tex(ctx, inst, sreg_index, srcs, dsts, writemask, dstconv, dst_override_no_wm[0]);
> +      ret = translate_tex(ctx, inst, sreg_index, srcs, dsts, writemask, dstconv, dst_override_no_wm[0], tg4_has_component);
>        if (ret)
>           return FALSE;
>        break;
> @@ -2235,11 +2424,11 @@ iter_instruction(struct tgsi_iterate_context *iter,
>        EMIT_BUF_WITH_RET(ctx, "return;\n");
>        break;
>     case TGSI_OPCODE_ARL:
> -      snprintf(buf, 255, "addr0 = int(floor(%s)%s);\n", srcs[0], writemask);
> +      snprintf(buf, 255, "%s = int(floor(%s)%s);\n", dsts[0], srcs[0], writemask);
>        EMIT_BUF_WITH_RET(ctx, buf);
>        break;
>     case TGSI_OPCODE_UARL:
> -      snprintf(buf, 255, "addr0 = int(%s);\n", srcs[0]);
> +      snprintf(buf, 255, "%s = int(%s);\n", dsts[0], srcs[0]);
>        EMIT_BUF_WITH_RET(ctx, buf);
>        break;
>     case TGSI_OPCODE_XPD:
> @@ -2260,7 +2449,8 @@ iter_instruction(struct tgsi_iterate_context *iter,
>        snprintf(buf, 255, "break;\n");
>        EMIT_BUF_WITH_RET(ctx, buf);
>        break;
> -   case TGSI_OPCODE_EMIT:
> +   case TGSI_OPCODE_EMIT: {
> +      struct immed *imd = &ctx->imm[(inst->Src[0].Register.Index)];
>        if (ctx->so && ctx->key->gs_present) {
>           emit_so_movs(ctx);
>        }
> @@ -2270,12 +2460,103 @@ iter_instruction(struct tgsi_iterate_context *iter,
>        ret = emit_prescale(ctx);
>        if (ret)
>           return FALSE;
> -      snprintf(buf, 255, "EmitVertex();\n");
> +      if (imd->val[inst->Src[0].Register.SwizzleX].ui > 0) {
> +         ctx->uses_gpu_shader5 = true;
> +         snprintf(buf, 255, "EmitStreamVertex(%d);\n", imd->val[inst->Src[0].Register.SwizzleX].ui);
> +      } else
> +         snprintf(buf, 255, "EmitVertex();\n");
> +      EMIT_BUF_WITH_RET(ctx, buf);
> +      break;
> +   }
> +   case TGSI_OPCODE_ENDPRIM: {
> +      struct immed *imd = &ctx->imm[(inst->Src[0].Register.Index)];
> +      if (imd->val[inst->Src[0].Register.SwizzleX].ui > 0) {
> +         ctx->uses_gpu_shader5 = true;
> +         snprintf(buf, 255, "EndStreamPrimitive(%d);\n", imd->val[inst->Src[0].Register.SwizzleX].ui);
> +      } else
> +         snprintf(buf, 255, "EndPrimitive();\n");
> +      EMIT_BUF_WITH_RET(ctx, buf);
> +      break;
> +   }
> +   case TGSI_OPCODE_INTERP_CENTROID:
> +      snprintf(buf, 255, "interp_temp = interpolateAtCentroid(%s);\n", interpSrc0);
> +      EMIT_BUF_WITH_RET(ctx, buf);
> +      snprintf(buf, 255, "%s = %s(%s(interp_temp%s));\n", dsts[0], dstconv, dtypeprefix, interpSwizzle0);
> +      EMIT_BUF_WITH_RET(ctx, buf);
> +      ctx->write_interp_temp = true;
> +      ctx->uses_gpu_shader5 = true;
> +      break;
> +   case TGSI_OPCODE_INTERP_SAMPLE:
> +      snprintf(buf, 255, "interp_temp = interpolateAtSample(%s, %s.x);\n", interpSrc0, srcs[1]);
> +      EMIT_BUF_WITH_RET(ctx, buf);
> +      snprintf(buf, 255, "%s = %s(%s(interp_temp%s));\n", dsts[0], dstconv, dtypeprefix, interpSwizzle0);
> +      EMIT_BUF_WITH_RET(ctx, buf);
> +      ctx->write_interp_temp = true;
> +      ctx->uses_gpu_shader5 = true;
> +      break;
> +   case TGSI_OPCODE_INTERP_OFFSET:
> +      snprintf(buf, 255, "interp_temp = interpolateAtOffset(%s, %s.xy);\n", interpSrc0, srcs[1]);
> +      EMIT_BUF_WITH_RET(ctx, buf);
> +      snprintf(buf, 255, "%s = %s(%s(interp_temp%s));\n", dsts[0], dstconv, dtypeprefix, interpSwizzle0);
> +      EMIT_BUF_WITH_RET(ctx, buf);
> +      ctx->write_interp_temp = true;
> +      ctx->uses_gpu_shader5 = true;
> +      break;
> +   case TGSI_OPCODE_UMUL_HI:
> +      snprintf(buf, 255, "umulExtended(%s, %s, umul_temp, mul_temp);\n", srcs[0], srcs[1]);
> +      EMIT_BUF_WITH_RET(ctx, buf);
> +      snprintf(buf, 255, "%s = %s(%s(umul_temp));\n", dsts[0], dstconv, dtypeprefix);
> +      EMIT_BUF_WITH_RET(ctx, buf);
> +      ctx->uses_gpu_shader5 = true;
> +      ctx->write_mul_temp = true;
> +      break;
> +   case TGSI_OPCODE_IMUL_HI:
> +      snprintf(buf, 255, "imulExtended(%s, %s, imul_temp, mul_temp);\n", srcs[0], srcs[1]);
> +      EMIT_BUF_WITH_RET(ctx, buf);
> +      snprintf(buf, 255, "%s = %s(%s(imul_temp));\n", dsts[0], dstconv, dtypeprefix);
> +      EMIT_BUF_WITH_RET(ctx, buf);
> +      ctx->uses_gpu_shader5 = true;
> +      ctx->write_mul_temp = true;
> +      break;
> +
> +   case TGSI_OPCODE_IBFE:
> +      snprintf(buf, 255, "%s = %s(%s(bitfieldExtract(%s, int(%s.x), int(%s.x))));\n", dsts[0], dstconv, dtypeprefix, srcs[0], srcs[1], srcs[2]);
> +      EMIT_BUF_WITH_RET(ctx, buf);
> +      ctx->uses_gpu_shader5 = true;
> +      break;
> +   case TGSI_OPCODE_UBFE:
> +      snprintf(buf, 255, "%s = %s(%s(bitfieldExtract(%s, int(%s.x), int(%s.x))));\n", dsts[0], dstconv, dtypeprefix, srcs[0], srcs[1], srcs[2]);
> +      EMIT_BUF_WITH_RET(ctx, buf);
> +      ctx->uses_gpu_shader5 = true;
> +      break;
> +   case TGSI_OPCODE_BFI:
> +      snprintf(buf, 255, "%s = %s(uintBitsToFloat(bitfieldInsert(%s, %s, int(%s), int(%s))));\n", dsts[0], dstconv, srcs[0], srcs[1], srcs[2], srcs[3]);
> +      EMIT_BUF_WITH_RET(ctx, buf);
> +      ctx->uses_gpu_shader5 = true;
> +      break;
> +   case TGSI_OPCODE_BREV:
> +      snprintf(buf, 255, "%s = %s(%s(bitfieldReverse(%s)));\n", dsts[0], dstconv, dtypeprefix, srcs[0]);
> +      EMIT_BUF_WITH_RET(ctx, buf);
> +      ctx->uses_gpu_shader5 = true;
> +      break;
> +   case TGSI_OPCODE_POPC:
> +      snprintf(buf, 255, "%s = %s(%s(bitCount(%s)));\n", dsts[0], dstconv, dtypeprefix, srcs[0]);
> +      EMIT_BUF_WITH_RET(ctx, buf);
> +      ctx->uses_gpu_shader5 = true;
> +      break;
> +   case TGSI_OPCODE_LSB:
> +      snprintf(buf, 255, "%s = %s(%s(findLSB(%s)));\n", dsts[0], dstconv, dtypeprefix, srcs[0]);
>        EMIT_BUF_WITH_RET(ctx, buf);
> +      ctx->uses_gpu_shader5 = true;
>        break;
> -   case TGSI_OPCODE_ENDPRIM:
> -      snprintf(buf, 255, "EndPrimitive();\n");
> +   case TGSI_OPCODE_IMSB:
> +   case TGSI_OPCODE_UMSB:
> +      snprintf(buf, 255, "%s = %s(%s(findMSB(%s)));\n", dsts[0], dstconv, dtypeprefix, srcs[0]);
>        EMIT_BUF_WITH_RET(ctx, buf);
> +      ctx->uses_gpu_shader5 = true;
> +      break;
> +   case TGSI_OPCODE_BARRIER:
> +      snprintf(buf, 255, "barrier();\n");
>        break;
>     default:
>        fprintf(stderr,"failed to convert opcode %d\n", inst->Instruction.Opcode);
> @@ -2419,13 +2700,29 @@ static const char *get_interp_string(struct vrend_shader_cfg *cfg, int interpola
>     }
>  }
>
> +static const char *get_aux_string(struct vrend_shader_cfg *cfg, bool centroid)
> +{
> +   return centroid ? "centroid " : "";
> +}
> +
> +static const char get_return_type_prefix(enum tgsi_return_type type)
> +{
> +   if (type == TGSI_RETURN_TYPE_SINT)
> +      return 'i';
> +   if (type == TGSI_RETURN_TYPE_UINT)
> +      return 'u';
> +   return ' ';
> +}
> +
>  static char *emit_ios(struct dump_ctx *ctx, char *glsl_hdr)
>  {
>     int i;
>     char buf[255];
>     char postfix[8];
> -   const char *prefix = "";
> +   const char *prefix = "", *auxprefix = "";
>     bool fcolor_emitted[2], bcolor_emitted[2];
> +   int nsamp;
> +   const char *sname = tgsi_proc_to_prefix(ctx->prog_type);
>     ctx->num_interps = 0;
>
>     if (ctx->so && ctx->so->num_outputs >= PIPE_MAX_SO_OUTPUTS) {
> @@ -2473,6 +2770,7 @@ static char *emit_ios(struct dump_ctx *ctx, char *glsl_hdr)
>              prefix = get_interp_string(ctx->cfg, ctx->inputs[i].interpolate, ctx->key->flatshade);
>              if (!prefix)
>                 prefix = "";
> +           auxprefix = get_aux_string(ctx->cfg, ctx->inputs[i].centroid);
>              ctx->num_interps++;
>           }
>
> @@ -2480,7 +2778,7 @@ static char *emit_ios(struct dump_ctx *ctx, char *glsl_hdr)
>              snprintf(postfix, 8, "[%d]", gs_input_prim_to_size(ctx->gs_in_prim));
>           } else
>              postfix[0] = 0;
> -         snprintf(buf, 255, "%sin vec4 %s%s;\n", prefix, ctx->inputs[i].glsl_name, postfix);
> +         snprintf(buf, 255, "%s%sin vec4 %s%s;\n", prefix, auxprefix, ctx->inputs[i].glsl_name, postfix);
>           STRCAT_WITH_RET(glsl_hdr, buf);
>        }
>     }
> @@ -2507,7 +2805,10 @@ static char *emit_ios(struct dump_ctx *ctx, char *glsl_hdr)
>              } else
>                 prefix = "";
>              /* ugly leave spaces to patch interp in later */
> -            snprintf(buf, 255, "%s%sout vec4 %s;\n", prefix, ctx->outputs[i].invariant ? "invariant " : "", ctx->outputs[i].glsl_name);
> +            if (ctx->prog_type == TGSI_PROCESSOR_GEOMETRY && ctx->outputs[i].stream)
> +               snprintf(buf, 255, "layout (stream = %d) %s%sout vec4 %s;\n", ctx->outputs[i].stream, prefix, ctx->outputs[i].invariant ? "invariant " : "", ctx->outputs[i].glsl_name);
> +            else
> +               snprintf(buf, 255, "%s%sout vec4 %s;\n", prefix, ctx->outputs[i].invariant ? "invariant " : "", ctx->outputs[i].glsl_name);
>              STRCAT_WITH_RET(glsl_hdr, buf);
>           } else if (ctx->outputs[i].invariant) {
>              snprintf(buf, 255, "invariant %s;\n", ctx->outputs[i].glsl_name);
> @@ -2629,7 +2930,10 @@ static char *emit_ios(struct dump_ctx *ctx, char *glsl_hdr)
>              snprintf(outtype, 6, "float");
>           else
>              snprintf(outtype, 6, "vec%d", ctx->so->output[i].num_components);
> -         snprintf(buf, 255, "out %s tfout%d;\n", outtype, i);
> +         if (ctx->so->output[i].stream && ctx->prog_type == TGSI_PROCESSOR_GEOMETRY)
> +            snprintf(buf, 255, "layout (stream=%d) out %s tfout%d;\n", ctx->so->output[i].stream, outtype, i);
> +         else
> +            snprintf(buf, 255, "out %s tfout%d;\n", outtype, i);
>           STRCAT_WITH_RET(glsl_hdr, buf);
>        }
>     }
> @@ -2638,6 +2942,20 @@ static char *emit_ios(struct dump_ctx *ctx, char *glsl_hdr)
>        STRCAT_WITH_RET(glsl_hdr, buf);
>     }
>
> +   if (ctx->write_mul_temp) {
> +      snprintf(buf, 255, "uvec4 mul_temp;\n");
> +      STRCAT_WITH_RET(glsl_hdr, buf);
> +      snprintf(buf, 255, "uvec4 umul_temp;\n");
> +      STRCAT_WITH_RET(glsl_hdr, buf);
> +      snprintf(buf, 255, "ivec4 imul_temp;\n");
> +      STRCAT_WITH_RET(glsl_hdr, buf);
> +   }
> +
> +   if (ctx->write_interp_temp) {
> +      snprintf(buf, 255, "vec4 interp_temp;\n");
> +      STRCAT_WITH_RET(glsl_hdr, buf);
> +   }
> +
>     for (i = 0; i < ctx->num_address; i++) {
>        snprintf(buf, 255, "int addr%d;\n", i);
>        STRCAT_WITH_RET(glsl_hdr, buf);
> @@ -2659,27 +2977,49 @@ static char *emit_ios(struct dump_ctx *ctx, char *glsl_hdr)
>        }
>     }
>     if (ctx->num_ubo) {
> -      for (i = 0; i < ctx->num_ubo; i++) {
> -         const char *cname = tgsi_proc_to_prefix(ctx->prog_type);
> -         snprintf(buf, 255, "uniform %subo%d { vec4 %subo%dcontents[%d]; };\n", cname, ctx->ubo_idx[i], cname, ctx->ubo_idx[i], ctx->ubo_sizes[i]);
> +      const char *cname = tgsi_proc_to_prefix(ctx->prog_type);
> +
> +      if (ctx->info.dimension_indirect_files & (1 << TGSI_FILE_CONSTANT)) {
> +         ctx->glsl_ver_required = 150;
> +         snprintf(buf, 255, "uniform %subo { vec4 ubocontents[%d]; } %suboarr[%d];\n", cname, ctx->ubo_sizes[0], cname, ctx->num_ubo);
>           STRCAT_WITH_RET(glsl_hdr, buf);
> +      } else {
> +         for (i = 0; i < ctx->num_ubo; i++) {
> +            snprintf(buf, 255, "uniform %subo%d { vec4 %subo%dcontents[%d]; };\n", cname, ctx->ubo_idx[i], cname, ctx->ubo_idx[i], ctx->ubo_sizes[i]);
> +            STRCAT_WITH_RET(glsl_hdr, buf);
> +         }
>        }
>     }
> -   for (i = 0; i < 32; i++) {
> -      int is_shad = 0;
> -      const char *stc;
> -      char ptc;
>
> -      if ((ctx->samplers_used & (1 << i)) == 0)
> -         continue;
> +   if (ctx->info.indirect_files & (1 << TGSI_FILE_SAMPLER)) {
> +      for (i = 0; i < ctx->num_sampler_arrays; i++) {
> +         int is_shad = 0;
> +         const char *stc;
> +         stc = vrend_shader_samplertypeconv(ctx->sampler_arrays[i].sview_type, &is_shad);
> +         if (!stc)
> +            continue;
> +         snprintf(buf, 255, "uniform %csampler%s %ssamp%d[%d];\n",
> +                  get_return_type_prefix(ctx->sampler_arrays[i].sview_rtype),
> +                  stc, sname, ctx->sampler_arrays[i].idx,
> +                  ctx->sampler_arrays[i].last - ctx->sampler_arrays[i].first);
> +         STRCAT_WITH_RET(glsl_hdr, buf);
> +      }
> +   } else {
> +      nsamp = util_last_bit(ctx->samplers_used);
> +      for (i = 0; i < nsamp; i++) {
> +         int is_shad = 0;
> +         const char *stc;
> +         char ptc;
>
> -      ptc = vrend_shader_samplerreturnconv(ctx->samplers[i].tgsi_sampler_return);
> -      stc = vrend_shader_samplertypeconv(ctx->samplers[i].tgsi_sampler_type, &is_shad);
> +         if ((ctx->samplers_used & (1 << i)) == 0)
> +            continue;
>
> -      if (stc) {
>           const char *sname;
>           const char *precision;
>
> +         ptc = vrend_shader_samplerreturnconv(ctx->samplers[i].tgsi_sampler_return);
> +         stc = vrend_shader_samplertypeconv(ctx->samplers[i].tgsi_sampler_type, &is_shad);
> +
>           sname = tgsi_proc_to_prefix(ctx->prog_type);
>
>           if (ctx->cfg->use_gles) {
> @@ -2733,6 +3073,7 @@ static boolean fill_fragment_interpolants(struct dump_ctx *ctx, struct vrend_sha
>        sinfo->interpinfo[index].semantic_name = ctx->inputs[i].name;
>        sinfo->interpinfo[index].semantic_index = ctx->inputs[i].sid;
>        sinfo->interpinfo[index].interpolate = ctx->inputs[i].interpolate;
> +      sinfo->interpinfo[index].centroid = ctx->inputs[i].centroid;
>        index++;
>     }
>     return TRUE;
> @@ -2782,7 +3123,9 @@ char *vrend_convert_shader(struct vrend_shader_cfg *cfg,
>     ctx.key = key;
>     ctx.cfg = cfg;
>     ctx.prog_type = -1;
> -
> +   ctx.num_sampler_arrays = 0;
> +   ctx.sampler_arrays = NULL;
> +   ctx.last_sampler_array_idx = -1;
>     tgsi_scan_shader(tokens, &ctx.info);
>     /* if we are in core profile mode we should use GLSL 1.40 */
>     if (cfg->use_core_profile && cfg->glsl_version >= 140)
> @@ -2796,6 +3139,12 @@ char *vrend_convert_shader(struct vrend_shader_cfg *cfg,
>     } else
>        ctx.so_names = NULL;
>
> +   if (ctx.info.dimension_indirect_files & (1 << TGSI_FILE_CONSTANT))
> +      ctx.glsl_ver_required = 150;
> +
> +   if (ctx.info.indirect_files & (1 << TGSI_FILE_SAMPLER))
> +      ctx.uses_gpu_shader5 = true;
> +
>     ctx.glsl_main = malloc(4096);
>     if (!ctx.glsl_main)
>        goto fail;
> @@ -2842,6 +3191,7 @@ char *vrend_convert_shader(struct vrend_shader_cfg *cfg,
>     sinfo->samplers_used_mask = ctx.samplers_used;
>     sinfo->num_consts = ctx.num_consts;
>     sinfo->num_ubos = ctx.num_ubo;
> +   sinfo->ubo_indirect = ctx.info.dimension_indirect_files & (1 << TGSI_FILE_CONSTANT);
>     sinfo->num_inputs = ctx.num_inputs;
>     sinfo->num_interps = ctx.num_interps;
>     sinfo->num_outputs = ctx.num_outputs;
> @@ -2850,6 +3200,8 @@ char *vrend_convert_shader(struct vrend_shader_cfg *cfg,
>     sinfo->gs_out_prim = ctx.gs_out_prim;
>     sinfo->so_names = ctx.so_names;
>     sinfo->attrib_input_mask = ctx.attrib_input_mask;
> +   sinfo->sampler_arrays = ctx.sampler_arrays;
> +   sinfo->num_sampler_arrays = ctx.num_sampler_arrays;
>     return glsl_final;
>   fail:
>     free(ctx.glsl_main);
> @@ -2862,7 +3214,7 @@ char *vrend_convert_shader(struct vrend_shader_cfg *cfg,
>
>  static void replace_interp(char *program,
>                             const char *var_name,
> -                           const char *pstring)
> +                           const char *pstring, const char *auxstring)
>  {
>     char *ptr;
>     int mylen = strlen(INTERP_PREFIX) + strlen("out vec4 ");
> @@ -2874,7 +3226,9 @@ static void replace_interp(char *program,
>
>     ptr -= mylen;
>
> +   memset(ptr, ' ', strlen(INTERP_PREFIX));
>     memcpy(ptr, pstring, strlen(pstring));
> +   memcpy(ptr + strlen(pstring), auxstring, strlen(auxstring));
>  }
>
>  bool vrend_patch_vertex_shader_interpolants(struct vrend_shader_cfg *cfg, char *program,
> @@ -2882,7 +3236,7 @@ bool vrend_patch_vertex_shader_interpolants(struct vrend_shader_cfg *cfg, char *
>                                              struct vrend_shader_info *fs_info, const char *oprefix, bool flatshade)
>  {
>     int i;
> -   const char *pstring;
> +   const char *pstring, *auxstring;
>     char glsl_name[64];
>     if (!vs_info || !fs_info)
>        return true;
> @@ -2895,27 +3249,29 @@ bool vrend_patch_vertex_shader_interpolants(struct vrend_shader_cfg *cfg, char *
>        if (!pstring)
>           continue;
>
> +      auxstring = get_aux_string(cfg, fs_info->interpinfo[i].centroid);
> +
>        switch (fs_info->interpinfo[i].semantic_name) {
>        case TGSI_SEMANTIC_COLOR:
>           /* color is a bit trickier */
>           if (fs_info->glsl_ver < 140) {
>              if (fs_info->interpinfo[i].semantic_index == 1) {
> -               replace_interp(program, "gl_FrontSecondaryColor", pstring);
> -               replace_interp(program, "gl_BackSecondaryColor", pstring);
> +              replace_interp(program, "gl_FrontSecondaryColor", pstring, auxstring);
> +               replace_interp(program, "gl_BackSecondaryColor", pstring, auxstring);
>              } else {
> -               replace_interp(program, "gl_FrontColor", pstring);
> -               replace_interp(program, "gl_BackColor", pstring);
> +               replace_interp(program, "gl_FrontColor", pstring, auxstring);
> +               replace_interp(program, "gl_BackColor", pstring, auxstring);
>              }
>           } else {
>              snprintf(glsl_name, 64, "ex_c%d", fs_info->interpinfo[i].semantic_index);
> -            replace_interp(program, glsl_name, pstring);
> +            replace_interp(program, glsl_name, pstring, auxstring);
>              snprintf(glsl_name, 64, "ex_bc%d", fs_info->interpinfo[i].semantic_index);
> -            replace_interp(program, glsl_name, pstring);
> +            replace_interp(program, glsl_name, pstring, auxstring);
>           }
>           break;
>        case TGSI_SEMANTIC_GENERIC:
>           snprintf(glsl_name, 64, "%s_g%d", oprefix, fs_info->interpinfo[i].semantic_index);
> -         replace_interp(program, glsl_name, pstring);
> +         replace_interp(program, glsl_name, pstring, auxstring);
>           break;
>        default:
>           fprintf(stderr,"unhandled semantic: %x\n", fs_info->interpinfo[i].semantic_name);
> diff --git a/src/vrend_shader.h b/src/vrend_shader.h
> index e54a85c..1a6d3d8 100644
> --- a/src/vrend_shader.h
> +++ b/src/vrend_shader.h
> @@ -32,6 +32,15 @@ struct vrend_interp_info {
>     int semantic_name;
>     int semantic_index;
>     int interpolate;
> +   bool centroid;
> +};
> +
> +struct vrend_sampler_array {
> +   int first;
> +   int last;
> +   int idx;
> +   int sview_type;
> +   int sview_rtype;
>  };
>
>  struct vrend_shader_info {
> @@ -41,6 +50,7 @@ struct vrend_shader_info {
>     int num_interps;
>     int num_outputs;
>     int num_ubos;
> +   bool ubo_indirect;
>     int num_ucp;
>     int glsl_ver;
>     bool has_pervertex_out;
> @@ -49,6 +59,10 @@ struct vrend_shader_info {
>     uint32_t shadow_samp_mask;
>     int gs_out_prim;
>     uint32_t attrib_input_mask;
> +
> +   struct vrend_sampler_array *sampler_arrays;
> +   int num_sampler_arrays;
> +
>     struct pipe_stream_output_info so_info;
>
>     struct vrend_interp_info *interpinfo;
> @@ -90,4 +104,6 @@ char *vrend_convert_shader(struct vrend_shader_cfg *cfg,
>                             struct vrend_shader_key *key,
>                             struct vrend_shader_info *sinfo);
>  const char *vrend_shader_samplertypeconv(int sampler_type, int *is_shad);
> +
> +int shader_lookup_sampler_array(struct vrend_shader_info *sinfo, int index);
>  #endif
> --
> 2.14.3
>
> _______________________________________________
> virglrenderer-devel mailing list
> virglrenderer-devel at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/virglrenderer-devel


More information about the virglrenderer-devel mailing list