[Mesa-dev] [PATCH] mesa/st: add support for dynamic sampler offsets

Ilia Mirkin imirkin at alum.mit.edu
Wed Aug 6 08:53:47 PDT 2014


      pc->MaxAddressRegs     = pc->MaxNativeAddressRegs     =
         _min(screen->get_shader_param(screen, sh, PIPE_SHADER_CAP_MAX_ADDRS),
              MAX_PROGRAM_ADDRESS_REGS);

Not really sure what that's referring to... ARB_vp/fp or something?

Anyways, this is definitely a bit of a violation of that. OTOH, so is
the indirect UBO indexing and indirect GS input access (assuming
that's allowed), since those would use ADDR[1] and every driver
(except nv30) returns 1, and sometimes 0 -- including
nv50/nvc0/r600/radeonsi.

So... dunno what the proper way to proceed is. Fix drivers to claim
higher numbers? Continue the tradition of ignoring it and relying on
the fact that GPU's that don't support it also won't support the
features that cause it to get used?

On Wed, Aug 6, 2014 at 11:45 AM, Marek Olšák <maraeo at gmail.com> wrote:
> I guess PIPE_SHADER_CAP_MAX_ADDRS is now useless, because it can be
> derived from GLSL_FEATURE_LEVEL, right?
>
> Marek
>
> On Wed, Aug 6, 2014 at 5:25 PM, Ilia Mirkin <imirkin at alum.mit.edu> wrote:
>> Replace the plain sampler index with a register reference to a sampler.
>> We also need to keep track of the sampler array size when there is a
>> relative reference so that we can mark the whole array used.
>>
>> To facilitate implementation, we add a separate ADDR register that
>> exclusively handles the sampler relative address. Other approaches would
>> be more invasive.
>>
>> Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
>> ---
>>
>> _mesa_get_sampler_array_nonconst_index is a function added by a patch that
>> ChrisF is working on... basically it returns NULL unless it's a nonconst
>> access.
>>
>> I've done a very modest amount of piglit testing, but I definitely need to do
>> some more. The nvc0 bits aren't 100% ready -- I noticed that in some odd
>> situations the arguments to the tex instruction will get all mangled. But for
>> a simple case that mixes non-array and array samplers, it looks something like
>> this:
>>
>> FRAG
>> PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
>> DCL IN[0], GENERIC[0], PERSPECTIVE
>> DCL OUT[0], COLOR
>> DCL SAMP[0]
>> DCL SAMP[1]
>> DCL SAMP[2]
>> DCL SAMP[3]
>> DCL CONST[0..1]
>> DCL TEMP[0..1], LOCAL
>> DCL ADDR[0..2]
>> IMM[0] FLT32 {    0.0000,     0.0000,     0.0000,     0.0000}
>>   0: MOV TEMP[0].xy, IN[0].xyyy
>>   1: TEX TEMP[0], TEMP[0], SAMP[0], 2D
>>   2: MOV TEMP[1].xy, IN[0].xyyy
>>   3: UARL ADDR[2].x, CONST[1].xxxx
>>   4: TEX TEMP[1], TEMP[1], SAMP[ADDR[2].x+1], 2D
>>   5: MUL TEMP[1], TEMP[1], CONST[0].xxxx
>>   6: MAD TEMP[0], TEMP[0], IMM[0].xxxx, TEMP[1]
>>   7: MOV OUT[0], TEMP[0]
>>   8: END
>>
>>  src/gallium/auxiliary/tgsi/tgsi_ureg.c | 2 +-
>>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 60
>>  +++++++++++++++++++++--------- 2 files changed, 44 insertions(+), 18
>>  deletions(-)
>>
>> diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
>> index dcf0cb5..6d3ac91 100644
>> --- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
>> +++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
>> @@ -78,7 +78,7 @@ struct ureg_tokens {
>>  #define UREG_MAX_OUTPUT PIPE_MAX_SHADER_OUTPUTS
>>  #define UREG_MAX_CONSTANT_RANGE 32
>>  #define UREG_MAX_IMMEDIATE 4096
>> -#define UREG_MAX_ADDR 2
>> +#define UREG_MAX_ADDR 3
>>  #define UREG_MAX_PRED 1
>>  #define UREG_MAX_ARRAY_TEMPS 256
>>
>> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>> index c5e2eb5..0d5c3ed 100644
>> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>> @@ -245,7 +245,8 @@ public:
>>     ir_instruction *ir;
>>     GLboolean cond_update;
>>     bool saturate;
>> -   int sampler; /**< sampler index */
>> +   st_src_reg sampler; /**< sampler register */
>> +   int sampler_array_size; /**< 1-based size of sampler array, 1 if not array */
>>     int tex_target; /**< One of TEXTURE_*_INDEX */
>>     GLboolean tex_shadow;
>>
>> @@ -476,6 +477,7 @@ static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_T
>>
>>  static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 0);
>>  static st_dst_reg address_reg2 = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 1);
>> +static st_dst_reg sampler_reladdr = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 2);
>>
>>  static void
>>  fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3);
>> @@ -2799,6 +2801,8 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
>>     glsl_to_tgsi_instruction *inst = NULL;
>>     unsigned opcode = TGSI_OPCODE_NOP;
>>     const glsl_type *sampler_type = ir->sampler->type;
>> +   ir_rvalue *sampler_index =
>> +      _mesa_get_sampler_array_nonconst_index(ir->sampler);
>>     bool is_cube_array = false;
>>     unsigned i;
>>
>> @@ -3016,6 +3020,11 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
>>        coord_dst.writemask = WRITEMASK_XYZW;
>>     }
>>
>> +   if (sampler_index) {
>> +      sampler_index->accept(this);
>> +      emit_arl(ir, sampler_reladdr, this->result);
>> +   }
>> +
>>     if (opcode == TGSI_OPCODE_TXD)
>>        inst = emit(ir, opcode, result_dst, coord, dx, dy);
>>     else if (opcode == TGSI_OPCODE_TXQ) {
>> @@ -3045,9 +3054,18 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir)
>>     if (ir->shadow_comparitor)
>>        inst->tex_shadow = GL_TRUE;
>>
>> -   inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler,
>> -                                                  this->shader_program,
>> -                                                  this->prog);
>> +   inst->sampler.index = _mesa_get_sampler_uniform_value(ir->sampler,
>> +                                                         this->shader_program,
>> +                                                         this->prog);
>> +   if (sampler_index) {
>> +      inst->sampler.reladdr = ralloc(mem_ctx, st_src_reg);
>> +      memcpy(inst->sampler.reladdr, &sampler_reladdr, sizeof(sampler_reladdr));
>> +      inst->sampler_array_size =
>> +         ir->sampler->as_dereference_array()
>> +            ->array->variable_referenced()->type->length;
>> +   } else {
>> +      inst->sampler_array_size = 1;
>> +   }
>>
>>     if (ir->offset) {
>>        for (i = 0; i < MAX_GLSL_TEXTURE_OFFSET && offset[i].file != PROGRAM_UNDEFINED; i++)
>> @@ -3215,10 +3233,12 @@ count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
>>
>>     foreach_in_list(glsl_to_tgsi_instruction, inst, &v->instructions) {
>>        if (is_tex_instruction(inst->op)) {
>> -         v->samplers_used |= 1 << inst->sampler;
>> +         for (int i = 0; i < inst->sampler_array_size; i++) {
>> +            v->samplers_used |= 1 << (inst->sampler.index + i);
>>
>> -         if (inst->tex_shadow) {
>> -            prog->ShadowSamplers |= 1 << inst->sampler;
>> +            if (inst->tex_shadow) {
>> +               prog->ShadowSamplers |= 1 << (inst->sampler.index + i);
>> +            }
>>           }
>>        }
>>     }
>> @@ -4024,7 +4044,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
>>     src0 = v->get_temp(glsl_type::vec4_type);
>>     dst0 = st_dst_reg(src0);
>>     inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
>> -   inst->sampler = 0;
>> +   inst->sampler_array_size = 1;
>>     inst->tex_target = TEXTURE_2D_INDEX;
>>
>>     prog->InputsRead |= VARYING_BIT_TEX0;
>> @@ -4063,14 +4083,16 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp,
>>        /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */
>>        temp_dst.writemask = WRITEMASK_XY; /* write R,G */
>>        inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
>> -      inst->sampler = 1;
>> +      inst->sampler.index = 1;
>> +      inst->sampler_array_size = 1;
>>        inst->tex_target = TEXTURE_2D_INDEX;
>>
>>        /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */
>>        src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
>>        temp_dst.writemask = WRITEMASK_ZW; /* write B,A */
>>        inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
>> -      inst->sampler = 1;
>> +      inst->sampler.index = 1;
>> +      inst->sampler_array_size = 1;
>>        inst->tex_target = TEXTURE_2D_INDEX;
>>
>>        prog->SamplersUsed |= (1 << 1); /* mark sampler 1 as used */
>> @@ -4151,7 +4173,8 @@ get_bitmap_visitor(struct st_fragment_program *fp,
>>     src0 = v->get_temp(glsl_type::vec4_type);
>>     dst0 = st_dst_reg(src0);
>>     inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
>> -   inst->sampler = samplerIndex;
>> +   inst->sampler.index = samplerIndex;
>> +   inst->sampler_array_size = 1;
>>     inst->tex_target = TEXTURE_2D_INDEX;
>>
>>     prog->InputsRead |= VARYING_BIT_TEX0;
>> @@ -4207,7 +4230,7 @@ struct st_translate {
>>     struct ureg_src *immediates;
>>     struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
>>     struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
>> -   struct ureg_dst address[2];
>> +   struct ureg_dst address[3];
>>     struct ureg_src samplers[PIPE_MAX_SAMPLERS];
>>     struct ureg_src systemValues[SYSTEM_VALUE_MAX];
>>     struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET];
>> @@ -4610,7 +4633,11 @@ compile_tgsi_instruction(struct st_translate *t,
>>     case TGSI_OPCODE_TXL2:
>>     case TGSI_OPCODE_TG4:
>>     case TGSI_OPCODE_LODQ:
>> -      src[num_src++] = t->samplers[inst->sampler];
>> +      src[num_src] = t->samplers[inst->sampler.index];
>> +      if (inst->sampler.reladdr)
>> +         src[num_src] =
>> +            ureg_src_indirect(src[num_src], ureg_src(t->address[2]));
>> +      num_src++;
>>        for (i = 0; i < inst->tex_offset_num_offset; i++) {
>>           texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i], i);
>>        }
>> @@ -5026,10 +5053,9 @@ st_translate_program(
>>     /* Declare address register.
>>      */
>>     if (program->num_address_regs > 0) {
>> -      assert(program->num_address_regs <= 2);
>> -      t->address[0] = ureg_DECL_address(ureg);
>> -      if (program->num_address_regs == 2)
>> -         t->address[1] = ureg_DECL_address(ureg);
>> +      assert(program->num_address_regs <= 3);
>> +      for (int i = 0; i < program->num_address_regs; i++)
>> +         t->address[i] = ureg_DECL_address(ureg);
>>     }
>>
>>     /* Declare misc input registers
>> --
>> 1.8.5.5
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev


More information about the mesa-dev mailing list