[Mesa-dev] [PATCH 11/19] i965/fs: Use LOAD_PAYLOAD in emit_texture_gen7().

Matt Turner mattst88 at gmail.com
Thu Apr 24 08:40:41 PDT 2014


On Wed, Apr 23, 2014 at 11:19 PM, Kenneth Graunke <kenneth at whitecape.org> wrote:
> On 04/18/2014 11:56 AM, Matt Turner wrote:
>> ---
>>  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 135 +++++++++++++++------------
>>  1 file changed, 73 insertions(+), 62 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>> index 2aa3acd..91bbe0a 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>> @@ -1257,8 +1257,11 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
>>     int reg_width = dispatch_width / 8;
>>     bool header_present = false;
>>
>> -   fs_reg payload = fs_reg(this, glsl_type::float_type);
>> -   fs_reg next = payload;
>> +   fs_reg *sources = ralloc_array(mem_ctx, fs_reg, MAX_SAMPLER_MESSAGE_SIZE);
>> +   for (int i = 0; i < MAX_SAMPLER_MESSAGE_SIZE; i++) {
>> +      sources[i] = fs_reg(this, glsl_type::float_type);
>> +   }
>> +   int length = 0;
>>
>>     if (ir->op == ir_tg4 || (ir->offset && ir->op != ir_txf) || sampler >= 16) {
>>        /* For general texture offsets (no txf workaround), we need a header to
>> @@ -1272,12 +1275,13 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
>>         * need to offset the Sampler State Pointer in the header.
>>         */
>>        header_present = true;
>> -      next.reg_offset++;
>> +      sources[length] = reg_undef;
>> +      length++;
>>     }
>
> So...if you don't take the header_present = true path...then it looks
> like the next thing (say, shadow_c) will land in sources[0], rather than
> leaving sources[0] as BAD_FILE and putting the first part of the payload
> in sources[1].
>
> Is sources[0] special and reserved for the header or isn't it?

It's the header if there is a header, and it's a regular source if
there isn't a header. We can't make it always a header, because if the
header doesn't exist we'll end up asking the register allocator for a
set of registers one too large.

>>
>>     if (ir->shadow_comparitor) {
>> -      emit(MOV(next, shadow_c));
>> -      next.reg_offset++;
>> +      emit(MOV(sources[length], shadow_c));
>
> I'm confused by the fact that these MOVs are still here.  I would have
> expected:
>
> sources[length] = sample_c;
>
> When we visit expression trees, we generate results into various
> registers.  The point of these MOVs is to put them into a large-VGRF we
> can SEND from, at the right ref_offset.
>
> Now, you have a new set of MAX_SAMPLER_MESSAGE_SIZE registers, and copy
> from the original registers to these new temporaries, then LOAD_PAYLOAD
> does a second copy from those into the new ones.  It seems like we could
> just use the original registers and do a single copy.
>
> I'm probably missing something here, but I can't think of what.

I seem to remember needing to do this for some reason. I'll probably
have to try without the MOVs and see what breaks to remember.

>> +      length++;
>>     }
>>
>>     bool has_nonconstant_offset = ir->offset && !ir->offset->as_constant();
>> @@ -1289,12 +1293,12 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
>>     case ir_lod:
>>        break;
>>     case ir_txb:
>> -      emit(MOV(next, lod));
>> -      next.reg_offset++;
>> +      emit(MOV(sources[length], lod));
>> +      length++;
>>        break;
>>     case ir_txl:
>> -      emit(MOV(next, lod));
>> -      next.reg_offset++;
>> +      emit(MOV(sources[length], lod));
>> +      length++;
>>        break;
>>     case ir_txd: {
>>        no16("Gen7 does not support sample_d/sample_d_c in SIMD16 mode.");
>> @@ -1303,21 +1307,21 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
>>         * [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z
>>         */
>>        for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
>> -      emit(MOV(next, coordinate));
>> +      emit(MOV(sources[length], coordinate));
>>        coordinate.reg_offset++;
>> -      next.reg_offset++;
>> +      length++;
>>
>>           /* For cube map array, the coordinate is (u,v,r,ai) but there are
>>            * only derivatives for (u, v, r).
>>            */
>>           if (i < ir->lod_info.grad.dPdx->type->vector_elements) {
>> -            emit(MOV(next, lod));
>> +            emit(MOV(sources[length], lod));
>>              lod.reg_offset++;
>> -            next.reg_offset++;
>> +            length++;
>>
>> -            emit(MOV(next, lod2));
>> +            emit(MOV(sources[length], lod2));
>>              lod2.reg_offset++;
>> -            next.reg_offset++;
>> +            length++;
>>           }
>>        }
>>
>> @@ -1325,45 +1329,45 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
>>        break;
>>     }
>>     case ir_txs:
>> -      emit(MOV(retype(next, BRW_REGISTER_TYPE_UD), lod));
>> -      next.reg_offset++;
>> +      emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), lod));
>> +      length++;
>>        break;
>>     case ir_query_levels:
>> -      emit(MOV(retype(next, BRW_REGISTER_TYPE_UD), fs_reg(0u)));
>> -      next.reg_offset++;
>> +      emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), fs_reg(0u)));
>> +      length++;
>>        break;
>>     case ir_txf:
>>        /* Unfortunately, the parameters for LD are intermixed: u, lod, v, r. */
>> -      emit(MOV(retype(next, BRW_REGISTER_TYPE_D), coordinate));
>> +      emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate));
>>        coordinate.reg_offset++;
>> -      next.reg_offset++;
>> +      length++;
>>
>> -      emit(MOV(retype(next, BRW_REGISTER_TYPE_D), lod));
>> -      next.reg_offset++;
>> +      emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), lod));
>> +      length++;
>>
>>        for (int i = 1; i < ir->coordinate->type->vector_elements; i++) {
>> -      emit(MOV(retype(next, BRW_REGISTER_TYPE_D), coordinate));
>> +      emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate));
>>        coordinate.reg_offset++;
>> -      next.reg_offset++;
>> +      length++;
>>        }
>>
>>        coordinate_done = true;
>>        break;
>>     case ir_txf_ms:
>> -      emit(MOV(retype(next, BRW_REGISTER_TYPE_UD), sample_index));
>> -      next.reg_offset++;
>> +      emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), sample_index));
>> +      length++;
>>
>>        /* data from the multisample control surface */
>> -      emit(MOV(retype(next, BRW_REGISTER_TYPE_UD), mcs));
>> -      next.reg_offset++;
>> +      emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_UD), mcs));
>> +      length++;
>>
>>        /* there is no offsetting for this message; just copy in the integer
>>         * texture coordinates
>>         */
>>        for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
>> -         emit(MOV(retype(next, BRW_REGISTER_TYPE_D), coordinate));
>> +         emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), coordinate));
>>           coordinate.reg_offset++;
>> -         next.reg_offset++;
>> +         length++;
>>        }
>>
>>        coordinate_done = true;
>> @@ -1378,21 +1382,21 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
>>           fs_reg offset_value = this->result;
>>
>>           for (int i = 0; i < 2; i++) { /* u, v */
>> -            emit(MOV(next, coordinate));
>> +            emit(MOV(sources[length], coordinate));
>>              coordinate.reg_offset++;
>> -            next.reg_offset++;
>> +            length++;
>>           }
>>
>>           for (int i = 0; i < 2; i++) { /* offu, offv */
>> -            emit(MOV(retype(next, BRW_REGISTER_TYPE_D), offset_value));
>> +            emit(MOV(retype(sources[length], BRW_REGISTER_TYPE_D), offset_value));
>>              offset_value.reg_offset++;
>> -            next.reg_offset++;
>> +            length++;
>>           }
>>
>>           if (ir->coordinate->type->vector_elements == 3) { /* r if present */
>> -            emit(MOV(next, coordinate));
>> +            emit(MOV(sources[length], coordinate));
>>              coordinate.reg_offset++;
>> -            next.reg_offset++;
>> +            length++;
>>           }
>>
>>           coordinate_done = true;
>> @@ -1403,40 +1407,44 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
>>     /* Set up the coordinate (except for cases where it was done above) */
>>     if (ir->coordinate && !coordinate_done) {
>>        for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
>> -         emit(MOV(next, coordinate));
>> +         emit(MOV(sources[length], coordinate));
>>           coordinate.reg_offset++;
>> -         next.reg_offset++;
>> +         length++;
>>        }
>>     }
>>
>> +   fs_reg src_payload = fs_reg(this, glsl_type::float_type);
>> +   virtual_grf_sizes[src_payload.reg] = length;
>> +   emit(SHADER_OPCODE_LOAD_PAYLOAD, src_payload, sources, length);
>> +
>
> I think I would prefer to see this called "payload", "msg_payload" or
> simply "message".  There are enough things called src/sources/...

Right. I don't know, I thought "load the sources into the source
payload" read pretty well. All of the other names suggest that they're
already a contiguous set of registers.

>
> Perhaps also use the virtual_grf_alloc paradigm, as you do in
> emit_mcs_fetch?  i.e.
>
> fs_reg msg_payload(GRF, virtual_grf_alloc(length), BRW_REGISTER_TYPE_F);

Yep, will do.

>>     /* Generate the SEND */
>> -   fs_inst *inst = NULL;
>> +   enum opcode opcode;
>>     switch (ir->op) {
>> -   case ir_tex: inst = emit(SHADER_OPCODE_TEX, dst, payload); break;
>> -   case ir_txb: inst = emit(FS_OPCODE_TXB, dst, payload); break;
>> -   case ir_txl: inst = emit(SHADER_OPCODE_TXL, dst, payload); break;
>> -   case ir_txd: inst = emit(SHADER_OPCODE_TXD, dst, payload); break;
>> -   case ir_txf: inst = emit(SHADER_OPCODE_TXF, dst, payload); break;
>> -   case ir_txf_ms: inst = emit(SHADER_OPCODE_TXF_CMS, dst, payload); break;
>> -   case ir_txs: inst = emit(SHADER_OPCODE_TXS, dst, payload); break;
>> -   case ir_query_levels: inst = emit(SHADER_OPCODE_TXS, dst, payload); break;
>> -   case ir_lod: inst = emit(SHADER_OPCODE_LOD, dst, payload); break;
>> +   case ir_tex: opcode = SHADER_OPCODE_TEX; break;
>> +   case ir_txb: opcode = FS_OPCODE_TXB; break;
>> +   case ir_txl: opcode = SHADER_OPCODE_TXL; break;
>> +   case ir_txd: opcode = SHADER_OPCODE_TXD; break;
>> +   case ir_txf: opcode = SHADER_OPCODE_TXF; break;
>> +   case ir_txf_ms: opcode = SHADER_OPCODE_TXF_CMS; break;
>> +   case ir_txs: opcode = SHADER_OPCODE_TXS; break;
>> +   case ir_query_levels: opcode = SHADER_OPCODE_TXS; break;
>> +   case ir_lod: opcode = SHADER_OPCODE_LOD; break;
>>     case ir_tg4:
>>        if (has_nonconstant_offset)
>> -         inst = emit(SHADER_OPCODE_TG4_OFFSET, dst, payload);
>> +         opcode = SHADER_OPCODE_TG4_OFFSET;
>>        else
>> -         inst = emit(SHADER_OPCODE_TG4, dst, payload);
>> +         opcode = SHADER_OPCODE_TG4;
>>        break;
>>     }
>> +   fs_inst *inst = emit(opcode, dst, src_payload);
>>     inst->base_mrf = -1;
>>     if (reg_width == 2)
>> -      inst->mlen = next.reg_offset * reg_width - header_present;
>> +      inst->mlen = length * reg_width - header_present;
>>     else
>> -      inst->mlen = next.reg_offset * reg_width;
>> +      inst->mlen = length * reg_width;
>>     inst->header_present = header_present;
>>     inst->regs_written = 4;
>>
>> -   virtual_grf_sizes[payload.reg] = next.reg_offset;
>>     if (inst->mlen > MAX_SAMPLER_MESSAGE_SIZE) {
>>        fail("Message length >" STRINGIFY(MAX_SAMPLER_MESSAGE_SIZE)
>>             " disallowed by hardware\n");
>> @@ -1551,21 +1559,24 @@ fs_reg
>>  fs_visitor::emit_mcs_fetch(ir_texture *ir, fs_reg coordinate, int sampler)
>>  {
>>     int reg_width = dispatch_width / 8;
>> -   fs_reg payload = fs_reg(this, glsl_type::float_type);
>> +   int length = ir->coordinate->type->vector_elements;
>> +   fs_reg payload = fs_reg(GRF, virtual_grf_alloc(length),
>> +                           BRW_REGISTER_TYPE_F);
>
> FWIW, in C++, you don't need to do
>
>    fs_reg name = fs_reg(...);
>
> you can just do:
>
>    fs_reg name(...);
>
> Hopefully the compiler is smart enough not to construct an fs_reg with
> the default constructor, destruct it, and assign over it using the
> assignment operator...but I don't know that it is.
>
> It's also just a bit shorter.

Yep, will do.

>
>>     fs_reg dest = fs_reg(this, glsl_type::uvec4_type);
>> -   fs_reg next = payload;
>> +   fs_reg *sources = ralloc_array(mem_ctx, fs_reg, length);
>>
>> -   /* parameters are: u, v, r, lod; missing parameters are treated as zero */
>> -   for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
>> -      emit(MOV(retype(next, BRW_REGISTER_TYPE_D), coordinate));
>> +   /* parameters are: u, v, r; missing parameters are treated as zero */
>> +   for (int i = 0; i < length; i++) {
>> +      sources[i] = fs_reg(this, glsl_type::float_type);
>> +      emit(MOV(retype(sources[i], BRW_REGISTER_TYPE_D), coordinate));
>>        coordinate.reg_offset++;
>> -      next.reg_offset++;
>>     }
>>
>> +   emit(SHADER_OPCODE_LOAD_PAYLOAD, payload, sources, length);
>> +
>>     fs_inst *inst = emit(SHADER_OPCODE_TXF_MCS, dest, payload);
>> -   virtual_grf_sizes[payload.reg] = next.reg_offset;
>>     inst->base_mrf = -1;
>> -   inst->mlen = next.reg_offset * reg_width;
>> +   inst->mlen = length * reg_width;
>>     inst->header_present = false;
>>     inst->regs_written = 4 * reg_width; /* we only care about one reg of response,
>>                                          * but the sampler always writes 4/8
>>
>
>


More information about the mesa-dev mailing list