[Mesa-dev] [PATCH 09/10] i965/fs: Track output regs on a split virtual GRF basis.

Kenneth Graunke kenneth at whitecape.org
Fri Mar 28 09:29:49 PDT 2014


On 03/26/2014 02:23 PM, Eric Anholt wrote:
> Basically, replace the output_components[] array with per-channel tracking
> of the register storing that channel, or a BAD_FILE undefined reg.
> 
> Right now var->data.location_frac is always 0, but I'm going to use that
> in vector_splitting next.
> ---
>  src/mesa/drivers/dri/i965/brw_fs.cpp         |  2 +-
>  src/mesa/drivers/dri/i965/brw_fs.h           |  5 +--
>  src/mesa/drivers/dri/i965/brw_fs_fp.cpp      | 18 +++++----
>  src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 55 ++++++++++++++--------------
>  4 files changed, 40 insertions(+), 40 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index 0d24f59..eee0c8a 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -1732,7 +1732,7 @@ fs_visitor::compact_virtual_grfs()
>        { &pixel_y, 1 },
>        { &pixel_w, 1 },
>        { &wpos_w, 1 },
> -      { &dual_src_output, 1 },
> +      { dual_src_output, ARRAY_SIZE(dual_src_output) },
>        { outputs, ARRAY_SIZE(outputs) },
>        { delta_x, ARRAY_SIZE(delta_x) },
>        { delta_y, ARRAY_SIZE(delta_y) },
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
> index f410733..d47bc28 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -526,9 +526,8 @@ public:
>     struct hash_table *variable_ht;
>     fs_reg frag_depth;
>     fs_reg sample_mask;
> -   fs_reg outputs[BRW_MAX_DRAW_BUFFERS];
> -   unsigned output_components[BRW_MAX_DRAW_BUFFERS];
> -   fs_reg dual_src_output;
> +   fs_reg outputs[BRW_MAX_DRAW_BUFFERS * 4];
> +   fs_reg dual_src_output[4];
>     bool do_dual_src;
>     int first_non_payload_grf;
>     /** Either BRW_MAX_GRF or GEN7_MRF_HACK_START */
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
> index 49eaf05..19483e3 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
> @@ -646,25 +646,27 @@ fs_visitor::get_fp_dst_reg(const prog_dst_register *dst)
>           return frag_depth;
>        } else if (dst->Index == FRAG_RESULT_COLOR) {
>           if (outputs[0].file == BAD_FILE) {
> -            outputs[0] = fs_reg(this, glsl_type::vec4_type);
> -            output_components[0] = 4;
> +            fs_reg reg = fs_reg(this, glsl_type::vec4_type);
>  
>              /* Tell emit_fb_writes() to smear fragment.color across all the
>               * color attachments.
>               */
>              for (int i = 1; i < c->key.nr_color_regions; i++) {
> -               outputs[i] = outputs[0];
> -               output_components[i] = output_components[0];
> +               for (int j = 0; j < 4; j++) {
> +                  outputs[i * 4 + j] = offset(reg, j);
> +               }
>              }
>           }
>           return outputs[0];

This sure doesn't look right.

We check if outputs[0].file == BAD_FILE, and then proceed to initialize
outputs[4], outputs[5], etc...but never outputs[0] through outputs[3].
Then we return outputs[0], giving them a bogus register...

Did you mean to change the loop condition to 'int i = 0'?

>        } else {

I'm pretty the above bug means this else case will never happen...

The GLSL code looks mostly reasonable.

>           int output_index = dst->Index - FRAG_RESULT_DATA0;
> -         if (outputs[output_index].file == BAD_FILE) {
> -            outputs[output_index] = fs_reg(this, glsl_type::vec4_type);
> +         if (outputs[output_index * 4].file == BAD_FILE) {
> +            fs_reg reg = fs_reg(this, glsl_type::vec4_type);
> +            for (int i = 0; i < 4; i++) {
> +               outputs[output_index * 4 + i] = offset(reg, i);
> +            }
>           }
> -         output_components[output_index] = 4;
> -         return outputs[output_index];
> +         return outputs[output_index * 4];
>        }
>  
>     case PROGRAM_UNDEFINED:
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> index 047ec21..d9bb4de 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> @@ -70,17 +70,25 @@ fs_visitor::visit(ir_variable *ir)
>     } else if (ir->data.mode == ir_var_shader_out) {
>        reg = new(this->mem_ctx) fs_reg(this, ir->type);
>  
> +      int vector_elements =
> +         ir->type->is_array() ? ir->type->fields.array->vector_elements
> +         : ir->type->vector_elements;
> +
>        if (ir->data.index > 0) {
> -	 assert(ir->data.location == FRAG_RESULT_DATA0);
> -	 assert(ir->data.index == 1);
> -	 this->dual_src_output = *reg;
> +         assert(ir->data.location == FRAG_RESULT_DATA0);
> +         assert(ir->data.index == 1);
> +         for (unsigned i = 0; i < vector_elements; i++) {
> +            this->dual_src_output[i + ir->data.location_frac] = offset(*reg, i);
> +         }
>           this->do_dual_src = true;
>        } else if (ir->data.location == FRAG_RESULT_COLOR) {
>  	 /* Writing gl_FragColor outputs to all color regions. */
> -	 for (unsigned int i = 0; i < MAX2(c->key.nr_color_regions, 1); i++) {
> -	    this->outputs[i] = *reg;
> -	    this->output_components[i] = 4;
> -	 }
> +         for (unsigned i = 0; i < MAX2(c->key.nr_color_regions, 1); i++) {
> +            for (int j = 0; j < vector_elements; j++) {
> +               this->outputs[i * 4 + j + ir->data.location_frac] = offset(*reg,
> +                                                                          j);
> +            }
> +         }
>        } else if (ir->data.location == FRAG_RESULT_DEPTH) {
>  	 this->frag_depth = *reg;
>        } else if (ir->data.location == FRAG_RESULT_SAMPLE_MASK) {
> @@ -90,16 +98,14 @@ fs_visitor::visit(ir_variable *ir)
>  	 assert(ir->data.location >= FRAG_RESULT_DATA0 &&
>  		ir->data.location < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS);
>  
> -	 int vector_elements =
> -	    ir->type->is_array() ? ir->type->fields.array->vector_elements
> -				 : ir->type->vector_elements;
> -
>  	 /* General color output. */
>  	 for (unsigned int i = 0; i < MAX2(1, ir->type->length); i++) {
>  	    int output = ir->data.location - FRAG_RESULT_DATA0 + i;
> -	    this->outputs[output] = *reg;
> -	    this->outputs[output].reg_offset += vector_elements * i;
> -	    this->output_components[output] = vector_elements;
> +
> +            for (int j = 0; j < vector_elements; j++) {
> +               this->outputs[4 * output + j + ir->data.location_frac] =
> +                  offset(*reg, vector_elements * i + j);
> +            }
>  	 }
>        }
>     } else if (ir->data.mode == ir_var_uniform) {
> @@ -2591,15 +2597,13 @@ fs_visitor::emit_color_write(int target, int index, int first_color_mrf)
>  {
>     int reg_width = dispatch_width / 8;
>     fs_inst *inst;
> -   fs_reg color = outputs[target];
> +   fs_reg color = outputs[target * 4 + index];
>     fs_reg mrf;
>  
>     /* If there's no color data to be written, skip it. */
>     if (color.file == BAD_FILE)
>        return;
>  
> -   color.reg_offset += index;
> -
>     if (dispatch_width == 8 || brw->gen >= 6) {
>        /* SIMD8 write looks like:
>         * m + 0: r0
> @@ -2700,8 +2704,7 @@ fs_visitor::emit_alpha_test()
>                       BRW_CONDITIONAL_NEQ));
>     } else {
>        /* RT0 alpha */
> -      fs_reg color = outputs[0];
> -      color.reg_offset += 3;
> +      fs_reg color = outputs[3];
>  
>        /* f0.1 &= func(color, ref) */
>        cmp = emit(CMP(reg_null_f, color, fs_reg(c->key.alpha_test_ref),
> @@ -2806,23 +2809,20 @@ fs_visitor::emit_fb_writes()
>     }
>  
>     if (do_dual_src) {
> -      fs_reg src0 = this->outputs[0];
> -      fs_reg src1 = this->dual_src_output;
> -
>        this->current_annotation = ralloc_asprintf(this->mem_ctx,
>  						 "FB write src0");
>        for (int i = 0; i < 4; i++) {
> +         fs_reg src0 = this->outputs[i];

Do we need to check for BAD_FILE here?  Say, dual source blending on
vec3 outputs (rather than vec4)?  (Or, just use emit_color_write?)

>  	 fs_inst *inst = emit(MOV(fs_reg(MRF, color_mrf + i, src0.type), src0));
> -	 src0.reg_offset++;
>  	 inst->saturate = c->key.clamp_fragment_color;
>        }
>  
>        this->current_annotation = ralloc_asprintf(this->mem_ctx,
>  						 "FB write src1");
>        for (int i = 0; i < 4; i++) {
> +         fs_reg src1 = this->dual_src_output[i];
>  	 fs_inst *inst = emit(MOV(fs_reg(MRF, color_mrf + 4 + i, src1.type),
>                                    src1));
> -	 src1.reg_offset++;
>  	 inst->saturate = c->key.clamp_fragment_color;
>        }
>  
> @@ -2855,8 +2855,7 @@ fs_visitor::emit_fb_writes()
>        int write_color_mrf = color_mrf;
>        if (src0_alpha_to_render_target && target != 0) {
>           fs_inst *inst;
> -         fs_reg color = outputs[0];
> -         color.reg_offset += 3;
> +         fs_reg color = outputs[3];
>  
>           inst = emit(MOV(fs_reg(MRF, write_color_mrf, color.type),
>                           color));
> @@ -2864,7 +2863,7 @@ fs_visitor::emit_fb_writes()
>           write_color_mrf = color_mrf + reg_width;
>        }
>  
> -      for (unsigned i = 0; i < this->output_components[target]; i++)
> +      for (unsigned i = 0; i < 4; i++)
>           emit_color_write(target, i, write_color_mrf);
>  
>        bool eot = false;
> @@ -2957,7 +2956,7 @@ fs_visitor::fs_visitor(struct brw_context *brw,
>                                         hash_table_pointer_compare);
>  
>     memset(this->outputs, 0, sizeof(this->outputs));
> -   memset(this->output_components, 0, sizeof(this->output_components));
> +   memset(this->dual_src_output, 0, sizeof(this->dual_src_output));
>     this->first_non_payload_grf = 0;
>     this->max_grf = brw->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
>  
> 


-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 836 bytes
Desc: OpenPGP digital signature
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20140328/694bee5d/attachment.sig>


More information about the mesa-dev mailing list