[Mesa-dev] [PATCH 09/10] i965/fs: Track output regs on a split virtual GRF basis.
Kenneth Graunke
kenneth at whitecape.org
Fri Mar 28 09:29:49 PDT 2014
On 03/26/2014 02:23 PM, Eric Anholt wrote:
> Basically, replace the output_components[] array with per-channel tracking
> of the register storing that channel, or a BAD_FILE undefined reg.
>
> Right now var->data.location_frac is always 0, but I'm going to use that
> in vector_splitting next.
> ---
> src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +-
> src/mesa/drivers/dri/i965/brw_fs.h | 5 +--
> src/mesa/drivers/dri/i965/brw_fs_fp.cpp | 18 +++++----
> src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 55 ++++++++++++++--------------
> 4 files changed, 40 insertions(+), 40 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index 0d24f59..eee0c8a 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -1732,7 +1732,7 @@ fs_visitor::compact_virtual_grfs()
> { &pixel_y, 1 },
> { &pixel_w, 1 },
> { &wpos_w, 1 },
> - { &dual_src_output, 1 },
> + { dual_src_output, ARRAY_SIZE(dual_src_output) },
> { outputs, ARRAY_SIZE(outputs) },
> { delta_x, ARRAY_SIZE(delta_x) },
> { delta_y, ARRAY_SIZE(delta_y) },
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
> index f410733..d47bc28 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.h
> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
> @@ -526,9 +526,8 @@ public:
> struct hash_table *variable_ht;
> fs_reg frag_depth;
> fs_reg sample_mask;
> - fs_reg outputs[BRW_MAX_DRAW_BUFFERS];
> - unsigned output_components[BRW_MAX_DRAW_BUFFERS];
> - fs_reg dual_src_output;
> + fs_reg outputs[BRW_MAX_DRAW_BUFFERS * 4];
> + fs_reg dual_src_output[4];
> bool do_dual_src;
> int first_non_payload_grf;
> /** Either BRW_MAX_GRF or GEN7_MRF_HACK_START */
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
> index 49eaf05..19483e3 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
> @@ -646,25 +646,27 @@ fs_visitor::get_fp_dst_reg(const prog_dst_register *dst)
> return frag_depth;
> } else if (dst->Index == FRAG_RESULT_COLOR) {
> if (outputs[0].file == BAD_FILE) {
> - outputs[0] = fs_reg(this, glsl_type::vec4_type);
> - output_components[0] = 4;
> + fs_reg reg = fs_reg(this, glsl_type::vec4_type);
>
> /* Tell emit_fb_writes() to smear fragment.color across all the
> * color attachments.
> */
> for (int i = 1; i < c->key.nr_color_regions; i++) {
> - outputs[i] = outputs[0];
> - output_components[i] = output_components[0];
> + for (int j = 0; j < 4; j++) {
> + outputs[i * 4 + j] = offset(reg, j);
> + }
> }
> }
> return outputs[0];
This sure doesn't look right.
We check if outputs[0].file == BAD_FILE, and then proceed to initialize
outputs[4], outputs[5], etc...but never outputs[0] through outputs[3].
Then we return outputs[0], giving them a bogus register...
Did you mean to change the loop condition to 'int i = 0'?
> } else {
I'm pretty the above bug means this else case will never happen...
The GLSL code looks mostly reasonable.
> int output_index = dst->Index - FRAG_RESULT_DATA0;
> - if (outputs[output_index].file == BAD_FILE) {
> - outputs[output_index] = fs_reg(this, glsl_type::vec4_type);
> + if (outputs[output_index * 4].file == BAD_FILE) {
> + fs_reg reg = fs_reg(this, glsl_type::vec4_type);
> + for (int i = 0; i < 4; i++) {
> + outputs[output_index * 4 + i] = offset(reg, i);
> + }
> }
> - output_components[output_index] = 4;
> - return outputs[output_index];
> + return outputs[output_index * 4];
> }
>
> case PROGRAM_UNDEFINED:
> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> index 047ec21..d9bb4de 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
> @@ -70,17 +70,25 @@ fs_visitor::visit(ir_variable *ir)
> } else if (ir->data.mode == ir_var_shader_out) {
> reg = new(this->mem_ctx) fs_reg(this, ir->type);
>
> + int vector_elements =
> + ir->type->is_array() ? ir->type->fields.array->vector_elements
> + : ir->type->vector_elements;
> +
> if (ir->data.index > 0) {
> - assert(ir->data.location == FRAG_RESULT_DATA0);
> - assert(ir->data.index == 1);
> - this->dual_src_output = *reg;
> + assert(ir->data.location == FRAG_RESULT_DATA0);
> + assert(ir->data.index == 1);
> + for (unsigned i = 0; i < vector_elements; i++) {
> + this->dual_src_output[i + ir->data.location_frac] = offset(*reg, i);
> + }
> this->do_dual_src = true;
> } else if (ir->data.location == FRAG_RESULT_COLOR) {
> /* Writing gl_FragColor outputs to all color regions. */
> - for (unsigned int i = 0; i < MAX2(c->key.nr_color_regions, 1); i++) {
> - this->outputs[i] = *reg;
> - this->output_components[i] = 4;
> - }
> + for (unsigned i = 0; i < MAX2(c->key.nr_color_regions, 1); i++) {
> + for (int j = 0; j < vector_elements; j++) {
> + this->outputs[i * 4 + j + ir->data.location_frac] = offset(*reg,
> + j);
> + }
> + }
> } else if (ir->data.location == FRAG_RESULT_DEPTH) {
> this->frag_depth = *reg;
> } else if (ir->data.location == FRAG_RESULT_SAMPLE_MASK) {
> @@ -90,16 +98,14 @@ fs_visitor::visit(ir_variable *ir)
> assert(ir->data.location >= FRAG_RESULT_DATA0 &&
> ir->data.location < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS);
>
> - int vector_elements =
> - ir->type->is_array() ? ir->type->fields.array->vector_elements
> - : ir->type->vector_elements;
> -
> /* General color output. */
> for (unsigned int i = 0; i < MAX2(1, ir->type->length); i++) {
> int output = ir->data.location - FRAG_RESULT_DATA0 + i;
> - this->outputs[output] = *reg;
> - this->outputs[output].reg_offset += vector_elements * i;
> - this->output_components[output] = vector_elements;
> +
> + for (int j = 0; j < vector_elements; j++) {
> + this->outputs[4 * output + j + ir->data.location_frac] =
> + offset(*reg, vector_elements * i + j);
> + }
> }
> }
> } else if (ir->data.mode == ir_var_uniform) {
> @@ -2591,15 +2597,13 @@ fs_visitor::emit_color_write(int target, int index, int first_color_mrf)
> {
> int reg_width = dispatch_width / 8;
> fs_inst *inst;
> - fs_reg color = outputs[target];
> + fs_reg color = outputs[target * 4 + index];
> fs_reg mrf;
>
> /* If there's no color data to be written, skip it. */
> if (color.file == BAD_FILE)
> return;
>
> - color.reg_offset += index;
> -
> if (dispatch_width == 8 || brw->gen >= 6) {
> /* SIMD8 write looks like:
> * m + 0: r0
> @@ -2700,8 +2704,7 @@ fs_visitor::emit_alpha_test()
> BRW_CONDITIONAL_NEQ));
> } else {
> /* RT0 alpha */
> - fs_reg color = outputs[0];
> - color.reg_offset += 3;
> + fs_reg color = outputs[3];
>
> /* f0.1 &= func(color, ref) */
> cmp = emit(CMP(reg_null_f, color, fs_reg(c->key.alpha_test_ref),
> @@ -2806,23 +2809,20 @@ fs_visitor::emit_fb_writes()
> }
>
> if (do_dual_src) {
> - fs_reg src0 = this->outputs[0];
> - fs_reg src1 = this->dual_src_output;
> -
> this->current_annotation = ralloc_asprintf(this->mem_ctx,
> "FB write src0");
> for (int i = 0; i < 4; i++) {
> + fs_reg src0 = this->outputs[i];
Do we need to check for BAD_FILE here? Say, dual source blending on
vec3 outputs (rather than vec4)? (Or, just use emit_color_write?)
> fs_inst *inst = emit(MOV(fs_reg(MRF, color_mrf + i, src0.type), src0));
> - src0.reg_offset++;
> inst->saturate = c->key.clamp_fragment_color;
> }
>
> this->current_annotation = ralloc_asprintf(this->mem_ctx,
> "FB write src1");
> for (int i = 0; i < 4; i++) {
> + fs_reg src1 = this->dual_src_output[i];
> fs_inst *inst = emit(MOV(fs_reg(MRF, color_mrf + 4 + i, src1.type),
> src1));
> - src1.reg_offset++;
> inst->saturate = c->key.clamp_fragment_color;
> }
>
> @@ -2855,8 +2855,7 @@ fs_visitor::emit_fb_writes()
> int write_color_mrf = color_mrf;
> if (src0_alpha_to_render_target && target != 0) {
> fs_inst *inst;
> - fs_reg color = outputs[0];
> - color.reg_offset += 3;
> + fs_reg color = outputs[3];
>
> inst = emit(MOV(fs_reg(MRF, write_color_mrf, color.type),
> color));
> @@ -2864,7 +2863,7 @@ fs_visitor::emit_fb_writes()
> write_color_mrf = color_mrf + reg_width;
> }
>
> - for (unsigned i = 0; i < this->output_components[target]; i++)
> + for (unsigned i = 0; i < 4; i++)
> emit_color_write(target, i, write_color_mrf);
>
> bool eot = false;
> @@ -2957,7 +2956,7 @@ fs_visitor::fs_visitor(struct brw_context *brw,
> hash_table_pointer_compare);
>
> memset(this->outputs, 0, sizeof(this->outputs));
> - memset(this->output_components, 0, sizeof(this->output_components));
> + memset(this->dual_src_output, 0, sizeof(this->dual_src_output));
> this->first_non_payload_grf = 0;
> this->max_grf = brw->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
>
>
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 836 bytes
Desc: OpenPGP digital signature
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20140328/694bee5d/attachment.sig>
More information about the mesa-dev
mailing list