[Mesa-dev] [PATCH] i965/fs: Track output regs on a split virtual GRF basis.
Eric Anholt
eric at anholt.net
Tue Apr 8 15:09:55 PDT 2014
v2: Fix fragment program fragment.color output, fix smaller-than-vec4 dual
src output codegen, use offset() a bit more.
---
src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +-
src/mesa/drivers/dri/i965/brw_fs.h | 5 +--
src/mesa/drivers/dri/i965/brw_fs_fp.cpp | 18 ++++----
src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 64 +++++++++++++++-------------
4 files changed, 46 insertions(+), 43 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 85a5463..972d4a8 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1732,7 +1732,7 @@ fs_visitor::compact_virtual_grfs()
{ &pixel_y, 1 },
{ &pixel_w, 1 },
{ &wpos_w, 1 },
- { &dual_src_output, 1 },
+ { dual_src_output, ARRAY_SIZE(dual_src_output) },
{ outputs, ARRAY_SIZE(outputs) },
{ delta_x, ARRAY_SIZE(delta_x) },
{ delta_y, ARRAY_SIZE(delta_y) },
diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
index 3d21ee5..d6dfde4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -526,9 +526,8 @@ public:
struct hash_table *variable_ht;
fs_reg frag_depth;
fs_reg sample_mask;
- fs_reg outputs[BRW_MAX_DRAW_BUFFERS];
- unsigned output_components[BRW_MAX_DRAW_BUFFERS];
- fs_reg dual_src_output;
+ fs_reg outputs[BRW_MAX_DRAW_BUFFERS * 4];
+ fs_reg dual_src_output[4];
bool do_dual_src;
int first_non_payload_grf;
/** Either BRW_MAX_GRF or GEN7_MRF_HACK_START */
diff --git a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
index 49eaf05..c6f063e 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp
@@ -646,25 +646,25 @@ fs_visitor::get_fp_dst_reg(const prog_dst_register *dst)
return frag_depth;
} else if (dst->Index == FRAG_RESULT_COLOR) {
if (outputs[0].file == BAD_FILE) {
- outputs[0] = fs_reg(this, glsl_type::vec4_type);
- output_components[0] = 4;
+ fs_reg reg = fs_reg(this, glsl_type::vec4_type);
/* Tell emit_fb_writes() to smear fragment.color across all the
* color attachments.
*/
- for (int i = 1; i < c->key.nr_color_regions; i++) {
- outputs[i] = outputs[0];
- output_components[i] = output_components[0];
+ for (int i = 0; i < c->key.nr_color_regions; i++) {
+ for (int j = 0; j < 4; j++)
+ outputs[i * 4 + j] = offset(reg, j);
}
}
return outputs[0];
} else {
int output_index = dst->Index - FRAG_RESULT_DATA0;
- if (outputs[output_index].file == BAD_FILE) {
- outputs[output_index] = fs_reg(this, glsl_type::vec4_type);
+ if (outputs[output_index * 4].file == BAD_FILE) {
+ fs_reg reg = fs_reg(this, glsl_type::vec4_type);
+ for (int i = 0; i < 4; i++)
+ outputs[output_index * 4 + i] = offset(reg, i);
}
- output_components[output_index] = 4;
- return outputs[output_index];
+ return outputs[output_index * 4];
}
case PROGRAM_UNDEFINED:
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 63a0ae5..a017d55 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -70,17 +70,25 @@ fs_visitor::visit(ir_variable *ir)
} else if (ir->data.mode == ir_var_shader_out) {
reg = new(this->mem_ctx) fs_reg(this, ir->type);
+ int vector_elements =
+ ir->type->is_array() ? ir->type->fields.array->vector_elements
+ : ir->type->vector_elements;
+
if (ir->data.index > 0) {
- assert(ir->data.location == FRAG_RESULT_DATA0);
- assert(ir->data.index == 1);
- this->dual_src_output = *reg;
+ assert(ir->data.location == FRAG_RESULT_DATA0);
+ assert(ir->data.index == 1);
+ for (unsigned i = 0; i < vector_elements; i++)
+ this->dual_src_output[i + ir->data.location_frac] = offset(*reg, i);
this->do_dual_src = true;
} else if (ir->data.location == FRAG_RESULT_COLOR) {
+ fs_reg chan = *reg;
/* Writing gl_FragColor outputs to all color regions. */
- for (unsigned int i = 0; i < MAX2(c->key.nr_color_regions, 1); i++) {
- this->outputs[i] = *reg;
- this->output_components[i] = 4;
- }
+ for (int j = 0; j < vector_elements; j++) {
+ for (unsigned i = 0; i < MAX2(c->key.nr_color_regions, 1); i++) {
+ this->outputs[i * 4 + j + ir->data.location_frac] = chan;
+ }
+ chan.reg_offset++;
+ }
} else if (ir->data.location == FRAG_RESULT_DEPTH) {
this->frag_depth = *reg;
} else if (ir->data.location == FRAG_RESULT_SAMPLE_MASK) {
@@ -90,16 +98,16 @@ fs_visitor::visit(ir_variable *ir)
assert(ir->data.location >= FRAG_RESULT_DATA0 &&
ir->data.location < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS);
- int vector_elements =
- ir->type->is_array() ? ir->type->fields.array->vector_elements
- : ir->type->vector_elements;
-
/* General color output. */
for (unsigned int i = 0; i < MAX2(1, ir->type->length); i++) {
int output = ir->data.location - FRAG_RESULT_DATA0 + i;
- this->outputs[output] = *reg;
- this->outputs[output].reg_offset += vector_elements * i;
- this->output_components[output] = vector_elements;
+ fs_reg out = *reg;
+ out.reg_offset += vector_elements * i;
+
+ for (int j = 0; j < vector_elements; j++) {
+ this->outputs[4 * output + j + ir->data.location_frac] = out;
+ out.reg_offset++;
+ }
}
}
} else if (ir->data.mode == ir_var_uniform) {
@@ -2600,15 +2608,13 @@ fs_visitor::emit_color_write(int target, int index, int first_color_mrf)
{
int reg_width = dispatch_width / 8;
fs_inst *inst;
- fs_reg color = outputs[target];
+ fs_reg color = outputs[target * 4 + index];
fs_reg mrf;
/* If there's no color data to be written, skip it. */
if (color.file == BAD_FILE)
return;
- color.reg_offset += index;
-
if (dispatch_width == 8 || brw->gen >= 6) {
/* SIMD8 write looks like:
* m + 0: r0
@@ -2709,8 +2715,7 @@ fs_visitor::emit_alpha_test()
BRW_CONDITIONAL_NEQ));
} else {
/* RT0 alpha */
- fs_reg color = outputs[0];
- color.reg_offset += 3;
+ fs_reg color = outputs[3];
/* f0.1 &= func(color, ref) */
cmp = emit(CMP(reg_null_f, color, fs_reg(c->key.alpha_test_ref),
@@ -2815,23 +2820,23 @@ fs_visitor::emit_fb_writes()
}
if (do_dual_src) {
- fs_reg src0 = this->outputs[0];
- fs_reg src1 = this->dual_src_output;
-
this->current_annotation = ralloc_asprintf(this->mem_ctx,
"FB write src0");
for (int i = 0; i < 4; i++) {
- fs_inst *inst = emit(MOV(fs_reg(MRF, color_mrf + i, src0.type), src0));
- src0.reg_offset++;
- inst->saturate = c->key.clamp_fragment_color;
+ fs_reg src0 = this->outputs[0 * 4 + i];
+ if (src0.file != BAD_FILE) {
+ fs_inst *inst = emit(MOV(fs_reg(MRF, color_mrf + i, src0.type),
+ src0));
+ inst->saturate = c->key.clamp_fragment_color;
+ }
}
this->current_annotation = ralloc_asprintf(this->mem_ctx,
"FB write src1");
for (int i = 0; i < 4; i++) {
+ fs_reg src1 = this->dual_src_output[i];
fs_inst *inst = emit(MOV(fs_reg(MRF, color_mrf + 4 + i, src1.type),
src1));
- src1.reg_offset++;
inst->saturate = c->key.clamp_fragment_color;
}
@@ -2864,8 +2869,7 @@ fs_visitor::emit_fb_writes()
int write_color_mrf = color_mrf;
if (src0_alpha_to_render_target && target != 0) {
fs_inst *inst;
- fs_reg color = outputs[0];
- color.reg_offset += 3;
+ fs_reg color = outputs[3];
inst = emit(MOV(fs_reg(MRF, write_color_mrf, color.type),
color));
@@ -2873,7 +2877,7 @@ fs_visitor::emit_fb_writes()
write_color_mrf = color_mrf + reg_width;
}
- for (unsigned i = 0; i < this->output_components[target]; i++)
+ for (unsigned i = 0; i < 4; i++)
emit_color_write(target, i, write_color_mrf);
bool eot = false;
@@ -2966,7 +2970,7 @@ fs_visitor::fs_visitor(struct brw_context *brw,
hash_table_pointer_compare);
memset(this->outputs, 0, sizeof(this->outputs));
- memset(this->output_components, 0, sizeof(this->output_components));
+ memset(this->dual_src_output, 0, sizeof(this->dual_src_output));
this->first_non_payload_grf = 0;
this->max_grf = brw->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
--
1.9.1
More information about the mesa-dev
mailing list