On 20 November 2012 21:40, Kenneth Graunke <span dir="ltr"><<a href="mailto:kenneth@whitecape.org" target="_blank">kenneth@whitecape.org</a>></span> wrote:<br><div class="gmail_extra"><div class="gmail_quote"><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+ unsigned dispatch_width; /** 8 or 16 */<br>
+<br></blockquote><div><br>While we're at it, why don't we make it const? That will prevent us from accidentally modifying it at some inoppurtune time, and it *may* even cause the compiler to generate more efficient code.<br>
<br>Note: if we do that we'll have to use the C++ "initialization list" syntax to initialize it, e.g.:<br><br>fs_visitor::fs_visitor(...)<br> : dispatch_width(dispatch_width)<br>{<br> ...<br>} <br> </div>
<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
int force_uncompressed_stack;<br>
int force_sechalf_stack;<br>
};<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp<br>
index 29c73cf..7fdf526 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp<br>
@@ -93,7 +93,7 @@ fs_visitor::generate_fb_write(fs_inst *inst)<br>
<br>
if (this->dual_src_output.file != BAD_FILE)<br>
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01;<br>
- else if (c->dispatch_width == 16)<br>
+ else if (dispatch_width == 16)<br>
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;<br>
else<br>
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;<br>
@@ -101,7 +101,7 @@ fs_visitor::generate_fb_write(fs_inst *inst)<br>
brw_pop_insn_state(p);<br>
<br>
brw_fb_WRITE(p,<br>
- c->dispatch_width,<br>
+ dispatch_width,<br>
inst->base_mrf,<br>
implied_header,<br>
msg_control,<br>
@@ -133,7 +133,7 @@ fs_visitor::generate_pixel_xy(struct brw_reg dst, bool is_x)<br>
deltas = brw_imm_v(0x11001100);<br>
}<br>
<br>
- if (c->dispatch_width == 16) {<br>
+ if (dispatch_width == 16) {<br>
dst = vec16(dst);<br>
}<br>
<br>
@@ -203,7 +203,7 @@ fs_visitor::generate_math1_gen6(fs_inst *inst,<br>
BRW_MATH_DATA_VECTOR,<br>
BRW_MATH_PRECISION_FULL);<br>
<br>
- if (c->dispatch_width == 16) {<br>
+ if (dispatch_width == 16) {<br>
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);<br>
brw_math(p, sechalf(dst),<br>
op,<br>
@@ -227,7 +227,7 @@ fs_visitor::generate_math2_gen6(fs_inst *inst,<br>
brw_set_compression_control(p, BRW_COMPRESSION_NONE);<br>
brw_math2(p, dst, op, src0, src1);<br>
<br>
- if (c->dispatch_width == 16) {<br>
+ if (dispatch_width == 16) {<br>
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);<br>
brw_math2(p, sechalf(dst), op, sechalf(src0), sechalf(src1));<br>
brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED);<br>
@@ -250,7 +250,7 @@ fs_visitor::generate_math_gen4(fs_inst *inst,<br>
BRW_MATH_DATA_VECTOR,<br>
BRW_MATH_PRECISION_FULL);<br>
<br>
- if (c->dispatch_width == 16) {<br>
+ if (dispatch_width == 16) {<br>
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);<br>
brw_math(p, sechalf(dst),<br>
op,<br>
@@ -282,7 +282,7 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)<br>
break;<br>
}<br>
<br>
- if (c->dispatch_width == 16)<br>
+ if (dispatch_width == 16)<br>
simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;<br>
<br>
if (intel->gen >= 5) {<br>
@@ -328,7 +328,7 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)<br>
/* Note that G45 and older determines shadow compare and dispatch width<br>
* from message length for most messages.<br>
*/<br>
- assert(c->dispatch_width == 8);<br>
+ assert(dispatch_width == 8);<br>
msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE;<br>
if (inst->shadow_compare) {<br>
assert(inst->mlen == 6);<br>
@@ -731,10 +731,10 @@ fs_visitor::generate_code()<br>
if (unlikely(INTEL_DEBUG & DEBUG_WM)) {<br>
if (shader) {<br>
printf("Native code for fragment shader %d (%d-wide dispatch):\n",<br>
- prog->Name, c->dispatch_width);<br>
+ prog->Name, dispatch_width);<br>
} else {<br>
printf("Native code for fragment program %d (%d-wide dispatch):\n",<br>
- c->fp-><a href="http://program.Base.Id" target="_blank">program.Base.Id</a>, c->dispatch_width);<br>
+ c->fp-><a href="http://program.Base.Id" target="_blank">program.Base.Id</a>, dispatch_width);<br>
}<br>
}<br>
<br>
@@ -807,7 +807,7 @@ fs_visitor::generate_code()<br>
brw_set_predicate_inverse(p, inst->predicate_inverse);<br>
brw_set_saturate(p, inst->saturate);<br>
<br>
- if (inst->force_uncompressed || c->dispatch_width == 8) {<br>
+ if (inst->force_uncompressed || dispatch_width == 8) {<br>
brw_set_compression_control(p, BRW_COMPRESSION_NONE);<br>
} else if (inst->force_sechalf) {<br>
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);<br>
@@ -833,7 +833,7 @@ fs_visitor::generate_code()<br>
<br>
case BRW_OPCODE_MAD:<br>
brw_set_access_mode(p, BRW_ALIGN_16);<br>
- if (c->dispatch_width == 16) {<br>
+ if (dispatch_width == 16) {<br>
brw_set_compression_control(p, BRW_COMPRESSION_NONE);<br>
brw_MAD(p, dst, src[0], src[1], src[2]);<br>
brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF);<br>
@@ -893,7 +893,7 @@ fs_visitor::generate_code()<br>
assert(intel->gen == 6);<br>
gen6_IF(p, inst->conditional_mod, src[0], src[1]);<br>
} else {<br>
- brw_IF(p, c->dispatch_width == 16 ? BRW_EXECUTE_16 : BRW_EXECUTE_8);<br>
+ brw_IF(p, dispatch_width == 16 ? BRW_EXECUTE_16 : BRW_EXECUTE_8);<br>
}<br>
break;<br>
<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp<br>
index 88b0976..dc5a386 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp<br>
@@ -587,7 +587,7 @@ fs_visitor::setup_fp_regs()<br>
fp_temp_regs[i] = fs_reg(this, glsl_type::vec4_type);<br>
<br>
/* PROGRAM_STATE_VAR etc. */<br>
- if (c->dispatch_width == 8) {<br>
+ if (dispatch_width == 8) {<br>
for (unsigned p = 0;<br>
p < c->fp->program.Base.Parameters->NumParameters; p++) {<br>
for (unsigned int i = 0; i < 4; i++) {<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp<br>
index d7bb721..db8f397 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp<br>
@@ -280,10 +280,10 @@ fs_visitor::virtual_grf_interferes(int a, int b)<br>
* so our second half values in g6 got overwritten in the first<br>
* half.<br>
*/<br>
- if (c->dispatch_width == 16 && (this->pixel_x.reg == a ||<br>
- this->pixel_x.reg == b ||<br>
- this->pixel_y.reg == a ||<br>
- this->pixel_y.reg == b)) {<br>
+ if (dispatch_width == 16 && (this->pixel_x.reg == a ||<br>
+ this->pixel_x.reg == b ||<br>
+ this->pixel_y.reg == a ||<br>
+ this->pixel_y.reg == b)) {<br>
return start <= end;<br>
}<br>
<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp<br>
index f87cbbc..c5fd6dc 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp<br>
@@ -45,7 +45,7 @@ fs_visitor::assign_regs_trivial()<br>
{<br>
int hw_reg_mapping[this->virtual_grf_count + 1];<br>
int i;<br>
- int reg_width = c->dispatch_width / 8;<br>
+ int reg_width = dispatch_width / 8;<br>
<br>
/* Note that compressed instructions require alignment to 2 registers. */<br>
hw_reg_mapping[0] = ALIGN(this->first_non_payload_grf, reg_width);<br>
@@ -215,7 +215,7 @@ fs_visitor::setup_payload_interference(struct ra_graph *g,<br>
int payload_node_count,<br>
int first_payload_node)<br>
{<br>
- int reg_width = c->dispatch_width / 8;<br>
+ int reg_width = dispatch_width / 8;<br>
int loop_depth = 0;<br>
int loop_end_ip = 0;<br>
<br>
@@ -337,7 +337,7 @@ void<br>
fs_visitor::setup_mrf_hack_interference(struct ra_graph *g, int first_mrf_node)<br>
{<br>
int mrf_count = BRW_MAX_GRF - GEN7_MRF_HACK_START;<br>
- int reg_width = c->dispatch_width / 8;<br>
+ int reg_width = dispatch_width / 8;<br>
<br>
/* Identify all the MRFs used in the program. */<br>
bool mrf_used[mrf_count];<br>
@@ -393,7 +393,7 @@ fs_visitor::assign_regs()<br>
* registers it's allocating be contiguous physical pairs of regs<br>
* for reg_width == 2.<br>
*/<br>
- int reg_width = c->dispatch_width / 8;<br>
+ int reg_width = dispatch_width / 8;<br>
int hw_reg_mapping[this->virtual_grf_count];<br>
int payload_node_count = (ALIGN(this->first_non_payload_grf, reg_width) /<br>
reg_width);<br>
@@ -450,7 +450,7 @@ fs_visitor::assign_regs()<br>
<br>
if (reg == -1) {<br>
fail("no register to spill\n");<br>
- } else if (c->dispatch_width == 16) {<br>
+ } else if (dispatch_width == 16) {<br>
fail("Failure to register allocate. Reduce number of live scalar "<br>
"values to avoid this.");<br>
} else {<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp<br>
index 11e9858..6b7c412 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp<br>
@@ -239,7 +239,7 @@ instruction_scheduler::add_barrier_deps(schedule_node *n)<br>
bool<br>
instruction_scheduler::is_compressed(fs_inst *inst)<br>
{<br>
- return (v->c->dispatch_width == 16 &&<br>
+ return (v->dispatch_width == 16 &&<br>
!inst->force_uncompressed &&<br>
!inst->force_sechalf);<br>
}<br>
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp<br>
index 5d94181..28c7c5a 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp<br>
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp<br>
@@ -110,7 +110,7 @@ fs_visitor::visit(ir_variable *ir)<br>
if (ir->uniform_block != -1)<br>
return;<br>
<br>
- if (c->dispatch_width == 16) {<br>
+ if (dispatch_width == 16) {<br>
if (!variable_storage(ir)) {<br>
fail("Failed to find uniform '%s' in 16-wide\n", ir->name);<br>
}<br>
@@ -381,7 +381,7 @@ fs_visitor::visit(ir_expression *ir)<br>
* FINISHME: Emit just the MUL if we know an operand is small<br>
* enough.<br>
*/<br>
- if (intel->gen >= 7 && c->dispatch_width == 16)<br>
+ if (intel->gen >= 7 && dispatch_width == 16)<br>
fail("16-wide explicit accumulator operands unsupported\n");<br>
<br>
struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);<br>
@@ -394,7 +394,7 @@ fs_visitor::visit(ir_expression *ir)<br>
}<br>
break;<br>
case ir_binop_div:<br>
- if (intel->gen >= 7 && c->dispatch_width == 16)<br>
+ if (intel->gen >= 7 && dispatch_width == 16)<br>
fail("16-wide INTDIV unsupported\n");<br>
<br>
/* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */<br>
@@ -402,7 +402,7 @@ fs_visitor::visit(ir_expression *ir)<br>
emit_math(SHADER_OPCODE_INT_QUOTIENT, this->result, op[0], op[1]);<br>
break;<br>
case ir_binop_mod:<br>
- if (intel->gen >= 7 && c->dispatch_width == 16)<br>
+ if (intel->gen >= 7 && dispatch_width == 16)<br>
fail("16-wide INTDIV unsupported\n");<br>
<br>
/* Floating point should be lowered by MOD_TO_FRACT in the compiler. */<br>
@@ -888,7 +888,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,<br>
{<br>
int mlen = 0;<br>
int base_mrf = 2;<br>
- int reg_width = c->dispatch_width / 8;<br>
+ int reg_width = dispatch_width / 8;<br>
bool header_present = false;<br>
const int vector_elements =<br>
ir->coordinate ? ir->coordinate->type->vector_elements : 0;<br>
@@ -1005,7 +1005,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,<br>
{<br>
int mlen = 0;<br>
int base_mrf = 2;<br>
- int reg_width = c->dispatch_width / 8;<br>
+ int reg_width = dispatch_width / 8;<br>
bool header_present = false;<br>
int offsets[3];<br>
<br>
@@ -1036,7 +1036,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,<br>
mlen += reg_width;<br>
break;<br>
case ir_txd: {<br>
- if (c->dispatch_width == 16)<br>
+ if (dispatch_width == 16)<br>
fail("Gen7 does not support sample_d/sample_d_c in SIMD16 mode.");<br>
<br>
/* Load dPdx and the coordinate together:<br>
@@ -1149,7 +1149,7 @@ fs_visitor::rescale_texcoord(ir_texture *ir, fs_reg coordinate,<br>
0<br>
};<br>
<br>
- if (c->dispatch_width == 16) {<br>
+ if (dispatch_width == 16) {<br>
fail("rectangle scale uniform setup not supported on 16-wide\n");<br>
return coordinate;<br>
}<br>
@@ -1615,7 +1615,7 @@ fs_visitor::emit_if_gen6(ir_if *ir)<br>
void<br>
fs_visitor::visit(ir_if *ir)<br>
{<br>
- if (intel->gen < 6 && c->dispatch_width == 16) {<br>
+ if (intel->gen < 6 && dispatch_width == 16) {<br>
fail("Can't support (non-uniform) control flow on 16-wide\n");<br>
}<br>
<br>
@@ -1658,7 +1658,7 @@ fs_visitor::visit(ir_loop *ir)<br>
{<br>
fs_reg counter = reg_undef;<br>
<br>
- if (intel->gen < 6 && c->dispatch_width == 16) {<br>
+ if (intel->gen < 6 && dispatch_width == 16) {<br>
fail("Can't support (non-uniform) control flow on 16-wide\n");<br>
}<br>
<br>
@@ -1790,7 +1790,7 @@ fs_visitor::emit(fs_inst *inst)<br>
void<br>
fs_visitor::emit_dummy_fs()<br>
{<br>
- int reg_width = c->dispatch_width / 8;<br>
+ int reg_width = dispatch_width / 8;<br>
<br>
/* Everyone's favorite color. */<br>
emit(MOV(fs_reg(MRF, 2 + 0 * reg_width), fs_reg(1.0f)));<br>
@@ -1911,7 +1911,7 @@ fs_visitor::emit_interpolation_setup_gen6()<br>
void<br>
fs_visitor::emit_color_write(int target, int index, int first_color_mrf)<br>
{<br>
- int reg_width = c->dispatch_width / 8;<br>
+ int reg_width = dispatch_width / 8;<br>
fs_inst *inst;<br>
fs_reg color = outputs[target];<br>
fs_reg mrf;<br>
@@ -1922,7 +1922,7 @@ fs_visitor::emit_color_write(int target, int index, int first_color_mrf)<br>
<br>
color.reg_offset += index;<br>
<br>
- if (c->dispatch_width == 8 || intel->gen >= 6) {<br>
+ if (dispatch_width == 8 || intel->gen >= 6) {<br>
/* SIMD8 write looks like:<br>
* m + 0: r0<br>
* m + 1: r1<br>
@@ -1992,11 +1992,11 @@ fs_visitor::emit_fb_writes()<br>
*/<br>
int base_mrf = 1;<br>
int nr = base_mrf;<br>
- int reg_width = c->dispatch_width / 8;<br>
+ int reg_width = dispatch_width / 8;<br>
bool do_dual_src = this->dual_src_output.file != BAD_FILE;<br>
bool src0_alpha_to_render_target = false;<br>
<br>
- if (c->dispatch_width == 16 && do_dual_src) {<br>
+ if (dispatch_width == 16 && do_dual_src) {<br>
fail("GL_ARB_blend_func_extended not yet supported in 16-wide.");<br>
do_dual_src = false;<br>
}<br>
@@ -2040,7 +2040,7 @@ fs_visitor::emit_fb_writes()<br>
nr += reg_width;<br>
<br>
if (c->source_depth_to_render_target) {<br>
- if (intel->gen == 6 && c->dispatch_width == 16) {<br>
+ if (intel->gen == 6 && dispatch_width == 16) {<br>
/* For outputting oDepth on gen6, SIMD8 writes have to be<br>
* used. This would require 8-wide moves of each half to<br>
* message regs, kind of like pre-gen5 SIMD16 FB writes.<br>
@@ -2175,7 +2175,7 @@ fs_visitor::resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg)<br>
}<br>
<br>
fs_visitor::fs_visitor(struct brw_wm_compile *c, struct gl_shader_program *prog,<br>
- struct brw_shader *shader)<br>
+ struct brw_shader *shader, unsigned dispatch_width)<br>
{<br>
this->c = c;<br>
this->p = &c->func;<br>
@@ -2186,6 +2186,7 @@ fs_visitor::fs_visitor(struct brw_wm_compile *c, struct gl_shader_program *prog,<br>
this->ctx = &intel->ctx;<br>
this->mem_ctx = ralloc_context(NULL);<br>
this->shader = shader;<br>
+ this->dispatch_width = dispatch_width;<br>
this->failed = false;<br>
this->variable_ht = hash_table_ctor(0,<br>
hash_table_pointer_hash,<br>
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h<br>
index b8d8df3..a5ebddd 100644<br>
--- a/src/mesa/drivers/dri/i965/brw_wm.h<br>
+++ b/src/mesa/drivers/dri/i965/brw_wm.h<br>
@@ -93,8 +93,6 @@ struct brw_wm_compile {<br>
GLuint runtime_check_aads_emit:1;<br>
<br>
GLuint last_scratch;<br>
-<br>
- GLuint dispatch_width;<br>
};<br>
<br>
bool brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c,<br>
<span class="HOEnZb"><font color="#888888">--<br>
1.8.0<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="http://lists.freedesktop.org/mailman/listinfo/mesa-dev" target="_blank">http://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br></div>