On 20 November 2012 21:40, Kenneth Graunke <<a href="mailto:kenneth@whitecape.org" target="_blank">kenneth@whitecape.org</a>> wrote: <div class="gmail_extra"><div class="gmail_quote"><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"> + unsigned dispatch_width; /** 8 or 16 */ + </blockquote><div> While we're at it, why don't we make it const? That will prevent us from accidentally modifying it at some inoppurtune time, and it *may* even cause the compiler to generate more efficient code. Note: if we do that we'll have to use the C++ "initialization list" syntax to initialize it, e.g.: fs_visitor::fs_visitor(...) : dispatch_width(dispatch_width) { ... } </div> <blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"> int force_uncompressed_stack; int force_sechalf_stack; }; diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp index 29c73cf..7fdf526 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp @@ -93,7 +93,7 @@ fs_visitor::generate_fb_write(fs_inst *inst) if (this->dual_src_output.file != BAD_FILE) msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01; - else if (c->dispatch_width == 16) + else if (dispatch_width == 16) msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; else msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; @@ -101,7 +101,7 @@ fs_visitor::generate_fb_write(fs_inst *inst) brw_pop_insn_state(p); brw_fb_WRITE(p, - c->dispatch_width, + dispatch_width, inst->base_mrf, implied_header, msg_control, @@ -133,7 +133,7 @@ fs_visitor::generate_pixel_xy(struct brw_reg dst, bool is_x) deltas = brw_imm_v(0x11001100); } - if (c->dispatch_width == 16) { + if (dispatch_width == 16) { dst = vec16(dst); } @@ -203,7 +203,7 @@ fs_visitor::generate_math1_gen6(fs_inst *inst, BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); - if (c->dispatch_width == 16) { + if (dispatch_width == 16) { brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); brw_math(p, sechalf(dst), op, @@ -227,7 +227,7 @@ fs_visitor::generate_math2_gen6(fs_inst *inst, brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_math2(p, dst, op, src0, src1); - if (c->dispatch_width == 16) { + if (dispatch_width == 16) { brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); brw_math2(p, sechalf(dst), op, sechalf(src0), sechalf(src1)); brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); @@ -250,7 +250,7 @@ fs_visitor::generate_math_gen4(fs_inst *inst, BRW_MATH_DATA_VECTOR, BRW_MATH_PRECISION_FULL); - if (c->dispatch_width == 16) { + if (dispatch_width == 16) { brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); brw_math(p, sechalf(dst), op, @@ -282,7 +282,7 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src) break; } - if (c->dispatch_width == 16) + if (dispatch_width == 16) simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; if (intel->gen >= 5) { @@ -328,7 +328,7 @@ fs_visitor::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src) /* Note that G45 and older determines shadow compare and dispatch width * from message length for most messages. */ - assert(c->dispatch_width == 8); + assert(dispatch_width == 8); msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; if (inst->shadow_compare) { assert(inst->mlen == 6); @@ -731,10 +731,10 @@ fs_visitor::generate_code() if (unlikely(INTEL_DEBUG & DEBUG_WM)) { if (shader) { printf("Native code for fragment shader %d (%d-wide dispatch):\n", - prog->Name, c->dispatch_width); + prog->Name, dispatch_width); } else { printf("Native code for fragment program %d (%d-wide dispatch):\n", - c->fp-><a href="http://program.Base.Id" target="_blank">program.Base.Id</a>, c->dispatch_width); + c->fp-><a href="http://program.Base.Id" target="_blank">program.Base.Id</a>, dispatch_width); } } @@ -807,7 +807,7 @@ fs_visitor::generate_code() brw_set_predicate_inverse(p, inst->predicate_inverse); brw_set_saturate(p, inst->saturate); - if (inst->force_uncompressed || c->dispatch_width == 8) { + if (inst->force_uncompressed || dispatch_width == 8) { brw_set_compression_control(p, BRW_COMPRESSION_NONE); } else if (inst->force_sechalf) { brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); @@ -833,7 +833,7 @@ fs_visitor::generate_code() case BRW_OPCODE_MAD: brw_set_access_mode(p, BRW_ALIGN_16); - if (c->dispatch_width == 16) { + if (dispatch_width == 16) { brw_set_compression_control(p, BRW_COMPRESSION_NONE); brw_MAD(p, dst, src[0], src[1], src[2]); brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); @@ -893,7 +893,7 @@ fs_visitor::generate_code() assert(intel->gen == 6); gen6_IF(p, inst->conditional_mod, src[0], src[1]); } else { - brw_IF(p, c->dispatch_width == 16 ? BRW_EXECUTE_16 : BRW_EXECUTE_8); + brw_IF(p, dispatch_width == 16 ? BRW_EXECUTE_16 : BRW_EXECUTE_8); } break; diff --git a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp index 88b0976..dc5a386 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_fp.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_fp.cpp @@ -587,7 +587,7 @@ fs_visitor::setup_fp_regs() fp_temp_regs[i] = fs_reg(this, glsl_type::vec4_type); /* PROGRAM_STATE_VAR etc. */ - if (c->dispatch_width == 8) { + if (dispatch_width == 8) { for (unsigned p = 0; p < c->fp->program.Base.Parameters->NumParameters; p++) { for (unsigned int i = 0; i < 4; i++) { diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp index d7bb721..db8f397 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp @@ -280,10 +280,10 @@ fs_visitor::virtual_grf_interferes(int a, int b) * so our second half values in g6 got overwritten in the first * half. */ - if (c->dispatch_width == 16 && (this->pixel_x.reg == a || - this->pixel_x.reg == b || - this->pixel_y.reg == a || - this->pixel_y.reg == b)) { + if (dispatch_width == 16 && (this->pixel_x.reg == a || + this->pixel_x.reg == b || + this->pixel_y.reg == a || + this->pixel_y.reg == b)) { return start <= end; } diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index f87cbbc..c5fd6dc 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -45,7 +45,7 @@ fs_visitor::assign_regs_trivial() { int hw_reg_mapping[this->virtual_grf_count + 1]; int i; - int reg_width = c->dispatch_width / 8; + int reg_width = dispatch_width / 8; /* Note that compressed instructions require alignment to 2 registers. */ hw_reg_mapping[0] = ALIGN(this->first_non_payload_grf, reg_width); @@ -215,7 +215,7 @@ fs_visitor::setup_payload_interference(struct ra_graph *g, int payload_node_count, int first_payload_node) { - int reg_width = c->dispatch_width / 8; + int reg_width = dispatch_width / 8; int loop_depth = 0; int loop_end_ip = 0; @@ -337,7 +337,7 @@ void fs_visitor::setup_mrf_hack_interference(struct ra_graph *g, int first_mrf_node) { int mrf_count = BRW_MAX_GRF - GEN7_MRF_HACK_START; - int reg_width = c->dispatch_width / 8; + int reg_width = dispatch_width / 8; /* Identify all the MRFs used in the program. */ bool mrf_used[mrf_count]; @@ -393,7 +393,7 @@ fs_visitor::assign_regs() * registers it's allocating be contiguous physical pairs of regs * for reg_width == 2. */ - int reg_width = c->dispatch_width / 8; + int reg_width = dispatch_width / 8; int hw_reg_mapping[this->virtual_grf_count]; int payload_node_count = (ALIGN(this->first_non_payload_grf, reg_width) / reg_width); @@ -450,7 +450,7 @@ fs_visitor::assign_regs() if (reg == -1) { fail("no register to spill\n"); - } else if (c->dispatch_width == 16) { + } else if (dispatch_width == 16) { fail("Failure to register allocate. Reduce number of live scalar " "values to avoid this."); } else { diff --git a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp index 11e9858..6b7c412 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_schedule_instructions.cpp @@ -239,7 +239,7 @@ instruction_scheduler::add_barrier_deps(schedule_node *n) bool instruction_scheduler::is_compressed(fs_inst *inst) { - return (v->c->dispatch_width == 16 && + return (v->dispatch_width == 16 && !inst->force_uncompressed && !inst->force_sechalf); } diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 5d94181..28c7c5a 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -110,7 +110,7 @@ fs_visitor::visit(ir_variable *ir) if (ir->uniform_block != -1) return; - if (c->dispatch_width == 16) { + if (dispatch_width == 16) { if (!variable_storage(ir)) { fail("Failed to find uniform '%s' in 16-wide\n", ir->name); } @@ -381,7 +381,7 @@ fs_visitor::visit(ir_expression *ir) * FINISHME: Emit just the MUL if we know an operand is small * enough. */ - if (intel->gen >= 7 && c->dispatch_width == 16) + if (intel->gen >= 7 && dispatch_width == 16) fail("16-wide explicit accumulator operands unsupported\n"); struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D); @@ -394,7 +394,7 @@ fs_visitor::visit(ir_expression *ir) } break; case ir_binop_div: - if (intel->gen >= 7 && c->dispatch_width == 16) + if (intel->gen >= 7 && dispatch_width == 16) fail("16-wide INTDIV unsupported\n"); /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */ @@ -402,7 +402,7 @@ fs_visitor::visit(ir_expression *ir) emit_math(SHADER_OPCODE_INT_QUOTIENT, this->result, op[0], op[1]); break; case ir_binop_mod: - if (intel->gen >= 7 && c->dispatch_width == 16) + if (intel->gen >= 7 && dispatch_width == 16) fail("16-wide INTDIV unsupported\n"); /* Floating point should be lowered by MOD_TO_FRACT in the compiler. */ @@ -888,7 +888,7 @@ fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate, { int mlen = 0; int base_mrf = 2; - int reg_width = c->dispatch_width / 8; + int reg_width = dispatch_width / 8; bool header_present = false; const int vector_elements = ir->coordinate ? ir->coordinate->type->vector_elements : 0; @@ -1005,7 +1005,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, { int mlen = 0; int base_mrf = 2; - int reg_width = c->dispatch_width / 8; + int reg_width = dispatch_width / 8; bool header_present = false; int offsets[3]; @@ -1036,7 +1036,7 @@ fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate, mlen += reg_width; break; case ir_txd: { - if (c->dispatch_width == 16) + if (dispatch_width == 16) fail("Gen7 does not support sample_d/sample_d_c in SIMD16 mode."); /* Load dPdx and the coordinate together: @@ -1149,7 +1149,7 @@ fs_visitor::rescale_texcoord(ir_texture *ir, fs_reg coordinate, 0 }; - if (c->dispatch_width == 16) { + if (dispatch_width == 16) { fail("rectangle scale uniform setup not supported on 16-wide\n"); return coordinate; } @@ -1615,7 +1615,7 @@ fs_visitor::emit_if_gen6(ir_if *ir) void fs_visitor::visit(ir_if *ir) { - if (intel->gen < 6 && c->dispatch_width == 16) { + if (intel->gen < 6 && dispatch_width == 16) { fail("Can't support (non-uniform) control flow on 16-wide\n"); } @@ -1658,7 +1658,7 @@ fs_visitor::visit(ir_loop *ir) { fs_reg counter = reg_undef; - if (intel->gen < 6 && c->dispatch_width == 16) { + if (intel->gen < 6 && dispatch_width == 16) { fail("Can't support (non-uniform) control flow on 16-wide\n"); } @@ -1790,7 +1790,7 @@ fs_visitor::emit(fs_inst *inst) void fs_visitor::emit_dummy_fs() { - int reg_width = c->dispatch_width / 8; + int reg_width = dispatch_width / 8; /* Everyone's favorite color. */ emit(MOV(fs_reg(MRF, 2 + 0 * reg_width), fs_reg(1.0f))); @@ -1911,7 +1911,7 @@ fs_visitor::emit_interpolation_setup_gen6() void fs_visitor::emit_color_write(int target, int index, int first_color_mrf) { - int reg_width = c->dispatch_width / 8; + int reg_width = dispatch_width / 8; fs_inst *inst; fs_reg color = outputs[target]; fs_reg mrf; @@ -1922,7 +1922,7 @@ fs_visitor::emit_color_write(int target, int index, int first_color_mrf) color.reg_offset += index; - if (c->dispatch_width == 8 || intel->gen >= 6) { + if (dispatch_width == 8 || intel->gen >= 6) { /* SIMD8 write looks like: * m + 0: r0 * m + 1: r1 @@ -1992,11 +1992,11 @@ fs_visitor::emit_fb_writes() */ int base_mrf = 1; int nr = base_mrf; - int reg_width = c->dispatch_width / 8; + int reg_width = dispatch_width / 8; bool do_dual_src = this->dual_src_output.file != BAD_FILE; bool src0_alpha_to_render_target = false; - if (c->dispatch_width == 16 && do_dual_src) { + if (dispatch_width == 16 && do_dual_src) { fail("GL_ARB_blend_func_extended not yet supported in 16-wide."); do_dual_src = false; } @@ -2040,7 +2040,7 @@ fs_visitor::emit_fb_writes() nr += reg_width; if (c->source_depth_to_render_target) { - if (intel->gen == 6 && c->dispatch_width == 16) { + if (intel->gen == 6 && dispatch_width == 16) { /* For outputting oDepth on gen6, SIMD8 writes have to be * used. This would require 8-wide moves of each half to * message regs, kind of like pre-gen5 SIMD16 FB writes. @@ -2175,7 +2175,7 @@ fs_visitor::resolve_bool_comparison(ir_rvalue *rvalue, fs_reg *reg) } fs_visitor::fs_visitor(struct brw_wm_compile *c, struct gl_shader_program *prog, - struct brw_shader *shader) + struct brw_shader *shader, unsigned dispatch_width) { this->c = c; this->p = &c->func; @@ -2186,6 +2186,7 @@ fs_visitor::fs_visitor(struct brw_wm_compile *c, struct gl_shader_program *prog, this->ctx = &intel->ctx; this->mem_ctx = ralloc_context(NULL); this->shader = shader; + this->dispatch_width = dispatch_width; this->failed = false; this->variable_ht = hash_table_ctor(0, hash_table_pointer_hash, diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index b8d8df3..a5ebddd 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -93,8 +93,6 @@ struct brw_wm_compile { GLuint runtime_check_aads_emit:1; GLuint last_scratch; - - GLuint dispatch_width; }; bool brw_wm_fs_emit(struct brw_context *brw, struct brw_wm_compile *c, -- 1.8.0 _______________________________________________ mesa-dev mailing list <a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a> <a href="http://lists.freedesktop.org/mailman/listinfo/mesa-dev" target="_blank">http://lists.freedesktop.org/mailman/listinfo/mesa-dev</a> </blockquote></div> </div>