[Mesa-dev] [PATCH v2 14/16] i965: Add fs_visitor::run_vs() to generate scalar vertex shader code
Kristian Høgsberg
krh at bitplanet.net
Tue Nov 25 15:43:14 PST 2014
On Fri, Nov 14, 2014 at 4:08 PM, Kenneth Graunke <kenneth at whitecape.org> wrote:
> On Thursday, November 13, 2014 04:28:20 PM Kristian Høgsberg wrote:
>> This patch uses the previous refactoring to add a new run_vs() method
>> that generates vertex shader code using the scalar visitor and
>> optimizer.
>>
>> Signed-off-by: Kristian Høgsberg <krh at bitplanet.net>
>> ---
>> src/mesa/drivers/dri/i965/brw_fs.cpp | 99 ++++++++-
>> src/mesa/drivers/dri/i965/brw_fs.h | 21 +-
>> src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 303 ++++++++++++++++++++++++++-
>> 3 files changed, 412 insertions(+), 11 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp
>> index 4dce0a2..8007977 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
>> @@ -1828,6 +1828,56 @@ fs_visitor::assign_urb_setup()
>> urb_start + prog_data->num_varying_inputs * 2;
>> }
>>
>> +void
>> +fs_visitor::assign_vs_urb_setup()
>> +{
>> + brw_vs_prog_data *vs_prog_data = (brw_vs_prog_data *) prog_data;
>> + int grf, count, slot, channel, attr;
>> +
>> + assert(stage == MESA_SHADER_VERTEX);
>> + count = _mesa_bitcount_64(vs_prog_data->inputs_read);
>> + if (vs_prog_data->uses_vertexid || vs_prog_data->uses_instanceid)
>> + count++;
>> +
>> + /* Each attribute is 4 regs. */
>> + this->first_non_payload_grf =
>> + payload.num_regs + prog_data->curb_read_length + count * 4;
>> +
>> + unsigned vue_entries =
>> + MAX2(count, vs_prog_data->base.vue_map.num_slots);
>> +
>> + vs_prog_data->base.urb_entry_size = ALIGN(vue_entries, 4) / 4;
>> + vs_prog_data->base.urb_read_length = (count + 1) / 2;
>> +
>> + assert(vs_prog_data->base.urb_read_length <= 15);
>> +
>> + /* Rewrite all ATTR file references to the hw grf that they land in. */
>> + foreach_block_and_inst(block, fs_inst, inst, cfg) {
>> + for (int i = 0; i < inst->sources; i++) {
>> + if (inst->src[i].file == ATTR) {
>> +
>> + if (inst->src[i].reg == VERT_ATTRIB_MAX) {
>> + slot = count - 1;
>> + } else {
>> + attr = inst->src[i].reg + inst->src[i].reg_offset / 4;
>> + slot = _mesa_bitcount_64(vs_prog_data->inputs_read &
>> + BITFIELD64_MASK(attr));
>
> I'm having trouble understanding this code - can you explain?
>
> Reading ir_set_program_inouts.cpp:98 I see that incoming vertex attributes
> are always vec4 slots, except for matrices and arrays, which use multiple
> vec4 slots.
>
> I expected your ATTR registers to always be size 4, so reg_offset would have
> valid values of 0..3. But I must be mistaken, since you're doing
> reg_offset / 4, which would always be 0. Are ATTRs 4*N where N == the # of
> matrix columns or array length?
There were cases where reg_offset was > 3, which is why I did it this
way. It may be that that's the problem and I shouldn't work around it
here... let me assert reg_offset < 4 there and find the piglit cases
that triggered this.
> Even still - I don't see how applying BITFIELD64_MASK to a potentially
> non-power-of-two number and then doing a bitcount will give you a single
> accurate slot value.
The slot computation is functionally the same as attribute_map[attr].
vec4_vs_visitor::setup_attributes, computes the number of enabled
attributes lower than attr in attribute_map[attr]. That's the number
of enabled bits in inputs_read that are lower than 1 << attr. We can
mask out those bits using BITFIELD64_MASK(attr) and count them using
bitcount.
> Adding a comment would also be nice to future maintainers.
Yea, fair point.
> This was the main spot where I got confused - otherwise most of the code
> looks good to me.
>
>> + }
>> +
>> + channel = inst->src[i].reg_offset & 3;
>> +
>> + grf = payload.num_regs +
>> + prog_data->curb_read_length +
>> + slot * 4 + channel;
>> +
>> + inst->src[i].file = HW_REG;
>> + inst->src[i].fixed_hw_reg =
>> + retype(brw_vec8_grf(grf, 0), inst->src[i].type);
>> + }
>> + }
>> + }
>> +}
>> +
>> /**
>> * Split large virtual GRFs into separate components if we can.
>> *
>> @@ -3405,6 +3455,13 @@ fs_visitor::setup_payload_gen6()
>> }
>>
>> void
>> +fs_visitor::setup_vs_payload()
>> +{
>> + /* R0: thread header, R1: urb handles */
>> + payload.num_regs = 2;
>> +}
>> +
>> +void
>> fs_visitor::assign_binding_table_offsets()
>> {
>> assert(stage == MESA_SHADER_FRAGMENT);
>> @@ -3471,6 +3528,8 @@ fs_visitor::opt_drop_redundant_mov_to_flags()
>> void
>> fs_visitor::optimize()
>> {
>> + const char *stage_name = stage == MESA_SHADER_VERTEX ? "vs" : "fs";
>> +
>> calculate_cfg();
>>
>> split_virtual_grfs();
>> @@ -3487,8 +3546,8 @@ fs_visitor::optimize()
>> \
>> if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER) && this_progress) { \
>> char filename[64]; \
>> - snprintf(filename, 64, "fs%d-%04d-%02d-%02d-" #pass, \
>> - dispatch_width, shader_prog ? shader_prog->Name : 0, iteration, pass_num); \
>> + snprintf(filename, 64, "%s%d-%04d-%02d-%02d-" #pass, \
>> + stage_name, dispatch_width, shader_prog ? shader_prog->Name : 0, iteration, pass_num); \
>> \
>> backend_visitor::dump_instructions(filename); \
>> } \
>> @@ -3498,8 +3557,8 @@ fs_visitor::optimize()
>>
>> if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) {
>> char filename[64];
>> - snprintf(filename, 64, "fs%d-%04d-00-start",
>> - dispatch_width, shader_prog ? shader_prog->Name : 0);
>> + snprintf(filename, 64, "%s%d-%04d-00-start",
>> + stage_name, dispatch_width, shader_prog ? shader_prog->Name : 0);
>>
>> backend_visitor::dump_instructions(filename);
>> }
>> @@ -3608,6 +3667,38 @@ fs_visitor::allocate_registers()
>> }
>>
>> bool
>> +fs_visitor::run_vs()
>> +{
>> + assert(stage == MESA_SHADER_VERTEX);
>> +
>> + assign_common_binding_table_offsets(0);
>> + setup_vs_payload();
>> +
>> + if (INTEL_DEBUG & DEBUG_SHADER_TIME)
>> + emit_shader_time_begin();
>> +
>> + foreach_in_list(ir_instruction, ir, shader->base.ir) {
>> + base_ir = ir;
>> + this->result = reg_undef;
>> + ir->accept(this);
>> + }
>> + base_ir = NULL;
>> + if (failed)
>> + return false;
>> +
>> + emit_urb_writes();
>> +
>> + optimize();
>> +
>> + assign_curb_setup();
>> + assign_vs_urb_setup();
>> +
>> + allocate_registers();
>> +
>> + return !failed;
>> +}
>> +
>> +bool
>> fs_visitor::run()
>> {
>> sanity_param_count = prog->Parameters->NumParameters;
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h
>> index bb6f767..6888cdd 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs.h
>> +++ b/src/mesa/drivers/dri/i965/brw_fs.h
>> @@ -310,12 +310,23 @@ public:
>> struct gl_shader_program *shader_prog,
>> struct gl_fragment_program *fp,
>> unsigned dispatch_width);
>> +
>> + fs_visitor(struct brw_context *brw,
>> + void *mem_ctx,
>> + const struct brw_vs_prog_key *key,
>> + struct brw_vs_prog_data *prog_data,
>> + struct gl_shader_program *shader_prog,
>> + struct gl_vertex_program *cp,
>> + unsigned dispatch_width);
>> +
>> ~fs_visitor();
>> void init();
>>
>> fs_reg *variable_storage(ir_variable *var);
>> int virtual_grf_alloc(int size);
>> void import_uniforms(fs_visitor *v);
>> + void setup_uniform_clipplane_values();
>> + void compute_clip_distance();
>>
>> void visit(ir_variable *ir);
>> void visit(ir_assignment *ir);
>> @@ -406,14 +417,17 @@ public:
>> uint32_t const_offset);
>>
>> bool run();
>> + bool run_vs();
>> void optimize();
>> void allocate_registers();
>> void assign_binding_table_offsets();
>> void setup_payload_gen4();
>> void setup_payload_gen6();
>> + void setup_vs_payload();
>> void assign_curb_setup();
>> void calculate_urb_setup();
>> void assign_urb_setup();
>> + void assign_vs_urb_setup();
>> bool assign_regs(bool allow_spilling);
>> void assign_regs_trivial();
>> void get_used_mrfs(bool *mrf_used);
>> @@ -471,6 +485,7 @@ public:
>> fs_reg *emit_samplepos_setup();
>> fs_reg *emit_sampleid_setup();
>> fs_reg *emit_general_interpolation(ir_variable *ir);
>> + fs_reg *emit_vs_system_value(enum brw_reg_type type, int location);
>> void emit_interpolation_setup_gen4();
>> void emit_interpolation_setup_gen6();
>> void compute_sample_position(fs_reg dst, fs_reg int_sample_pos);
>> @@ -557,6 +572,7 @@ public:
>> fs_inst *emit_single_fb_write(fs_reg color1, fs_reg color2,
>> fs_reg src0_alpha, unsigned components);
>> void emit_fb_writes();
>> + void emit_urb_writes();
>>
>> void emit_shader_time_begin();
>> void emit_shader_time_end();
>> @@ -632,8 +648,8 @@ public:
>> struct hash_table *variable_ht;
>> fs_reg frag_depth;
>> fs_reg sample_mask;
>> - fs_reg outputs[BRW_MAX_DRAW_BUFFERS];
>> - unsigned output_components[BRW_MAX_DRAW_BUFFERS];
>> + fs_reg outputs[VARYING_SLOT_MAX];
>> + unsigned output_components[VARYING_SLOT_MAX];
>> fs_reg dual_src_output;
>> bool do_dual_src;
>> int first_non_payload_grf;
>> @@ -680,6 +696,7 @@ public:
>> fs_reg delta_x[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
>> fs_reg delta_y[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
>> fs_reg shader_start_time;
>> + fs_reg userplane[MAX_CLIP_PLANES];
>>
>> int grf_used;
>> bool spilled_any_registers;
>> diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>> index 0cc51f3..df70340 100644
>> --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
>> @@ -43,11 +43,40 @@ extern "C" {
>> #include "brw_eu.h"
>> #include "brw_wm.h"
>> }
>> +#include "brw_vec4.h"
>> #include "brw_fs.h"
>> #include "main/uniforms.h"
>> #include "glsl/glsl_types.h"
>> #include "glsl/ir_optimization.h"
>>
>> +fs_reg *
>> +fs_visitor::emit_vs_system_value(enum brw_reg_type type, int location)
>> +{
>> + fs_reg *reg = new(this->mem_ctx)
>> + fs_reg(ATTR, VERT_ATTRIB_MAX, type);
>> + brw_vs_prog_data *vs_prog_data = (brw_vs_prog_data *) prog_data;
>> +
>> + switch (location) {
>> + case SYSTEM_VALUE_BASE_VERTEX:
>> + reg->reg_offset = 0;
>> + vs_prog_data->uses_vertexid = true;
>> + break;
>> + case SYSTEM_VALUE_VERTEX_ID:
>> + case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
>> + reg->reg_offset = 2;
>> + vs_prog_data->uses_vertexid = true;
>> + break;
>> + case SYSTEM_VALUE_INSTANCE_ID:
>> + reg->reg_offset = 3;
>> + vs_prog_data->uses_instanceid = true;
>> + break;
>> + default:
>> + unreachable("not reached");
>> + }
>> +
>> + return reg;
>> +}
>> +
>> void
>> fs_visitor::visit(ir_variable *ir)
>> {
>> @@ -57,7 +86,11 @@ fs_visitor::visit(ir_variable *ir)
>> return;
>>
>> if (ir->data.mode == ir_var_shader_in) {
>> - if (!strcmp(ir->name, "gl_FragCoord")) {
>> + if (stage == MESA_SHADER_VERTEX) {
>> + reg = new(this->mem_ctx)
>> + fs_reg(ATTR, ir->data.location,
>> + brw_type_for_base_type(ir->type->get_scalar_type()));
>> + } else if (!strcmp(ir->name, "gl_FragCoord")) {
>> reg = emit_fragcoord_interpolation(ir);
>> } else if (!strcmp(ir->name, "gl_FrontFacing")) {
>> reg = emit_frontfacing_interpolation();
>> @@ -70,7 +103,19 @@ fs_visitor::visit(ir_variable *ir)
>> } else if (ir->data.mode == ir_var_shader_out) {
>> reg = new(this->mem_ctx) fs_reg(this, ir->type);
>>
>> - if (ir->data.index > 0) {
>> + if (stage == MESA_SHADER_VERTEX) {
>> + int vector_elements =
>> + ir->type->is_array() ? ir->type->fields.array->vector_elements
>> + : ir->type->vector_elements;
>> +
>> + for (int i = 0; i < (type_size(ir->type) + 3) / 4; i++) {
>> + int output = ir->data.location + i;
>> + this->outputs[output] = *reg;
>> + this->outputs[output].reg_offset = i * 4;
>> + this->output_components[output] = vector_elements;
>> + }
>> +
>> + } else if (ir->data.index > 0) {
>> assert(ir->data.location == FRAG_RESULT_DATA0);
>> assert(ir->data.index == 1);
>> this->dual_src_output = *reg;
>> @@ -134,15 +179,26 @@ fs_visitor::visit(ir_variable *ir)
>> reg->type = brw_type_for_base_type(ir->type);
>>
>> } else if (ir->data.mode == ir_var_system_value) {
>> - if (ir->data.location == SYSTEM_VALUE_SAMPLE_POS) {
>> + switch (ir->data.location) {
>> + case SYSTEM_VALUE_BASE_VERTEX:
>> + case SYSTEM_VALUE_VERTEX_ID:
>> + case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE:
>> + case SYSTEM_VALUE_INSTANCE_ID:
>> + reg = emit_vs_system_value(brw_type_for_base_type(ir->type),
>> + ir->data.location);
>> + break;
>> + case SYSTEM_VALUE_SAMPLE_POS:
>> reg = emit_samplepos_setup();
>> - } else if (ir->data.location == SYSTEM_VALUE_SAMPLE_ID) {
>> + break;
>> + case SYSTEM_VALUE_SAMPLE_ID:
>> reg = emit_sampleid_setup();
>> - } else if (ir->data.location == SYSTEM_VALUE_SAMPLE_MASK_IN) {
>> + break;
>> + case SYSTEM_VALUE_SAMPLE_MASK_IN:
>> assert(brw->gen >= 7);
>> reg = new(mem_ctx)
>> fs_reg(retype(brw_vec8_grf(payload.sample_mask_in_reg, 0),
>> BRW_REGISTER_TYPE_D));
>> + break;
>> }
>> }
>>
>> @@ -1709,6 +1765,8 @@ get_tex(gl_shader_stage stage, const void *key)
>> switch (stage) {
>> case MESA_SHADER_FRAGMENT:
>> return &((brw_wm_prog_key*) key)->tex;
>> + case MESA_SHADER_VERTEX:
>> + return &((brw_vec4_prog_key*) key)->tex;
>
> Doesn't compile. &((brw_vue_prog_key *) key)->tex;
>
>> default:
>> unreachable("unhandled shader stage");
>> }
>> @@ -3394,6 +3452,222 @@ fs_visitor::emit_fb_writes()
>> }
>>
>> void
>> +fs_visitor::setup_uniform_clipplane_values()
>> +{
>> + gl_clip_plane *clip_planes = brw_select_clip_planes(ctx);
>> + const struct brw_vec4_prog_key *key =
>> + (const struct brw_vec4_prog_key *) this->key;
>> +
>> + for (int i = 0; i < key->nr_userclip_plane_consts; i++) {
>> + this->userplane[i] = fs_reg(UNIFORM, uniforms);
>> + for (int j = 0; j < 4; ++j) {
>> + stage_prog_data->param[uniforms + j] =
>> + (gl_constant_value *) &clip_planes[i][j];
>> + }
>> + uniforms += 4;
>> + }
>> +}
>> +
>> +void fs_visitor::compute_clip_distance()
>> +{
>> + struct brw_vue_prog_data *vue_prog_data =
>> + (struct brw_vue_prog_data *) prog_data;
>> + const struct brw_vec4_prog_key *key =
>> + (const struct brw_vec4_prog_key *) this->key;
>> +
>
> brw_vue_prog_key.
>
> Please also copy and paste Paul's comment:
>
> /* From the GLSL 1.30 spec, section 7.1 (Vertex Shader Special Variables):
> *
> * "If a linked set of shaders forming the vertex stage contains no
> * static write to gl_ClipVertex or gl_ClipDistance, but the
> * application has requested clipping against user clip planes through
> * the API, then the coordinate written to gl_Position is used for
> * comparison against the user clip planes."
> *
> * This function is only called if the shader didn't write to
> * gl_ClipDistance. Accordingly, we use gl_ClipVertex to perform clipping
> * if the user wrote to it; otherwise we use gl_Position.
> */
Right, done.
>> + gl_varying_slot clip_vertex = VARYING_SLOT_CLIP_VERTEX;
>> + if (!(vec4_prog_data->vue_map.slots_valid & VARYING_BIT_CLIP_VERTEX))
>> + clip_vertex = VARYING_SLOT_POS;
>> +
>> + /* If the clip vertex isn't written, skip this. Typically this means
>> + * the GS will set up clipping. */
>
> */ goes on its own line (same feedback applies in many places).
>
>> + if (outputs[clip_vertex].file == BAD_FILE)
>> + return;
>> +
>> + setup_uniform_clipplane_values();
>> +
>> + current_annotation = "user clip distances";
>> +
>> + this->outputs[VARYING_SLOT_CLIP_DIST0] = fs_reg(this, glsl_type::vec4_type);
>> + this->outputs[VARYING_SLOT_CLIP_DIST1] = fs_reg(this, glsl_type::vec4_type);
>> +
>> + for (int i = 0; i < key->nr_userclip_plane_consts; i++) {
>> + fs_reg u = userplane[i];
>> + fs_reg output = outputs[VARYING_SLOT_CLIP_DIST0 + i / 4];
>> + output.reg_offset = i & 3;
>> +
>> + emit(MUL(output, outputs[clip_vertex], u));
>> + for (int j = 1; j < 4; j++) {
>> + u.reg = userplane[i].reg + j;
>> + emit(MAD(output, output, offset(outputs[clip_vertex], j), u));
>> + }
>> + }
>> +}
>> +
>> +void
>> +fs_visitor::emit_urb_writes()
>> +{
>> + int slot, urb_offset, length;
>> + struct brw_vue_prog_data *vue_prog_data =
>> + (struct brw_vue_prog_data *) prog_data;
>> + const struct brw_vec4_prog_key *key =
>> + (const struct brw_vec4_prog_key *) this->key;
>> + const GLbitfield64 psiz_mask =
>> + VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT | VARYING_BIT_PSIZ;
>> + bool flush;
>> + fs_reg sources[8];
>> +
>> + /* Lower legacy ff and ClipVertex clipping to clip distances */
>> + if (key->userclip_active && !prog->UsesClipDistanceOut)
>> + compute_clip_distance();
>> +
>> + /* If we don't have any valid slots to write, just do a minimal urb write
>> + * send to terminate the shader. */
>> + if (vec4_prog_data->vue_map.slots_valid == 0) {
>> +
>> + fs_reg payload = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD);
>> + fs_inst *inst = emit(MOV(payload, fs_reg(retype(brw_vec8_grf(1, 0),
>> + BRW_REGISTER_TYPE_UD))));
>> + inst->force_writemask_all = true;
>> +
>> + inst = emit(VS_OPCODE_URB_WRITE, reg_undef, payload);
>> + inst->eot = true;
>> + inst->mlen = 1;
>> + inst->offset = 1;
>> + return;
>> + }
>> +
>> + length = 0;
>> + urb_offset = 0;
>> + flush = false;
>> + for (slot = 0; slot < vec4_prog_data->vue_map.num_slots; slot++) {
>> + fs_reg reg, src, zero;
>> +
>> + int varying = vec4_prog_data->vue_map.slot_to_varying[slot];
>> + switch (varying) {
>> + case VARYING_SLOT_PSIZ:
>> +
>> + /* The point size varying slot is the vue header and is always in the
>> + * vue map. But often none of the special varyings that live there
>> + * are written and in that case we can skip writing to the vue
>> + * header, provided the corresponding state properly clamps the
>> + * values further down the pipeline. */
>> + if ((vec4_prog_data->vue_map.slots_valid & psiz_mask) == 0) {
>> + assert(length == 0);
>> + urb_offset++;
>> + break;
>> + }
>> +
>> + zero = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_UD);
>> + emit(MOV(zero, fs_reg(0u)));
>> +
>> + sources[length++] = zero;
>> + if (vec4_prog_data->vue_map.slots_valid & VARYING_BIT_LAYER)
>> + sources[length++] = this->outputs[VARYING_SLOT_LAYER];
>> + else
>> + sources[length++] = zero;
>> +
>> + if (vec4_prog_data->vue_map.slots_valid & VARYING_BIT_VIEWPORT)
>> + sources[length++] = this->outputs[VARYING_SLOT_VIEWPORT];
>> + else
>> + sources[length++] = zero;
>> +
>> + if (vec4_prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ)
>> + sources[length++] = this->outputs[VARYING_SLOT_PSIZ];
>> + else
>> + sources[length++] = zero;
>> + break;
>> +
>> + case BRW_VARYING_SLOT_NDC:
>> + case VARYING_SLOT_EDGE:
>> + unreachable("unexpected scalar vs output");
>> + break;
>> +
>> + case BRW_VARYING_SLOT_PAD:
>> + break;
>> +
>> + default:
>> + /* gl_Position is always in the vue map, but isn't always written by
>> + * the shader. Other varyings (clip distances) get added to the vue
>
> Really? I guess that's true, but it's a link error to not write it prior to
> GLSL 1.40, so virtually all shaders do write it...
I got this from a couple of piglit tests that use an empty vertex
shader and generates the position in the geometry shader.
> I suppose you're right, though, and we need this code for clip distance
> regardless, so my point is moot. Nevermind.
>
>> + * map but doesn't always get written. In those cases, the
>
> "don't always get written"
>
>> + * corresponding this->output slot will be invalid we can skip the
>
> ", and" ^
>
>> + * urb write for the varying. If we've already queued up a vue slot
>> + * for writing we flush a mlen 5 urb write, otherwise we just advance
>> + * the urb_offset.
>> + */
>> + if (this->outputs[varying].file == BAD_FILE) {
>> + if (length > 0)
>> + flush = true;
>> + else
>> + urb_offset++;
>> + break;
>> + }
>> +
>> + for (int i = 0; i < 4; i++) {
>> + if ((varying == VARYING_SLOT_COL0 ||
>> + varying == VARYING_SLOT_COL1 ||
>> + varying == VARYING_SLOT_BFC0 ||
>> + varying == VARYING_SLOT_BFC1) &&
>> + key->clamp_vertex_color) {
>> + /* We need to clamp these guys, so do a saturating MOV into a
>> + * temp register and use that for the payload.
>> + */
>> + reg = fs_reg(GRF, virtual_grf_alloc(1), BRW_REGISTER_TYPE_F);
>> + reg.type = this->outputs[varying].type;
>
> Could be written more succinctly as:
>
> reg = fs_reg(GRF, virtual_grf_alloc(1), outputs[varying].type);
Heh, right...
>> + src = offset(this->outputs[varying], i);
>> + fs_inst *inst = emit(MOV(reg, src));
>> + inst->saturate = true;
>> + sources[length++] = reg;
>> + } else {
>> + sources[length++] = offset(this->outputs[varying], i);
>> + }
>> + }
>
> I'd push the loop into the then/else blocks, i.e.
>
> if (key->clamp_vertex_color && (COL0 || COL1 || BFC0 || BFC1)) {
> for (int i = 0; i < 4; i++) {
> ...
> }
> } else {
> for (int i = 0; i < 4; i++)
> sources[length++] = offset(this->outputs[varying], i);
> }
Yeah, that feels a little less awkward.
>> + break;
>> + }
>> +
>> + current_annotation = "URB write";
>> +
>> + /* If we've queued up 8 registers of payload (2 VUE slots), if this is
>> + * the last slot or if we need to flush (see BAD_FILE varying case
>> + * above), emit a URB write send now to flush out the data.
>> + */
>> + int last = slot == vec4_prog_data->vue_map.num_slots - 1;
>> + if (length == 8 || last)
>> + flush = true;
>> + if (flush) {
>> + if (last && (INTEL_DEBUG & DEBUG_SHADER_TIME))
>> + emit_shader_time_end();
>> +
>> + fs_reg *payload_sources = ralloc_array(mem_ctx, fs_reg, length + 1);
>> + fs_reg payload = fs_reg(GRF, virtual_grf_alloc(length + 1),
>> + BRW_REGISTER_TYPE_F);
>> +
>> + /* We need WE_all on the MOV for the message header (the URB handles)
>> + * so do a MOV to a dummy register and set force_writemask_all on the
>> + * MOV. LOAD_PAYLOAD will preserve that.
>> + */
>> + fs_reg dummy = fs_reg(GRF, virtual_grf_alloc(1),
>> + BRW_REGISTER_TYPE_UD);
>> + fs_inst *inst = emit(MOV(dummy, fs_reg(retype(brw_vec8_grf(1, 0),
>> + BRW_REGISTER_TYPE_UD))));
>> + inst->force_writemask_all = true;
>> + payload_sources[0] = dummy;
>> +
>> + memcpy(&payload_sources[1], sources, length * sizeof sources[0]);
>> + emit(LOAD_PAYLOAD(payload, payload_sources, length + 1));
>> +
>> + inst = emit(SHADER_OPCODE_URB_WRITE_SIMD8, reg_undef, payload);
>> + inst->eot = last;
>> + inst->mlen = length + 1;
>> + inst->offset = urb_offset;
>> + urb_offset = slot + 1;
>> + length = 0;
>> + flush = false;
>> + }
>> + }
>> +}
>> +
>> +void
>> fs_visitor::resolve_ud_negate(fs_reg *reg)
>> {
>> if (reg->type != BRW_REGISTER_TYPE_UD ||
>> @@ -3437,6 +3711,25 @@ fs_visitor::fs_visitor(struct brw_context *brw,
>> init();
>> }
>>
>> +fs_visitor::fs_visitor(struct brw_context *brw,
>> + void *mem_ctx,
>> + const struct brw_vs_prog_key *key,
>> + struct brw_vs_prog_data *prog_data,
>> + struct gl_shader_program *shader_prog,
>> + struct gl_vertex_program *cp,
>> + unsigned dispatch_width)
>> + : backend_visitor(brw, shader_prog, &cp->Base, &prog_data->base.base,
>> + MESA_SHADER_VERTEX),
>> + reg_null_f(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_F)),
>> + reg_null_d(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_D)),
>> + reg_null_ud(retype(brw_null_vec(dispatch_width), BRW_REGISTER_TYPE_UD)),
>> + key(key), prog_data(&prog_data->base.base),
>> + dispatch_width(dispatch_width)
>> +{
>> + this->mem_ctx = mem_ctx;
>> + init();
>> +}
>> +
>> void
>> fs_visitor::init()
>> {
>>
More information about the mesa-dev
mailing list