[Mesa-dev] [PATCH v2 39/82] i965/vec4: Implement unsized array's length calculation
Iago Toral Quiroga
itoral at igalia.com
Wed Jun 3 00:01:29 PDT 2015
From: Samuel Iglesias Gonsalvez <siglesias at igalia.com>
Notice that Skylake needs to include a header in the sampler message
so it will need some tweaks to work there.
Signed-off-by: Samuel Iglesias Gonsalvez <siglesias at igalia.com>
---
src/glsl/lower_ubo_reference.cpp | 182 +++++++++++++++++++++++
src/mesa/drivers/dri/i965/brw_defines.h | 3 +
src/mesa/drivers/dri/i965/brw_shader.cpp | 3 +
src/mesa/drivers/dri/i965/brw_vec4.cpp | 1 +
src/mesa/drivers/dri/i965/brw_vec4.h | 6 +
src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 31 ++++
src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 46 +++++-
7 files changed, 270 insertions(+), 2 deletions(-)
diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp
index 58e6921..77c4384 100644
--- a/src/glsl/lower_ubo_reference.cpp
+++ b/src/glsl/lower_ubo_reference.cpp
@@ -165,6 +165,16 @@ public:
bool row_major, int matrix_columns,
unsigned write_mask);
+ ir_visitor_status visit_enter(class ir_expression *);
+ void check_ssbo_unsized_array_length_expression(class ir_expression *);
+ void check_ssbo_unsized_array_length_assignment(ir_assignment *ir);
+
+ ir_expression *process_ssbo_unsized_array_length(ir_rvalue **, ir_dereference *, ir_variable *);
+ ir_expression *emit_ssbo_unsized_array_length(ir_variable *base_offset,
+ unsigned int deref_offset,
+ unsigned int unsized_array_stride);
+ unsigned calculate_unsized_array_stride(ir_dereference *deref);
+
void *mem_ctx;
struct gl_shader *shader;
struct gl_uniform_buffer_variable *ubo_var;
@@ -659,6 +669,177 @@ lower_ubo_reference_visitor::write_to_memory(ir_dereference *deref,
row_major, matrix_columns, write_mask);
}
+ir_visitor_status
+lower_ubo_reference_visitor::visit_enter(ir_expression *ir)
+{
+ check_ssbo_unsized_array_length_expression(ir);
+ return rvalue_visit(ir);
+}
+
+void
+lower_ubo_reference_visitor::check_ssbo_unsized_array_length_expression(ir_expression *former_ir)
+{
+ if (former_ir->operation == ir_expression_operation(ir_unop_ssbo_unsized_array_length)) {
+ /* Don't replace this unop if it is found alone. It is going to be
+ * removed by the optimization passes or replaced if it is part of
+ * an ir_assignment or another ir_expression.
+ */
+ return;
+ }
+
+ for (unsigned i = 0; i < 4; i++) {
+ if (!former_ir->operands[i] || former_ir->operands[i]->ir_type != ir_type_expression)
+ continue;
+ ir_expression *ir = (ir_expression *) former_ir->operands[i];
+ if (ir->operation == ir_expression_operation(ir_unop_ssbo_unsized_array_length)) {
+ ir_rvalue *rvalue = ir->operands[0]->as_rvalue();
+ if (!rvalue || !rvalue->type->is_array() || !rvalue->type->is_unsized_array())
+ return;
+
+ ir_dereference *deref = ir->operands[0]->as_dereference();
+ if (!deref)
+ return;
+
+ ir_variable *var = ir->operands[0]->variable_referenced();
+ if (!var || !var->is_in_shader_storage_block())
+ return;
+ /* Now replace the unop instruction for the triop */
+ ir_expression *temp = process_ssbo_unsized_array_length(&rvalue, deref, var);
+ delete ir;
+ former_ir->operands[i] = temp;
+ }
+ }
+}
+
+void
+lower_ubo_reference_visitor::check_ssbo_unsized_array_length_assignment(ir_assignment *ir)
+{
+ if (!ir->rhs || ir->rhs->ir_type != ir_type_expression)
+ return;
+
+ ir_expression *expr = (ir_expression *) ir->rhs;
+ if (expr->operation == ir_expression_operation(ir_unop_ssbo_unsized_array_length)) {
+ ir_rvalue *rvalue = expr->operands[0]->as_rvalue();
+ if (!rvalue || !rvalue->type->is_array() || !rvalue->type->is_unsized_array())
+ return;
+
+ ir_dereference *deref = expr->operands[0]->as_dereference();
+ if (!deref)
+ return;
+
+ ir_variable *var = expr->operands[0]->variable_referenced();
+ if (!var || !var->is_in_shader_storage_block())
+ return;
+ /* Now replace the unop instruction for the binop */
+ ir_expression *temp = process_ssbo_unsized_array_length(&rvalue, deref, var);
+ delete expr;
+ ir->rhs = temp;
+ return;
+ }
+ return;
+}
+
+ir_expression *
+lower_ubo_reference_visitor::emit_ssbo_unsized_array_length(ir_variable *base_offset,
+ unsigned int deref_offset,
+ unsigned int unsized_array_stride)
+{
+ ir_rvalue *offset =
+ add(base_offset, new(mem_ctx) ir_constant(deref_offset));
+ ir_rvalue *stride = new(mem_ctx) ir_constant(unsized_array_stride);
+ ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL);
+ return new(mem_ctx) ir_expression(ir_triop_ssbo_unsized_array_length,
+ glsl_type::int_type, block_ref, offset, stride);
+}
+
+unsigned
+lower_ubo_reference_visitor::calculate_unsized_array_stride(ir_dereference *deref)
+{
+ unsigned array_stride = 0;
+
+ switch (deref->ir_type) {
+ case ir_type_dereference_variable:
+ {
+ ir_dereference_variable *deref_var = (ir_dereference_variable *)deref;
+ const struct glsl_type *unsized_array_type = NULL;
+ /* An unsized array can be sized by other lowering passes, so pick
+ * the first field of the array which has the data type of the unsized
+ * array.
+ */
+ unsized_array_type = deref_var->var->type->fields.array;
+
+ /* Whether or not the field is row-major (because it might be a
+ * bvec2 or something) does not affect the array itself. We need
+ * to know whether an array element in its entirety is row-major.
+ */
+ const bool array_row_major =
+ is_dereferenced_thing_row_major(deref_var);
+
+ array_stride = unsized_array_type->std140_size(array_row_major);
+ array_stride = glsl_align(array_stride, 16);
+ break;
+ }
+ case ir_type_dereference_record:
+ {
+ ir_dereference_record *deref_record = (ir_dereference_record *) deref;
+ const struct glsl_type *deref_record_type =
+ deref_record->record->as_dereference()->type;
+ unsigned record_length = deref_record_type->length;
+ /* Unsized array is always the last element of the interface */
+ const struct glsl_type *unsized_array_type =
+ deref_record_type->fields.structure[record_length - 1].type->fields.array;
+
+ const bool array_row_major =
+ is_dereferenced_thing_row_major(deref_record);
+ array_stride = unsized_array_type->std140_size(array_row_major);
+ array_stride = glsl_align(array_stride, 16);
+ break;
+ }
+ default:
+ assert(!"Not reached");
+ }
+ return array_stride;
+}
+
+ir_expression *
+lower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue **rvalue,
+ ir_dereference *deref,
+ ir_variable *var)
+{
+ mem_ctx = ralloc_parent(*rvalue);
+
+ ir_rvalue *offset = NULL;
+ unsigned const_offset;
+ bool row_major;
+ int matrix_columns;
+ bool is_shader_storage;
+ unsigned unsized_array_stride = calculate_unsized_array_stride(deref);
+
+ /* Compute the offset to the start if the dereference as well as other
+ * information we need to configure the length
+ */
+ setup_for_load_or_write(var, deref,
+ &offset, &const_offset,
+ &row_major, &matrix_columns,
+ &is_shader_storage);
+ assert(is_shader_storage);
+
+ /* Now that we've calculated the offset to the start of the
+ * dereference, emit writes from the temporary to memory
+ */
+ ir_variable *write_offset = new(mem_ctx) ir_variable(glsl_type::uint_type,
+ "ssbo_length_temp_offset",
+ ir_var_temporary);
+ base_ir->insert_after(write_offset);
+ base_ir->insert_after(assign(write_offset, offset));
+
+ ir_expression *new_ssbo = emit_ssbo_unsized_array_length(write_offset,
+ const_offset,
+ unsized_array_stride);
+
+ return new_ssbo;
+}
+
void
lower_ubo_reference_visitor::check_for_ssbo_write(ir_assignment *ir)
{
@@ -698,6 +879,7 @@ lower_ubo_reference_visitor::check_for_ssbo_write(ir_assignment *ir)
ir_visitor_status
lower_ubo_reference_visitor::visit_enter(ir_assignment *ir)
{
+ check_ssbo_unsized_array_length_assignment(ir);
check_for_ssbo_write(ir);
return rvalue_visit(ir);
}
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index f6da305..29a40c9 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -976,6 +976,9 @@ enum opcode {
VS_OPCODE_PULL_CONSTANT_LOAD,
VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
VS_OPCODE_SET_SIMD4X2_HEADER_GEN9,
+
+ VS_OPCODE_UNSIZED_ARRAY_LENGTH,
+
VS_OPCODE_UNPACK_FLAGS_SIMD4X2,
/**
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 6222d52..136b2c9 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -598,6 +598,9 @@ brw_instruction_name(enum opcode op)
case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
return "set_simd4x2_header_gen9";
+ case VS_OPCODE_UNSIZED_ARRAY_LENGTH:
+ return "vs_unsized_array_length";
+
case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
return "unpack_flags_simd4x2";
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index bcca93b..3f66b17 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -326,6 +326,7 @@ vec4_visitor::implied_mrf_writes(vec4_instruction *inst)
case SHADER_OPCODE_TXS:
case SHADER_OPCODE_TG4:
case SHADER_OPCODE_TG4_OFFSET:
+ case VS_OPCODE_UNSIZED_ARRAY_LENGTH:
return inst->header_size;
default:
unreachable("not reached");
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index e6b356d..5a6c66f 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -507,6 +507,12 @@ private:
struct brw_reg offset);
void generate_set_simd4x2_header_gen9(vec4_instruction *inst,
struct brw_reg dst);
+
+ void generate_unsized_array_length(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src,
+ struct brw_reg index);
+
void generate_unpack_flags(struct brw_reg dst);
struct brw_context *brw;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index a2b0685..7a4bbb4 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -1025,6 +1025,32 @@ vec4_generator::generate_pull_constant_load(vec4_instruction *inst,
}
void
+vec4_generator::generate_unsized_array_length(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src,
+ struct brw_reg surf_index)
+{
+ assert(brw->gen >= 7);
+ assert(surf_index.type == BRW_REGISTER_TYPE_UD &&
+ surf_index.file == BRW_IMMEDIATE_VALUE);
+
+ brw_SAMPLE(p,
+ dst,
+ inst->base_mrf,
+ src,
+ surf_index.dw1.ud,
+ 0,
+ GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO,
+ 1, /* response length */
+ inst->mlen,
+ inst->header_size > 0,
+ BRW_SAMPLER_SIMD_MODE_SIMD4X2,
+ BRW_SAMPLER_RETURN_FORMAT_SINT32);
+
+ brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud);
+}
+
+void
vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst,
struct brw_reg dst,
struct brw_reg surf_index,
@@ -1397,6 +1423,11 @@ vec4_generator::generate_code(const cfg_t *cfg)
generate_set_simd4x2_header_gen9(inst, dst);
break;
+
+ case VS_OPCODE_UNSIZED_ARRAY_LENGTH:
+ generate_unsized_array_length(inst, dst, src[0], src[1]);
+ break;
+
case GS_OPCODE_URB_WRITE:
generate_gs_urb_write(inst);
break;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 389d6b4..95f6209 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1931,9 +1931,51 @@ vec4_visitor::visit(ir_expression *ir)
emit(BFE(result_dst, op[2], op[1], op[0]));
break;
- case ir_triop_ssbo_unsized_array_length:
- unreachable("not reached: not implemented");
+ case ir_triop_ssbo_unsized_array_length: {
+ ir_constant *const_uniform_block = ir->operands[0]->as_constant();
+ unsigned ubo_index = const_uniform_block->value.u[0];
+ ir_constant *const_offset_ir = ir->operands[1]->as_constant();
+ int const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0;
+ ir_constant *const_stride_ir = ir->operands[2]->as_constant();
+ int unsized_array_stride = const_stride_ir ? const_stride_ir->value.u[0] : 1;
+
+ assert(shader->base.UniformBlocks[ubo_index].IsShaderStorage);
+
+ src_reg surf_index = src_reg(prog_data->base.binding_table.ubo_start +
+ ubo_index);
+
+ dst_reg buffer_size = dst_reg(this, ir->type);
+
+ vec4_instruction *inst = new(mem_ctx) vec4_instruction(
+ VS_OPCODE_UNSIZED_ARRAY_LENGTH, buffer_size);
+
+ inst->base_mrf = 2;
+ inst->mlen = 1; /* always at least one */
+ inst->src[1] = src_reg(surf_index);
+
+ /* MRF for the first parameter */
+ src_reg lod = src_reg(0);
+ int param_base = inst->base_mrf;
+ int writemask = WRITEMASK_X;
+ emit(MOV(dst_reg(MRF, param_base, glsl_type::int_type, writemask), lod));
+
+ emit(inst);
+
+ /* array.length() =
+ max((buffer_object_size - offset_of_array) / stride_of_array, 0) */
+ emit(ADD(buffer_size, src_reg(buffer_size), brw_imm_d(-const_offset)));
+
+ assert(unsized_array_stride > 0);
+
+ src_reg stride = src_reg(unsized_array_stride);
+ dst_reg temp = dst_reg(this, glsl_type::int_type);
+ emit_math(SHADER_OPCODE_INT_QUOTIENT,
+ temp,
+ src_reg(buffer_size),
+ stride);
+ emit_minmax(BRW_CONDITIONAL_GE, result_dst, src_reg(temp), brw_imm_d(0));
break;
+ }
case ir_triop_vector_insert:
unreachable("should have been lowered by lower_vector_insert");
--
1.9.1
More information about the mesa-dev
mailing list