[Mesa-dev] [PATCH v2 39/82] i965/vec4: Implement unsized array's length calculation

Iago Toral Quiroga itoral at igalia.com
Wed Jun 3 00:01:29 PDT 2015


From: Samuel Iglesias Gonsalvez <siglesias at igalia.com>

Notice that Skylake needs to include a header in the sampler message
so it will need some tweaks to work there.

Signed-off-by: Samuel Iglesias Gonsalvez <siglesias at igalia.com>
---
 src/glsl/lower_ubo_reference.cpp                 | 182 +++++++++++++++++++++++
 src/mesa/drivers/dri/i965/brw_defines.h          |   3 +
 src/mesa/drivers/dri/i965/brw_shader.cpp         |   3 +
 src/mesa/drivers/dri/i965/brw_vec4.cpp           |   1 +
 src/mesa/drivers/dri/i965/brw_vec4.h             |   6 +
 src/mesa/drivers/dri/i965/brw_vec4_generator.cpp |  31 ++++
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp   |  46 +++++-
 7 files changed, 270 insertions(+), 2 deletions(-)

diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp
index 58e6921..77c4384 100644
--- a/src/glsl/lower_ubo_reference.cpp
+++ b/src/glsl/lower_ubo_reference.cpp
@@ -165,6 +165,16 @@ public:
                              bool row_major, int matrix_columns,
                              unsigned write_mask);
 
+   ir_visitor_status visit_enter(class ir_expression *);
+   void check_ssbo_unsized_array_length_expression(class ir_expression *);
+   void check_ssbo_unsized_array_length_assignment(ir_assignment *ir);
+
+   ir_expression *process_ssbo_unsized_array_length(ir_rvalue **, ir_dereference *, ir_variable *);
+   ir_expression *emit_ssbo_unsized_array_length(ir_variable *base_offset,
+                                                 unsigned int deref_offset,
+                                                 unsigned int unsized_array_stride);
+   unsigned calculate_unsized_array_stride(ir_dereference *deref);
+
    void *mem_ctx;
    struct gl_shader *shader;
    struct gl_uniform_buffer_variable *ubo_var;
@@ -659,6 +669,177 @@ lower_ubo_reference_visitor::write_to_memory(ir_dereference *deref,
                         row_major, matrix_columns, write_mask);
 }
 
+ir_visitor_status
+lower_ubo_reference_visitor::visit_enter(ir_expression *ir)
+{
+   check_ssbo_unsized_array_length_expression(ir);
+   return rvalue_visit(ir);
+}
+
+void
+lower_ubo_reference_visitor::check_ssbo_unsized_array_length_expression(ir_expression *former_ir)
+{
+   if (former_ir->operation == ir_expression_operation(ir_unop_ssbo_unsized_array_length)) {
+         /* Don't replace this unop if it is found alone. It is going to be
+          * removed by the optimization passes or replaced if it is part of
+          * an ir_assignment or another ir_expression.
+          */
+         return;
+   }
+
+   for (unsigned i = 0; i < 4; i++) {
+      if (!former_ir->operands[i] || former_ir->operands[i]->ir_type != ir_type_expression)
+         continue;
+      ir_expression *ir = (ir_expression *) former_ir->operands[i];
+      if (ir->operation == ir_expression_operation(ir_unop_ssbo_unsized_array_length)) {
+         ir_rvalue *rvalue = ir->operands[0]->as_rvalue();
+         if (!rvalue || !rvalue->type->is_array() || !rvalue->type->is_unsized_array())
+            return;
+
+         ir_dereference *deref = ir->operands[0]->as_dereference();
+         if (!deref)
+            return;
+
+         ir_variable *var = ir->operands[0]->variable_referenced();
+         if (!var || !var->is_in_shader_storage_block())
+            return;
+         /* Now replace the unop instruction for the triop */
+         ir_expression *temp = process_ssbo_unsized_array_length(&rvalue, deref, var);
+         delete ir;
+         former_ir->operands[i] = temp;
+      }
+   }
+}
+
+void
+lower_ubo_reference_visitor::check_ssbo_unsized_array_length_assignment(ir_assignment *ir)
+{
+   if (!ir->rhs || ir->rhs->ir_type != ir_type_expression)
+      return;
+
+   ir_expression *expr = (ir_expression *) ir->rhs;
+   if (expr->operation == ir_expression_operation(ir_unop_ssbo_unsized_array_length)) {
+      ir_rvalue *rvalue = expr->operands[0]->as_rvalue();
+      if (!rvalue || !rvalue->type->is_array() || !rvalue->type->is_unsized_array())
+         return;
+
+      ir_dereference *deref = expr->operands[0]->as_dereference();
+      if (!deref)
+         return;
+
+      ir_variable *var = expr->operands[0]->variable_referenced();
+      if (!var || !var->is_in_shader_storage_block())
+         return;
+      /* Now replace the unop instruction for the binop */
+      ir_expression *temp = process_ssbo_unsized_array_length(&rvalue, deref, var);
+      delete expr;
+      ir->rhs = temp;
+      return;
+   }
+   return;
+}
+
+ir_expression *
+lower_ubo_reference_visitor::emit_ssbo_unsized_array_length(ir_variable *base_offset,
+                                                            unsigned int deref_offset,
+                                                            unsigned int unsized_array_stride)
+{
+   ir_rvalue *offset =
+      add(base_offset, new(mem_ctx) ir_constant(deref_offset));
+   ir_rvalue *stride = new(mem_ctx) ir_constant(unsized_array_stride);
+   ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL);
+   return new(mem_ctx) ir_expression(ir_triop_ssbo_unsized_array_length,
+                                     glsl_type::int_type, block_ref, offset, stride);
+}
+
+unsigned
+lower_ubo_reference_visitor::calculate_unsized_array_stride(ir_dereference *deref)
+{
+   unsigned array_stride = 0;
+
+   switch (deref->ir_type) {
+   case ir_type_dereference_variable:
+   {
+      ir_dereference_variable *deref_var = (ir_dereference_variable *)deref;
+      const struct glsl_type *unsized_array_type = NULL;
+      /* An unsized array can be sized by other lowering passes, so pick
+       * the first field of the array which has the data type of the unsized
+       * array.
+       */
+      unsized_array_type = deref_var->var->type->fields.array;
+
+      /* Whether or not the field is row-major (because it might be a
+      * bvec2 or something) does not affect the array itself. We need
+      * to know whether an array element in its entirety is row-major.
+      */
+      const bool array_row_major =
+         is_dereferenced_thing_row_major(deref_var);
+
+      array_stride = unsized_array_type->std140_size(array_row_major);
+      array_stride = glsl_align(array_stride, 16);
+      break;
+   }
+   case ir_type_dereference_record:
+   {
+      ir_dereference_record *deref_record = (ir_dereference_record *) deref;
+      const struct glsl_type *deref_record_type =
+         deref_record->record->as_dereference()->type;
+      unsigned record_length = deref_record_type->length;
+      /* Unsized array is always the last element of the interface */
+      const struct glsl_type *unsized_array_type =
+         deref_record_type->fields.structure[record_length - 1].type->fields.array;
+
+      const bool array_row_major =
+         is_dereferenced_thing_row_major(deref_record);
+      array_stride = unsized_array_type->std140_size(array_row_major);
+      array_stride = glsl_align(array_stride, 16);
+      break;
+   }
+   default:
+      assert(!"Not reached");
+   }
+   return array_stride;
+}
+
+ir_expression *
+lower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue **rvalue,
+                                                               ir_dereference *deref,
+                                                               ir_variable *var)
+{
+   mem_ctx = ralloc_parent(*rvalue);
+
+   ir_rvalue *offset = NULL;
+   unsigned const_offset;
+   bool row_major;
+   int matrix_columns;
+   bool is_shader_storage;
+   unsigned unsized_array_stride = calculate_unsized_array_stride(deref);
+
+   /* Compute the offset to the start if the dereference as well as other
+    * information we need to configure the length
+    */
+   setup_for_load_or_write(var, deref,
+                           &offset, &const_offset,
+                           &row_major, &matrix_columns,
+                           &is_shader_storage);
+   assert(is_shader_storage);
+
+   /* Now that we've calculated the offset to the start of the
+    * dereference, emit writes from the temporary to memory
+    */
+   ir_variable *write_offset = new(mem_ctx) ir_variable(glsl_type::uint_type,
+                                                        "ssbo_length_temp_offset",
+                                                        ir_var_temporary);
+   base_ir->insert_after(write_offset);
+   base_ir->insert_after(assign(write_offset, offset));
+
+   ir_expression *new_ssbo = emit_ssbo_unsized_array_length(write_offset,
+                                                            const_offset,
+                                                            unsized_array_stride);
+
+   return new_ssbo;
+}
+
 void
 lower_ubo_reference_visitor::check_for_ssbo_write(ir_assignment *ir)
 {
@@ -698,6 +879,7 @@ lower_ubo_reference_visitor::check_for_ssbo_write(ir_assignment *ir)
 ir_visitor_status
 lower_ubo_reference_visitor::visit_enter(ir_assignment *ir)
 {
+   check_ssbo_unsized_array_length_assignment(ir);
    check_for_ssbo_write(ir);
    return rvalue_visit(ir);
 }
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index f6da305..29a40c9 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -976,6 +976,9 @@ enum opcode {
    VS_OPCODE_PULL_CONSTANT_LOAD,
    VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
    VS_OPCODE_SET_SIMD4X2_HEADER_GEN9,
+
+   VS_OPCODE_UNSIZED_ARRAY_LENGTH,
+
    VS_OPCODE_UNPACK_FLAGS_SIMD4X2,
 
    /**
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 6222d52..136b2c9 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -598,6 +598,9 @@ brw_instruction_name(enum opcode op)
    case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
       return "set_simd4x2_header_gen9";
 
+   case VS_OPCODE_UNSIZED_ARRAY_LENGTH:
+      return "vs_unsized_array_length";
+
    case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
       return "unpack_flags_simd4x2";
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index bcca93b..3f66b17 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -326,6 +326,7 @@ vec4_visitor::implied_mrf_writes(vec4_instruction *inst)
    case SHADER_OPCODE_TXS:
    case SHADER_OPCODE_TG4:
    case SHADER_OPCODE_TG4_OFFSET:
+   case VS_OPCODE_UNSIZED_ARRAY_LENGTH:
       return inst->header_size;
    default:
       unreachable("not reached");
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index e6b356d..5a6c66f 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -507,6 +507,12 @@ private:
                                          struct brw_reg offset);
    void generate_set_simd4x2_header_gen9(vec4_instruction *inst,
                                          struct brw_reg dst);
+
+   void generate_unsized_array_length(vec4_instruction *inst,
+                                      struct brw_reg dst,
+                                      struct brw_reg src,
+                                      struct brw_reg index);
+
    void generate_unpack_flags(struct brw_reg dst);
 
    struct brw_context *brw;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
index a2b0685..7a4bbb4 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp
@@ -1025,6 +1025,32 @@ vec4_generator::generate_pull_constant_load(vec4_instruction *inst,
 }
 
 void
+vec4_generator::generate_unsized_array_length(vec4_instruction *inst,
+                                                 struct brw_reg dst,
+                                                 struct brw_reg src,
+                                                 struct brw_reg surf_index)
+{
+   assert(brw->gen >= 7);
+   assert(surf_index.type == BRW_REGISTER_TYPE_UD &&
+          surf_index.file == BRW_IMMEDIATE_VALUE);
+
+   brw_SAMPLE(p,
+              dst,
+              inst->base_mrf,
+              src,
+              surf_index.dw1.ud,
+              0,
+              GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO,
+              1, /* response length */
+              inst->mlen,
+              inst->header_size > 0,
+              BRW_SAMPLER_SIMD_MODE_SIMD4X2,
+              BRW_SAMPLER_RETURN_FORMAT_SINT32);
+
+   brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud);
+}
+
+void
 vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst,
                                                  struct brw_reg dst,
                                                  struct brw_reg surf_index,
@@ -1397,6 +1423,11 @@ vec4_generator::generate_code(const cfg_t *cfg)
          generate_set_simd4x2_header_gen9(inst, dst);
          break;
 
+
+      case VS_OPCODE_UNSIZED_ARRAY_LENGTH:
+         generate_unsized_array_length(inst, dst, src[0], src[1]);
+         break;
+
       case GS_OPCODE_URB_WRITE:
          generate_gs_urb_write(inst);
          break;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index 389d6b4..95f6209 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1931,9 +1931,51 @@ vec4_visitor::visit(ir_expression *ir)
       emit(BFE(result_dst, op[2], op[1], op[0]));
       break;
 
-   case ir_triop_ssbo_unsized_array_length:
-      unreachable("not reached: not implemented");
+   case ir_triop_ssbo_unsized_array_length: {
+      ir_constant *const_uniform_block = ir->operands[0]->as_constant();
+      unsigned ubo_index = const_uniform_block->value.u[0];
+      ir_constant *const_offset_ir = ir->operands[1]->as_constant();
+      int const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0;
+      ir_constant *const_stride_ir = ir->operands[2]->as_constant();
+      int unsized_array_stride = const_stride_ir ? const_stride_ir->value.u[0] : 1;
+
+      assert(shader->base.UniformBlocks[ubo_index].IsShaderStorage);
+
+      src_reg surf_index = src_reg(prog_data->base.binding_table.ubo_start +
+                                   ubo_index);
+
+      dst_reg buffer_size = dst_reg(this, ir->type);
+
+      vec4_instruction *inst = new(mem_ctx) vec4_instruction(
+         VS_OPCODE_UNSIZED_ARRAY_LENGTH, buffer_size);
+
+      inst->base_mrf = 2;
+      inst->mlen = 1; /* always at least one */
+      inst->src[1] = src_reg(surf_index);
+
+      /* MRF for the first parameter */
+      src_reg lod = src_reg(0);
+      int param_base = inst->base_mrf;
+      int writemask = WRITEMASK_X;
+      emit(MOV(dst_reg(MRF, param_base, glsl_type::int_type, writemask), lod));
+
+      emit(inst);
+
+      /* array.length() =
+          max((buffer_object_size - offset_of_array) / stride_of_array, 0) */
+      emit(ADD(buffer_size, src_reg(buffer_size), brw_imm_d(-const_offset)));
+
+      assert(unsized_array_stride > 0);
+
+      src_reg stride = src_reg(unsized_array_stride);
+      dst_reg temp = dst_reg(this, glsl_type::int_type);
+      emit_math(SHADER_OPCODE_INT_QUOTIENT,
+                temp,
+                src_reg(buffer_size),
+                stride);
+      emit_minmax(BRW_CONDITIONAL_GE, result_dst, src_reg(temp), brw_imm_d(0));
       break;
+   }
 
    case ir_triop_vector_insert:
       unreachable("should have been lowered by lower_vector_insert");
-- 
1.9.1



More information about the mesa-dev mailing list