<div dir="ltr"><div class="gmail_extra"><div class="gmail_quote">On Mon, Nov 14, 2016 at 5:41 PM, Kenneth Graunke <<a href="mailto:kenneth@whitecape.org" target="_blank">kenneth@whitecape.org</a>> wrote: <blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Certain built-in arrays, such as gl_ClipDistance[], gl_CullDistance[], gl_TessLevelInner[], and gl_TessLevelOuter[] are specified as scalar arrays. Normal scalar arrays are sparse - each array element usually occupies a whole vec4 slot. However, most hardware assumes these built-in arrays are tightly packed. The new var->data.compact flag indicates that a scalar array should be tightly packed, so a float[4] array would take up a single vec4 slot, and a float[8] array would take up two slots. They are still arrays, not vec4s, however. nir_lower_io will generate intrinsics using ARB_enhanced_layouts style component qualifiers. Signed-off-by: Kenneth Graunke <<a href="mailto:kenneth@whitecape.org">kenneth@whitecape.org</a>> --- src/compiler/glsl/glsl_to_nir.cpp | 1 + src/compiler/nir/nir.h | 7 +++++ src/compiler/nir/nir_gather_info.c | 9 ++++-- src/compiler/nir/nir_lower_indirect_derefs.c | 8 +++-- src/compiler/nir/nir_lower_io.c | 44 ++++++++++++++++++++-------- src/compiler/nir/nir_print.c | 4 ++- 6 files changed, 55 insertions(+), 18 deletions(-) diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp index 6ca760b..ed1c739 100644 --- a/src/compiler/glsl/glsl_to_nir.cpp +++ b/src/compiler/glsl/glsl_to_nir.cpp @@ -331,6 +331,7 @@ nir_visitor::visit(ir_variable *ir) var->data.explicit_index = ir->data.explicit_index; var->data.explicit_binding = ir->data.explicit_binding; var->data.has_initializer = ir->data.has_initializer; + var->data.compact = false; var->data.location_frac = ir->data.location_frac; switch (ir->data.depth_layout) { diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h index 3d46384..0b78242 100644 --- a/src/compiler/nir/nir.h +++ b/src/compiler/nir/nir.h @@ -230,6 +230,13 @@ typedef struct nir_variable { unsigned location_frac:2; /** + * If true, this variable represents an array of scalars that should + * be tightly packed. In other words, consecutive array elements + * should be stored one component apart, rather than one slot apart. + */ + unsigned compact:1; </blockquote><div> </div><div>Should this 1-bit integer be a bool? </div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"> + + /** * Whether this is a fragment shader output implicitly initialized with * the previous contents of the specified render target at the * framebuffer location corresponding to this shader invocation. diff --git a/src/compiler/nir/nir_gather_info.c b/src/compiler/nir/nir_gather_info.c index 63c8a42..4d07dda 100644 --- a/src/compiler/nir/nir_gather_info.c +++ b/src/compiler/nir/nir_gather_info.c @@ -94,8 +94,11 @@ mark_whole_variable(nir_shader *shader, nir_variable *var) var->data.mode == nir_var_shader_in) is_vertex_input = true; - set_io_mask(shader, var, 0, - glsl_count_attribute_slots(type, is_vertex_input)); + const unsigned slots = + var->data.compact ? DIV_ROUND_UP(glsl_get_length(type), 4) + : glsl_count_attribute_slots(type, is_vertex_input); </blockquote><div> </div><div>By using glsl_get_length(), you're assuming that all compact things are 1-D arrays with no structs. Is that your intention? If so, we should probably assert so that we catch it if we ever change this in the future. </div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"> + + set_io_mask(shader, var, 0, slots); } static unsigned @@ -150,7 +153,7 @@ try_mask_partial_io(nir_shader *shader, nir_deref_var *deref) * here marking the entire variable as used. */ if (!(glsl_type_is_matrix(type) || - (glsl_type_is_array(type) && + (glsl_type_is_array(type) && !var->data.compact && (glsl_type_is_numeric(glsl_without_array(type)) || glsl_type_is_boolean(glsl_without_array(type)))))) { diff --git a/src/compiler/nir/nir_lower_indirect_derefs.c b/src/compiler/nir/nir_lower_indirect_derefs.c index 356373e..5c97dc8e 100644 --- a/src/compiler/nir/nir_lower_indirect_derefs.c +++ b/src/compiler/nir/nir_lower_indirect_derefs.c @@ -175,8 +175,12 @@ lower_indirect_block(nir_block *block, nir_builder *b, if (!deref_has_indirect(intrin->variables[0])) continue; - /* Only lower variables whose mode is in the mask */ - if (!(modes & intrin->variables[0]->var->data.mode)) + /* Only lower variables whose mode is in the mask, or compact + * array variables. (We can't handle indirects on tightly packed + * scalar arrays, so we need to lower them regardless.) </blockquote><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"> + */ + if (!(modes & intrin->variables[0]->var->data.mode) && + !intrin->variables[0]->var->data.compact) continue; b->cursor = nir_before_instr(&intrin->instr); diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c index a7e7f14..6628947 100644 --- a/src/compiler/nir/nir_lower_io.c +++ b/src/compiler/nir/nir_lower_io.c @@ -88,7 +88,8 @@ nir_is_per_vertex_io(nir_variable *var, gl_shader_stage stage) static nir_ssa_def * get_io_offset(nir_builder *b, nir_deref_var *deref, nir_ssa_def **vertex_index, - int (*type_size)(const struct glsl_type *)) + int (*type_size)(const struct glsl_type *), + unsigned *component) { nir_deref *tail = &deref->deref; @@ -106,6 +107,19 @@ get_io_offset(nir_builder *b, nir_deref_var *deref, *vertex_index = vtx; } + if (deref->var->data.compact) { + assert(tail->child->deref_type == nir_deref_type_array); + assert(glsl_type_is_scalar(glsl_without_array(deref->var->type))); + nir_deref_array *deref_array = nir_deref_as_array(tail->child); + /* We always lower indirect dereferences for "compact" array vars. */ + assert(deref_array->deref_array_type == nir_deref_array_type_direct); + + const unsigned total_offset = *component + deref_array->base_offset; + const unsigned slot_offset = total_offset / 4; + *component = total_offset % 4; + return nir_imm_int(b, type_size(glsl_vec4_type()) * slot_offset); + } + /* Just emit code and let constant-folding go to town */ nir_ssa_def *offset = nir_imm_int(b, 0); @@ -143,7 +157,8 @@ get_io_offset(nir_builder *b, nir_deref_var *deref, static nir_intrinsic_instr * lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state, - nir_ssa_def *vertex_index, nir_ssa_def *offset) + nir_ssa_def *vertex_index, nir_ssa_def *offset, + unsigned component) { const nir_shader *nir = state->builder.shader; nir_variable *var = intrin->variables[0]->var; @@ -194,7 +209,7 @@ lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state, nir_intrinsic_set_base(load, var->data.driver_location); if (mode == nir_var_shader_in || mode == nir_var_shader_out) - nir_intrinsic_set_component(load, var->data.location_frac); + nir_intrinsic_set_component(load, component); if (load->intrinsic == nir_intrinsic_load_uniform) nir_intrinsic_set_range(load, state->type_size(var->type)); @@ -214,7 +229,8 @@ lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state, static nir_intrinsic_instr * lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state, - nir_ssa_def *vertex_index, nir_ssa_def *offset) + nir_ssa_def *vertex_index, nir_ssa_def *offset, + unsigned component) { nir_variable *var = intrin->variables[0]->var; nir_variable_mode mode = var->data.mode; @@ -236,7 +252,7 @@ lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state, nir_intrinsic_set_base(store, var->data.driver_location); if (mode == nir_var_shader_out) - nir_intrinsic_set_component(store, var->data.location_frac); + nir_intrinsic_set_component(store, component); nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intrin)); @@ -289,7 +305,7 @@ lower_atomic(nir_intrinsic_instr *intrin, struct lower_io_state *state, static nir_intrinsic_instr * lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state, - nir_ssa_def *offset) + nir_ssa_def *offset, unsigned component) { nir_variable *var = intrin->variables[0]->var; @@ -297,7 +313,7 @@ lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state, /* Ignore interpolateAt() for flat variables - flat is flat. */ if (var->data.interpolation == INTERP_MODE_FLAT) - return lower_load(intrin, state, NULL, offset); + return lower_load(intrin, state, NULL, offset, component); nir_intrinsic_op bary_op; switch (intrin->intrinsic) { @@ -333,7 +349,7 @@ lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state, load->num_components = intrin->num_components; nir_intrinsic_set_base(load, var->data.driver_location); - nir_intrinsic_set_component(load, var->data.location_frac); + nir_intrinsic_set_component(load, component); load->src[0] = nir_src_for_ssa(&bary_setup->dest.ssa); load->src[1] = nir_src_for_ssa(offset); @@ -398,20 +414,23 @@ nir_lower_io_block(nir_block *block, nir_ssa_def *offset; nir_ssa_def *vertex_index = NULL; + unsigned component_offset = var->data.location_frac; offset = get_io_offset(b, intrin->variables[0], per_vertex ? &vertex_index : NULL, - state->type_size); + state->type_size, &component_offset); nir_intrinsic_instr *replacement; switch (intrin->intrinsic) { case nir_intrinsic_load_var: - replacement = lower_load(intrin, state, vertex_index, offset); + replacement = lower_load(intrin, state, vertex_index, offset, + component_offset); break; case nir_intrinsic_store_var: - replacement = lower_store(intrin, state, vertex_index, offset); + replacement = lower_store(intrin, state, vertex_index, offset, + component_offset); break; case nir_intrinsic_var_atomic_add: @@ -432,7 +451,8 @@ nir_lower_io_block(nir_block *block, case nir_intrinsic_interp_var_at_sample: case nir_intrinsic_interp_var_at_offset: assert(vertex_index == NULL); - replacement = lower_interpolate_at(intrin, state, offset); + replacement = lower_interpolate_at(intrin, state, offset, + component_offset); break; default: diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c index 242bffb..475e3f2 100644 --- a/src/compiler/nir/nir_print.c +++ b/src/compiler/nir/nir_print.c @@ -432,9 +432,11 @@ print_var_decl(nir_variable *var, print_state *state) loc = buf; } - fprintf(fp, " (%s, %u)", loc, var->data.driver_location); + fprintf(fp, " (%s, %u)%s", loc, var->data.driver_location, + var->data.compact ? " compact" : ""); } + </blockquote><div> </div><div>Stray newline </div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"> if (var->constant_initializer) { fprintf(fp, " = { "); print_constant(var->constant_initializer, var->type, state); -- 2.10.2 _______________________________________________ mesa-dev mailing list <a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a> <a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/mailman/listinfo/mesa-dev</a> </blockquote></div> </div></div>