[Mesa-dev] [PATCH 5/7] i965: Apply VS attribute workarounds in NIR.
Kenneth Graunke
kenneth at whitecape.org
Wed Jan 13 20:33:15 PST 2016
This patch re-implements the pre-Haswell VS attribute workarounds.
Instead of emitting shader code in the vec4 backend, we now simply
call a NIR pass to emit the necessary code.
This simplifies the vec4 backend. Beyond deleting code, it removes
the primary use of ATTR as a destination. It also eliminates the
requirement that the vec4 VS backend express the ATTR file in terms
of VERT_ATTRIB_* locations, giving us a bit more flexibility.
This approach is a little different: rather than munging the attributes
at the top, we emit code to fix them up when they're accessed. However,
we run the optimizer afterwards, so CSE should eliminate the redundant
math. It may even be able to fuse it with other calculations based on
the input value.
shader-db does not handle non-default NOS settings, so I have no
statistics about this patch.
Note that the scalar backend does not implement VS attribute
workarounds, as they are unnecessary on hardware which allows SIMD8 VS.
Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
---
src/mesa/drivers/dri/i965/Makefile.sources | 1 +
src/mesa/drivers/dri/i965/brw_nir.c | 19 ++-
src/mesa/drivers/dri/i965/brw_nir.h | 7 +-
.../dri/i965/brw_nir_attribute_workarounds.c | 178 +++++++++++++++++++++
src/mesa/drivers/dri/i965/brw_shader.cpp | 2 +-
src/mesa/drivers/dri/i965/brw_vec4.cpp | 3 +
src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp | 2 +-
src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp | 109 -------------
8 files changed, 204 insertions(+), 117 deletions(-)
create mode 100644 src/mesa/drivers/dri/i965/brw_nir_attribute_workarounds.c
diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources
index 5aeeca5..c654f94 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -42,6 +42,7 @@ i965_compiler_FILES = \
brw_nir.h \
brw_nir.c \
brw_nir_analyze_boolean_resolves.c \
+ brw_nir_attribute_workarounds.c \
brw_nir_opt_peephole_ffma.c \
brw_nir_uniforms.cpp \
brw_packed_float.c \
diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
index 935529a..cdecc3d 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.c
+++ b/src/mesa/drivers/dri/i965/brw_nir.c
@@ -205,7 +205,9 @@ remap_patch_urb_offsets(nir_block *block, void *closure)
static void
brw_nir_lower_inputs(nir_shader *nir,
const struct brw_device_info *devinfo,
- bool is_scalar)
+ bool is_scalar,
+ bool use_legacy_snorm_formula,
+ const uint8_t *vs_attrib_wa_flags)
{
switch (nir->stage) {
case MESA_SHADER_VERTEX:
@@ -225,6 +227,9 @@ brw_nir_lower_inputs(nir_shader *nir,
add_const_offset_to_base(nir, nir_var_shader_in);
+ brw_nir_apply_attribute_workarounds(nir, use_legacy_snorm_formula,
+ vs_attrib_wa_flags);
+
if (is_scalar) {
/* Finally, translate VERT_ATTRIB_* values into the actual registers.
*
@@ -497,12 +502,15 @@ brw_preprocess_nir(nir_shader *nir, bool is_scalar)
nir_shader *
brw_nir_lower_io(nir_shader *nir,
const struct brw_device_info *devinfo,
- bool is_scalar)
+ bool is_scalar,
+ bool use_legacy_snorm_formula,
+ const uint8_t *vs_attrib_wa_flags)
{
bool progress; /* Written by OPT and OPT_V */
(void)progress;
- OPT_V(brw_nir_lower_inputs, devinfo, is_scalar);
+ OPT_V(brw_nir_lower_inputs, devinfo, is_scalar,
+ use_legacy_snorm_formula, vs_attrib_wa_flags);
OPT_V(brw_nir_lower_outputs, devinfo, is_scalar);
OPT_V(nir_lower_io, nir_var_all, is_scalar ? type_size_scalar : type_size_vec4);
@@ -613,9 +621,10 @@ brw_create_nir(struct brw_context *brw,
OPT_V(nir_lower_atomics, shader_prog);
}
- if (nir->stage != MESA_SHADER_TESS_CTRL &&
+ if (nir->stage != MESA_SHADER_VERTEX &&
+ nir->stage != MESA_SHADER_TESS_CTRL &&
nir->stage != MESA_SHADER_TESS_EVAL) {
- nir = brw_nir_lower_io(nir, devinfo, is_scalar);
+ nir = brw_nir_lower_io(nir, devinfo, is_scalar, false, NULL);
}
return nir;
diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h
index 78b139b..5bfe40f 100644
--- a/src/mesa/drivers/dri/i965/brw_nir.h
+++ b/src/mesa/drivers/dri/i965/brw_nir.h
@@ -84,11 +84,16 @@ nir_shader *brw_create_nir(struct brw_context *brw,
nir_shader *brw_preprocess_nir(nir_shader *nir, bool is_scalar);
nir_shader *brw_nir_lower_io(nir_shader *nir,
const struct brw_device_info *devinfo,
- bool is_scalar);
+ bool is_scalar,
+ bool use_legacy_snorm_formula,
+ const uint8_t *vs_attrib_wa_flags);
nir_shader *brw_postprocess_nir(nir_shader *nir,
const struct brw_device_info *devinfo,
bool is_scalar);
+bool brw_nir_apply_attribute_workarounds(nir_shader *nir,
+ bool use_legacy_snorm_formula,
+ const uint8_t *attrib_wa_flags);
nir_shader *brw_nir_apply_sampler_key(nir_shader *nir,
const struct brw_device_info *devinfo,
diff --git a/src/mesa/drivers/dri/i965/brw_nir_attribute_workarounds.c b/src/mesa/drivers/dri/i965/brw_nir_attribute_workarounds.c
new file mode 100644
index 0000000..475fa1d
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_nir_attribute_workarounds.c
@@ -0,0 +1,178 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "glsl/nir/nir_builder.h"
+#include "brw_nir.h"
+#include "brw_vs.h"
+
+/**
+ * Prior to Haswell, the hardware can't natively support GL_FIXED or
+ * 2_10_10_10_REV vertex formats. This pass inserts extra shader code
+ * to produce the correct values.
+ */
+
+struct attr_wa_state {
+ nir_builder builder;
+ bool impl_progress;
+ bool use_legacy_snorm_formula;
+ const uint8_t *wa_flags;
+};
+
+static bool
+apply_attr_wa_block(nir_block *block, void *void_state)
+{
+ struct attr_wa_state *state = void_state;
+ nir_builder *b = &state->builder;
+
+ nir_foreach_instr_safe(block, instr) {
+ if (instr->type != nir_instr_type_intrinsic)
+ continue;
+
+ nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ if (intrin->intrinsic != nir_intrinsic_load_input)
+ continue;
+
+ uint8_t wa_flags = state->wa_flags[intrin->const_index[0]];
+ if (wa_flags == 0)
+ continue;
+
+ b->cursor = nir_after_instr(instr);
+
+ nir_ssa_def *val = &intrin->dest.ssa;
+
+ /* Do GL_FIXED rescaling for GLES2.0. Our GL_FIXED attributes
+ * come in as floating point conversions of the integer values.
+ */
+ if (wa_flags & BRW_ATTRIB_WA_COMPONENT_MASK) {
+ nir_ssa_def *comps[4];
+ nir_ssa_def *factor = nir_imm_float(b, 1.0f / 65536.0f);
+ for (int i = 0; i < val->num_components; i++) {
+ if (i < (wa_flags & BRW_ATTRIB_WA_COMPONENT_MASK)) {
+ comps[i] = nir_fmul(b, nir_channel(b, val, i), factor);
+ } else {
+ comps[i] = nir_channel(b, val, i);
+ }
+ }
+ val = nir_vec(b, comps, val->num_components);
+ }
+
+ /* Do sign recovery for 2101010 formats if required. */
+ if (wa_flags & BRW_ATTRIB_WA_SIGN) {
+ /* sign recovery shift: <22, 22, 22, 30> */
+ nir_ssa_def *shift = nir_imm_ivec4(b, 22, 22, 22, 30);
+ val = nir_ishr(b, nir_ishl(b, val, shift), shift);
+ }
+
+ /* Apply BGRA swizzle if required. */
+ if (wa_flags & BRW_ATTRIB_WA_BGRA) {
+ val = nir_swizzle(b, val, (unsigned[4]){2,1,0,3}, 4, true);
+ }
+
+ if (wa_flags & BRW_ATTRIB_WA_NORMALIZE) {
+ /* ES 3.0 has different rules for converting signed normalized
+ * fixed-point numbers than desktop GL.
+ */
+ if ((wa_flags & BRW_ATTRIB_WA_SIGN) &&
+ !state->use_legacy_snorm_formula) {
+ /* According to equation 2.2 of the ES 3.0 specification,
+ * signed normalization conversion is done by:
+ *
+ * f = c / (2^(b-1)-1)
+ */
+ nir_ssa_def *es3_normalize_factor =
+ nir_imm_vec4(b, 1.0f / ((1 << 9) - 1), 1.0f / ((1 << 9) - 1),
+ 1.0f / ((1 << 9) - 1), 1.0f / ((1 << 1) - 1));
+ val = nir_fmax(b,
+ nir_fmul(b, nir_i2f(b, val), es3_normalize_factor),
+ nir_imm_float(b, -1.0f));
+ } else {
+ /* The following equations are from the OpenGL 3.2 specification:
+ *
+ * 2.1 unsigned normalization
+ * f = c/(2^n-1)
+ *
+ * 2.2 signed normalization
+ * f = (2c+1)/(2^n-1)
+ *
+ * Both of these share a common divisor, which we handle by
+ * multiplying by 1 / (2^b - 1) for b = <10, 10, 10, 2>.
+ */
+ nir_ssa_def *normalize_factor =
+ nir_imm_vec4(b, 1.0f / ((1 << 10) - 1), 1.0f / ((1 << 10) - 1),
+ 1.0f / ((1 << 10) - 1), 1.0f / ((1 << 2) - 1));
+
+ if (wa_flags & BRW_ATTRIB_WA_SIGN) {
+ /* For signed normalization, the numerator is 2c+1. */
+ nir_ssa_def *two = nir_imm_float(b, 2.0f);
+ nir_ssa_def *one = nir_imm_float(b, 1.0f);
+ val = nir_fadd(b, nir_fmul(b, nir_i2f(b, val), two), one);
+ } else {
+ /* For unsigned normalization, the numerator is just c. */
+ val = nir_u2f(b, val);
+ }
+ val = nir_fmul(b, val, normalize_factor);
+ }
+ }
+
+ if (wa_flags & BRW_ATTRIB_WA_SCALE) {
+ val = (wa_flags & BRW_ATTRIB_WA_SIGN) ? nir_i2f(b, val)
+ : nir_u2f(b, val);
+ }
+
+ nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa, nir_src_for_ssa(val),
+ val->parent_instr);
+ state->impl_progress = true;
+ }
+
+ return true;
+}
+
+bool
+brw_nir_apply_attribute_workarounds(nir_shader *shader,
+ bool use_legacy_snorm_formula,
+ const uint8_t *attrib_wa_flags)
+{
+ bool progress = false;
+ struct attr_wa_state state = {
+ .use_legacy_snorm_formula = use_legacy_snorm_formula,
+ .wa_flags = attrib_wa_flags,
+ };
+
+ nir_foreach_function(shader, func) {
+ if (!func->impl)
+ continue;
+
+ nir_builder_init(&state.builder, func->impl);
+ state.impl_progress = false;
+
+ nir_foreach_block(func->impl, apply_attr_wa_block, &state);
+
+ if (state.impl_progress) {
+ nir_metadata_preserve(func->impl, nir_metadata_block_index |
+ nir_metadata_dominance);
+ progress = true;
+ }
+ }
+
+ return progress;
+}
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 0ac3f4a..b6f67d5 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -1354,7 +1354,7 @@ brw_compile_tes(const struct brw_compiler *compiler,
nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar);
nir->info.inputs_read = key->inputs_read;
nir->info.patch_inputs_read = key->patch_inputs_read;
- nir = brw_nir_lower_io(nir, compiler->devinfo, is_scalar);
+ nir = brw_nir_lower_io(nir, compiler->devinfo, is_scalar, false, NULL);
nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar);
brw_compute_vue_map(devinfo, &prog_data->base.vue_map,
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp
index ca27066..d2c27ff 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp
@@ -1992,6 +1992,9 @@ brw_compile_vs(const struct brw_compiler *compiler, void *log_data,
nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
shader = brw_nir_apply_sampler_key(shader, compiler->devinfo, &key->tex,
is_scalar);
+ shader = brw_nir_lower_io(shader, compiler->devinfo, is_scalar,
+ use_legacy_snorm_formula,
+ key->gl_attrib_wa_flags);
shader = brw_postprocess_nir(shader, compiler->devinfo, is_scalar);
const unsigned *assembly = NULL;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
index fb6ca8e..1e6c7b8 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_tcs.cpp
@@ -516,7 +516,7 @@ brw_compile_tcs(const struct brw_compiler *compiler,
nir = brw_nir_apply_sampler_key(nir, devinfo, &key->tex, is_scalar);
nir->info.outputs_written = key->outputs_written;
nir->info.patch_outputs_written = key->patch_outputs_written;
- nir = brw_nir_lower_io(nir, compiler->devinfo, is_scalar);
+ nir = brw_nir_lower_io(nir, compiler->devinfo, is_scalar, false, NULL);
nir = brw_postprocess_nir(nir, compiler->devinfo, is_scalar);
prog_data->instances = DIV_ROUND_UP(nir->info.tcs.vertices_out, 2);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
index 1d69149..f3cfc88 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp
@@ -30,115 +30,6 @@ namespace brw {
void
vec4_vs_visitor::emit_prolog()
{
- dst_reg sign_recovery_shift;
- dst_reg normalize_factor;
- dst_reg es3_normalize_factor;
-
- for (int i = 0; i < VERT_ATTRIB_MAX; i++) {
- if (vs_prog_data->inputs_read & BITFIELD64_BIT(i)) {
- uint8_t wa_flags = key->gl_attrib_wa_flags[i];
- dst_reg reg(ATTR, i);
- dst_reg reg_d = reg;
- reg_d.type = BRW_REGISTER_TYPE_D;
- dst_reg reg_ud = reg;
- reg_ud.type = BRW_REGISTER_TYPE_UD;
-
- /* Do GL_FIXED rescaling for GLES2.0. Our GL_FIXED attributes
- * come in as floating point conversions of the integer values.
- */
- if (wa_flags & BRW_ATTRIB_WA_COMPONENT_MASK) {
- dst_reg dst = reg;
- dst.type = brw_type_for_base_type(glsl_type::vec4_type);
- dst.writemask = (1 << (wa_flags & BRW_ATTRIB_WA_COMPONENT_MASK)) - 1;
- emit(MUL(dst, src_reg(dst), brw_imm_f(1.0f / 65536.0f)));
- }
-
- /* Do sign recovery for 2101010 formats if required. */
- if (wa_flags & BRW_ATTRIB_WA_SIGN) {
- if (sign_recovery_shift.file == BAD_FILE) {
- /* shift constant: <22,22,22,30> */
- sign_recovery_shift = dst_reg(this, glsl_type::uvec4_type);
- emit(MOV(writemask(sign_recovery_shift, WRITEMASK_XYZ), brw_imm_ud(22u)));
- emit(MOV(writemask(sign_recovery_shift, WRITEMASK_W), brw_imm_ud(30u)));
- }
-
- emit(SHL(reg_ud, src_reg(reg_ud), src_reg(sign_recovery_shift)));
- emit(ASR(reg_d, src_reg(reg_d), src_reg(sign_recovery_shift)));
- }
-
- /* Apply BGRA swizzle if required. */
- if (wa_flags & BRW_ATTRIB_WA_BGRA) {
- src_reg temp = src_reg(reg);
- temp.swizzle = BRW_SWIZZLE4(2,1,0,3);
- emit(MOV(reg, temp));
- }
-
- if (wa_flags & BRW_ATTRIB_WA_NORMALIZE) {
- /* ES 3.0 has different rules for converting signed normalized
- * fixed-point numbers than desktop GL.
- */
- if ((wa_flags & BRW_ATTRIB_WA_SIGN) && !use_legacy_snorm_formula) {
- /* According to equation 2.2 of the ES 3.0 specification,
- * signed normalization conversion is done by:
- *
- * f = c / (2^(b-1)-1)
- */
- if (es3_normalize_factor.file == BAD_FILE) {
- /* mul constant: 1 / (2^(b-1) - 1) */
- es3_normalize_factor = dst_reg(this, glsl_type::vec4_type);
- emit(MOV(writemask(es3_normalize_factor, WRITEMASK_XYZ),
- brw_imm_f(1.0f / ((1<<9) - 1))));
- emit(MOV(writemask(es3_normalize_factor, WRITEMASK_W),
- brw_imm_f(1.0f / ((1<<1) - 1))));
- }
-
- dst_reg dst = reg;
- dst.type = brw_type_for_base_type(glsl_type::vec4_type);
- emit(MOV(dst, src_reg(reg_d)));
- emit(MUL(dst, src_reg(dst), src_reg(es3_normalize_factor)));
- emit_minmax(BRW_CONDITIONAL_GE, dst, src_reg(dst), brw_imm_f(-1.0f));
- } else {
- /* The following equations are from the OpenGL 3.2 specification:
- *
- * 2.1 unsigned normalization
- * f = c/(2^n-1)
- *
- * 2.2 signed normalization
- * f = (2c+1)/(2^n-1)
- *
- * Both of these share a common divisor, which is represented by
- * "normalize_factor" in the code below.
- */
- if (normalize_factor.file == BAD_FILE) {
- /* 1 / (2^b - 1) for b=<10,10,10,2> */
- normalize_factor = dst_reg(this, glsl_type::vec4_type);
- emit(MOV(writemask(normalize_factor, WRITEMASK_XYZ),
- brw_imm_f(1.0f / ((1<<10) - 1))));
- emit(MOV(writemask(normalize_factor, WRITEMASK_W),
- brw_imm_f(1.0f / ((1<<2) - 1))));
- }
-
- dst_reg dst = reg;
- dst.type = brw_type_for_base_type(glsl_type::vec4_type);
- emit(MOV(dst, src_reg((wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud)));
-
- /* For signed normalization, we want the numerator to be 2c+1. */
- if (wa_flags & BRW_ATTRIB_WA_SIGN) {
- emit(MUL(dst, src_reg(dst), brw_imm_f(2.0f)));
- emit(ADD(dst, src_reg(dst), brw_imm_f(1.0f)));
- }
-
- emit(MUL(dst, src_reg(dst), src_reg(normalize_factor)));
- }
- }
-
- if (wa_flags & BRW_ATTRIB_WA_SCALE) {
- dst_reg dst = reg;
- dst.type = brw_type_for_base_type(glsl_type::vec4_type);
- emit(MOV(dst, src_reg((wa_flags & BRW_ATTRIB_WA_SIGN) ? reg_d : reg_ud)));
- }
- }
- }
}
--
2.7.0
More information about the mesa-dev
mailing list