Mesa (main): nir: Add blend lowering pass

Fri May 7 17:49:24 UTC 2021

Module: Mesa
Branch: main
Commit: f3de2bd6c2dd4c6b4bdbd63e7e5d47f326d2f494
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=f3de2bd6c2dd4c6b4bdbd63e7e5d47f326d2f494

Author: Alyssa Rosenzweig <alyssa at collabora.com>
Date:   Tue May  4 14:11:12 2021 -0400

nir: Add blend lowering pass

This pass was originally developed for Panfrost, where it passes the
relevant dEQP tests. Upstreaming so it can be extended and then shared
with:

* Asahi, for blending
* Zink, for logic ops
* Lavapipe, for advanced blending

Note that using this with MRT in a fragment shader (as non-panfrost
drivers will) has not yet been tested. Logic ops with integer
framebuffers are probably todo. It's been enough for Panfrost, will
suffice for ES2 on Asahi, and provides an upstream base for kusma's work
on advanced blending, so overall the merge is a net benefit.

v2: Remove bogus assert that the format layout is PLAIN. We need to
render R11G11B10, which Mesa reports as layout OTHER. The code is still
correct.

Signed-off-by: Alyssa Rosenzweig <alyssa at collabora.com>
Reviewed-by: Erik Faye-Lund <erik.faye-lund at collabora.com> [v1]
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10601>

---

 src/compiler/Makefile.sources      |   1 +
 src/compiler/nir/meson.build       |   1 +
 src/compiler/nir/nir_lower_blend.c | 381 +++++++++++++++++++++++++++++++++++++
 src/compiler/nir/nir_lower_blend.h |  69 +++++++
 4 files changed, 452 insertions(+)

diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index 73ae56eb686..d6bfa32c701 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -242,6 +242,7 @@ NIR_FILES = \
 	nir/nir_lower_array_deref_of_vec.c \
 	nir/nir_lower_atomics_to_ssbo.c \
 	nir/nir_lower_bitmap.c \
+	nir/nir_lower_blend.c \
 	nir/nir_lower_bit_size.c \
 	nir/nir_lower_bool_to_bitsize.c \
 	nir/nir_lower_bool_to_float.c \
diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
index 847142de0d2..9b83c3be476 100644
--- a/src/compiler/nir/meson.build
+++ b/src/compiler/nir/meson.build
@@ -130,6 +130,7 @@ files_libnir = files(
   'nir_lower_array_deref_of_vec.c',
   'nir_lower_atomics_to_ssbo.c',
   'nir_lower_bitmap.c',
+  'nir_lower_blend.c',
   'nir_lower_bool_to_bitsize.c',
   'nir_lower_bool_to_float.c',
   'nir_lower_bool_to_int32.c',
diff --git a/src/compiler/nir/nir_lower_blend.c b/src/compiler/nir/nir_lower_blend.c
new file mode 100644
index 00000000000..05245337240
--- /dev/null
+++ b/src/compiler/nir/nir_lower_blend.c
@@ -0,0 +1,381 @@
+/*
+ * Copyright (C) 2019-2021 Collabora, Ltd.
+ * Copyright (C) 2019 Alyssa Rosenzweig
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * @file
+ *
+ * Implements the fragment pipeline (blending and writeout) in software, to be
+ * run as a dedicated "blend shader" stage on Midgard/Bifrost, or as a fragment
+ * shader variant on typical GPUs. This pass is useful if hardware lacks
+ * fixed-function blending in part or in full.
+ */
+
+#include "compiler/nir/nir.h"
+#include "compiler/nir/nir_builder.h"
+#include "compiler/nir/nir_format_convert.h"
+#include "nir_lower_blend.h"
+
+/* Given processed factors, combine them per a blend function */
+
+static nir_ssa_def *
+nir_blend_func(
+   nir_builder *b,
+   enum blend_func func,
+   nir_ssa_def *src, nir_ssa_def *dst)
+{
+   switch (func) {
+   case BLEND_FUNC_ADD:
+      return nir_fadd(b, src, dst);
+   case BLEND_FUNC_SUBTRACT:
+      return nir_fsub(b, src, dst);
+   case BLEND_FUNC_REVERSE_SUBTRACT:
+      return nir_fsub(b, dst, src);
+   case BLEND_FUNC_MIN:
+      return nir_fmin(b, src, dst);
+   case BLEND_FUNC_MAX:
+      return nir_fmax(b, src, dst);
+   }
+
+   unreachable("Invalid blend function");
+}
+
+/* Does this blend function multiply by a blend factor? */
+
+static bool
+nir_blend_factored(enum blend_func func)
+{
+   switch (func) {
+   case BLEND_FUNC_ADD:
+   case BLEND_FUNC_SUBTRACT:
+   case BLEND_FUNC_REVERSE_SUBTRACT:
+      return true;
+   default:
+      return false;
+   }
+}
+
+/* Compute a src_alpha_saturate factor */
+static nir_ssa_def *
+nir_alpha_saturate(
+   nir_builder *b,
+   nir_ssa_def *src, nir_ssa_def *dst,
+   unsigned chan)
+{
+   nir_ssa_def *Asrc = nir_channel(b, src, 3);
+   nir_ssa_def *Adst = nir_channel(b, dst, 3);
+   nir_ssa_def *one = nir_imm_floatN_t(b, 1.0, src->bit_size);
+   nir_ssa_def *Adsti = nir_fsub(b, one, Adst);
+
+   return (chan < 3) ? nir_fmin(b, Asrc, Adsti) : one;
+}
+
+/* Returns a scalar single factor, unmultiplied */
+
+static nir_ssa_def *
+nir_blend_factor_value(
+   nir_builder *b,
+   nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst, nir_ssa_def *bconst,
+   unsigned chan,
+   enum blend_factor factor)
+{
+   switch (factor) {
+   case BLEND_FACTOR_ZERO:
+      return nir_imm_floatN_t(b, 0.0, src->bit_size);
+   case BLEND_FACTOR_SRC_COLOR:
+      return nir_channel(b, src, chan);
+   case BLEND_FACTOR_SRC1_COLOR:
+      return nir_channel(b, src1, chan);
+   case BLEND_FACTOR_DST_COLOR:
+      return nir_channel(b, dst, chan);
+   case BLEND_FACTOR_SRC_ALPHA:
+      return nir_channel(b, src, 3);
+   case BLEND_FACTOR_SRC1_ALPHA:
+      return nir_channel(b, src1, 3);
+   case BLEND_FACTOR_DST_ALPHA:
+      return nir_channel(b, dst, 3);
+   case BLEND_FACTOR_CONSTANT_COLOR:
+      return nir_channel(b, bconst, chan);
+   case BLEND_FACTOR_CONSTANT_ALPHA:
+      return nir_channel(b, bconst, 3);
+   case BLEND_FACTOR_SRC_ALPHA_SATURATE:
+      return nir_alpha_saturate(b, src, dst, chan);
+   }
+
+   unreachable("Invalid blend factor");
+}
+
+static nir_ssa_def *
+nir_blend_factor(
+   nir_builder *b,
+   nir_ssa_def *raw_scalar,
+   nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst, nir_ssa_def *bconst,
+   unsigned chan,
+   enum blend_factor factor,
+   bool inverted)
+{
+   nir_ssa_def *f =
+      nir_blend_factor_value(b, src, src1, dst, bconst, chan, factor);
+
+   if (inverted)
+      f = nir_fadd_imm(b, nir_fneg(b, f), 1.0);
+
+   return nir_fmul(b, raw_scalar, f);
+}
+
+/* Given a colormask, "blend" with the destination */
+
+static nir_ssa_def *
+nir_color_mask(
+   nir_builder *b,
+   unsigned mask,
+   nir_ssa_def *src,
+   nir_ssa_def *dst)
+{
+   return nir_vec4(b,
+         nir_channel(b, (mask & (1 << 0)) ? src : dst, 0),
+         nir_channel(b, (mask & (1 << 1)) ? src : dst, 1),
+         nir_channel(b, (mask & (1 << 2)) ? src : dst, 2),
+         nir_channel(b, (mask & (1 << 3)) ? src : dst, 3));
+}
+
+static nir_ssa_def *
+nir_logicop_func(
+   nir_builder *b,
+   unsigned func,
+   nir_ssa_def *src, nir_ssa_def *dst)
+{
+   switch (func) {
+   case PIPE_LOGICOP_CLEAR:
+      return nir_imm_ivec4(b, 0, 0, 0, 0);
+   case PIPE_LOGICOP_NOR:
+      return nir_inot(b, nir_ior(b, src, dst));
+   case PIPE_LOGICOP_AND_INVERTED:
+      return nir_iand(b, nir_inot(b, src), dst);
+   case PIPE_LOGICOP_COPY_INVERTED:
+      return nir_inot(b, src);
+   case PIPE_LOGICOP_AND_REVERSE:
+      return nir_iand(b, src, nir_inot(b, dst));
+   case PIPE_LOGICOP_INVERT:
+      return nir_inot(b, dst);
+   case PIPE_LOGICOP_XOR:
+      return nir_ixor(b, src, dst);
+   case PIPE_LOGICOP_NAND:
+      return nir_inot(b, nir_iand(b, src, dst));
+   case PIPE_LOGICOP_AND:
+      return nir_iand(b, src, dst);
+   case PIPE_LOGICOP_EQUIV:
+      return nir_inot(b, nir_ixor(b, src, dst));
+   case PIPE_LOGICOP_NOOP:
+      return dst;
+   case PIPE_LOGICOP_OR_INVERTED:
+      return nir_ior(b, nir_inot(b, src), dst);
+   case PIPE_LOGICOP_COPY:
+      return src;
+   case PIPE_LOGICOP_OR_REVERSE:
+      return nir_ior(b, src, nir_inot(b, dst));
+   case PIPE_LOGICOP_OR:
+      return nir_ior(b, src, dst);
+   case PIPE_LOGICOP_SET:
+      return nir_imm_ivec4(b, ~0, ~0, ~0, ~0);
+   }
+
+   unreachable("Invalid logciop function");
+}
+
+static nir_ssa_def *
+nir_blend_logicop(
+   nir_builder *b,
+   nir_lower_blend_options options,
+   unsigned rt,
+   nir_ssa_def *src, nir_ssa_def *dst)
+{
+   unsigned bit_size = src->bit_size;
+   const struct util_format_description *format_desc =
+      util_format_description(options.format[rt]);
+
+   if (bit_size != 32) {
+      src = nir_f2f32(b, src);
+      dst = nir_f2f32(b, dst);
+   }
+
+   assert(src->num_components <= 4);
+   assert(dst->num_components <= 4);
+
+   unsigned bits[4];
+   for (int i = 0; i < 4; ++i)
+       bits[i] = format_desc->channel[i].size;
+
+   src = nir_format_float_to_unorm(b, src, bits);
+   dst = nir_format_float_to_unorm(b, dst, bits);
+
+   nir_ssa_def *out = nir_logicop_func(b, options.logicop_func, src, dst);
+
+   if (bits[0] < 32) {
+       nir_const_value mask[4];
+       for (int i = 0; i < 4; ++i)
+           mask[i] = nir_const_value_for_int((1u << bits[i]) - 1, 32);
+
+       out = nir_iand(b, out, nir_build_imm(b, 4, 32, mask));
+   }
+
+   out = nir_format_unorm_to_float(b, out, bits);
+
+   if (bit_size == 16)
+      out = nir_f2f16(b, out);
+
+   return out;
+}
+
+/* Given a blend state, the source color, and the destination color,
+ * return the blended color
+ */
+
+static nir_ssa_def *
+nir_blend(
+   nir_builder *b,
+   nir_lower_blend_options options,
+   unsigned rt,
+   nir_ssa_def *src, nir_ssa_def *src1, nir_ssa_def *dst)
+{
+   /* Grab the blend constant ahead of time */
+   nir_ssa_def *bconst;
+   if (options.scalar_blend_const) {
+      bconst = nir_vec4(b,
+                        nir_load_blend_const_color_r_float(b),
+                        nir_load_blend_const_color_g_float(b),
+                        nir_load_blend_const_color_b_float(b),
+                        nir_load_blend_const_color_a_float(b));
+   } else {
+      bconst = nir_load_blend_const_color_rgba(b);
+   }
+
+   if (src->bit_size == 16)
+      bconst = nir_f2f16(b, bconst);
+
+   /* Fixed-point framebuffers require their inputs clamped. */
+   enum pipe_format format = options.format[rt];
+
+   if (!util_format_is_float(format))
+      src = nir_fsat(b, src);
+
+   /* DST_ALPHA reads back 1.0 if there is no alpha channel */
+   const struct util_format_description *desc =
+      util_format_description(format);
+
+   if (desc->nr_channels < 4) {
+      nir_ssa_def *zero = nir_imm_floatN_t(b, 0.0, dst->bit_size);
+      nir_ssa_def *one = nir_imm_floatN_t(b, 1.0, dst->bit_size);
+
+      dst = nir_vec4(b, nir_channel(b, dst, 0),
+            desc->nr_channels > 1 ? nir_channel(b, dst, 1) : zero,
+            desc->nr_channels > 2 ? nir_channel(b, dst, 2) : zero,
+            desc->nr_channels > 3 ? nir_channel(b, dst, 3) : one);
+   }
+
+   /* We blend per channel and recombine later */
+   nir_ssa_def *channels[4];
+
+   for (unsigned c = 0; c < 4; ++c) {
+      /* Decide properties based on channel */
+      nir_lower_blend_channel chan =
+         (c < 3) ? options.rt[rt].rgb : options.rt[rt].alpha;
+
+      nir_ssa_def *psrc = nir_channel(b, src, c);
+      nir_ssa_def *pdst = nir_channel(b, dst, c);
+
+      if (nir_blend_factored(chan.func)) {
+         psrc = nir_blend_factor(
+                   b, psrc,
+                   src, src1, dst, bconst, c,
+                   chan.src_factor, chan.invert_src_factor);
+
+         pdst = nir_blend_factor(
+                   b, pdst,
+                   src, src1, dst, bconst, c,
+                   chan.dst_factor, chan.invert_dst_factor);
+      }
+
+      channels[c] = nir_blend_func(b, chan.func, psrc, pdst);
+   }
+
+   return nir_vec(b, channels, 4);
+}
+
+static bool
+nir_lower_blend_instr(nir_builder *b, nir_instr *instr, void *data)
+{
+   nir_lower_blend_options *options = data;
+   if (instr->type != nir_instr_type_intrinsic)
+      return false;
+
+   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
+   if (intr->intrinsic != nir_intrinsic_store_deref)
+      return false;
+
+   nir_variable *var = nir_intrinsic_get_var(intr, 0);
+   if (var->data.mode != nir_var_shader_out ||
+         (var->data.location != FRAG_RESULT_COLOR &&
+         var->data.location < FRAG_RESULT_DATA0))
+      return false;
+
+   /* Determine render target for per-RT blending */
+   unsigned rt =
+      (var->data.location == FRAG_RESULT_COLOR) ? 0 :
+      (var->data.location - FRAG_RESULT_DATA0);
+
+   b->cursor = nir_before_instr(instr);
+
+   /* Grab the input color */
+   nir_ssa_def *src = nir_ssa_for_src(b, intr->src[1], 4);
+
+   /* Grab the previous fragment color */
+   var->data.fb_fetch_output = true;
+   b->shader->info.outputs_read |= BITFIELD64_BIT(var->data.location);
+   b->shader->info.fs.uses_fbfetch_output = true;
+   nir_ssa_def *dst = nir_load_var(b, var);
+
+   /* Blend the two colors per the passed options */
+   nir_ssa_def *blended = src;
+
+   if (options->logicop_enable)
+      blended = nir_blend_logicop(b, *options, rt, src, dst);
+   else if (!util_format_is_pure_integer(options->format[rt]))
+      blended = nir_blend(b, *options, rt, src, options->src1, dst);
+
+   /* Apply a colormask */
+   blended = nir_color_mask(b, options->rt[rt].colormask, blended, dst);
+
+   /* Write out the final color instead of the input */
+   nir_instr_rewrite_src_ssa(instr, &intr->src[1], blended);
+   return true;
+}
+
+void
+nir_lower_blend(nir_shader *shader, nir_lower_blend_options options)
+{
+   assert(shader->info.stage == MESA_SHADER_FRAGMENT);
+
+   nir_shader_instructions_pass(shader, nir_lower_blend_instr,
+         nir_metadata_block_index | nir_metadata_dominance, &options);
+}
diff --git a/src/compiler/nir/nir_lower_blend.h b/src/compiler/nir/nir_lower_blend.h
new file mode 100644
index 00000000000..c0f5c8ee1be
--- /dev/null
+++ b/src/compiler/nir/nir_lower_blend.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright Â 2019 Alyssa Rosenzweig
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef NIR_BLEND_H
+#define NIR_BLEND_H
+
+#include "compiler/nir/nir.h"
+#include "pipe/p_format.h"
+
+/* These structs encapsulates the blend state such that it can be lowered
+ * cleanly
+ */
+
+typedef struct {
+   enum blend_func func;
+
+   enum blend_factor src_factor;
+   bool invert_src_factor;
+
+   enum blend_factor dst_factor;
+   bool invert_dst_factor;
+} nir_lower_blend_channel;
+
+typedef struct {
+   nir_lower_blend_channel rgb;
+   nir_lower_blend_channel alpha;
+
+   /* 4-bit colormask. 0x0 for none, 0xF for RGBA, 0x1 for R */
+   unsigned colormask;
+} nir_lower_blend_rt;
+
+typedef struct {
+   nir_lower_blend_rt rt[8];
+   enum pipe_format format[8];
+
+   bool logicop_enable;
+   unsigned logicop_func;
+
+   nir_ssa_def *src1;
+
+   /* If set, will use load_blend_const_color_{r,g,b,a}_float instead of
+    * load_blend_const_color_rgba */
+   bool scalar_blend_const;
+} nir_lower_blend_options;
+
+void nir_lower_blend(nir_shader *shader, nir_lower_blend_options options);
+
+#endif