Mesa (main): nir, nir/algebraic: add byte/word insertion instructions

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Tue Jun 8 09:19:34 UTC 2021


Module: Mesa
Branch: main
Commit: 1cbcfb8b38c308297a1284c572f5cc4df41b0a00
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=1cbcfb8b38c308297a1284c572f5cc4df41b0a00

Author: Rhys Perry <pendingchaos02 at gmail.com>
Date:   Wed Mar 25 15:38:06 2020 +0000

nir, nir/algebraic: add byte/word insertion instructions

Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof at gmail.com>
Reviewed-by: Ian Romanick <ian.d.romanick at intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3151>

---

 src/amd/vulkan/radv_shader.c                             |  2 ++
 src/asahi/compiler/agx_compile.h                         |  2 ++
 src/broadcom/vulkan/v3dv_pipeline.c                      |  2 ++
 src/compiler/nir/nir.h                                   |  2 ++
 src/compiler/nir/nir_opcodes.py                          |  4 ++++
 src/compiler/nir/nir_opt_algebraic.py                    | 13 +++++++++++++
 src/freedreno/ir3/ir3_nir.c                              |  4 ++++
 src/gallium/auxiliary/nir/nir_to_tgsi.c                  |  6 ++++++
 src/gallium/drivers/etnaviv/etnaviv_screen.c             |  2 ++
 src/gallium/drivers/freedreno/a2xx/ir2_nir.c             |  2 ++
 src/gallium/drivers/lima/lima_program.c                  |  4 ++++
 src/gallium/drivers/llvmpipe/lp_screen.c                 |  2 ++
 src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp |  2 ++
 src/gallium/drivers/nouveau/nv50/nv50_screen.c           |  2 ++
 src/gallium/drivers/r600/r600_pipe_common.c              |  2 ++
 src/gallium/drivers/radeonsi/si_get.c                    |  2 ++
 src/gallium/drivers/softpipe/sp_screen.c                 |  2 ++
 src/gallium/drivers/v3d/v3d_screen.c                     |  2 ++
 src/gallium/drivers/vc4/vc4_program.c                    |  2 ++
 src/gallium/drivers/zink/zink_compiler.c                 |  2 ++
 src/intel/compiler/brw_compiler.c                        |  2 ++
 src/microsoft/compiler/nir_to_dxil.c                     |  2 ++
 src/panfrost/bifrost/bifrost_compile.h                   |  2 ++
 src/panfrost/midgard/midgard_compile.h                   |  2 ++
 24 files changed, 69 insertions(+)

diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 3b7adc38a6b..083e0ca5620 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -74,6 +74,8 @@ static const struct nir_shader_compiler_options nir_options = {
    .lower_unpack_half_2x16 = true,
    .lower_extract_byte = true,
    .lower_extract_word = true,
+   .lower_insert_byte = true,
+   .lower_insert_word = true,
    .lower_ffma16 = true,
    .lower_ffma32 = true,
    .lower_ffma64 = true,
diff --git a/src/asahi/compiler/agx_compile.h b/src/asahi/compiler/agx_compile.h
index 70aed6e48a8..5c90e44a144 100644
--- a/src/asahi/compiler/agx_compile.h
+++ b/src/asahi/compiler/agx_compile.h
@@ -155,6 +155,8 @@ static const nir_shader_compiler_options agx_nir_options = {
    .lower_fsign = true,
    .lower_rotate = true,
    .lower_pack_split = true,
+   .lower_insert_byte = true,
+   .lower_insert_word = true,
    .lower_uniforms_to_ubo = true,
    .lower_cs_local_index_from_id = true,
 
diff --git a/src/broadcom/vulkan/v3dv_pipeline.c b/src/broadcom/vulkan/v3dv_pipeline.c
index a4e1d97dbd7..99d7779038b 100644
--- a/src/broadcom/vulkan/v3dv_pipeline.c
+++ b/src/broadcom/vulkan/v3dv_pipeline.c
@@ -188,6 +188,8 @@ const nir_shader_compiler_options v3dv_nir_options = {
    .lower_all_io_to_temps = true,
    .lower_extract_byte = true,
    .lower_extract_word = true,
+   .lower_insert_byte = true,
+   .lower_insert_word = true,
    .lower_bitfield_insert_to_shifts = true,
    .lower_bitfield_extract_to_shifts = true,
    .lower_bitfield_reverse = true,
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 7981031b54a..1bad96e1f37 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -3259,6 +3259,8 @@ typedef struct nir_shader_compiler_options {
 
    bool lower_extract_byte;
    bool lower_extract_word;
+   bool lower_insert_byte;
+   bool lower_insert_word;
 
    bool lower_all_io_to_temps;
    bool lower_all_io_to_elements;
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index bb8fc5bea86..eda8d11cb3d 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -972,6 +972,10 @@ binop("extract_i8", tint, "", "(int8_t)(src0 >> (src1 * 8))")
 binop("extract_u16", tuint, "", "(uint16_t)(src0 >> (src1 * 16))")
 binop("extract_i16", tint, "", "(int16_t)(src0 >> (src1 * 16))")
 
+# Byte/word insertion
+binop("insert_u8", tuint, "", "(src0 & 0xff) << (src1 * 8)")
+binop("insert_u16", tuint, "", "(src0 & 0xffff) << (src1 * 16)")
+
 
 def triop(name, ty, alg_props, const_expr):
    opcode(name, 0, ty, [0, 0, 0], [ty, ty, ty], False, alg_props, const_expr)
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index 6339ebb5079..8dc20390491 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -2429,6 +2429,19 @@ for N in [16, 32]:
                 ((x2xN, ('i2i16', aN)), (extract_x16, a, 0), '!options->lower_extract_word'),
             ])
 
+# Byte insertion
+late_optimizations.extend([(('ishl', ('extract_u8', 'a at 32', 0), 8 * i), ('insert_u8', a, i), '!options->lower_insert_byte') for i in range(1, 4)])
+late_optimizations.extend([(('iand', ('ishl', 'a at 32', 8 * i), 0xff << (8 * i)), ('insert_u8', a, i), '!options->lower_insert_byte') for i in range(1, 4)])
+late_optimizations.append((('ishl', 'a at 32', 24), ('insert_u8', a, 3), '!options->lower_insert_byte'))
+
+late_optimizations += [
+   # Word insertion
+   (('ishl', 'a at 32', 16), ('insert_u16', a, 1), '!options->lower_insert_word'),
+
+   # Extract and then insert
+   (('insert_u8', ('extract_u8', 'a', 0), b), ('insert_u8', a, b)),
+   (('insert_u16', ('extract_u16', 'a', 0), b), ('insert_u16', a, b)),
+]
 
 # Integer sizes
 for s in [8, 16, 32, 64]:
diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c
index a45274b82df..4410cae41de 100644
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -53,6 +53,8 @@ static const nir_shader_compiler_options options = {
 		.vertex_id_zero_based = true,
 		.lower_extract_byte = true,
 		.lower_extract_word = true,
+		.lower_insert_byte = true,
+		.lower_insert_word = true,
 		.lower_helper_invocation = true,
 		.lower_bitfield_insert_to_shifts = true,
 		.lower_bitfield_extract_to_shifts = true,
@@ -107,6 +109,8 @@ static const nir_shader_compiler_options options_a6xx = {
 		.vertex_id_zero_based = false,
 		.lower_extract_byte = true,
 		.lower_extract_word = true,
+		.lower_insert_byte = true,
+		.lower_insert_word = true,
 		.lower_helper_invocation = true,
 		.lower_bitfield_insert_to_shifts = true,
 		.lower_bitfield_extract_to_shifts = true,
diff --git a/src/gallium/auxiliary/nir/nir_to_tgsi.c b/src/gallium/auxiliary/nir/nir_to_tgsi.c
index 409e9188be8..2b12d802618 100644
--- a/src/gallium/auxiliary/nir/nir_to_tgsi.c
+++ b/src/gallium/auxiliary/nir/nir_to_tgsi.c
@@ -2659,6 +2659,8 @@ ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s)
 
    if (!options->lower_extract_byte ||
        !options->lower_extract_word ||
+       !options->lower_insert_byte ||
+       !options->lower_insert_word ||
        !options->lower_fdph ||
        !options->lower_flrp64 ||
        !options->lower_fmod ||
@@ -2671,6 +2673,8 @@ ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s)
 
       new_options->lower_extract_byte = true;
       new_options->lower_extract_word = true;
+      new_options->lower_insert_byte = true;
+      new_options->lower_insert_word = true;
       new_options->lower_fdph = true;
       new_options->lower_flrp64 = true;
       new_options->lower_fmod = true;
@@ -2835,6 +2839,8 @@ static const nir_shader_compiler_options nir_to_tgsi_compiler_options = {
    .fuse_ffma64 = true,
    .lower_extract_byte = true,
    .lower_extract_word = true,
+   .lower_insert_byte = true,
+   .lower_insert_word = true,
    .lower_fdph = true,
    .lower_flrp64 = true,
    .lower_fmod = true,
diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c b/src/gallium/drivers/etnaviv/etnaviv_screen.c
index 467c17f26b9..82a0e152bfc 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_screen.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c
@@ -1067,6 +1067,8 @@ etna_screen_create(struct etna_device *dev, struct etna_gpu *gpu,
       .lower_fmod = true,
       .lower_vector_cmp = true,
       .lower_fdph = true,
+      .lower_insert_byte = true,
+      .lower_insert_word = true,
       .lower_fdiv = true, /* !screen->specs.has_new_transcendentals */
       .lower_fsign = !screen->specs.has_sign_floor_ceil,
       .lower_ffloor = !screen->specs.has_sign_floor_ceil,
diff --git a/src/gallium/drivers/freedreno/a2xx/ir2_nir.c b/src/gallium/drivers/freedreno/a2xx/ir2_nir.c
index 40998acaebb..46c7ad7ff0f 100644
--- a/src/gallium/drivers/freedreno/a2xx/ir2_nir.c
+++ b/src/gallium/drivers/freedreno/a2xx/ir2_nir.c
@@ -47,6 +47,8 @@ static const nir_shader_compiler_options options = {
    .lower_fdph = true,
    .has_fsub = true,
    .has_isub = true,
+   .lower_insert_byte = true,
+   .lower_insert_word = true,
 };
 
 const nir_shader_compiler_options *
diff --git a/src/gallium/drivers/lima/lima_program.c b/src/gallium/drivers/lima/lima_program.c
index 40af5c029be..562586b851e 100644
--- a/src/gallium/drivers/lima/lima_program.c
+++ b/src/gallium/drivers/lima/lima_program.c
@@ -59,6 +59,8 @@ static const nir_shader_compiler_options vs_nir_options = {
    .lower_rotate = true,
    .lower_sincos = true,
    .lower_fceil = true,
+   .lower_insert_byte = true,
+   .lower_insert_word = true,
 };
 
 static const nir_shader_compiler_options fs_nir_options = {
@@ -74,6 +76,8 @@ static const nir_shader_compiler_options fs_nir_options = {
    .lower_rotate = true,
    .lower_fdot = true,
    .lower_fdph = true,
+   .lower_insert_byte = true,
+   .lower_insert_word = true,
    .lower_bitops = true,
    .lower_vector_cmp = true,
 };
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index c1d642bc299..acf3e5d9224 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -577,6 +577,8 @@ static const struct nir_shader_compiler_options gallivm_nir_options = {
    .lower_unpack_half_2x16 = true,
    .lower_extract_byte = true,
    .lower_extract_word = true,
+   .lower_insert_byte = true,
+   .lower_insert_word = true,
    .lower_rotate = true,
    .lower_uadd_carry = true,
    .lower_usub_borrow = true,
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 90150065f86..bb2e8c7d062 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -3265,6 +3265,8 @@ nvir_nir_shader_compiler_options(int chipset)
    op.lower_pack_split = false;
    op.lower_extract_byte = (chipset < NVISA_GM107_CHIPSET);
    op.lower_extract_word = (chipset < NVISA_GM107_CHIPSET);
+   op.lower_insert_byte = true;
+   op.lower_insert_word = true;
    op.lower_all_io_to_temps = false;
    op.lower_all_io_to_elements = false;
    op.vertex_id_zero_based = false;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index a8ad5526d82..cd9946baed6 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -978,6 +978,8 @@ static const nir_shader_compiler_options nir_options = {
    .lower_unpack_snorm_4x8 = true,
    .lower_extract_byte = true,
    .lower_extract_word = true,
+   .lower_insert_byte = true,
+   .lower_insert_word = true,
    .lower_all_io_to_temps = false,
    .lower_cs_local_index_from_id = true,
    .lower_rotate = true,
diff --git a/src/gallium/drivers/r600/r600_pipe_common.c b/src/gallium/drivers/r600/r600_pipe_common.c
index 5c6a02087d4..a23cf2acf48 100644
--- a/src/gallium/drivers/r600/r600_pipe_common.c
+++ b/src/gallium/drivers/r600/r600_pipe_common.c
@@ -1334,6 +1334,8 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
 		.lower_int64_options = ~0,
 		.lower_extract_byte = true,
 		.lower_extract_word = true,
+		.lower_insert_byte = true,
+		.lower_insert_word = true,
 		.lower_rotate = true,
 		.max_unroll_iterations = 32,
 		.lower_interpolate_at = true,
diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c
index 817d0e6cc64..569515a87d1 100644
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -1004,6 +1004,8 @@ void si_init_screen_get_functions(struct si_screen *sscreen)
       .lower_unpack_unorm_4x8 = true,
       .lower_extract_byte = true,
       .lower_extract_word = true,
+      .lower_insert_byte = true,
+      .lower_insert_word = true,
       .lower_rotate = true,
       .lower_to_scalar = true,
       .optimize_sample_mask_in = true,
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index b652b4d260e..eecc5c1072c 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -78,6 +78,8 @@ static const nir_shader_compiler_options sp_compiler_options = {
    .fuse_ffma64 = true,
    .lower_extract_byte = true,
    .lower_extract_word = true,
+   .lower_insert_byte = true,
+   .lower_insert_word = true,
    .lower_fdph = true,
    .lower_flrp64 = true,
    .lower_fmod = true,
diff --git a/src/gallium/drivers/v3d/v3d_screen.c b/src/gallium/drivers/v3d/v3d_screen.c
index 5b7d8d3c3ee..6112e4b1290 100644
--- a/src/gallium/drivers/v3d/v3d_screen.c
+++ b/src/gallium/drivers/v3d/v3d_screen.c
@@ -643,6 +643,8 @@ static const nir_shader_compiler_options v3d_nir_options = {
         .lower_all_io_to_temps = true,
         .lower_extract_byte = true,
         .lower_extract_word = true,
+        .lower_insert_byte = true,
+        .lower_insert_word = true,
         .lower_bitfield_insert_to_shifts = true,
         .lower_bitfield_extract_to_shifts = true,
         .lower_bitfield_reverse = true,
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 3ba8e6e5a94..f3c57942621 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -2173,6 +2173,8 @@ static const nir_shader_compiler_options nir_options = {
         .lower_all_io_to_temps = true,
         .lower_extract_byte = true,
         .lower_extract_word = true,
+        .lower_insert_byte = true,
+        .lower_insert_word = true,
         .lower_fdiv = true,
         .lower_ffma16 = true,
         .lower_ffma32 = true,
diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c
index c5a592fa76e..5fff56d3dff 100644
--- a/src/gallium/drivers/zink/zink_compiler.c
+++ b/src/gallium/drivers/zink/zink_compiler.c
@@ -379,6 +379,8 @@ zink_screen_init_compiler(struct zink_screen *screen)
       .lower_fsat = true,
       .lower_extract_byte = true,
       .lower_extract_word = true,
+      .lower_insert_byte = true,
+      .lower_insert_word = true,
       .lower_mul_high = true,
       .lower_rotate = true,
       .lower_uadd_carry = true,
diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c
index 8b368bc0766..5ec16f958cf 100644
--- a/src/intel/compiler/brw_compiler.c
+++ b/src/intel/compiler/brw_compiler.c
@@ -45,6 +45,8 @@
    .lower_device_index_to_zero = true,                                        \
    .vectorize_io = true,                                                      \
    .use_interpolated_input_intrinsics = true,                                 \
+   .lower_insert_byte = true,                                                 \
+   .lower_insert_word = true,                                                 \
    .vertex_id_zero_based = true,                                              \
    .lower_base_vertex = true,                                                 \
    .use_scoped_barrier = true,                                                \
diff --git a/src/microsoft/compiler/nir_to_dxil.c b/src/microsoft/compiler/nir_to_dxil.c
index 9952c395e52..c25d3fc9ad2 100644
--- a/src/microsoft/compiler/nir_to_dxil.c
+++ b/src/microsoft/compiler/nir_to_dxil.c
@@ -90,6 +90,8 @@ nir_options = {
    .lower_bitfield_extract_to_shifts = true,
    .lower_extract_word = true,
    .lower_extract_byte = true,
+   .lower_insert_word = true,
+   .lower_insert_byte = true,
    .lower_all_io_to_elements = true,
    .lower_all_io_to_temps = true,
    .lower_hadd = true,
diff --git a/src/panfrost/bifrost/bifrost_compile.h b/src/panfrost/bifrost/bifrost_compile.h
index 53cb19d7323..88433f1a292 100644
--- a/src/panfrost/bifrost/bifrost_compile.h
+++ b/src/panfrost/bifrost/bifrost_compile.h
@@ -56,6 +56,8 @@ static const nir_shader_compiler_options bifrost_nir_options = {
         .lower_bitfield_extract_to_shifts = true,
         .lower_extract_byte = true,
         .lower_extract_word = true,
+        .lower_insert_byte = true,
+        .lower_insert_word = true,
         .lower_rotate = true,
 
         .lower_pack_half_2x16 = true,
diff --git a/src/panfrost/midgard/midgard_compile.h b/src/panfrost/midgard/midgard_compile.h
index a4c9e1626bc..242ac116216 100644
--- a/src/panfrost/midgard/midgard_compile.h
+++ b/src/panfrost/midgard/midgard_compile.h
@@ -68,6 +68,8 @@ static const nir_shader_compiler_options midgard_nir_options = {
         .lower_bitfield_extract_to_shifts = true,
         .lower_extract_byte = true,
         .lower_extract_word = true,
+        .lower_insert_byte = true,
+        .lower_insert_word = true,
         .lower_rotate = true,
 
         .lower_pack_half_2x16 = true,



More information about the mesa-commit mailing list