Mesa (main): nir, nir/algebraic: add byte/word insertion instructions
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Tue Jun 8 09:19:34 UTC 2021
Module: Mesa
Branch: main
Commit: 1cbcfb8b38c308297a1284c572f5cc4df41b0a00
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=1cbcfb8b38c308297a1284c572f5cc4df41b0a00
Author: Rhys Perry <pendingchaos02 at gmail.com>
Date: Wed Mar 25 15:38:06 2020 +0000
nir, nir/algebraic: add byte/word insertion instructions
Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof at gmail.com>
Reviewed-by: Ian Romanick <ian.d.romanick at intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/3151>
---
src/amd/vulkan/radv_shader.c | 2 ++
src/asahi/compiler/agx_compile.h | 2 ++
src/broadcom/vulkan/v3dv_pipeline.c | 2 ++
src/compiler/nir/nir.h | 2 ++
src/compiler/nir/nir_opcodes.py | 4 ++++
src/compiler/nir/nir_opt_algebraic.py | 13 +++++++++++++
src/freedreno/ir3/ir3_nir.c | 4 ++++
src/gallium/auxiliary/nir/nir_to_tgsi.c | 6 ++++++
src/gallium/drivers/etnaviv/etnaviv_screen.c | 2 ++
src/gallium/drivers/freedreno/a2xx/ir2_nir.c | 2 ++
src/gallium/drivers/lima/lima_program.c | 4 ++++
src/gallium/drivers/llvmpipe/lp_screen.c | 2 ++
src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp | 2 ++
src/gallium/drivers/nouveau/nv50/nv50_screen.c | 2 ++
src/gallium/drivers/r600/r600_pipe_common.c | 2 ++
src/gallium/drivers/radeonsi/si_get.c | 2 ++
src/gallium/drivers/softpipe/sp_screen.c | 2 ++
src/gallium/drivers/v3d/v3d_screen.c | 2 ++
src/gallium/drivers/vc4/vc4_program.c | 2 ++
src/gallium/drivers/zink/zink_compiler.c | 2 ++
src/intel/compiler/brw_compiler.c | 2 ++
src/microsoft/compiler/nir_to_dxil.c | 2 ++
src/panfrost/bifrost/bifrost_compile.h | 2 ++
src/panfrost/midgard/midgard_compile.h | 2 ++
24 files changed, 69 insertions(+)
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 3b7adc38a6b..083e0ca5620 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -74,6 +74,8 @@ static const struct nir_shader_compiler_options nir_options = {
.lower_unpack_half_2x16 = true,
.lower_extract_byte = true,
.lower_extract_word = true,
+ .lower_insert_byte = true,
+ .lower_insert_word = true,
.lower_ffma16 = true,
.lower_ffma32 = true,
.lower_ffma64 = true,
diff --git a/src/asahi/compiler/agx_compile.h b/src/asahi/compiler/agx_compile.h
index 70aed6e48a8..5c90e44a144 100644
--- a/src/asahi/compiler/agx_compile.h
+++ b/src/asahi/compiler/agx_compile.h
@@ -155,6 +155,8 @@ static const nir_shader_compiler_options agx_nir_options = {
.lower_fsign = true,
.lower_rotate = true,
.lower_pack_split = true,
+ .lower_insert_byte = true,
+ .lower_insert_word = true,
.lower_uniforms_to_ubo = true,
.lower_cs_local_index_from_id = true,
diff --git a/src/broadcom/vulkan/v3dv_pipeline.c b/src/broadcom/vulkan/v3dv_pipeline.c
index a4e1d97dbd7..99d7779038b 100644
--- a/src/broadcom/vulkan/v3dv_pipeline.c
+++ b/src/broadcom/vulkan/v3dv_pipeline.c
@@ -188,6 +188,8 @@ const nir_shader_compiler_options v3dv_nir_options = {
.lower_all_io_to_temps = true,
.lower_extract_byte = true,
.lower_extract_word = true,
+ .lower_insert_byte = true,
+ .lower_insert_word = true,
.lower_bitfield_insert_to_shifts = true,
.lower_bitfield_extract_to_shifts = true,
.lower_bitfield_reverse = true,
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 7981031b54a..1bad96e1f37 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -3259,6 +3259,8 @@ typedef struct nir_shader_compiler_options {
bool lower_extract_byte;
bool lower_extract_word;
+ bool lower_insert_byte;
+ bool lower_insert_word;
bool lower_all_io_to_temps;
bool lower_all_io_to_elements;
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index bb8fc5bea86..eda8d11cb3d 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -972,6 +972,10 @@ binop("extract_i8", tint, "", "(int8_t)(src0 >> (src1 * 8))")
binop("extract_u16", tuint, "", "(uint16_t)(src0 >> (src1 * 16))")
binop("extract_i16", tint, "", "(int16_t)(src0 >> (src1 * 16))")
+# Byte/word insertion
+binop("insert_u8", tuint, "", "(src0 & 0xff) << (src1 * 8)")
+binop("insert_u16", tuint, "", "(src0 & 0xffff) << (src1 * 16)")
+
def triop(name, ty, alg_props, const_expr):
opcode(name, 0, ty, [0, 0, 0], [ty, ty, ty], False, alg_props, const_expr)
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index 6339ebb5079..8dc20390491 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -2429,6 +2429,19 @@ for N in [16, 32]:
((x2xN, ('i2i16', aN)), (extract_x16, a, 0), '!options->lower_extract_word'),
])
+# Byte insertion
+late_optimizations.extend([(('ishl', ('extract_u8', 'a at 32', 0), 8 * i), ('insert_u8', a, i), '!options->lower_insert_byte') for i in range(1, 4)])
+late_optimizations.extend([(('iand', ('ishl', 'a at 32', 8 * i), 0xff << (8 * i)), ('insert_u8', a, i), '!options->lower_insert_byte') for i in range(1, 4)])
+late_optimizations.append((('ishl', 'a at 32', 24), ('insert_u8', a, 3), '!options->lower_insert_byte'))
+
+late_optimizations += [
+ # Word insertion
+ (('ishl', 'a at 32', 16), ('insert_u16', a, 1), '!options->lower_insert_word'),
+
+ # Extract and then insert
+ (('insert_u8', ('extract_u8', 'a', 0), b), ('insert_u8', a, b)),
+ (('insert_u16', ('extract_u16', 'a', 0), b), ('insert_u16', a, b)),
+]
# Integer sizes
for s in [8, 16, 32, 64]:
diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c
index a45274b82df..4410cae41de 100644
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -53,6 +53,8 @@ static const nir_shader_compiler_options options = {
.vertex_id_zero_based = true,
.lower_extract_byte = true,
.lower_extract_word = true,
+ .lower_insert_byte = true,
+ .lower_insert_word = true,
.lower_helper_invocation = true,
.lower_bitfield_insert_to_shifts = true,
.lower_bitfield_extract_to_shifts = true,
@@ -107,6 +109,8 @@ static const nir_shader_compiler_options options_a6xx = {
.vertex_id_zero_based = false,
.lower_extract_byte = true,
.lower_extract_word = true,
+ .lower_insert_byte = true,
+ .lower_insert_word = true,
.lower_helper_invocation = true,
.lower_bitfield_insert_to_shifts = true,
.lower_bitfield_extract_to_shifts = true,
diff --git a/src/gallium/auxiliary/nir/nir_to_tgsi.c b/src/gallium/auxiliary/nir/nir_to_tgsi.c
index 409e9188be8..2b12d802618 100644
--- a/src/gallium/auxiliary/nir/nir_to_tgsi.c
+++ b/src/gallium/auxiliary/nir/nir_to_tgsi.c
@@ -2659,6 +2659,8 @@ ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s)
if (!options->lower_extract_byte ||
!options->lower_extract_word ||
+ !options->lower_insert_byte ||
+ !options->lower_insert_word ||
!options->lower_fdph ||
!options->lower_flrp64 ||
!options->lower_fmod ||
@@ -2671,6 +2673,8 @@ ntt_fix_nir_options(struct pipe_screen *screen, struct nir_shader *s)
new_options->lower_extract_byte = true;
new_options->lower_extract_word = true;
+ new_options->lower_insert_byte = true;
+ new_options->lower_insert_word = true;
new_options->lower_fdph = true;
new_options->lower_flrp64 = true;
new_options->lower_fmod = true;
@@ -2835,6 +2839,8 @@ static const nir_shader_compiler_options nir_to_tgsi_compiler_options = {
.fuse_ffma64 = true,
.lower_extract_byte = true,
.lower_extract_word = true,
+ .lower_insert_byte = true,
+ .lower_insert_word = true,
.lower_fdph = true,
.lower_flrp64 = true,
.lower_fmod = true,
diff --git a/src/gallium/drivers/etnaviv/etnaviv_screen.c b/src/gallium/drivers/etnaviv/etnaviv_screen.c
index 467c17f26b9..82a0e152bfc 100644
--- a/src/gallium/drivers/etnaviv/etnaviv_screen.c
+++ b/src/gallium/drivers/etnaviv/etnaviv_screen.c
@@ -1067,6 +1067,8 @@ etna_screen_create(struct etna_device *dev, struct etna_gpu *gpu,
.lower_fmod = true,
.lower_vector_cmp = true,
.lower_fdph = true,
+ .lower_insert_byte = true,
+ .lower_insert_word = true,
.lower_fdiv = true, /* !screen->specs.has_new_transcendentals */
.lower_fsign = !screen->specs.has_sign_floor_ceil,
.lower_ffloor = !screen->specs.has_sign_floor_ceil,
diff --git a/src/gallium/drivers/freedreno/a2xx/ir2_nir.c b/src/gallium/drivers/freedreno/a2xx/ir2_nir.c
index 40998acaebb..46c7ad7ff0f 100644
--- a/src/gallium/drivers/freedreno/a2xx/ir2_nir.c
+++ b/src/gallium/drivers/freedreno/a2xx/ir2_nir.c
@@ -47,6 +47,8 @@ static const nir_shader_compiler_options options = {
.lower_fdph = true,
.has_fsub = true,
.has_isub = true,
+ .lower_insert_byte = true,
+ .lower_insert_word = true,
};
const nir_shader_compiler_options *
diff --git a/src/gallium/drivers/lima/lima_program.c b/src/gallium/drivers/lima/lima_program.c
index 40af5c029be..562586b851e 100644
--- a/src/gallium/drivers/lima/lima_program.c
+++ b/src/gallium/drivers/lima/lima_program.c
@@ -59,6 +59,8 @@ static const nir_shader_compiler_options vs_nir_options = {
.lower_rotate = true,
.lower_sincos = true,
.lower_fceil = true,
+ .lower_insert_byte = true,
+ .lower_insert_word = true,
};
static const nir_shader_compiler_options fs_nir_options = {
@@ -74,6 +76,8 @@ static const nir_shader_compiler_options fs_nir_options = {
.lower_rotate = true,
.lower_fdot = true,
.lower_fdph = true,
+ .lower_insert_byte = true,
+ .lower_insert_word = true,
.lower_bitops = true,
.lower_vector_cmp = true,
};
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index c1d642bc299..acf3e5d9224 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -577,6 +577,8 @@ static const struct nir_shader_compiler_options gallivm_nir_options = {
.lower_unpack_half_2x16 = true,
.lower_extract_byte = true,
.lower_extract_word = true,
+ .lower_insert_byte = true,
+ .lower_insert_word = true,
.lower_rotate = true,
.lower_uadd_carry = true,
.lower_usub_borrow = true,
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
index 90150065f86..bb2e8c7d062 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
@@ -3265,6 +3265,8 @@ nvir_nir_shader_compiler_options(int chipset)
op.lower_pack_split = false;
op.lower_extract_byte = (chipset < NVISA_GM107_CHIPSET);
op.lower_extract_word = (chipset < NVISA_GM107_CHIPSET);
+ op.lower_insert_byte = true;
+ op.lower_insert_word = true;
op.lower_all_io_to_temps = false;
op.lower_all_io_to_elements = false;
op.vertex_id_zero_based = false;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index a8ad5526d82..cd9946baed6 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -978,6 +978,8 @@ static const nir_shader_compiler_options nir_options = {
.lower_unpack_snorm_4x8 = true,
.lower_extract_byte = true,
.lower_extract_word = true,
+ .lower_insert_byte = true,
+ .lower_insert_word = true,
.lower_all_io_to_temps = false,
.lower_cs_local_index_from_id = true,
.lower_rotate = true,
diff --git a/src/gallium/drivers/r600/r600_pipe_common.c b/src/gallium/drivers/r600/r600_pipe_common.c
index 5c6a02087d4..a23cf2acf48 100644
--- a/src/gallium/drivers/r600/r600_pipe_common.c
+++ b/src/gallium/drivers/r600/r600_pipe_common.c
@@ -1334,6 +1334,8 @@ bool r600_common_screen_init(struct r600_common_screen *rscreen,
.lower_int64_options = ~0,
.lower_extract_byte = true,
.lower_extract_word = true,
+ .lower_insert_byte = true,
+ .lower_insert_word = true,
.lower_rotate = true,
.max_unroll_iterations = 32,
.lower_interpolate_at = true,
diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c
index 817d0e6cc64..569515a87d1 100644
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -1004,6 +1004,8 @@ void si_init_screen_get_functions(struct si_screen *sscreen)
.lower_unpack_unorm_4x8 = true,
.lower_extract_byte = true,
.lower_extract_word = true,
+ .lower_insert_byte = true,
+ .lower_insert_word = true,
.lower_rotate = true,
.lower_to_scalar = true,
.optimize_sample_mask_in = true,
diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c
index b652b4d260e..eecc5c1072c 100644
--- a/src/gallium/drivers/softpipe/sp_screen.c
+++ b/src/gallium/drivers/softpipe/sp_screen.c
@@ -78,6 +78,8 @@ static const nir_shader_compiler_options sp_compiler_options = {
.fuse_ffma64 = true,
.lower_extract_byte = true,
.lower_extract_word = true,
+ .lower_insert_byte = true,
+ .lower_insert_word = true,
.lower_fdph = true,
.lower_flrp64 = true,
.lower_fmod = true,
diff --git a/src/gallium/drivers/v3d/v3d_screen.c b/src/gallium/drivers/v3d/v3d_screen.c
index 5b7d8d3c3ee..6112e4b1290 100644
--- a/src/gallium/drivers/v3d/v3d_screen.c
+++ b/src/gallium/drivers/v3d/v3d_screen.c
@@ -643,6 +643,8 @@ static const nir_shader_compiler_options v3d_nir_options = {
.lower_all_io_to_temps = true,
.lower_extract_byte = true,
.lower_extract_word = true,
+ .lower_insert_byte = true,
+ .lower_insert_word = true,
.lower_bitfield_insert_to_shifts = true,
.lower_bitfield_extract_to_shifts = true,
.lower_bitfield_reverse = true,
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index 3ba8e6e5a94..f3c57942621 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -2173,6 +2173,8 @@ static const nir_shader_compiler_options nir_options = {
.lower_all_io_to_temps = true,
.lower_extract_byte = true,
.lower_extract_word = true,
+ .lower_insert_byte = true,
+ .lower_insert_word = true,
.lower_fdiv = true,
.lower_ffma16 = true,
.lower_ffma32 = true,
diff --git a/src/gallium/drivers/zink/zink_compiler.c b/src/gallium/drivers/zink/zink_compiler.c
index c5a592fa76e..5fff56d3dff 100644
--- a/src/gallium/drivers/zink/zink_compiler.c
+++ b/src/gallium/drivers/zink/zink_compiler.c
@@ -379,6 +379,8 @@ zink_screen_init_compiler(struct zink_screen *screen)
.lower_fsat = true,
.lower_extract_byte = true,
.lower_extract_word = true,
+ .lower_insert_byte = true,
+ .lower_insert_word = true,
.lower_mul_high = true,
.lower_rotate = true,
.lower_uadd_carry = true,
diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c
index 8b368bc0766..5ec16f958cf 100644
--- a/src/intel/compiler/brw_compiler.c
+++ b/src/intel/compiler/brw_compiler.c
@@ -45,6 +45,8 @@
.lower_device_index_to_zero = true, \
.vectorize_io = true, \
.use_interpolated_input_intrinsics = true, \
+ .lower_insert_byte = true, \
+ .lower_insert_word = true, \
.vertex_id_zero_based = true, \
.lower_base_vertex = true, \
.use_scoped_barrier = true, \
diff --git a/src/microsoft/compiler/nir_to_dxil.c b/src/microsoft/compiler/nir_to_dxil.c
index 9952c395e52..c25d3fc9ad2 100644
--- a/src/microsoft/compiler/nir_to_dxil.c
+++ b/src/microsoft/compiler/nir_to_dxil.c
@@ -90,6 +90,8 @@ nir_options = {
.lower_bitfield_extract_to_shifts = true,
.lower_extract_word = true,
.lower_extract_byte = true,
+ .lower_insert_word = true,
+ .lower_insert_byte = true,
.lower_all_io_to_elements = true,
.lower_all_io_to_temps = true,
.lower_hadd = true,
diff --git a/src/panfrost/bifrost/bifrost_compile.h b/src/panfrost/bifrost/bifrost_compile.h
index 53cb19d7323..88433f1a292 100644
--- a/src/panfrost/bifrost/bifrost_compile.h
+++ b/src/panfrost/bifrost/bifrost_compile.h
@@ -56,6 +56,8 @@ static const nir_shader_compiler_options bifrost_nir_options = {
.lower_bitfield_extract_to_shifts = true,
.lower_extract_byte = true,
.lower_extract_word = true,
+ .lower_insert_byte = true,
+ .lower_insert_word = true,
.lower_rotate = true,
.lower_pack_half_2x16 = true,
diff --git a/src/panfrost/midgard/midgard_compile.h b/src/panfrost/midgard/midgard_compile.h
index a4c9e1626bc..242ac116216 100644
--- a/src/panfrost/midgard/midgard_compile.h
+++ b/src/panfrost/midgard/midgard_compile.h
@@ -68,6 +68,8 @@ static const nir_shader_compiler_options midgard_nir_options = {
.lower_bitfield_extract_to_shifts = true,
.lower_extract_byte = true,
.lower_extract_word = true,
+ .lower_insert_byte = true,
+ .lower_insert_word = true,
.lower_rotate = true,
.lower_pack_half_2x16 = true,
More information about the mesa-commit
mailing list