Mesa (main): driconf: Add a limit_trig_input_range option

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Fri May 13 07:23:58 UTC 2022


Module: Mesa
Branch: main
Commit: 55c71217ec7a184753d64560323c18acd50b0fcf
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=55c71217ec7a184753d64560323c18acd50b0fcf

Author: Vadym Shovkoplias <vadym.shovkoplias at globallogic.com>
Date:   Fri May  6 18:52:47 2022 +0300

driconf: Add a limit_trig_input_range option

With this option enabled range of input values for fsin and fcos is
limited to [-2*pi : 2*pi] by calculating the reminder after 2*pi modulo
division. This helps to improve calculation precision for large input
arguments on Intel.

-v2: Add limit_trig_input_range option to prog_key to update shader
     cache (Lionel)

Signed-off-by: Vadym Shovkoplias <vadym.shovkoplias at globallogic.com>
Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16388>

---

 src/gallium/drivers/crocus/crocus_program.c    |  7 ++-
 src/gallium/drivers/crocus/crocus_screen.c     |  2 +
 src/gallium/drivers/crocus/crocus_screen.h     |  1 +
 src/gallium/drivers/crocus/driinfo_crocus.h    |  1 +
 src/gallium/drivers/iris/driinfo_iris.h        |  1 +
 src/gallium/drivers/iris/iris_context.h        |  1 +
 src/gallium/drivers/iris/iris_program.c        | 85 ++++++++++++++------------
 src/gallium/drivers/iris/iris_screen.c         |  2 +
 src/gallium/drivers/iris/iris_screen.h         |  1 +
 src/intel/compiler/brw_compiler.h              |  7 +++
 src/intel/compiler/brw_nir.c                   |  3 +
 src/intel/compiler/brw_nir.h                   |  2 +
 src/intel/compiler/brw_nir_trig_workarounds.py |  7 +++
 src/intel/vulkan/anv_device.c                  |  3 +
 src/intel/vulkan/anv_pipeline.c                | 59 +++++++++---------
 src/intel/vulkan/anv_private.h                 |  1 +
 src/util/driconf.h                             |  4 ++
 17 files changed, 117 insertions(+), 70 deletions(-)

diff --git a/src/gallium/drivers/crocus/crocus_program.c b/src/gallium/drivers/crocus/crocus_program.c
index 08e408cb334..f1ee9b9c816 100644
--- a/src/gallium/drivers/crocus/crocus_program.c
+++ b/src/gallium/drivers/crocus/crocus_program.c
@@ -51,7 +51,10 @@
    .base.subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM, \
    .base.tex.swizzles[0 ... MAX_SAMPLERS - 1] = 0x688,   \
    .base.tex.compressed_multisample_layout_mask = ~0
-#define KEY_INIT() .base.program_string_id = ish->program_id, KEY_INIT_NO_ID()
+#define KEY_INIT()                                                        \
+   .base.program_string_id = ish->program_id,                             \
+   .base.limit_trig_input_range = screen->driconf.limit_trig_input_range, \
+   KEY_INIT_NO_ID()
 
 static void
 crocus_sanitize_tex_key(struct brw_sampler_prog_key_data *key)
@@ -1660,8 +1663,8 @@ crocus_update_compiled_tes(struct crocus_context *ice)
    struct crocus_shader_state *shs = &ice->state.shaders[MESA_SHADER_TESS_EVAL];
    struct crocus_uncompiled_shader *ish =
       ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
-   struct brw_tes_prog_key key = { KEY_INIT() };
    struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
+   struct brw_tes_prog_key key = { KEY_INIT() };
    const struct intel_device_info *devinfo = &screen->devinfo;
 
    if (ish->nos & (1ull << CROCUS_NOS_TEXTURES))
diff --git a/src/gallium/drivers/crocus/crocus_screen.c b/src/gallium/drivers/crocus/crocus_screen.c
index 52a54a1e248..2c0e82d3ec8 100644
--- a/src/gallium/drivers/crocus/crocus_screen.c
+++ b/src/gallium/drivers/crocus/crocus_screen.c
@@ -762,6 +762,8 @@ crocus_screen_create(int fd, const struct pipe_screen_config *config)
       driQueryOptionb(config->options, "disable_throttling");
    screen->driconf.always_flush_cache =
       driQueryOptionb(config->options, "always_flush_cache");
+   screen->driconf.limit_trig_input_range =
+      driQueryOptionb(config->options, "limit_trig_input_range");
 
    screen->precompile = env_var_as_boolean("shader_precompile", true);
 
diff --git a/src/gallium/drivers/crocus/crocus_screen.h b/src/gallium/drivers/crocus/crocus_screen.h
index c5b5a2c8df2..31d4fad158c 100644
--- a/src/gallium/drivers/crocus/crocus_screen.h
+++ b/src/gallium/drivers/crocus/crocus_screen.h
@@ -199,6 +199,7 @@ struct crocus_screen {
       bool dual_color_blend_by_location;
       bool disable_throttling;
       bool always_flush_cache;
+      bool limit_trig_input_range;
    } driconf;
 
    uint64_t aperture_bytes;
diff --git a/src/gallium/drivers/crocus/driinfo_crocus.h b/src/gallium/drivers/crocus/driinfo_crocus.h
index 829bf7f818c..71fc5a3dc4e 100644
--- a/src/gallium/drivers/crocus/driinfo_crocus.h
+++ b/src/gallium/drivers/crocus/driinfo_crocus.h
@@ -4,6 +4,7 @@ DRI_CONF_SECTION_DEBUG
    DRI_CONF_DUAL_COLOR_BLEND_BY_LOCATION(false)
    DRI_CONF_DISABLE_THROTTLING(false)
    DRI_CONF_ALWAYS_FLUSH_CACHE(false)
+   DRI_CONF_LIMIT_TRIG_INPUT_RANGE(false)
 DRI_CONF_SECTION_END
 
 DRI_CONF_SECTION_PERFORMANCE
diff --git a/src/gallium/drivers/iris/driinfo_iris.h b/src/gallium/drivers/iris/driinfo_iris.h
index 15ede27e4e3..ff19011b954 100644
--- a/src/gallium/drivers/iris/driinfo_iris.h
+++ b/src/gallium/drivers/iris/driinfo_iris.h
@@ -5,6 +5,7 @@ DRI_CONF_SECTION_DEBUG
    DRI_CONF_DISABLE_THROTTLING(false)
    DRI_CONF_ALWAYS_FLUSH_CACHE(false)
    DRI_CONF_OPT_B(sync_compile, false, "Always compile synchronously (will cause stalls)")
+   DRI_CONF_LIMIT_TRIG_INPUT_RANGE(false)
 DRI_CONF_SECTION_END
 
 DRI_CONF_SECTION_PERFORMANCE
diff --git a/src/gallium/drivers/iris/iris_context.h b/src/gallium/drivers/iris/iris_context.h
index 7b73c7be06b..648c27411dc 100644
--- a/src/gallium/drivers/iris/iris_context.h
+++ b/src/gallium/drivers/iris/iris_context.h
@@ -208,6 +208,7 @@ enum iris_nos_dep {
 
 struct iris_base_prog_key {
    unsigned program_string_id;
+   bool limit_trig_input_range;
 };
 
 /**
diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c
index 1ea9be793e0..02423c0d308 100644
--- a/src/gallium/drivers/iris/iris_program.c
+++ b/src/gallium/drivers/iris/iris_program.c
@@ -47,9 +47,12 @@
 #include "iris_context.h"
 #include "nir/tgsi_to_nir.h"
 
-#define KEY_ID(prefix) .prefix.program_string_id = ish->program_id
-#define BRW_KEY_INIT(gen, prog_id)                       \
+#define KEY_INIT(prefix)                                                   \
+   .prefix.program_string_id = ish->program_id,                            \
+   .prefix.limit_trig_input_range = screen->driconf.limit_trig_input_range
+#define BRW_KEY_INIT(gen, prog_id, limit_trig_input)     \
    .base.program_string_id = prog_id,                    \
+   .base.limit_trig_input_range = limit_trig_input,      \
    .base.subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM, \
    .base.tex.swizzles[0 ... MAX_SAMPLERS - 1] = 0x688,   \
    .base.tex.compressed_multisample_layout_mask = ~0,    \
@@ -95,11 +98,12 @@ iris_finalize_program(struct iris_compiled_shader *shader,
 }
 
 static struct brw_vs_prog_key
-iris_to_brw_vs_key(const struct intel_device_info *devinfo,
+iris_to_brw_vs_key(const struct iris_screen *screen,
                    const struct iris_vs_prog_key *key)
 {
    return (struct brw_vs_prog_key) {
-      BRW_KEY_INIT(devinfo->ver, key->vue.base.program_string_id),
+      BRW_KEY_INIT(screen->devinfo.ver, key->vue.base.program_string_id,
+                   key->vue.base.limit_trig_input_range),
 
       /* Don't tell the backend about our clip plane constants, we've
        * already lowered them in NIR and don't want it doing it again.
@@ -109,11 +113,12 @@ iris_to_brw_vs_key(const struct intel_device_info *devinfo,
 }
 
 static struct brw_tcs_prog_key
-iris_to_brw_tcs_key(const struct intel_device_info *devinfo,
+iris_to_brw_tcs_key(const struct iris_screen *screen,
                     const struct iris_tcs_prog_key *key)
 {
    return (struct brw_tcs_prog_key) {
-      BRW_KEY_INIT(devinfo->ver, key->vue.base.program_string_id),
+      BRW_KEY_INIT(screen->devinfo.ver, key->vue.base.program_string_id,
+                   key->vue.base.limit_trig_input_range),
       ._tes_primitive_mode = key->_tes_primitive_mode,
       .input_vertices = key->input_vertices,
       .patch_outputs_written = key->patch_outputs_written,
@@ -123,31 +128,34 @@ iris_to_brw_tcs_key(const struct intel_device_info *devinfo,
 }
 
 static struct brw_tes_prog_key
-iris_to_brw_tes_key(const struct intel_device_info *devinfo,
+iris_to_brw_tes_key(const struct iris_screen *screen,
                     const struct iris_tes_prog_key *key)
 {
    return (struct brw_tes_prog_key) {
-      BRW_KEY_INIT(devinfo->ver, key->vue.base.program_string_id),
+      BRW_KEY_INIT(screen->devinfo.ver, key->vue.base.program_string_id,
+                   key->vue.base.limit_trig_input_range),
       .patch_inputs_read = key->patch_inputs_read,
       .inputs_read = key->inputs_read,
    };
 }
 
 static struct brw_gs_prog_key
-iris_to_brw_gs_key(const struct intel_device_info *devinfo,
+iris_to_brw_gs_key(const struct iris_screen *screen,
                    const struct iris_gs_prog_key *key)
 {
    return (struct brw_gs_prog_key) {
-      BRW_KEY_INIT(devinfo->ver, key->vue.base.program_string_id),
+      BRW_KEY_INIT(screen->devinfo.ver, key->vue.base.program_string_id,
+                   key->vue.base.limit_trig_input_range),
    };
 }
 
 static struct brw_wm_prog_key
-iris_to_brw_fs_key(const struct intel_device_info *devinfo,
+iris_to_brw_fs_key(const struct iris_screen *screen,
                    const struct iris_fs_prog_key *key)
 {
    return (struct brw_wm_prog_key) {
-      BRW_KEY_INIT(devinfo->ver, key->base.program_string_id),
+      BRW_KEY_INIT(screen->devinfo.ver, key->base.program_string_id,
+                   key->base.limit_trig_input_range),
       .nr_color_regions = key->nr_color_regions,
       .flat_shade = key->flat_shade,
       .alpha_test_replicate_alpha = key->alpha_test_replicate_alpha,
@@ -164,11 +172,12 @@ iris_to_brw_fs_key(const struct intel_device_info *devinfo,
 }
 
 static struct brw_cs_prog_key
-iris_to_brw_cs_key(const struct intel_device_info *devinfo,
+iris_to_brw_cs_key(const struct iris_screen *screen,
                    const struct iris_cs_prog_key *key)
 {
    return (struct brw_cs_prog_key) {
-      BRW_KEY_INIT(devinfo->ver, key->base.program_string_id),
+      BRW_KEY_INIT(screen->devinfo.ver, key->base.program_string_id,
+                   key->base.limit_trig_input_range),
    };
 }
 
@@ -1113,7 +1122,6 @@ iris_debug_recompile(struct iris_screen *screen,
             || list_is_singular(&ish->variants))
       return;
 
-   const struct intel_device_info *devinfo = &screen->devinfo;
    const struct brw_compiler *c = screen->compiler;
    const struct shader_info *info = &ish->nir->info;
 
@@ -1130,22 +1138,22 @@ iris_debug_recompile(struct iris_screen *screen,
 
    switch (info->stage) {
    case MESA_SHADER_VERTEX:
-      old_key.vs = iris_to_brw_vs_key(devinfo, old_iris_key);
+      old_key.vs = iris_to_brw_vs_key(screen, old_iris_key);
       break;
    case MESA_SHADER_TESS_CTRL:
-      old_key.tcs = iris_to_brw_tcs_key(devinfo, old_iris_key);
+      old_key.tcs = iris_to_brw_tcs_key(screen, old_iris_key);
       break;
    case MESA_SHADER_TESS_EVAL:
-      old_key.tes = iris_to_brw_tes_key(devinfo, old_iris_key);
+      old_key.tes = iris_to_brw_tes_key(screen, old_iris_key);
       break;
    case MESA_SHADER_GEOMETRY:
-      old_key.gs = iris_to_brw_gs_key(devinfo, old_iris_key);
+      old_key.gs = iris_to_brw_gs_key(screen, old_iris_key);
       break;
    case MESA_SHADER_FRAGMENT:
-      old_key.wm = iris_to_brw_fs_key(devinfo, old_iris_key);
+      old_key.wm = iris_to_brw_fs_key(screen, old_iris_key);
       break;
    case MESA_SHADER_COMPUTE:
-      old_key.cs = iris_to_brw_cs_key(devinfo, old_iris_key);
+      old_key.cs = iris_to_brw_cs_key(screen, old_iris_key);
       break;
    default:
       unreachable("invalid shader stage");
@@ -1342,7 +1350,7 @@ iris_compile_vs(struct iris_screen *screen,
                        &vue_prog_data->vue_map, nir->info.outputs_written,
                        nir->info.separate_shader, /* pos_slots */ 1);
 
-   struct brw_vs_prog_key brw_key = iris_to_brw_vs_key(devinfo, key);
+   struct brw_vs_prog_key brw_key = iris_to_brw_vs_key(screen, key);
 
    struct brw_compile_vs_params params = {
       .nir = nir,
@@ -1395,7 +1403,7 @@ iris_update_compiled_vs(struct iris_context *ice)
    struct iris_uncompiled_shader *ish =
       ice->shaders.uncompiled[MESA_SHADER_VERTEX];
 
-   struct iris_vs_prog_key key = { KEY_ID(vue.base) };
+   struct iris_vs_prog_key key = { KEY_INIT(vue.base) };
    screen->vtbl.populate_vs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
 
    struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_VS];
@@ -1501,7 +1509,7 @@ iris_compile_tcs(struct iris_screen *screen,
    struct iris_binding_table bt;
 
    const struct iris_tcs_prog_key *const key = &shader->key.tcs;
-   struct brw_tcs_prog_key brw_key = iris_to_brw_tcs_key(devinfo, key);
+   struct brw_tcs_prog_key brw_key = iris_to_brw_tcs_key(screen, key);
 
    if (ish) {
       nir = nir_shader_clone(mem_ctx, ish->nir);
@@ -1709,7 +1717,7 @@ iris_compile_tes(struct iris_screen *screen,
    brw_compute_tess_vue_map(&input_vue_map, key->inputs_read,
                             key->patch_inputs_read);
 
-   struct brw_tes_prog_key brw_key = iris_to_brw_tes_key(devinfo, key);
+   struct brw_tes_prog_key brw_key = iris_to_brw_tes_key(screen, key);
 
    struct brw_compile_tes_params params = {
       .nir = nir,
@@ -1763,7 +1771,7 @@ iris_update_compiled_tes(struct iris_context *ice)
    struct iris_uncompiled_shader *ish =
       ice->shaders.uncompiled[MESA_SHADER_TESS_EVAL];
 
-   struct iris_tes_prog_key key = { KEY_ID(vue.base) };
+   struct iris_tes_prog_key key = { KEY_INIT(vue.base) };
    get_unified_tess_slots(ice, &key.inputs_read, &key.patch_inputs_read);
    screen->vtbl.populate_tes_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
 
@@ -1848,7 +1856,7 @@ iris_compile_gs(struct iris_screen *screen,
                        &vue_prog_data->vue_map, nir->info.outputs_written,
                        nir->info.separate_shader, /* pos_slots */ 1);
 
-   struct brw_gs_prog_key brw_key = iris_to_brw_gs_key(devinfo, key);
+   struct brw_gs_prog_key brw_key = iris_to_brw_gs_key(screen, key);
 
    struct brw_compile_gs_params params = {
       .nir = nir,
@@ -1904,7 +1912,7 @@ iris_update_compiled_gs(struct iris_context *ice)
    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
 
    if (ish) {
-      struct iris_gs_prog_key key = { KEY_ID(vue.base) };
+      struct iris_gs_prog_key key = { KEY_INIT(vue.base) };
       screen->vtbl.populate_gs_key(ice, &ish->nir->info, last_vue_stage(ice), &key);
 
       bool added;
@@ -1984,7 +1992,7 @@ iris_compile_fs(struct iris_screen *screen,
 
    brw_nir_analyze_ubo_ranges(compiler, nir, NULL, prog_data->ubo_ranges);
 
-   struct brw_wm_prog_key brw_key = iris_to_brw_fs_key(devinfo, key);
+   struct brw_wm_prog_key brw_key = iris_to_brw_fs_key(screen, key);
 
    struct brw_compile_fs_params params = {
       .nir = nir,
@@ -2035,8 +2043,8 @@ iris_update_compiled_fs(struct iris_context *ice)
    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
    struct iris_uncompiled_shader *ish =
       ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
-   struct iris_fs_prog_key key = { KEY_ID(base) };
    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
+   struct iris_fs_prog_key key = { KEY_INIT(base) };
    screen->vtbl.populate_fs_key(ice, &ish->nir->info, &key);
 
    struct brw_vue_map *last_vue_map =
@@ -2261,7 +2269,7 @@ iris_compile_cs(struct iris_screen *screen,
    iris_setup_binding_table(devinfo, nir, &bt, /* num_render_targets */ 0,
                             num_system_values, num_cbufs);
 
-   struct brw_cs_prog_key brw_key = iris_to_brw_cs_key(devinfo, key);
+   struct brw_cs_prog_key brw_key = iris_to_brw_cs_key(screen, key);
 
    struct brw_compile_cs_params params = {
       .nir = nir,
@@ -2303,9 +2311,8 @@ iris_update_compiled_cs(struct iris_context *ice)
    struct u_upload_mgr *uploader = ice->shaders.uploader_driver;
    struct iris_uncompiled_shader *ish =
       ice->shaders.uncompiled[MESA_SHADER_COMPUTE];
-
-   struct iris_cs_prog_key key = { KEY_ID(base) };
    struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen;
+   struct iris_cs_prog_key key = { KEY_INIT(base) };
    screen->vtbl.populate_cs_key(ice, &key);
 
    struct iris_compiled_shader *old = ice->shaders.prog[IRIS_CACHE_CS];
@@ -2519,7 +2526,7 @@ iris_create_compute_state(struct pipe_context *ctx,
    // XXX: disallow more than 64KB of shared variables
 
    if (screen->precompile) {
-      struct iris_cs_prog_key key = { KEY_ID(base) };
+      struct iris_cs_prog_key key = { KEY_INIT(base) };
 
       struct iris_compiled_shader *shader =
          iris_create_shader_variant(screen, NULL, IRIS_CACHE_CS,
@@ -2599,13 +2606,13 @@ iris_create_shader_state(struct pipe_context *ctx,
       if (info->clip_distance_array_size == 0)
          ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
 
-      key.vs = (struct iris_vs_prog_key) { KEY_ID(vue.base) };
+      key.vs = (struct iris_vs_prog_key) { KEY_INIT(vue.base) };
       key_size = sizeof(key.vs);
       break;
 
    case MESA_SHADER_TESS_CTRL: {
       key.tcs = (struct iris_tcs_prog_key) {
-         KEY_ID(vue.base),
+         KEY_INIT(vue.base),
          // XXX: make sure the linker fills this out from the TES...
          ._tes_primitive_mode =
          info->tess._primitive_mode ? info->tess._primitive_mode
@@ -2632,7 +2639,7 @@ iris_create_shader_state(struct pipe_context *ctx,
          ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
 
       key.tes = (struct iris_tes_prog_key) {
-         KEY_ID(vue.base),
+         KEY_INIT(vue.base),
          // XXX: not ideal, need TCS output/TES input unification
          .inputs_read = info->inputs_read,
          .patch_inputs_read = info->patch_inputs_read,
@@ -2646,7 +2653,7 @@ iris_create_shader_state(struct pipe_context *ctx,
       if (info->clip_distance_array_size == 0)
          ish->nos |= (1ull << IRIS_NOS_RASTERIZER);
 
-      key.gs = (struct iris_gs_prog_key) { KEY_ID(vue.base) };
+      key.gs = (struct iris_gs_prog_key) { KEY_INIT(vue.base) };
       key_size = sizeof(key.gs);
       break;
 
@@ -2672,7 +2679,7 @@ iris_create_shader_state(struct pipe_context *ctx,
       const struct intel_device_info *devinfo = &screen->devinfo;
 
       key.fs = (struct iris_fs_prog_key) {
-         KEY_ID(base),
+         KEY_INIT(base),
          .nr_color_regions = util_bitcount(color_outputs),
          .coherent_fb_fetch = devinfo->ver >= 9,
          .input_slots_valid =
diff --git a/src/gallium/drivers/iris/iris_screen.c b/src/gallium/drivers/iris/iris_screen.c
index 180de3b9250..df0c9012991 100644
--- a/src/gallium/drivers/iris/iris_screen.c
+++ b/src/gallium/drivers/iris/iris_screen.c
@@ -843,6 +843,8 @@ iris_screen_create(int fd, const struct pipe_screen_config *config)
       driQueryOptionb(config->options, "always_flush_cache");
    screen->driconf.sync_compile =
       driQueryOptionb(config->options, "sync_compile");
+   screen->driconf.limit_trig_input_range =
+      driQueryOptionb(config->options, "limit_trig_input_range");
 
    screen->precompile = env_var_as_boolean("shader_precompile", true);
 
diff --git a/src/gallium/drivers/iris/iris_screen.h b/src/gallium/drivers/iris/iris_screen.h
index 92b77e9cb86..c62322cd142 100644
--- a/src/gallium/drivers/iris/iris_screen.h
+++ b/src/gallium/drivers/iris/iris_screen.h
@@ -181,6 +181,7 @@ struct iris_screen {
       bool disable_throttling;
       bool always_flush_cache;
       bool sync_compile;
+      bool limit_trig_input_range;
    } driconf;
 
    /** Does the kernel support various features (KERNEL_HAS_* bitfield)? */
diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h
index 3691c4325d6..0caeafa8a3f 100644
--- a/src/intel/compiler/brw_compiler.h
+++ b/src/intel/compiler/brw_compiler.h
@@ -247,6 +247,13 @@ struct brw_base_prog_key {
 
    enum brw_subgroup_size_type subgroup_size_type;
    bool robust_buffer_access;
+
+   /**
+    * Apply workarounds for SIN and COS input range problems.
+    * This limits input range for SIN and COS to [-2p : 2p] to
+    * avoid precision issues.
+    */
+   bool limit_trig_input_range;
    struct brw_sampler_prog_key_data tex;
 };
 
diff --git a/src/intel/compiler/brw_nir.c b/src/intel/compiler/brw_nir.c
index 81930286604..0381c67c6e5 100644
--- a/src/intel/compiler/brw_nir.c
+++ b/src/intel/compiler/brw_nir.c
@@ -1409,6 +1409,9 @@ brw_nir_apply_key(nir_shader *nir,
    };
    OPT(nir_lower_subgroups, &subgroups_options);
 
+   if (key->limit_trig_input_range)
+      OPT(brw_nir_limit_trig_input_range_workaround);
+
    if (progress)
       brw_nir_optimize(nir, compiler, is_scalar, false);
 }
diff --git a/src/intel/compiler/brw_nir.h b/src/intel/compiler/brw_nir.h
index 59b65301fb2..98f6c946c11 100644
--- a/src/intel/compiler/brw_nir.h
+++ b/src/intel/compiler/brw_nir.h
@@ -142,6 +142,8 @@ bool brw_nir_apply_attribute_workarounds(nir_shader *nir,
 
 bool brw_nir_apply_trig_workarounds(nir_shader *nir);
 
+bool brw_nir_limit_trig_input_range_workaround(nir_shader *nir);
+
 void brw_nir_apply_tcs_quads_workaround(nir_shader *nir);
 
 void brw_nir_apply_key(nir_shader *nir,
diff --git a/src/intel/compiler/brw_nir_trig_workarounds.py b/src/intel/compiler/brw_nir_trig_workarounds.py
index 7425ff4c6ec..5d6a7601d31 100644
--- a/src/intel/compiler/brw_nir_trig_workarounds.py
+++ b/src/intel/compiler/brw_nir_trig_workarounds.py
@@ -33,12 +33,17 @@
 
 import argparse
 import sys
+from math import pi
 
 TRIG_WORKAROUNDS = [
     (('fsin', 'x(is_not_const)'), ('fmul', ('fsin', 'x'), 0.99997)),
     (('fcos', 'x(is_not_const)'), ('fmul', ('fcos', 'x'), 0.99997)),
 ]
 
+LIMIT_TRIG_INPUT_RANGE_WORKAROUND = [
+    (('fsin', 'x(is_not_const)'), ('fsin', ('fmod', 'x', 2.0 * pi))),
+    (('fcos', 'x(is_not_const)'), ('fcos', ('fmod', 'x', 2.0 * pi))),
+]
 
 def main():
     parser = argparse.ArgumentParser()
@@ -54,6 +59,8 @@ def run():
     print('#include "brw_nir.h"')
     print(nir_algebraic.AlgebraicPass("brw_nir_apply_trig_workarounds",
                                       TRIG_WORKAROUNDS).render())
+    print(nir_algebraic.AlgebraicPass("brw_nir_limit_trig_input_range_workaround",
+                                      LIMIT_TRIG_INPUT_RANGE_WORKAROUND).render())
 
 
 if __name__ == '__main__':
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index 41f7486feeb..45c2e2b7a34 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -74,6 +74,7 @@ static const driOptionDescription anv_dri_options[] = {
    DRI_CONF_SECTION_DEBUG
       DRI_CONF_ALWAYS_FLUSH_CACHE(false)
       DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
+      DRI_CONF_LIMIT_TRIG_INPUT_RANGE(false)
    DRI_CONF_SECTION_END
 };
 
@@ -1104,6 +1105,8 @@ anv_init_dri_options(struct anv_instance *instance)
 
     instance->assume_full_subgroups =
             driQueryOptionb(&instance->dri_options, "anv_assume_full_subgroups");
+    instance->limit_trig_input_range =
+            driQueryOptionb(&instance->dri_options, "limit_trig_input_range");
 }
 
 VkResult anv_CreateInstance(
diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c
index 06e3c872ab3..8d4238f8bec 100644
--- a/src/intel/vulkan/anv_pipeline.c
+++ b/src/intel/vulkan/anv_pipeline.c
@@ -314,26 +314,28 @@ populate_sampler_prog_key(const struct intel_device_info *devinfo,
 }
 
 static void
-populate_base_prog_key(const struct intel_device_info *devinfo,
+populate_base_prog_key(const struct anv_device *device,
                        enum brw_subgroup_size_type subgroup_size_type,
                        bool robust_buffer_acccess,
                        struct brw_base_prog_key *key)
 {
    key->subgroup_size_type = subgroup_size_type;
    key->robust_buffer_access = robust_buffer_acccess;
+   key->limit_trig_input_range =
+      device->physical->instance->limit_trig_input_range;
 
-   populate_sampler_prog_key(devinfo, &key->tex);
+   populate_sampler_prog_key(&device->info, &key->tex);
 }
 
 static void
-populate_vs_prog_key(const struct intel_device_info *devinfo,
+populate_vs_prog_key(const struct anv_device *device,
                      enum brw_subgroup_size_type subgroup_size_type,
                      bool robust_buffer_acccess,
                      struct brw_vs_prog_key *key)
 {
    memset(key, 0, sizeof(*key));
 
-   populate_base_prog_key(devinfo, subgroup_size_type,
+   populate_base_prog_key(device, subgroup_size_type,
                           robust_buffer_acccess, &key->base);
 
    /* XXX: Handle vertex input work-arounds */
@@ -342,7 +344,7 @@ populate_vs_prog_key(const struct intel_device_info *devinfo,
 }
 
 static void
-populate_tcs_prog_key(const struct intel_device_info *devinfo,
+populate_tcs_prog_key(const struct anv_device *device,
                       enum brw_subgroup_size_type subgroup_size_type,
                       bool robust_buffer_acccess,
                       unsigned input_vertices,
@@ -350,33 +352,33 @@ populate_tcs_prog_key(const struct intel_device_info *devinfo,
 {
    memset(key, 0, sizeof(*key));
 
-   populate_base_prog_key(devinfo, subgroup_size_type,
+   populate_base_prog_key(device, subgroup_size_type,
                           robust_buffer_acccess, &key->base);
 
    key->input_vertices = input_vertices;
 }
 
 static void
-populate_tes_prog_key(const struct intel_device_info *devinfo,
+populate_tes_prog_key(const struct anv_device *device,
                       enum brw_subgroup_size_type subgroup_size_type,
                       bool robust_buffer_acccess,
                       struct brw_tes_prog_key *key)
 {
    memset(key, 0, sizeof(*key));
 
-   populate_base_prog_key(devinfo, subgroup_size_type,
+   populate_base_prog_key(device, subgroup_size_type,
                           robust_buffer_acccess, &key->base);
 }
 
 static void
-populate_gs_prog_key(const struct intel_device_info *devinfo,
+populate_gs_prog_key(const struct anv_device *device,
                      enum brw_subgroup_size_type subgroup_size_type,
                      bool robust_buffer_acccess,
                      struct brw_gs_prog_key *key)
 {
    memset(key, 0, sizeof(*key));
 
-   populate_base_prog_key(devinfo, subgroup_size_type,
+   populate_base_prog_key(device, subgroup_size_type,
                           robust_buffer_acccess, &key->base);
 }
 
@@ -436,25 +438,25 @@ pipeline_has_coarse_pixel(const struct anv_graphics_pipeline *pipeline,
 }
 
 static void
-populate_task_prog_key(const struct intel_device_info *devinfo,
+populate_task_prog_key(const struct anv_device *device,
                        enum brw_subgroup_size_type subgroup_size_type,
                        bool robust_buffer_access,
                        struct brw_task_prog_key *key)
 {
    memset(key, 0, sizeof(*key));
 
-   populate_base_prog_key(devinfo, subgroup_size_type, robust_buffer_access, &key->base);
+   populate_base_prog_key(device, subgroup_size_type, robust_buffer_access, &key->base);
 }
 
 static void
-populate_mesh_prog_key(const struct intel_device_info *devinfo,
+populate_mesh_prog_key(const struct anv_device *device,
                        enum brw_subgroup_size_type subgroup_size_type,
                        bool robust_buffer_access,
                        struct brw_mesh_prog_key *key)
 {
    memset(key, 0, sizeof(*key));
 
-   populate_base_prog_key(devinfo, subgroup_size_type, robust_buffer_access, &key->base);
+   populate_base_prog_key(device, subgroup_size_type, robust_buffer_access, &key->base);
 }
 
 static void
@@ -467,11 +469,10 @@ populate_wm_prog_key(const struct anv_graphics_pipeline *pipeline,
                      struct brw_wm_prog_key *key)
 {
    const struct anv_device *device = pipeline->base.device;
-   const struct intel_device_info *devinfo = &device->info;
 
    memset(key, 0, sizeof(*key));
 
-   populate_base_prog_key(devinfo, flags, robust_buffer_acccess, &key->base);
+   populate_base_prog_key(device, flags, robust_buffer_acccess, &key->base);
 
    /* We set this to 0 here and set to the actual value before we call
     * brw_compile_fs.
@@ -515,26 +516,26 @@ populate_wm_prog_key(const struct anv_graphics_pipeline *pipeline,
 }
 
 static void
-populate_cs_prog_key(const struct intel_device_info *devinfo,
+populate_cs_prog_key(const struct anv_device *device,
                      enum brw_subgroup_size_type subgroup_size_type,
                      bool robust_buffer_acccess,
                      struct brw_cs_prog_key *key)
 {
    memset(key, 0, sizeof(*key));
 
-   populate_base_prog_key(devinfo, subgroup_size_type,
+   populate_base_prog_key(device, subgroup_size_type,
                           robust_buffer_acccess, &key->base);
 }
 
 static void
-populate_bs_prog_key(const struct intel_device_info *devinfo,
+populate_bs_prog_key(const struct anv_device *device,
                      VkPipelineShaderStageCreateFlags flags,
                      bool robust_buffer_access,
                      struct brw_bs_prog_key *key)
 {
    memset(key, 0, sizeof(*key));
 
-   populate_base_prog_key(devinfo, flags, robust_buffer_access, &key->base);
+   populate_base_prog_key(device, flags, robust_buffer_access, &key->base);
 }
 
 struct anv_pipeline_stage {
@@ -1466,26 +1467,26 @@ anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline,
       enum brw_subgroup_size_type subgroup_size_type =
          anv_subgroup_size_type(stage, stages[stage].module, sinfo->flags, rss_info);
 
-      const struct intel_device_info *devinfo = &pipeline->base.device->info;
+      const struct anv_device *device = pipeline->base.device;
       switch (stage) {
       case MESA_SHADER_VERTEX:
-         populate_vs_prog_key(devinfo, subgroup_size_type,
+         populate_vs_prog_key(device, subgroup_size_type,
                               pipeline->base.device->robust_buffer_access,
                               &stages[stage].key.vs);
          break;
       case MESA_SHADER_TESS_CTRL:
-         populate_tcs_prog_key(devinfo, subgroup_size_type,
+         populate_tcs_prog_key(device, subgroup_size_type,
                                pipeline->base.device->robust_buffer_access,
                                info->pTessellationState->patchControlPoints,
                                &stages[stage].key.tcs);
          break;
       case MESA_SHADER_TESS_EVAL:
-         populate_tes_prog_key(devinfo, subgroup_size_type,
+         populate_tes_prog_key(device, subgroup_size_type,
                                pipeline->base.device->robust_buffer_access,
                                &stages[stage].key.tes);
          break;
       case MESA_SHADER_GEOMETRY:
-         populate_gs_prog_key(devinfo, subgroup_size_type,
+         populate_gs_prog_key(device, subgroup_size_type,
                               pipeline->base.device->robust_buffer_access,
                               &stages[stage].key.gs);
          break;
@@ -1503,12 +1504,12 @@ anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline,
          break;
       }
       case MESA_SHADER_TASK:
-         populate_task_prog_key(devinfo, subgroup_size_type,
+         populate_task_prog_key(device, subgroup_size_type,
                                 pipeline->base.device->robust_buffer_access,
                                 &stages[stage].key.task);
          break;
       case MESA_SHADER_MESH:
-         populate_mesh_prog_key(devinfo, subgroup_size_type,
+         populate_mesh_prog_key(device, subgroup_size_type,
                                 pipeline->base.device->robust_buffer_access,
                                 &stages[stage].key.mesh);
          break;
@@ -1947,7 +1948,7 @@ anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,
    const enum brw_subgroup_size_type subgroup_size_type =
       anv_subgroup_size_type(MESA_SHADER_COMPUTE, stage.module, info->stage.flags, rss_info);
 
-   populate_cs_prog_key(&device->info, subgroup_size_type,
+   populate_cs_prog_key(device, subgroup_size_type,
                         device->robust_buffer_access,
                         &stage.key.cs);
 
@@ -2749,7 +2750,7 @@ anv_pipeline_init_ray_tracing_stages(struct anv_ray_tracing_pipeline *pipeline,
          },
       };
 
-      populate_bs_prog_key(&pipeline->base.device->info, sinfo->flags,
+      populate_bs_prog_key(pipeline->base.device, sinfo->flags,
                            pipeline->base.device->robust_buffer_access,
                            &stages[i].key.bs);
 
diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
index 3113cbdcd1e..5bb91ea8701 100644
--- a/src/intel/vulkan/anv_private.h
+++ b/src/intel/vulkan/anv_private.h
@@ -1085,6 +1085,7 @@ struct anv_instance {
      * Workarounds for game bugs.
      */
     bool                                        assume_full_subgroups;
+    bool                                        limit_trig_input_range;
 };
 
 VkResult anv_init_wsi(struct anv_physical_device *physical_device);
diff --git a/src/util/driconf.h b/src/util/driconf.h
index a5ca3f1d6c7..068ed227af0 100644
--- a/src/util/driconf.h
+++ b/src/util/driconf.h
@@ -291,6 +291,10 @@
    DRI_CONF_OPT_B(vk_dont_care_as_load, def, \
                   "Treat VK_ATTACHMENT_LOAD_OP_DONT_CARE as LOAD_OP_LOAD, workaround on tiler GPUs for games that confuse these two load ops")
 
+#define DRI_CONF_LIMIT_TRIG_INPUT_RANGE(def) \
+   DRI_CONF_OPT_B(limit_trig_input_range, def, \
+                  "Limit trig input range to [-2p : 2p] to improve sin/cos calculation precision on Intel")
+
 /**
  * \brief Image quality-related options
  */



More information about the mesa-commit mailing list