Mesa (main): r300: Request NIR shaders from mesa/st and use NIR-to-TGSI.

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Fri Dec 10 13:56:25 UTC 2021


Module: Mesa
Branch: main
Commit: 7d2ea9b0edef2176140629ac3dee6a6809c4abe2
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=7d2ea9b0edef2176140629ac3dee6a6809c4abe2

Author: Emma Anholt <emma at anholt.net>
Date:   Mon Dec  6 12:11:43 2021 -0800

r300: Request NIR shaders from mesa/st and use NIR-to-TGSI.

This brings us into parity on state tracker paths with most other
supported drivers, and a lot of additional optimization on our shaders.

Results on a subset of shader-db that doesn't crash:

instructions in affected programs: 59502 -> 47991 (-19.35%)
vinst in affected programs: 17633 -> 15197 (-13.82%)
sinst in affected programs: 9296 -> 7319 (-21.27%)
flowcontrol in affected programs: 627 -> 310 (-50.56%)
presub in affected programs: 4220 -> 1554 (-63.18%)
temps in affected programs: 5775 -> 8570 (48.40%)
lits in affected programs: 215 -> 37 (-82.79%)

The temps (register usage) increase is unfortunate, but it seems that
instruction counts tend to be our limit before reg counts are.

Fixes: #3354
Reviewed-by: Marek Olšák <marek.olsak at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14096>

---

 src/gallium/drivers/r300/ci/r300-rv515-fails.txt |  96 +++++------------
 src/gallium/drivers/r300/ci/r300-rv515-skips.txt |   3 +
 src/gallium/drivers/r300/meson.build             |   2 +-
 src/gallium/drivers/r300/r300_debug.c            |   1 +
 src/gallium/drivers/r300/r300_screen.c           | 131 ++++++++++++++++++++++-
 src/gallium/drivers/r300/r300_screen.h           |   2 +-
 src/gallium/drivers/r300/r300_state.c            |  19 +++-
 7 files changed, 179 insertions(+), 75 deletions(-)

diff --git a/src/gallium/drivers/r300/ci/r300-rv515-fails.txt b/src/gallium/drivers/r300/ci/r300-rv515-fails.txt
index fa875f06d89..c395c114c9e 100644
--- a/src/gallium/drivers/r300/ci/r300-rv515-fails.txt
+++ b/src/gallium/drivers/r300/ci/r300-rv515-fails.txt
@@ -36,45 +36,14 @@ dEQP-GLES2.functional.rasterization.primitives.lines_wide,Fail
 dEQP-GLES2.functional.rasterization.primitives.line_strip_wide,Fail,Fail
 dEQP-GLES2.functional.rasterization.primitives.line_loop_wide,Fail
 
-# "Unknown opcode IF"
 dEQP-GLES2.functional.shaders.functions.control_flow.mixed_return_break_continue_vertex,Fail
-dEQP-GLES2.functional.shaders.functions.control_flow.return_in_nested_loop_vertex,Fail
-dEQP-GLES2.functional.shaders.functions.control_flow.return_in_loop_if_vertex,Fail
 
-# "Ran out of temporary registers"
+# "No free temporary to use for predicate stack counter."
 dEQP-GLES2.functional.shaders.indexing.tmp_array.float_const_write_dynamic_loop_read_vertex,Fail
 dEQP-GLES2.functional.shaders.indexing.tmp_array.vec2_const_write_dynamic_loop_read_vertex,Fail
 dEQP-GLES2.functional.shaders.indexing.tmp_array.vec3_const_write_dynamic_loop_read_vertex,Fail
 dEQP-GLES2.functional.shaders.indexing.tmp_array.vec4_const_write_dynamic_loop_read_vertex,Fail
 
-# "Vertex program has too many instructions"
-dEQP-GLES2.functional.shaders.indexing.tmp_array.float_const_write_static_loop_read_vertex,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.float_const_write_static_read_vertex,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec2_const_write_static_loop_read_vertex,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec2_const_write_static_read_vertex,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec3_const_write_static_loop_read_vertex,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec3_const_write_static_read_vertex,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec4_const_write_static_loop_read_vertex,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec4_const_write_static_read_vertex,Fail
-
-# "emit_alu: Too many instructions"
-dEQP-GLES2.functional.shaders.indexing.tmp_array.float_const_write_static_read_fragment,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec2_const_write_static_read_fragment,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec3_const_write_static_read_fragment,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec4_const_write_static_read_fragment,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.float_const_write_static_loop_read_fragment,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec2_const_write_static_loop_read_fragment,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec3_const_write_static_loop_read_fragment,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec4_const_write_static_loop_read_fragment,Fail
-
-# "Ran out of temporary registers"
-dEQP-GLES2.functional.shaders.indexing.tmp_array.float_const_write_dynamic_read_vertex,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec2_const_write_dynamic_read_vertex,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec3_const_write_dynamic_read_vertex,Fail
-dEQP-GLES2.functional.shaders.indexing.tmp_array.vec4_const_write_dynamic_read_vertex,Fail
-
-dEQP-GLES2.functional.shaders.indexing.tmp_array.float_dynamic_loop_write_dynamic_read_fragment,Fail
-
 dEQP-GLES2.functional.shaders.indexing.tmp_array.float_dynamic_loop_write_dynamic_read_vertex,Fail
 dEQP-GLES2.functional.shaders.indexing.tmp_array.float_dynamic_loop_write_static_loop_read_vertex,Fail
 dEQP-GLES2.functional.shaders.indexing.tmp_array.float_dynamic_loop_write_static_read_vertex,Fail
@@ -141,18 +110,34 @@ dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec4_dynamic_loop_subscr
 dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec4_dynamic_loop_subscript_write_static_loop_subscript_read_vertex,Fail
 dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec4_dynamic_loop_subscript_write_dynamic_loop_subscript_read_vertex,Fail
 
+# "Rewrite of inst 0 failed Can't allocate source for Inst 4 src_type=1 new_index=1 new_mask=1"
+dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec2_dynamic_subscript_write_component_read_fragment,Fail
+dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec2_dynamic_subscript_write_direct_read_fragment,Fail
+dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec2_dynamic_subscript_write_dynamic_subscript_read_fragment,Fail
+dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec2_dynamic_subscript_write_static_loop_subscript_read_fragment,Fail
+dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec2_dynamic_subscript_write_static_subscript_read_fragment,Fail
+dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec3_dynamic_subscript_write_component_read_fragment,Fail
+dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec3_dynamic_subscript_write_direct_read_fragment,Fail
+dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec3_dynamic_subscript_write_dynamic_loop_subscript_read_fragment,Fail
+dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec3_dynamic_subscript_write_dynamic_subscript_read_fragment,Fail
+dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec3_dynamic_subscript_write_static_loop_subscript_read_fragment,Fail
+dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec3_dynamic_subscript_write_static_subscript_read_fragment,Fail
+dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec4_dynamic_subscript_write_component_read_fragment,Fail
+dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec4_dynamic_subscript_write_direct_read_fragment,Fail
+dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec4_dynamic_subscript_write_dynamic_loop_subscript_read_fragment,Fail
+dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec4_dynamic_subscript_write_dynamic_subscript_read_fragment,Fail
+dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec4_dynamic_subscript_write_static_loop_subscript_read_fragment,Fail
+dEQP-GLES2.functional.shaders.indexing.vector_subscript.vec4_dynamic_subscript_write_static_subscript_read_fragment,Fail
+
 # Bus error
 dEQP-GLES2.functional.shaders.loops.for_dynamic_iterations.sequence_vertex,Crash
 dEQP-GLES2.functional.shaders.loops.for_uniform_iterations.sequence_vertex,Crash
-dEQP-GLES2.functional.shaders.loops.do_while_constant_iterations.sequence_vertex,Crash
 dEQP-GLES2.functional.shaders.loops.do_while_dynamic_iterations.sequence_vertex,Crash
 dEQP-GLES2.functional.shaders.loops.do_while_uniform_iterations.sequence_vertex,Crash
 dEQP-GLES2.functional.shaders.loops.while_constant_iterations.sequence_vertex,Crash
 dEQP-GLES2.functional.shaders.loops.while_dynamic_iterations.sequence_vertex,Crash
 dEQP-GLES2.functional.shaders.loops.while_uniform_iterations.sequence_vertex,Crash
 
-dEQP-GLES2.functional.shaders.loops.do_while_constant_iterations.conditional_continue_vertex,Fail
-dEQP-GLES2.functional.shaders.loops.do_while_constant_iterations.double_continue_vertex,Fail
 dEQP-GLES2.functional.shaders.loops.do_while_constant_iterations.mixed_break_continue_vertex,Fail
 dEQP-GLES2.functional.shaders.loops.do_while_dynamic_iterations.conditional_continue_vertex,Fail
 dEQP-GLES2.functional.shaders.loops.do_while_dynamic_iterations.double_continue_vertex,Fail
@@ -163,24 +148,10 @@ dEQP-GLES2.functional.shaders.loops.do_while_uniform_iterations.mixed_break_cont
 
 dEQP-GLES2.functional.shaders.loops.for_constant_iterations.mixed_break_continue_vertex,Fail
 dEQP-GLES2.functional.shaders.loops.for_dynamic_iterations.mixed_break_continue_vertex,Fail
-dEQP-GLES2.functional.shaders.loops.for_dynamic_iterations.unconditional_break_vertex,Fail
 dEQP-GLES2.functional.shaders.loops.for_uniform_iterations.mixed_break_continue_vertex,Fail
-dEQP-GLES2.functional.shaders.loops.for_uniform_iterations.unconditional_break_vertex,Fail
-dEQP-GLES2.functional.shaders.loops.while_constant_iterations.unconditional_break_vertex,Fail
-dEQP-GLES2.functional.shaders.loops.while_dynamic_iterations.unconditional_break_vertex,Fail
-dEQP-GLES2.functional.shaders.loops.while_uniform_iterations.unconditional_break_vertex,Fail
 
-# "Unknown opcode IF"
-dEQP-GLES2.functional.shaders.return.return_in_dynamic_loop_always_vertex,Fail
 dEQP-GLES2.functional.shaders.return.return_in_dynamic_loop_dynamic_vertex,Fail
 
-dEQP-GLES2.functional.shaders.return.return_in_dynamic_loop_always_fragment,Fail
-
-# FS: POW channel looks good, the rest got trashed though?
-dEQP-GLES2.functional.shaders.operator.exponential.pow.highp_float_fragment,Fail
-dEQP-GLES2.functional.shaders.operator.exponential.pow.mediump_float_fragment,Fail
-
-dEQP-GLES2.functional.shaders.random.swizzle.fragment.24,Fail
 dEQP-GLES2.functional.shaders.random.texture.fragment.141,Fail
 
 # VS: Only the first channel of a POW result is right it looks like.
@@ -191,17 +162,9 @@ dEQP-GLES2.functional.shaders.operator.exponential.pow.mediump_vec2_vertex,Fail
 dEQP-GLES2.functional.shaders.operator.exponential.pow.mediump_vec3_vertex,Fail
 dEQP-GLES2.functional.shaders.operator.exponential.pow.mediump_vec4_vertex,Fail
 
-# "No free temporary to use for predicate stack counter."
 dEQP-GLES2.functional.shaders.struct.local.dynamic_loop_struct_array_vertex,Fail
-dEQP-GLES2.functional.shaders.struct.local.dynamic_loop_nested_struct_array_vertex,Fail
-
-# "Rewrite of inst 1 failed Can't allocate source for Inst 17 src_type=1 new_index=1 new_mask=2"
-dEQP-GLES2.functional.shaders.struct.local.dynamic_loop_struct_array_fragment,Fail
 dEQP-GLES2.functional.shaders.struct.local.dynamic_loop_nested_struct_array_fragment,Fail
 
-dEQP-GLES2.functional.shaders.struct.uniform.dynamic_loop_struct_array_vertex,Fail
-dEQP-GLES2.functional.shaders.struct.uniform.dynamic_loop_struct_array_fragment,Fail
-
 dEQP-GLES2.functional.texture.format.a8_cube_npot,Fail
 dEQP-GLES2.functional.texture.format.l8_cube_npot,Fail
 dEQP-GLES2.functional.texture.format.la88_cube_npot,Fail
@@ -253,15 +216,8 @@ dEQP-GLES2.functional.texture.specification.teximage2d_align.cube_rgba8888_47_2,
 dEQP-GLES2.functional.texture.specification.teximage2d_align.cube_rgba8888_47_4,Fail
 dEQP-GLES2.functional.texture.specification.teximage2d_align.cube_rgba8888_47_8,Fail
 
-# "emit_tex: Too many instructionsUsing a dummy shader instead."
-dEQP-GLES2.functional.uniform_api.value.initial.render.array_in_struct.mat4_mat2_fragment,Fail
-dEQP-GLES2.functional.uniform_api.value.initial.render.array_in_struct.mat4_mat2_both,Fail
-dEQP-GLES2.functional.uniform_api.value.initial.render.nested_structs_arrays.mat4_mat2_fragment,Fail
-dEQP-GLES2.functional.uniform_api.value.initial.render.nested_structs_arrays.mat4_mat2_both,Fail
-dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.array_in_struct.mat4_mat2_fragment,Fail
-dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.array_in_struct.mat4_mat2_both,Fail
-dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.nested_structs_arrays.mat4_mat2_fragment,Fail
-dEQP-GLES2.functional.uniform_api.value.assigned.by_pointer.render.nested_structs_arrays.mat4_mat2_both,Fail
+dEQP-GLES2.functional.uniform_api.value.initial.render.basic_array.vec4_both,Fail
+dEQP-GLES2.functional.uniform_api.value.initial.render.array_in_struct.float_vec4_both,Fail
 
 KHR-GLES2.core.internalformat.texture2d.depth_component_unsigned_int_depth_component16,Fail
 KHR-GLES2.core.internalformat.texture2d.depth_component_unsigned_int_depth_component24,Fail
@@ -566,6 +522,12 @@ spec at arb_color_buffer_float@gl_rgba32f-render,Fail
 spec at arb_color_buffer_float@gl_rgba8_snorm-render-sanity,Fail
 spec at arb_framebuffer_object@fbo-blit-stretch,Fail
 spec at arb_framebuffer_object@fbo-drawbuffers-none glclear,Fail
+
+# The test accidentally assigns a varying output instead of an attribute input to
+# gl_Position, then it gets optimized out and then set_vertex_inputs_outputs gets
+# angry that nobody set gl_Position.
+spec at arb_separate_shader_objects@getprogrampipelineiv,Crash
+
 spec at arb_shader_texture_lod@execution at tex-miplevel-selection *gradarb 2d,Fail
 spec at arb_shader_texture_lod@execution at tex-miplevel-selection *lod 2dshadow,Fail
 spec at arb_shader_texture_lod@execution at tex-miplevel-selection *projgradarb 2d,Fail
@@ -601,8 +563,8 @@ spec at glsl-1.10@execution at interpolation@interpolation-none-gl_backcolor-smooth-ve
 spec at glsl-1.10@execution at interpolation@interpolation-none-gl_frontcolor-flat-vertex,Fail
 spec at glsl-1.10@execution at interpolation@interpolation-none-gl_frontsecondarycolor-smooth-vertex,Fail
 spec at glsl-1.10@execution at loops@glsl-vs-continue-inside-do-while,Fail
-spec at glsl-1.10@execution at vs-loop-complex-unroll-nested-break,Fail
 spec at glsl-1.20@execution at clipping@vs-clip-vertex-primitives,Fail
+spec at glsl-1.20@execution at fs-function-inout-array-of-structs,Fail
 spec at glsl-1.20@execution at tex-miplevel-selection gl2:textureproj 1dshadow,Fail
 spec at glsl-1.20@execution at tex-miplevel-selection gl2:textureproj(bias) 1d,Fail
 spec at glsl-1.20@execution at vs-nan-builtin-max,Fail
diff --git a/src/gallium/drivers/r300/ci/r300-rv515-skips.txt b/src/gallium/drivers/r300/ci/r300-rv515-skips.txt
index 86ef027fc02..c309d409ab2 100644
--- a/src/gallium/drivers/r300/ci/r300-rv515-skips.txt
+++ b/src/gallium/drivers/r300/ci/r300-rv515-skips.txt
@@ -8,3 +8,6 @@ shaders at glsl-predication-on-large-array
 
 # I'm running it with gl_rgba8_snorm
 glx@
+
+# GPU hang
+dEQP-GLES2.functional.shaders.struct.local.dynamic_loop_nested_struct_array_vertex
diff --git a/src/gallium/drivers/r300/meson.build b/src/gallium/drivers/r300/meson.build
index 41e09bd56fc..226c374e477 100644
--- a/src/gallium/drivers/r300/meson.build
+++ b/src/gallium/drivers/r300/meson.build
@@ -126,7 +126,7 @@ libr300 = static_library(
     inc_mesa,
   ],
   gnu_symbol_visibility : 'hidden',
-  dependencies : [dep_libdrm_radeon, dep_llvm, idep_mesautil],
+  dependencies : [dep_libdrm_radeon, dep_llvm, idep_mesautil, idep_nir],
 )
 
 driver_r300 = declare_dependency(
diff --git a/src/gallium/drivers/r300/r300_debug.c b/src/gallium/drivers/r300/r300_debug.c
index c86577cd251..bb595b26b1a 100644
--- a/src/gallium/drivers/r300/r300_debug.c
+++ b/src/gallium/drivers/r300/r300_debug.c
@@ -50,6 +50,7 @@ static const struct debug_named_value r300_debug_options[] = {
     { "nozmask", DBG_NO_ZMASK, "Disable zbuffer compression" },
     { "nohiz", DBG_NO_HIZ, "Disable hierarchical zbuffer" },
     { "nocmask", DBG_NO_CMASK, "Disable AA compression and fast AA clear" },
+    { "use_tgsi", DBG_USE_TGSI, "Request TGSI shaders from the state tracker" },
 
     /* must be last */
     DEBUG_NAMED_VALUE_END
diff --git a/src/gallium/drivers/r300/r300_screen.c b/src/gallium/drivers/r300/r300_screen.c
index 56353c47720..89502d46ff6 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -21,6 +21,7 @@
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE. */
 
+#include "compiler/nir/nir.h"
 #include "util/format/u_format.h"
 #include "util/format/u_format_s3tc.h"
 #include "util/u_screen.h"
@@ -315,9 +316,9 @@ static int r300_get_shader_param(struct pipe_screen *pscreen,
         case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
             return 32;
         case PIPE_SHADER_CAP_PREFERRED_IR:
-            return PIPE_SHADER_IR_TGSI;
+            return (r300screen->debug & DBG_USE_TGSI) ? PIPE_SHADER_IR_TGSI : PIPE_SHADER_IR_NIR;
         case PIPE_SHADER_CAP_SUPPORTED_IRS:
-            return 1 << PIPE_SHADER_IR_TGSI;
+            return (1 << PIPE_SHADER_IR_NIR) | (1 << PIPE_SHADER_IR_TGSI);
         }
         break;
     case PIPE_SHADER_VERTEX:
@@ -391,9 +392,9 @@ static int r300_get_shader_param(struct pipe_screen *pscreen,
         case PIPE_SHADER_CAP_MAX_UNROLL_ITERATIONS_HINT:
             return 32;
         case PIPE_SHADER_CAP_PREFERRED_IR:
-            return PIPE_SHADER_IR_TGSI;
+            return (r300screen->debug & DBG_USE_TGSI) ? PIPE_SHADER_IR_TGSI : PIPE_SHADER_IR_NIR;
         case PIPE_SHADER_CAP_SUPPORTED_IRS:
-            return 1 << PIPE_SHADER_IR_TGSI;
+            return (1 << PIPE_SHADER_IR_NIR) | (1 << PIPE_SHADER_IR_TGSI);
         }
         break;
     default:
@@ -472,6 +473,127 @@ static int r300_get_video_param(struct pipe_screen *screen,
    }
 }
 
+static const nir_shader_compiler_options r500_vs_compiler_options = {
+   .fuse_ffma32 = true,
+   .fuse_ffma64 = true,
+   .lower_bitops = true,
+   .lower_extract_byte = true,
+   .lower_extract_word = true,
+   .lower_fdiv = true,
+   .lower_insert_byte = true,
+   .lower_insert_word = true,
+   .lower_fdph = true,
+   .lower_flrp32 = true,
+   .lower_flrp64 = true,
+   .lower_fmod = true,
+   .lower_rotate = true,
+   .lower_uniforms_to_ubo = true,
+   .lower_vector_cmp = true,
+
+   /* Have HW loops support and 1024 max instr count, but don't unroll *too*
+    * hard.
+    */
+   .max_unroll_iterations = 32,
+
+   .use_interpolated_input_intrinsics = true,
+};
+
+static const nir_shader_compiler_options r500_fs_compiler_options = {
+   .fuse_ffma32 = true,
+   .fuse_ffma64 = true,
+   .lower_bitops = true,
+   .lower_extract_byte = true,
+   .lower_extract_word = true,
+   .lower_fdiv = true,
+   .lower_insert_byte = true,
+   .lower_insert_word = true,
+   .lower_fdph = true,
+   .lower_fpow = true, /* POW is only in the VS */
+   .lower_flrp32 = true,
+   .lower_flrp64 = true,
+   .lower_fmod = true,
+   .lower_rotate = true,
+   .lower_uniforms_to_ubo = true,
+   .lower_vector_cmp = true,
+
+   /* Have HW loops support and 512 max instr count, but don't unroll *too*
+    * hard.
+    */
+   .max_unroll_iterations = 32,
+
+   .use_interpolated_input_intrinsics = true,
+};
+
+static const nir_shader_compiler_options r300_vs_compiler_options = {
+   .fuse_ffma32 = true,
+   .fuse_ffma64 = true,
+   .lower_bitops = true,
+   .lower_extract_byte = true,
+   .lower_extract_word = true,
+   .lower_fdiv = true,
+   .lower_insert_byte = true,
+   .lower_insert_word = true,
+   .lower_fdph = true,
+   .lower_fsat = true, /* No fsat in pre-r500 VS */
+   .lower_flrp32 = true,
+   .lower_flrp64 = true,
+   .lower_fmod = true,
+   .lower_rotate = true,
+   .lower_uniforms_to_ubo = true,
+   .lower_vector_cmp = true,
+
+   /* Note: has HW loops support, but only 256 ALU instructions. */
+   .max_unroll_iterations = 32,
+
+   .use_interpolated_input_intrinsics = true,
+};
+
+static const nir_shader_compiler_options r300_fs_compiler_options = {
+   .fuse_ffma32 = true,
+   .fuse_ffma64 = true,
+   .lower_bitops = true,
+   .lower_extract_byte = true,
+   .lower_extract_word = true,
+   .lower_fdiv = true,
+   .lower_fpow = true, /* POW is only in the VS */
+   .lower_insert_byte = true,
+   .lower_insert_word = true,
+   .lower_fdph = true,
+   .lower_flrp32 = true,
+   .lower_flrp64 = true,
+   .lower_fmod = true,
+   .lower_rotate = true,
+   .lower_uniforms_to_ubo = true,
+   .lower_vector_cmp = true,
+
+    /* No HW loops support, so set it equal to ALU instr max */
+   .max_unroll_iterations = 64,
+
+   .use_interpolated_input_intrinsics = true,
+};
+
+static const void *
+r300_get_compiler_options(struct pipe_screen *pscreen,
+                          enum pipe_shader_ir ir,
+                          enum pipe_shader_type shader)
+{
+   struct r300_screen* r300screen = r300_screen(pscreen);
+
+   assert(ir == PIPE_SHADER_IR_NIR);
+
+   if (r300screen->caps.is_r500) {
+      if (shader == PIPE_SHADER_VERTEX)
+         return &r500_vs_compiler_options;
+       else
+         return &r500_fs_compiler_options;
+   } else {
+      if (shader == PIPE_SHADER_VERTEX)
+         return &r300_vs_compiler_options;
+       else
+         return &r300_fs_compiler_options;
+   }
+}
+
 /**
  * Whether the format matches:
  *   PIPE_FORMAT_?10?10?10?2_UNORM
@@ -734,6 +856,7 @@ struct pipe_screen* r300_screen_create(struct radeon_winsys *rws,
     r300screen->screen.destroy = r300_destroy_screen;
     r300screen->screen.get_name = r300_get_name;
     r300screen->screen.get_vendor = r300_get_vendor;
+    r300screen->screen.get_compiler_options = r300_get_compiler_options;
     r300screen->screen.get_device_vendor = r300_get_device_vendor;
     r300screen->screen.get_disk_shader_cache = r300_get_disk_shader_cache;
     r300screen->screen.get_param = r300_get_param;
diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h
index 09332b3adcc..1fe9e861308 100644
--- a/src/gallium/drivers/r300/r300_screen.h
+++ b/src/gallium/drivers/r300/r300_screen.h
@@ -103,8 +103,8 @@ radeon_winsys(struct pipe_screen *screen) {
 #define DBG_NO_ZMASK    (1 << 21)
 #define DBG_NO_HIZ      (1 << 22)
 #define DBG_NO_CMASK    (1 << 23)
+#define DBG_USE_TGSI    (1 << 24)
 /*@}*/
-
 static inline boolean SCREEN_DBG_ON(struct r300_screen * screen, unsigned flags)
 {
     return (screen->debug & flags) ? TRUE : FALSE;
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 39c442802af..4c9df7588bb 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -46,6 +46,7 @@
 #include "r300_fs.h"
 #include "r300_texture.h"
 #include "r300_vs.h"
+#include "nir/nir_to_tgsi.h"
 
 /* r300_state: Functions used to initialize state context by translating
  * Gallium state objects into semi-native r300 state objects. */
@@ -1041,7 +1042,14 @@ static void* r300_create_fs_state(struct pipe_context* pipe,
 
     /* Copy state directly into shader. */
     fs->state = *shader;
-    fs->state.tokens = tgsi_dup_tokens(shader->tokens);
+
+    if (fs->state.type == PIPE_SHADER_IR_NIR) {
+       fs->state.tokens = nir_to_tgsi(shader->ir.nir, pipe->screen);
+    } else {
+       assert(fs->state.type == PIPE_SHADER_IR_TGSI);
+       /* we need to keep a local copy of the tokens */
+       fs->state.tokens = tgsi_dup_tokens(fs->state.tokens);
+    }
 
     /* Precompile the fragment shader at creation time to avoid jank at runtime.
      * In most cases we won't have anything in the key at draw time.
@@ -1925,7 +1933,14 @@ static void* r300_create_vs_state(struct pipe_context* pipe,
 
     /* Copy state directly into shader. */
     vs->state = *shader;
-    vs->state.tokens = tgsi_dup_tokens(shader->tokens);
+
+    if (vs->state.type == PIPE_SHADER_IR_NIR) {
+       vs->state.tokens = nir_to_tgsi(shader->ir.nir, pipe->screen);
+    } else {
+       assert(vs->state.type == PIPE_SHADER_IR_TGSI);
+       /* we need to keep a local copy of the tokens */
+       vs->state.tokens = tgsi_dup_tokens(vs->state.tokens);
+    }
 
     if (r300->screen->caps.has_tcl) {
         r300_init_vs_outputs(r300, vs);



More information about the mesa-commit mailing list