Mesa (master): radv/gfx10: add an option to switch from legacy to NGG streamout

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Mon Sep 16 10:12:47 UTC 2019


Module: Mesa
Branch: master
Commit: a15b3bcf1a64c79dc5c0c61841cd9b23359b1e6f
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=a15b3bcf1a64c79dc5c0c61841cd9b23359b1e6f

Author: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Date:   Mon Sep  9 10:23:30 2019 +0200

radv/gfx10: add an option to switch from legacy to NGG streamout

This internal option is turned off by default because NGG streamout
still hangs. It seems like it's related to GDS as RadeonSI.

That option will be turned on once all issues are resolved.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>

---

 src/amd/vulkan/radv_cmd_buffer.c  |  8 +++++---
 src/amd/vulkan/radv_device.c      |  2 ++
 src/amd/vulkan/radv_nir_to_llvm.c | 12 +++++++++---
 src/amd/vulkan/radv_pipeline.c    | 25 +++++++++++++------------
 src/amd/vulkan/radv_private.h     |  3 +++
 src/amd/vulkan/radv_shader.c      | 16 ++++++++++------
 src/amd/vulkan/radv_shader.h      |  1 +
 7 files changed, 43 insertions(+), 24 deletions(-)

diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 7baa0b3aa36..94329a2a500 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -222,7 +222,8 @@ radv_bind_streamout_state(struct radv_cmd_buffer *cmd_buffer,
 	struct radv_streamout_state *so = &cmd_buffer->state.streamout;
 	struct radv_shader_info *info;
 
-	if (!pipeline->streamout_shader)
+	if (!pipeline->streamout_shader ||
+	    cmd_buffer->device->physical_device->use_ngg_streamout)
 		return;
 
 	info = &pipeline->streamout_shader->info;
@@ -5810,8 +5811,9 @@ radv_set_streamout_enable(struct radv_cmd_buffer *cmd_buffer, bool enable)
 			      (so->enabled_mask << 8) |
 			      (so->enabled_mask << 12);
 
-	if ((old_streamout_enabled != so->streamout_enabled) ||
-	    (old_hw_enabled_mask != so->hw_enabled_mask))
+	if (!cmd_buffer->device->physical_device->use_ngg_streamout &&
+	    ((old_streamout_enabled != so->streamout_enabled) ||
+	     (old_hw_enabled_mask != so->hw_enabled_mask)))
 		radv_emit_streamout_enable(cmd_buffer);
 }
 
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index bdc38a555de..53a08bcdc5a 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -364,6 +364,8 @@ radv_physical_device_init(struct radv_physical_device *device,
 	device->use_shader_ballot = device->rad_info.chip_class >= GFX8 &&
 				    device->instance->perftest_flags & RADV_PERFTEST_SHADER_BALLOT;
 
+	device->use_ngg_streamout = false;
+
 	/* Determine the number of threads per wave for all stages. */
 	device->cs_wave_size = 64;
 	device->ps_wave_size = 64;
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index 88c0c514eae..d9c91f0591b 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -771,6 +771,9 @@ declare_streamout_sgprs(struct radv_shader_context *ctx, gl_shader_stage stage,
 {
 	int i;
 
+	if (ctx->options->use_ngg_streamout)
+		return;
+
 	/* Streamout SGPRs. */
 	if (ctx->shader_info->so.num_outputs) {
 		assert(stage == MESA_SHADER_VERTEX ||
@@ -2786,7 +2789,8 @@ handle_vs_outputs_post(struct radv_shader_context *ctx,
 	       sizeof(outinfo->vs_output_param_offset));
 	outinfo->pos_exports = 0;
 
-	if (ctx->shader_info->so.num_outputs &&
+	if (!ctx->options->use_ngg_streamout &&
+	    ctx->shader_info->so.num_outputs &&
 	    !ctx->is_gs_copy_shader) {
 		/* The GS copy shader emission already emits streamout. */
 		radv_emit_streamout(ctx, 0);
@@ -4479,7 +4483,8 @@ ac_gs_copy_shader_emit(struct radv_shader_context *ctx)
 	LLVMValueRef stream_id;
 
 	/* Fetch the vertex stream ID. */
-	if (ctx->shader_info->so.num_outputs) {
+	if (!ctx->options->use_ngg_streamout &&
+	    ctx->shader_info->so.num_outputs) {
 		stream_id =
 			ac_unpack_param(&ctx->ac, ctx->streamout_config, 24, 2);
 	} else {
@@ -4550,7 +4555,8 @@ ac_gs_copy_shader_emit(struct radv_shader_context *ctx)
 			}
 		}
 
-		if (ctx->shader_info->so.num_outputs)
+		if (!ctx->options->use_ngg_streamout &&
+		    ctx->shader_info->so.num_outputs)
 			radv_emit_streamout(ctx, stream);
 
 		if (stream == 0) {
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 054f6ac36f8..48ea2c03929 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -2350,20 +2350,21 @@ radv_fill_shader_keys(struct radv_device *device,
 				keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg = false;
 		}
 
-		/* TODO: Implement streamout support for NGG. */
-		gl_shader_stage last_xfb_stage = MESA_SHADER_VERTEX;
+		if (!device->physical_device->use_ngg_streamout) {
+			gl_shader_stage last_xfb_stage = MESA_SHADER_VERTEX;
 
-		for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
-			if (nir[i])
-				last_xfb_stage = i;
-		}
+			for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
+				if (nir[i])
+					last_xfb_stage = i;
+			}
 
-		if (nir[last_xfb_stage] &&
-		    radv_nir_stage_uses_xfb(nir[last_xfb_stage])) {
-			if (nir[MESA_SHADER_TESS_CTRL])
-				keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = false;
-			else
-				keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg = false;
+			if (nir[last_xfb_stage] &&
+			    radv_nir_stage_uses_xfb(nir[last_xfb_stage])) {
+				if (nir[MESA_SHADER_TESS_CTRL])
+					keys[MESA_SHADER_TESS_EVAL].vs_common_out.as_ngg = false;
+				else
+					keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg = false;
+			}
 		}
 	}
 
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index d6c446abd06..8b612155621 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -288,6 +288,9 @@ struct radv_physical_device {
 	/* Whether to enable the AMD_shader_ballot extension */
 	bool use_shader_ballot;
 
+	/* Whether to enable NGG streamout. */
+	bool use_ngg_streamout;
+
 	/* Number of threads per wave. */
 	uint8_t ps_wave_size;
 	uint8_t cs_wave_size;
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 473b6b0032f..c8dd54fae53 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -683,12 +683,15 @@ static void radv_postprocess_config(const struct radv_physical_device *pdevice,
 	config_out->float_mode |= V_00B028_FP_64_DENORMS;
 
 	config_out->rsrc2 = S_00B12C_USER_SGPR(info->num_user_sgprs) |
-			    S_00B12C_SCRATCH_EN(scratch_enabled) |
-			    S_00B12C_SO_BASE0_EN(!!info->so.strides[0]) |
-			    S_00B12C_SO_BASE1_EN(!!info->so.strides[1]) |
-			    S_00B12C_SO_BASE2_EN(!!info->so.strides[2]) |
-			    S_00B12C_SO_BASE3_EN(!!info->so.strides[3]) |
-			    S_00B12C_SO_EN(!!info->so.num_outputs);
+			    S_00B12C_SCRATCH_EN(scratch_enabled);
+
+	if (!pdevice->use_ngg_streamout) {
+		config_out->rsrc2 |= S_00B12C_SO_BASE0_EN(!!info->so.strides[0]) |
+				     S_00B12C_SO_BASE1_EN(!!info->so.strides[1]) |
+				     S_00B12C_SO_BASE2_EN(!!info->so.strides[2]) |
+				     S_00B12C_SO_BASE3_EN(!!info->so.strides[3]) |
+				     S_00B12C_SO_EN(!!info->so.num_outputs);
+	}
 
 	config_out->rsrc1 = S_00B848_VGPRS((num_vgprs - 1) /
 					   (info->wave_size == 32 ? 8 : 4)) |
@@ -1050,6 +1053,7 @@ shader_variant_compile(struct radv_device *device,
 	options->tess_offchip_block_dw_size = device->tess_offchip_block_dw_size;
 	options->address32_hi = device->physical_device->rad_info.address32_hi;
 	options->has_ls_vgpr_init_bug = device->physical_device->rad_info.has_ls_vgpr_init_bug;
+	options->use_ngg_streamout = device->physical_device->use_ngg_streamout;
 
 	if ((stage == MESA_SHADER_GEOMETRY && !options->key.vs_common_out.as_ngg) ||
 	    gs_copy_shader)
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
index 61431cc9683..874318e7dc4 100644
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -126,6 +126,7 @@ struct radv_nir_compiler_options {
 	bool record_llvm_ir;
 	bool check_ir;
 	bool has_ls_vgpr_init_bug;
+	bool use_ngg_streamout;
 	enum radeon_family family;
 	enum chip_class chip_class;
 	uint32_t tess_offchip_block_dw_size;




More information about the mesa-commit mailing list