Mesa (master): radv: Replace supports_spill with explict_scratch_args

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Mon Nov 25 13:46:46 UTC 2019


Module: Mesa
Branch: master
Commit: e7f4cadd02ca88fd3af5a396c71a33e91088228b
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=e7f4cadd02ca88fd3af5a396c71a33e91088228b

Author: Connor Abbott <cwabbott0 at gmail.com>
Date:   Tue Nov 12 15:38:46 2019 +0100

radv: Replace supports_spill with explict_scratch_args

The former was always true and hence dead code. We will want to
explicitly declare the ring offset register with ACO, but we also want
to declare the scratch offset too, and we can't try to disable it since
ACO also supports spilling and the determination of whether spilling has
to happen occurs well after setting up registers. So replace
supports_spill with something that will actually be used for ACO.

Reviewed-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>

---

 .../compiler/aco_instruction_selection_setup.cpp   | 22 +++-----
 src/amd/vulkan/radv_nir_to_llvm.c                  | 14 ++---
 src/amd/vulkan/radv_shader.c                       |  5 +-
 src/amd/vulkan/radv_shader.h                       |  2 +-
 src/amd/vulkan/radv_shader_args.c                  | 59 +++++++++++++---------
 src/amd/vulkan/radv_shader_args.h                  |  1 +
 6 files changed, 49 insertions(+), 54 deletions(-)

diff --git a/src/amd/compiler/aco_instruction_selection_setup.cpp b/src/amd/compiler/aco_instruction_selection_setup.cpp
index c3f2832fa8f..fbab89417cd 100644
--- a/src/amd/compiler/aco_instruction_selection_setup.cpp
+++ b/src/amd/compiler/aco_instruction_selection_setup.cpp
@@ -95,9 +95,6 @@ struct isel_context {
       bool exec_potentially_empty = false;
    } cf_info;
 
-   /* scratch */
-   bool scratch_enabled = false;
-
    /* inputs common for merged stages */
    Temp merged_wave_info = Temp(0, s1);
 
@@ -639,8 +636,7 @@ static void allocate_user_sgprs(isel_context *ctx,
       user_sgpr_info.need_ring_offsets = true;
 
    /* 2 user sgprs will nearly always be allocated for scratch/rings */
-   if (ctx->options->supports_spill || user_sgpr_info.need_ring_offsets || ctx->scratch_enabled)
-      user_sgpr_count += 2;
+   user_sgpr_count += 2;
 
    switch (ctx->stage) {
    case vertex_vs:
@@ -895,10 +891,8 @@ Pseudo_instruction *add_startpgm(struct isel_context *ctx)
    arg_info args = {};
 
    /* this needs to be in sgprs 0 and 1 */
-   if (ctx->options->supports_spill || user_sgpr_info.need_ring_offsets || ctx->scratch_enabled) {
-      add_arg(&args, s2, &ctx->program->private_segment_buffer, 0);
-      set_loc_shader_ptr(ctx, AC_UD_SCRATCH_RING_OFFSETS, &user_sgpr_info.user_sgpr_idx);
-   }
+   add_arg(&args, s2, &ctx->program->private_segment_buffer, 0);
+   set_loc_shader_ptr(ctx, AC_UD_SCRATCH_RING_OFFSETS, &user_sgpr_info.user_sgpr_idx);
 
    unsigned vgpr_idx = 0;
    switch (ctx->stage) {
@@ -928,8 +922,7 @@ Pseudo_instruction *add_startpgm(struct isel_context *ctx)
       else
          declare_streamout_sgprs(ctx, &args, &idx);
 
-      if (ctx->options->supports_spill || ctx->scratch_enabled)
-         add_arg(&args, s1, &ctx->program->scratch_offset, idx++);
+      add_arg(&args, s1, &ctx->program->scratch_offset, idx++);
 
       declare_vs_input_vgprs(ctx, &args);
       break;
@@ -940,8 +933,7 @@ Pseudo_instruction *add_startpgm(struct isel_context *ctx)
       assert(user_sgpr_info.user_sgpr_idx == user_sgpr_info.num_sgpr);
       add_arg(&args, s1, &ctx->prim_mask, user_sgpr_info.user_sgpr_idx);
 
-      if (ctx->options->supports_spill || ctx->scratch_enabled)
-         add_arg(&args, s1, &ctx->program->scratch_offset, user_sgpr_info.user_sgpr_idx + 1);
+      add_arg(&args, s1, &ctx->program->scratch_offset, user_sgpr_info.user_sgpr_idx + 1);
 
       ctx->program->config->spi_ps_input_addr = 0;
       ctx->program->config->spi_ps_input_ena = 0;
@@ -1004,8 +996,7 @@ Pseudo_instruction *add_startpgm(struct isel_context *ctx)
 
       if (ctx->program->info->cs.uses_local_invocation_idx)
          add_arg(&args, s1, &ctx->tg_size, idx++);
-      if (ctx->options->supports_spill || ctx->scratch_enabled)
-         add_arg(&args, s1, &ctx->program->scratch_offset, idx++);
+      add_arg(&args, s1, &ctx->program->scratch_offset, idx++);
 
       add_arg(&args, v3, &ctx->local_invocation_ids, vgpr_idx++);
       break;
@@ -1357,7 +1348,6 @@ setup_isel_context(Program* program,
    unsigned scratch_size = 0;
    for (unsigned i = 0; i < shader_count; i++)
       scratch_size = std::max(scratch_size, shaders[i]->scratch_size);
-   ctx.scratch_enabled = scratch_size > 0;
    ctx.program->config->scratch_bytes_per_wave = align(scratch_size * ctx.program->wave_size, 1024);
 
    ctx.block = ctx.program->create_and_insert_block();
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index 11f983974d6..5d87b9a675a 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -411,15 +411,11 @@ static void create_function(struct radv_shader_context *ctx,
 	    ctx->max_workgroup_size,
 	    ctx->args->options);
 
-	if (ctx->args->options->supports_spill) {
-		ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr",
-						       LLVMPointerType(ctx->ac.i8, AC_ADDR_SPACE_CONST),
-						       NULL, 0, AC_FUNC_ATTR_READNONE);
-		ctx->ring_offsets = LLVMBuildBitCast(ctx->ac.builder, ctx->ring_offsets,
-						     ac_array_in_const_addr_space(ctx->ac.v4i32), "");
-	} else if (ctx->args->ring_offsets.used) {
-		ctx->ring_offsets = ac_get_arg(&ctx->ac, ctx->args->ring_offsets);
-	}
+	ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr",
+					       LLVMPointerType(ctx->ac.i8, AC_ADDR_SPACE_CONST),
+					       NULL, 0, AC_FUNC_ATTR_READNONE);
+	ctx->ring_offsets = LLVMBuildBitCast(ctx->ac.builder, ctx->ring_offsets,
+					     ac_array_in_const_addr_space(ctx->ac.v4i32), "");
 
 	load_descriptor_sets(ctx);
 
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 81526c7eca0..a7253976f67 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -1118,8 +1118,7 @@ shader_variant_compile(struct radv_device *device,
 		struct ac_llvm_compiler ac_llvm;
 		bool thread_compiler;
 
-		if (options->supports_spill)
-			tm_options |= AC_TM_SUPPORTS_SPILL;
+		tm_options |= AC_TM_SUPPORTS_SPILL;
 		if (device->instance->perftest_flags & RADV_PERFTEST_SISCHED)
 			tm_options |= AC_TM_SISCHED;
 		if (options->check_ir)
@@ -1200,7 +1199,7 @@ radv_shader_variant_compile(struct radv_device *device,
 	if (key)
 		options.key = *key;
 
-	options.supports_spill = true;
+	options.explicit_scratch_args = use_aco;
 	options.robust_buffer_access = device->robust_buffer_access;
 
 	return shader_variant_compile(device, module, shaders, shader_count, shaders[shader_count - 1]->info.stage, info,
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
index 0dde52e1303..7ffce47bdd8 100644
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -125,7 +125,7 @@ struct radv_shader_variant_key {
 struct radv_nir_compiler_options {
 	struct radv_pipeline_layout *layout;
 	struct radv_shader_variant_key key;
-	bool supports_spill;
+	bool explicit_scratch_args;
 	bool clamp_shadow_reference;
 	bool robust_buffer_access;
 	bool dump_shader;
diff --git a/src/amd/vulkan/radv_shader_args.c b/src/amd/vulkan/radv_shader_args.c
index 949b91dcf94..bcec3e9d2e7 100644
--- a/src/amd/vulkan/radv_shader_args.c
+++ b/src/amd/vulkan/radv_shader_args.c
@@ -72,7 +72,6 @@ set_loc_desc(struct radv_shader_args *args, int idx, uint8_t *sgpr_idx)
 }
 
 struct user_sgpr_info {
-	bool need_ring_offsets;
 	bool indirect_all_descriptor_sets;
 	uint8_t remaining_sgprs;
 };
@@ -168,22 +167,8 @@ static void allocate_user_sgprs(struct radv_shader_args *args,
 
 	memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info));
 
-	/* until we sort out scratch/global buffers always assign ring offsets for gs/vs/es */
-	if (stage == MESA_SHADER_GEOMETRY ||
-	    stage == MESA_SHADER_VERTEX ||
-	    stage == MESA_SHADER_TESS_CTRL ||
-	    stage == MESA_SHADER_TESS_EVAL ||
-	    args->is_gs_copy_shader)
-		user_sgpr_info->need_ring_offsets = true;
-
-	if (stage == MESA_SHADER_FRAGMENT &&
-	    args->shader_info->ps.needs_sample_positions)
-		user_sgpr_info->need_ring_offsets = true;
-
-	/* 2 user sgprs will nearly always be allocated for scratch/rings */
-	if (args->options->supports_spill || user_sgpr_info->need_ring_offsets) {
-		user_sgpr_count += 2;
-	}
+	/* 2 user sgprs will always be allocated for scratch/rings */
+	user_sgpr_count += 2;
 
 	switch (stage) {
 	case MESA_SHADER_COMPUTE:
@@ -464,7 +449,7 @@ radv_declare_shader_args(struct radv_shader_args *args,
 	allocate_user_sgprs(args, stage, has_previous_stage,
 			    previous_stage, needs_view_index, &user_sgpr_info);
 
-	if (user_sgpr_info.need_ring_offsets && !args->options->supports_spill) {
+	if (args->options->explicit_scratch_args) {
 		ac_add_arg(&args->ac, AC_ARG_SGPR, 2, AC_ARG_CONST_DESC_PTR,
 			   &args->ring_offsets);
 	}
@@ -490,6 +475,11 @@ radv_declare_shader_args(struct radv_shader_args *args,
 				   &args->ac.tg_size);
 		}
 
+		if (args->options->explicit_scratch_args) {
+			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
+				   &args->scratch_offset);
+		}
+
 		ac_add_arg(&args->ac, AC_ARG_VGPR, 3, AC_ARG_INT,
 			   &args->ac.local_invocation_ids);
 		break;
@@ -513,6 +503,11 @@ radv_declare_shader_args(struct radv_shader_args *args,
 			declare_streamout_sgprs(args, stage);
 		}
 
+		if (args->options->explicit_scratch_args) {
+			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
+				   &args->scratch_offset);
+		}
+
 		declare_vs_input_vgprs(args);
 		break;
 	case MESA_SHADER_TESS_CTRL:
@@ -524,7 +519,7 @@ radv_declare_shader_args(struct radv_shader_args *args,
 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
 				   &args->tess_factor_offset);
 
-			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // scratch offset
+			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->scratch_offset);
 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
 
@@ -556,6 +551,10 @@ radv_declare_shader_args(struct radv_shader_args *args,
 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds);
 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
 				   &args->tess_factor_offset);
+			if (args->options->explicit_scratch_args) {
+				ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
+					   &args->scratch_offset);
+			}
 			ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
 				   &args->ac.tcs_patch_id);
 			ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
@@ -578,6 +577,10 @@ radv_declare_shader_args(struct radv_shader_args *args,
 			declare_streamout_sgprs(args, stage);
 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds);
 		}
+		if (args->options->explicit_scratch_args) {
+			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
+				   &args->scratch_offset);
+		}
 		declare_tes_input_vgprs(args);
 		break;
 	case MESA_SHADER_GEOMETRY:
@@ -595,7 +598,7 @@ radv_declare_shader_args(struct radv_shader_args *args,
 				   &args->merged_wave_info);
 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->oc_lds);
 
-			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // scratch offset
+			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->scratch_offset);
 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, NULL); // unknown
 
@@ -638,6 +641,10 @@ radv_declare_shader_args(struct radv_shader_args *args,
 
 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->gs2vs_offset);
 			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->gs_wave_id);
+			if (args->options->explicit_scratch_args) {
+				ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
+					   &args->scratch_offset);
+			}
 			ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
 				   &args->gs_vtx_offset[0]);
 			ac_add_arg(&args->ac, AC_ARG_VGPR, 1, AC_ARG_INT,
@@ -660,6 +667,10 @@ radv_declare_shader_args(struct radv_shader_args *args,
 		declare_global_input_sgprs(args, &user_sgpr_info);
 
 		ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT, &args->ac.prim_mask);
+		if (args->options->explicit_scratch_args) {
+			ac_add_arg(&args->ac, AC_ARG_SGPR, 1, AC_ARG_INT,
+				   &args->scratch_offset);
+		}
 		ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_sample);
 		ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_center);
 		ac_add_arg(&args->ac, AC_ARG_VGPR, 2, AC_ARG_INT, &args->ac.persp_centroid);
@@ -682,7 +693,7 @@ radv_declare_shader_args(struct radv_shader_args *args,
 	}
 
 	args->shader_info->num_input_vgprs = 0;
-	args->shader_info->num_input_sgprs = args->options->supports_spill ? 2 : 0;
+	args->shader_info->num_input_sgprs = 2;
 	args->shader_info->num_input_sgprs += args->ac.num_sgprs_used;
 
 	if (stage != MESA_SHADER_FRAGMENT)
@@ -690,10 +701,8 @@ radv_declare_shader_args(struct radv_shader_args *args,
 
 	uint8_t user_sgpr_idx = 0;
 
-	if (args->options->supports_spill || user_sgpr_info.need_ring_offsets) {
-		set_loc_shader_ptr(args, AC_UD_SCRATCH_RING_OFFSETS,
-				   &user_sgpr_idx);
-	}
+	set_loc_shader_ptr(args, AC_UD_SCRATCH_RING_OFFSETS,
+			   &user_sgpr_idx);
 
 	/* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including
 	 * the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0 */
diff --git a/src/amd/vulkan/radv_shader_args.h b/src/amd/vulkan/radv_shader_args.h
index a7442c617de..0f57058bd6e 100644
--- a/src/amd/vulkan/radv_shader_args.h
+++ b/src/amd/vulkan/radv_shader_args.h
@@ -34,6 +34,7 @@ struct radv_shader_args {
 
 	struct ac_arg descriptor_sets[MAX_SETS];
 	struct ac_arg ring_offsets;
+	struct ac_arg scratch_offset;
 
 	struct ac_arg vertex_buffers;
 	struct ac_arg rel_auto_id;




More information about the mesa-commit mailing list