Mesa (master): ac: set swizzled bit in cache policy as a hint not to merge loads/stores

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Mon Nov 25 21:50:19 UTC 2019


Module: Mesa
Branch: master
Commit: f671cc4d95eaf9ecfaafb216afeff7dc89f66cbf
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=f671cc4d95eaf9ecfaafb216afeff7dc89f66cbf

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Fri Nov 22 17:41:22 2019 -0500

ac: set swizzled bit in cache policy as a hint not to merge loads/stores

LLVM now merges loads and stores for all opcodes, so this must be set.

Reviewed-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>

---

 src/amd/llvm/ac_llvm_build.c                       | 11 ++++-----
 src/amd/llvm/ac_llvm_build.h                       |  4 ++--
 src/amd/llvm/ac_nir_to_llvm.c                      |  2 +-
 src/amd/vulkan/radv_nir_to_llvm.c                  | 20 ++++++++---------
 .../drivers/radeonsi/si_compute_prim_discard.c     |  2 +-
 src/gallium/drivers/radeonsi/si_shader.c           | 26 +++++++++++-----------
 src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c  |  3 +--
 7 files changed, 32 insertions(+), 36 deletions(-)

diff --git a/src/amd/llvm/ac_llvm_build.c b/src/amd/llvm/ac_llvm_build.c
index 35472900e98..60213fdd5d7 100644
--- a/src/amd/llvm/ac_llvm_build.c
+++ b/src/amd/llvm/ac_llvm_build.c
@@ -1237,8 +1237,7 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
 			    LLVMValueRef voffset,
 			    LLVMValueRef soffset,
 			    unsigned inst_offset,
-			    unsigned cache_policy,
-			    bool swizzle_enable_hint)
+			    unsigned cache_policy)
 {
 	/* Split 3 channel stores, because only LLVM 9+ support 3-channel
 	 * intrinsics. */
@@ -1252,12 +1251,10 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
 		v01 = ac_build_gather_values(ctx, v, 2);
 
 		ac_build_buffer_store_dword(ctx, rsrc, v01, 2, voffset,
-					    soffset, inst_offset, cache_policy,
-					    swizzle_enable_hint);
+					    soffset, inst_offset, cache_policy);
 		ac_build_buffer_store_dword(ctx, rsrc, v[2], 1, voffset,
 					    soffset, inst_offset + 8,
-					    cache_policy,
-					    swizzle_enable_hint);
+					    cache_policy);
 		return;
 	}
 
@@ -1265,7 +1262,7 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
 	 * (voffset is swizzled, but soffset isn't swizzled).
 	 * llvm.amdgcn.buffer.store doesn't have a separate soffset parameter.
 	 */
-	if (!swizzle_enable_hint) {
+	if (!(cache_policy & ac_swizzled)) {
 		LLVMValueRef offset = soffset;
 
 		if (inst_offset)
diff --git a/src/amd/llvm/ac_llvm_build.h b/src/amd/llvm/ac_llvm_build.h
index 2357e42035c..8f6d56ab687 100644
--- a/src/amd/llvm/ac_llvm_build.h
+++ b/src/amd/llvm/ac_llvm_build.h
@@ -299,8 +299,7 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
 			    LLVMValueRef voffset,
 			    LLVMValueRef soffset,
 			    unsigned inst_offset,
-			    unsigned cache_policy,
-			    bool swizzle_enable_hint);
+			    unsigned cache_policy);
 
 void
 ac_build_buffer_store_format(struct ac_llvm_context *ctx,
@@ -533,6 +532,7 @@ enum ac_image_cache_policy {
 	ac_glc = 1 << 0, /* per-CU cache control */
 	ac_slc = 1 << 1, /* global L2 cache control */
 	ac_dlc = 1 << 2, /* per-shader-array cache control */
+	ac_swizzled = 1 << 3, /* the access is swizzled, disabling load/store merging */
 };
 
 struct ac_image_args {
diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c
index 9e9ddf62555..2eba80a9c38 100644
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@@ -1650,7 +1650,7 @@ static void visit_store_ssbo(struct ac_nir_context *ctx,
 			ac_build_buffer_store_dword(&ctx->ac, rsrc, data,
 						    num_channels, offset,
 						    ctx->ac.i32_0, 0,
-						    cache_policy, false);
+						    cache_policy);
 		}
 	}
 }
diff --git a/src/amd/vulkan/radv_nir_to_llvm.c b/src/amd/vulkan/radv_nir_to_llvm.c
index 5d87b9a675a..51422cea12b 100644
--- a/src/amd/vulkan/radv_nir_to_llvm.c
+++ b/src/amd/vulkan/radv_nir_to_llvm.c
@@ -742,13 +742,13 @@ store_tcs_output(struct ac_shader_abi *abi,
 		if (!is_tess_factor && writemask != 0xF)
 			ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, value, 1,
 						    buf_addr, oc_lds,
-						    4 * (base + chan), ac_glc, false);
+						    4 * (base + chan), ac_glc);
 	}
 
 	if (writemask == 0xF) {
 		ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, src, 4,
 					    buf_addr, oc_lds,
-					    (base * 4), ac_glc, false);
+					    (base * 4), ac_glc);
 	}
 }
 
@@ -1037,7 +1037,7 @@ visit_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addr
 						    voffset,
 						    ac_get_arg(&ctx->ac,
 							       ctx->args->gs2vs_offset),
-						    0, ac_glc | ac_slc, true);
+						    0, ac_glc | ac_slc | ac_swizzled);
 		}
 	}
 
@@ -1768,7 +1768,7 @@ radv_emit_stream_output(struct radv_shader_context *ctx,
 	ac_build_buffer_store_dword(&ctx->ac, so_buffers[buf],
 				    vdata, num_comps, so_write_offsets[buf],
 				    ctx->ac.i32_0, offset,
-				    ac_glc | ac_slc, false);
+				    ac_glc | ac_slc);
 }
 
 static void
@@ -2173,7 +2173,7 @@ handle_es_outputs_post(struct radv_shader_context *ctx,
 				                            NULL,
 							    ac_get_arg(&ctx->ac, ctx->args->es2gs_offset),
 				                            (4 * param_index + j) * 4,
-				                            ac_glc | ac_slc, true);
+				                            ac_glc | ac_slc | ac_swizzled);
 			}
 		}
 	}
@@ -3635,7 +3635,7 @@ write_tess_factors(struct radv_shader_context *ctx)
 		ac_build_buffer_store_dword(&ctx->ac, buffer,
 					    LLVMConstInt(ctx->ac.i32, 0x80000000, false),
 					    1, ctx->ac.i32_0, tf_base,
-					    0, ac_glc, false);
+					    0, ac_glc);
 		tf_offset += 4;
 
 		ac_build_endif(&ctx->ac, 6504);
@@ -3644,11 +3644,11 @@ write_tess_factors(struct radv_shader_context *ctx)
 	/* Store the tessellation factors. */
 	ac_build_buffer_store_dword(&ctx->ac, buffer, vec0,
 				    MIN2(stride, 4), byteoffset, tf_base,
-				    tf_offset, ac_glc, false);
+				    tf_offset, ac_glc);
 	if (vec1)
 		ac_build_buffer_store_dword(&ctx->ac, buffer, vec1,
 					    stride - 4, byteoffset, tf_base,
-					    16 + tf_offset, ac_glc, false);
+					    16 + tf_offset, ac_glc);
 
 	//store to offchip for TES to read - only if TES reads them
 	if (ctx->args->options->key.tcs.tes_reads_tess_factors) {
@@ -3666,7 +3666,7 @@ write_tess_factors(struct radv_shader_context *ctx)
 		ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, outer_vec,
 					    outer_comps, tf_outer_offset,
 					    ac_get_arg(&ctx->ac, ctx->args->oc_lds),
-					    0, ac_glc, false);
+					    0, ac_glc);
 		if (inner_comps) {
 			param_inner = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_INNER);
 			tf_inner_offset = get_tcs_tes_buffer_address(ctx, NULL,
@@ -3677,7 +3677,7 @@ write_tess_factors(struct radv_shader_context *ctx)
 			ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, inner_vec,
 						    inner_comps, tf_inner_offset,
 						    ac_get_arg(&ctx->ac, ctx->args->oc_lds),
-						    0, ac_glc, false);
+						    0, ac_glc);
 		}
 	}
 	
diff --git a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c
index a52966f2376..0bbaf4f306d 100644
--- a/src/gallium/drivers/radeonsi/si_compute_prim_discard.c
+++ b/src/gallium/drivers/radeonsi/si_compute_prim_discard.c
@@ -822,7 +822,7 @@ void si_build_prim_discard_compute_shader(struct si_shader_context *ctx)
 			};
 			LLVMValueRef rsrc = ac_build_gather_values(&ctx->ac, desc, 4);
 			ac_build_buffer_store_dword(&ctx->ac, rsrc, count, 1, ctx->i32_0,
-						    ctx->i32_0, 0, ac_glc | ac_slc, false);
+						    ctx->i32_0, 0, ac_glc | ac_slc);
 		} else {
 			LLVMBuildStore(builder, count,
 				       si_expand_32bit_pointer(ctx,
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c
index b63a39efe2d..bad2bfdf130 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -1309,7 +1309,7 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
 		if (reg->Register.WriteMask != 0xF && !is_tess_factor) {
 			ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1,
 						    buf_addr, base,
-						    4 * chan_index, ac_glc, false);
+						    4 * chan_index, ac_glc);
 		}
 
 		/* Write tess factors into VGPRs for the epilog. */
@@ -1329,7 +1329,7 @@ static void store_output_tcs(struct lp_build_tgsi_context *bld_base,
 		LLVMValueRef value = ac_build_gather_values(&ctx->ac,
 		                                            values, 4);
 		ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buf_addr,
-					    base, 0, ac_glc, false);
+					    base, 0, ac_glc);
 	}
 }
 
@@ -1432,7 +1432,7 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi,
 			ac_build_buffer_store_dword(&ctx->ac, buffer, value, 1,
 						    addr, base,
 						    4 * buffer_store_offset,
-                                                    ac_glc, false);
+                                                    ac_glc);
 		}
 
 		/* Write tess factors into VGPRs for the epilog. */
@@ -1452,7 +1452,7 @@ static void si_nir_store_output_tcs(struct ac_shader_abi *abi,
 		LLVMValueRef value = ac_build_gather_values(&ctx->ac,
 		                                            values, 4);
 		ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, addr,
-					    base, 0, ac_glc, false);
+					    base, 0, ac_glc);
 	}
 }
 
@@ -2661,7 +2661,7 @@ void si_emit_streamout_output(struct si_shader_context *ctx,
 				    vdata, num_comps,
 				    so_write_offsets[buf_idx],
 				    ctx->i32_0,
-				    stream_out->dst_offset * 4, ac_glc | ac_slc, false);
+				    stream_out->dst_offset * 4, ac_glc | ac_slc);
 }
 
 /**
@@ -3066,7 +3066,7 @@ static void si_copy_tcs_inputs(struct lp_build_tgsi_context *bld_base)
 		LLVMValueRef value = lshs_lds_load(bld_base, ctx->ac.i32, ~0, lds_ptr);
 
 		ac_build_buffer_store_dword(&ctx->ac, buffer, value, 4, buffer_addr,
-					    buffer_offset, 0, ac_glc, false);
+					    buffer_offset, 0, ac_glc);
 	}
 }
 
@@ -3191,7 +3191,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
 		ac_build_buffer_store_dword(&ctx->ac, buffer,
 					    LLVMConstInt(ctx->i32, 0x80000000, 0),
 					    1, ctx->i32_0, tf_base,
-					    offset, ac_glc, false);
+					    offset, ac_glc);
 		offset += 4;
 	}
 
@@ -3200,12 +3200,12 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
 	/* Store the tessellation factors. */
 	ac_build_buffer_store_dword(&ctx->ac, buffer, vec0,
 				    MIN2(stride, 4), byteoffset, tf_base,
-				    offset, ac_glc, false);
+				    offset, ac_glc);
 	offset += 16;
 	if (vec1)
 		ac_build_buffer_store_dword(&ctx->ac, buffer, vec1,
 					    stride - 4, byteoffset, tf_base,
-					    offset, ac_glc, false);
+					    offset, ac_glc);
 
 	/* Store the tess factors into the offchip buffer if TES reads them. */
 	if (shader->key.part.tcs.epilog.tes_reads_tess_factors) {
@@ -3228,7 +3228,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
 
 		ac_build_buffer_store_dword(&ctx->ac, buf, outer_vec,
 					    outer_comps, tf_outer_offset,
-					    base, 0, ac_glc, false);
+					    base, 0, ac_glc);
 		if (inner_comps) {
 			param_inner = si_shader_io_get_unique_index_patch(
 					      TGSI_SEMANTIC_TESSINNER, 0);
@@ -3239,7 +3239,7 @@ static void si_write_tess_factors(struct lp_build_tgsi_context *bld_base,
 				    ac_build_gather_values(&ctx->ac, inner, inner_comps);
 			ac_build_buffer_store_dword(&ctx->ac, buf, inner_vec,
 						    inner_comps, tf_inner_offset,
-						    base, 0, ac_glc, false);
+						    base, 0, ac_glc);
 		}
 	}
 
@@ -3554,7 +3554,7 @@ static void si_llvm_emit_es_epilogue(struct ac_shader_abi *abi,
 						    out_val, 1, NULL,
 						    ac_get_arg(&ctx->ac, ctx->es2gs_offset),
 						    (4 * param + chan) * 4,
-						    ac_glc | ac_slc, true);
+						    ac_glc | ac_slc | ac_swizzled);
 		}
 	}
 
@@ -4283,7 +4283,7 @@ static void si_llvm_emit_vertex(struct ac_shader_abi *abi,
 						    ctx->gsvs_ring[stream],
 						    out_val, 1,
 						    voffset, soffset, 0,
-						    ac_glc | ac_slc, true);
+						    ac_glc | ac_slc | ac_swizzled);
 		}
 	}
 
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index 67db98d6fed..21b861b8244 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -649,8 +649,7 @@ static void store_emit_buffer(struct si_shader_context *ctx,
 		}
 
 		ac_build_buffer_store_dword(&ctx->ac, resource, data, count,
-					    voff, ctx->i32_0, 0, cache_policy,
-					    false);
+					    voff, ctx->i32_0, 0, cache_policy);
 	}
 }
 




More information about the mesa-commit mailing list