Mesa (master): ir3: Enable nir_lower_vars_to_scratch on a6xx

Thu Nov 19 17:12:47 UTC 2020

Module: Mesa
Branch: master
Commit: bac6cc586fe4c1b24351e0574d3a961eb631f6ae
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=bac6cc586fe4c1b24351e0574d3a961eb631f6ae

Author: Connor Abbott <cwabbott0 at gmail.com>
Date:   Fri Oct 30 16:38:40 2020 +0100

ir3: Enable nir_lower_vars_to_scratch on a6xx

Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7386>

---

 src/freedreno/ir3/ir3_compiler.c |  3 +++
 src/freedreno/ir3/ir3_compiler.h |  3 +++
 src/freedreno/ir3/ir3_nir.c      | 11 +++++++++++
 3 files changed, 17 insertions(+)

diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c
index 9080ed7671d..c338af43ae0 100644
--- a/src/freedreno/ir3/ir3_compiler.c
+++ b/src/freedreno/ir3/ir3_compiler.c
@@ -97,6 +97,9 @@ ir3_compiler_create(struct fd_device *dev, uint32_t gpu_id)
 		/* TODO: implement clip+cull distances on earlier gen's */
 		compiler->has_clip_cull = true;
 
+		/* TODO: implement private memory on earlier gen's */
+		compiler->has_pvtmem = true;
+
 		if (compiler->gpu_id == 650)
 			compiler->tess_use_shared = true;
 	} else {
diff --git a/src/freedreno/ir3/ir3_compiler.h b/src/freedreno/ir3/ir3_compiler.h
index 65bf48f0eb3..9924140711f 100644
--- a/src/freedreno/ir3/ir3_compiler.h
+++ b/src/freedreno/ir3/ir3_compiler.h
@@ -109,6 +109,9 @@ struct ir3_compiler {
 
 	/* Whether clip+cull distances are supported */
 	bool has_clip_cull;
+
+	/* Whether private memory is supported */
+	bool has_pvtmem;
 };
 
 void ir3_compiler_destroy(struct ir3_compiler *compiler);
diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c
index 2faa802deb1..8a3f768d995 100644
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -513,6 +513,17 @@ ir3_nir_lower_variant(struct ir3_shader_variant *so, nir_shader *s)
 
 	progress |= OPT(s, ir3_nir_lower_ubo_loads, so);
 
+	/* Lower large temporaries to scratch, which in Qualcomm terms is private
+	 * memory, to avoid excess register pressure. This should happen after
+	 * nir_opt_large_constants, because loading from a UBO is much, much less
+	 * expensive.
+	 */
+	if (so->shader->compiler->has_pvtmem) {
+		NIR_PASS_V(s, nir_lower_vars_to_scratch, nir_var_function_temp,
+				   16 * 16 /* bytes */, glsl_get_natural_size_align_bytes);
+	}
+
+
 	OPT_V(s, nir_lower_amul, ir3_glsl_type_size);
 
 	/* UBO offset lowering has to come after we've decided what will