Mesa (master): radv/aco: Setup alternate path in RADV to support the experimental ACO compiler

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Thu Sep 19 10:33:40 UTC 2019


Module: Mesa
Branch: master
Commit: a70a9987181a09258406cc0d8ff5e34acc000371
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=a70a9987181a09258406cc0d8ff5e34acc000371

Author: Daniel Schürmann <daniel at schuermann.dev>
Date:   Tue Sep 17 14:35:22 2019 +0200

radv/aco: Setup alternate path in RADV to support the experimental ACO compiler

LLVM remains default and ACO can be enabled with RADV_PERFTEST=aco.

Co-authored-by: Daniel Schürmann <daniel at schuermann.dev>
Co-authored-by: Rhys Perry <pendingchaos02 at gmail.com>

Reviewed-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>

---

 src/amd/common/ac_llvm_util.c     |   3 +
 src/amd/meson.build               |   1 +
 src/amd/vulkan/meson.build        |   2 +-
 src/amd/vulkan/radv_cmd_buffer.c  |   4 ++
 src/amd/vulkan/radv_debug.h       |  25 ++++----
 src/amd/vulkan/radv_device.c      |  90 ++++++++++++++------------
 src/amd/vulkan/radv_extensions.py |  10 +--
 src/amd/vulkan/radv_pipeline.c    |  33 ++++++++--
 src/amd/vulkan/radv_private.h     |   4 ++
 src/amd/vulkan/radv_shader.c      | 131 ++++++++++++++++++++++++++------------
 src/amd/vulkan/radv_shader.h      |   5 +-
 11 files changed, 205 insertions(+), 103 deletions(-)

diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
index 64942670b6c..ddc8fee839b 100644
--- a/src/amd/common/ac_llvm_util.c
+++ b/src/amd/common/ac_llvm_util.c
@@ -49,6 +49,9 @@ static void ac_init_llvm_target()
 	/* For inline assembly. */
 	LLVMInitializeAMDGPUAsmParser();
 
+	/* For ACO disassembly. */
+	LLVMInitializeAMDGPUDisassembler();
+
 	/* Workaround for bug in llvm 4.0 that causes image intrinsics
 	 * to disappear.
 	 * https://reviews.llvm.org/D26348
diff --git a/src/amd/meson.build b/src/amd/meson.build
index f96a9aac095..1e459b26c1a 100644
--- a/src/amd/meson.build
+++ b/src/amd/meson.build
@@ -22,6 +22,7 @@ inc_amd = include_directories('.')
 
 subdir('addrlib')
 subdir('common')
+subdir('compiler')
 if with_amd_vk
   subdir('vulkan')
 endif
diff --git a/src/amd/vulkan/meson.build b/src/amd/vulkan/meson.build
index 9278f1144d9..72cb64c5847 100644
--- a/src/amd/vulkan/meson.build
+++ b/src/amd/vulkan/meson.build
@@ -156,7 +156,7 @@ libvulkan_radeon = shared_library(
   ],
   dependencies : [
     dep_llvm, dep_libdrm_amdgpu, dep_thread, dep_elf, dep_dl, dep_m,
-    dep_valgrind, radv_deps,
+    dep_valgrind, radv_deps, idep_aco,
     idep_mesautil, idep_nir, idep_vulkan_util, idep_amdgfxregs_h, idep_xmlconfig,
   ],
   c_args : [c_vis_args, no_override_init_args, radv_flags],
diff --git a/src/amd/vulkan/radv_cmd_buffer.c b/src/amd/vulkan/radv_cmd_buffer.c
index 6937eeacc49..f35053b8695 100644
--- a/src/amd/vulkan/radv_cmd_buffer.c
+++ b/src/amd/vulkan/radv_cmd_buffer.c
@@ -2844,6 +2844,10 @@ radv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer,
 			break;
 		case VK_ACCESS_SHADER_READ_BIT:
 			flush_bits |= RADV_CMD_FLAG_INV_VCACHE;
+			/* Unlike LLVM, ACO uses SMEM for SSBOs and we have to
+			 * invalidate the scalar cache. */
+			if (cmd_buffer->device->physical_device->use_aco)
+				flush_bits |= RADV_CMD_FLAG_INV_SCACHE;
 
 			if (!image_is_coherent)
 				flush_bits |= RADV_CMD_FLAG_INV_L2;
diff --git a/src/amd/vulkan/radv_debug.h b/src/amd/vulkan/radv_debug.h
index 6276589d025..ca71d535f2a 100644
--- a/src/amd/vulkan/radv_debug.h
+++ b/src/amd/vulkan/radv_debug.h
@@ -58,18 +58,19 @@ enum {
 };
 
 enum {
-	RADV_PERFTEST_NO_BATCHCHAIN  =   0x1,
-	RADV_PERFTEST_SISCHED        =   0x2,
-	RADV_PERFTEST_LOCAL_BOS      =   0x4,
-	RADV_PERFTEST_OUT_OF_ORDER   =   0x8,
-	RADV_PERFTEST_DCC_MSAA       =  0x10,
-	RADV_PERFTEST_BO_LIST        =  0x20,
-	RADV_PERFTEST_SHADER_BALLOT  =  0x40,
-	RADV_PERFTEST_TC_COMPAT_CMASK = 0x80,
-	RADV_PERFTEST_CS_WAVE_32     = 0x100,
-	RADV_PERFTEST_PS_WAVE_32     = 0x200,
-	RADV_PERFTEST_GE_WAVE_32     = 0x400,
-	RADV_PERFTEST_DFSM           = 0x800,
+	RADV_PERFTEST_NO_BATCHCHAIN   =    0x1,
+	RADV_PERFTEST_SISCHED         =    0x2,
+	RADV_PERFTEST_LOCAL_BOS       =    0x4,
+	RADV_PERFTEST_OUT_OF_ORDER    =    0x8,
+	RADV_PERFTEST_DCC_MSAA        =   0x10,
+	RADV_PERFTEST_BO_LIST         =   0x20,
+	RADV_PERFTEST_SHADER_BALLOT   =   0x40,
+	RADV_PERFTEST_TC_COMPAT_CMASK =   0x80,
+	RADV_PERFTEST_CS_WAVE_32      =  0x100,
+	RADV_PERFTEST_PS_WAVE_32      =  0x200,
+	RADV_PERFTEST_GE_WAVE_32      =  0x400,
+	RADV_PERFTEST_DFSM            =  0x800,
+	RADV_PERFTEST_ACO             = 0x1000,
 };
 
 bool
diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index af8607c1559..26de979b64f 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -86,41 +86,41 @@ radv_get_device_uuid(struct radeon_info *info, void *uuid)
 }
 
 static void
-radv_get_device_name(enum radeon_family family, char *name, size_t name_len)
+radv_get_device_name(enum radeon_family family, char *name, size_t name_len, bool aco)
 {
 	const char *chip_string;
 
 	switch (family) {
-	case CHIP_TAHITI: chip_string = "AMD RADV TAHITI"; break;
-	case CHIP_PITCAIRN: chip_string = "AMD RADV PITCAIRN"; break;
-	case CHIP_VERDE: chip_string = "AMD RADV CAPE VERDE"; break;
-	case CHIP_OLAND: chip_string = "AMD RADV OLAND"; break;
-	case CHIP_HAINAN: chip_string = "AMD RADV HAINAN"; break;
-	case CHIP_BONAIRE: chip_string = "AMD RADV BONAIRE"; break;
-	case CHIP_KAVERI: chip_string = "AMD RADV KAVERI"; break;
-	case CHIP_KABINI: chip_string = "AMD RADV KABINI"; break;
-	case CHIP_HAWAII: chip_string = "AMD RADV HAWAII"; break;
-	case CHIP_TONGA: chip_string = "AMD RADV TONGA"; break;
-	case CHIP_ICELAND: chip_string = "AMD RADV ICELAND"; break;
-	case CHIP_CARRIZO: chip_string = "AMD RADV CARRIZO"; break;
-	case CHIP_FIJI: chip_string = "AMD RADV FIJI"; break;
-	case CHIP_POLARIS10: chip_string = "AMD RADV POLARIS10"; break;
-	case CHIP_POLARIS11: chip_string = "AMD RADV POLARIS11"; break;
-	case CHIP_POLARIS12: chip_string = "AMD RADV POLARIS12"; break;
-	case CHIP_STONEY: chip_string = "AMD RADV STONEY"; break;
-	case CHIP_VEGAM: chip_string = "AMD RADV VEGA M"; break;
-	case CHIP_VEGA10: chip_string = "AMD RADV VEGA10"; break;
-	case CHIP_VEGA12: chip_string = "AMD RADV VEGA12"; break;
-	case CHIP_VEGA20: chip_string = "AMD RADV VEGA20"; break;
-	case CHIP_RAVEN: chip_string = "AMD RADV RAVEN"; break;
-	case CHIP_RAVEN2: chip_string = "AMD RADV RAVEN2"; break;
-	case CHIP_NAVI10: chip_string = "AMD RADV NAVI10"; break;
-	case CHIP_NAVI12: chip_string = "AMD RADV NAVI12"; break;
-	case CHIP_NAVI14: chip_string = "AMD RADV NAVI14"; break;
-	default: chip_string = "AMD RADV unknown"; break;
-	}
-
-	snprintf(name, name_len, "%s (LLVM " MESA_LLVM_VERSION_STRING ")", chip_string);
+	case CHIP_TAHITI: chip_string = "TAHITI"; break;
+	case CHIP_PITCAIRN: chip_string = "PITCAIRN"; break;
+	case CHIP_VERDE: chip_string = "CAPE VERDE"; break;
+	case CHIP_OLAND: chip_string = "OLAND"; break;
+	case CHIP_HAINAN: chip_string = "HAINAN"; break;
+	case CHIP_BONAIRE: chip_string = "BONAIRE"; break;
+	case CHIP_KAVERI: chip_string = "KAVERI"; break;
+	case CHIP_KABINI: chip_string = "KABINI"; break;
+	case CHIP_HAWAII: chip_string = "HAWAII"; break;
+	case CHIP_TONGA: chip_string = "TONGA"; break;
+	case CHIP_ICELAND: chip_string = "ICELAND"; break;
+	case CHIP_CARRIZO: chip_string = "CARRIZO"; break;
+	case CHIP_FIJI: chip_string = "FIJI"; break;
+	case CHIP_POLARIS10: chip_string = "POLARIS10"; break;
+	case CHIP_POLARIS11: chip_string = "POLARIS11"; break;
+	case CHIP_POLARIS12: chip_string = "POLARIS12"; break;
+	case CHIP_STONEY: chip_string = "STONEY"; break;
+	case CHIP_VEGAM: chip_string = "VEGA M"; break;
+	case CHIP_VEGA10: chip_string = "VEGA10"; break;
+	case CHIP_VEGA12: chip_string = "VEGA12"; break;
+	case CHIP_VEGA20: chip_string = "VEGA20"; break;
+	case CHIP_RAVEN: chip_string = "RAVEN"; break;
+	case CHIP_RAVEN2: chip_string = "RAVEN2"; break;
+	case CHIP_NAVI10: chip_string = "NAVI10"; break;
+	case CHIP_NAVI12: chip_string = "NAVI12"; break;
+	case CHIP_NAVI14: chip_string = "NAVI14"; break;
+	default: chip_string = "unknown"; break;
+	}
+
+	snprintf(name, name_len, "AMD RADV%s %s (LLVM " MESA_LLVM_VERSION_STRING ")", aco ? "/ACO" : "", chip_string);
 }
 
 static uint64_t
@@ -327,7 +327,14 @@ radv_physical_device_init(struct radv_physical_device *device,
 
 	radv_handle_env_var_force_family(device);
 
-	radv_get_device_name(device->rad_info.family, device->name, sizeof(device->name));
+	device->use_aco = instance->perftest_flags & RADV_PERFTEST_ACO;
+	if ((device->rad_info.chip_class < GFX8 ||
+	     device->rad_info.chip_class > GFX9) && device->use_aco) {
+		fprintf(stderr, "WARNING: disabling ACO on unsupported GPUs.\n");
+		device->use_aco = false;
+	}
+
+	radv_get_device_name(device->rad_info.family, device->name, sizeof(device->name), device->use_aco);
 
 	if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {
 		device->ws->destroy(device->ws);
@@ -339,7 +346,8 @@ radv_physical_device_init(struct radv_physical_device *device,
 	/* These flags affect shader compilation. */
 	uint64_t shader_env_flags =
 		(device->instance->perftest_flags & RADV_PERFTEST_SISCHED ? 0x1 : 0) |
-		(device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH ? 0x2 : 0);
+		(device->instance->debug_flags & RADV_DEBUG_UNSAFE_MATH ? 0x2 : 0) |
+		(device->use_aco ? 0x4 : 0);
 
 	/* The gpu id is already embedded in the uuid so we just pass "radv"
 	 * when creating the cache.
@@ -362,9 +370,10 @@ radv_physical_device_init(struct radv_physical_device *device,
 		(device->instance->perftest_flags & RADV_PERFTEST_DCC_MSAA);
 
 	device->use_shader_ballot = device->rad_info.chip_class >= GFX8 &&
-				    device->instance->perftest_flags & RADV_PERFTEST_SHADER_BALLOT;
+				    (device->use_aco || device->instance->perftest_flags & RADV_PERFTEST_SHADER_BALLOT);
 
 	device->use_ngg_streamout = false;
+	device->use_aco = device->instance->perftest_flags & RADV_PERFTEST_ACO;
 
 	/* Determine the number of threads per wave for all stages. */
 	device->cs_wave_size = 64;
@@ -500,6 +509,7 @@ static const struct debug_control radv_perftest_options[] = {
 	{"pswave32", RADV_PERFTEST_PS_WAVE_32},
 	{"gewave32", RADV_PERFTEST_GE_WAVE_32},
 	{"dfsm", RADV_PERFTEST_DFSM},
+	{"aco", RADV_PERFTEST_ACO},
 	{NULL, 0}
 };
 
@@ -622,6 +632,8 @@ VkResult radv_CreateInstance(
 	instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"),
 						   radv_perftest_options);
 
+	if (instance->perftest_flags & RADV_PERFTEST_ACO)
+		fprintf(stderr, "WARNING: Experimental compiler backend enabled. Here be dragons! Incorrect rendering, GPU hangs and/or resets are likely\n");
 
 	if (instance->debug_flags & RADV_DEBUG_STARTUP)
 		radv_logi("Created an instance");
@@ -832,7 +844,7 @@ void radv_GetPhysicalDeviceFeatures(
 		.shaderCullDistance                       = true,
 		.shaderFloat64                            = true,
 		.shaderInt64                              = true,
-		.shaderInt16                              = pdevice->rad_info.chip_class >= GFX9,
+		.shaderInt16                              = pdevice->rad_info.chip_class >= GFX9 && !pdevice->use_aco,
 		.sparseBinding                            = true,
 		.variableMultisampleRate                  = true,
 		.inheritedQueries                         = true,
@@ -874,7 +886,7 @@ void radv_GetPhysicalDeviceFeatures2(
 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: {
 			VkPhysicalDevice16BitStorageFeatures *features =
 			    (VkPhysicalDevice16BitStorageFeatures*)ext;
-			bool enabled = pdevice->rad_info.chip_class >= GFX8;
+			bool enabled = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_aco;
 			features->storageBuffer16BitAccess = enabled;
 			features->uniformAndStorageBuffer16BitAccess = enabled;
 			features->storagePushConstant16 = enabled;
@@ -968,7 +980,7 @@ void radv_GetPhysicalDeviceFeatures2(
 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES_KHR: {
 			VkPhysicalDevice8BitStorageFeaturesKHR *features =
 			    (VkPhysicalDevice8BitStorageFeaturesKHR*)ext;
-			bool enabled = pdevice->rad_info.chip_class >= GFX8;
+			bool enabled = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_aco;
 			features->storageBuffer8BitAccess = enabled;
 			features->uniformAndStorageBuffer8BitAccess = enabled;
 			features->storagePushConstant8 = enabled;
@@ -977,8 +989,8 @@ void radv_GetPhysicalDeviceFeatures2(
 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT16_INT8_FEATURES_KHR: {
 			VkPhysicalDeviceFloat16Int8FeaturesKHR *features =
 				(VkPhysicalDeviceFloat16Int8FeaturesKHR*)ext;
-			features->shaderFloat16 = pdevice->rad_info.chip_class >= GFX8;
-			features->shaderInt8 = true;
+			features->shaderFloat16 = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_aco;
+			features->shaderInt8 = !pdevice->use_aco;
 			break;
 		}
 		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES_KHR: {
diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_extensions.py
index 4bb4c4dfc23..eb2505ba765 100644
--- a/src/amd/vulkan/radv_extensions.py
+++ b/src/amd/vulkan/radv_extensions.py
@@ -51,7 +51,7 @@ class Extension:
 # and dEQP-VK.api.info.device fail due to the duplicated strings.
 EXTENSIONS = [
     Extension('VK_ANDROID_native_buffer',                 5, 'ANDROID && device->rad_info.has_syncobj_wait_for_submit'),
-    Extension('VK_KHR_16bit_storage',                     1, True),
+    Extension('VK_KHR_16bit_storage',                     1, '!device->use_aco'),
     Extension('VK_KHR_bind_memory2',                      1, True),
     Extension('VK_KHR_create_renderpass2',                1, True),
     Extension('VK_KHR_dedicated_allocation',              1, True),
@@ -87,7 +87,7 @@ EXTENSIONS = [
     Extension('VK_KHR_sampler_ycbcr_conversion',          1, True),
     Extension('VK_KHR_shader_atomic_int64',               1, 'LLVM_VERSION_MAJOR >= 9'),
     Extension('VK_KHR_shader_draw_parameters',            1, True),
-    Extension('VK_KHR_shader_float16_int8',               1, True),
+    Extension('VK_KHR_shader_float16_int8',               1, '!device->use_aco'),
     Extension('VK_KHR_storage_buffer_storage_class',      1, True),
     Extension('VK_KHR_surface',                          25, 'RADV_HAS_SURFACE'),
     Extension('VK_KHR_surface_protected_capabilities',    1, 'RADV_HAS_SURFACE'),
@@ -99,7 +99,7 @@ EXTENSIONS = [
     Extension('VK_KHR_xlib_surface',                      6, 'VK_USE_PLATFORM_XLIB_KHR'),
     Extension('VK_KHR_multiview',                         1, True),
     Extension('VK_KHR_display',                          23, 'VK_USE_PLATFORM_DISPLAY_KHR'),
-    Extension('VK_KHR_8bit_storage',                      1, 'device->rad_info.chip_class >= GFX8'),
+    Extension('VK_KHR_8bit_storage',                      1, 'device->rad_info.chip_class >= GFX8 && !device->use_aco'),
     Extension('VK_EXT_direct_mode_display',               1, 'VK_USE_PLATFORM_DISPLAY_KHR'),
     Extension('VK_EXT_acquire_xlib_display',              1, 'VK_USE_PLATFORM_XLIB_XRANDR_EXT'),
     Extension('VK_EXT_buffer_device_address',             1, True),
@@ -138,8 +138,8 @@ EXTENSIONS = [
     Extension('VK_AMD_buffer_marker',                     1, True),
     Extension('VK_AMD_draw_indirect_count',               1, True),
     Extension('VK_AMD_gcn_shader',                        1, True),
-    Extension('VK_AMD_gpu_shader_half_float',             1, 'device->rad_info.chip_class >= GFX9'),
-    Extension('VK_AMD_gpu_shader_int16',                  1, 'device->rad_info.chip_class >= GFX9'),
+    Extension('VK_AMD_gpu_shader_half_float',             1, '!device->use_aco && device->rad_info.chip_class >= GFX9'),
+    Extension('VK_AMD_gpu_shader_int16',                  1, '!device->use_aco && device->rad_info.chip_class >= GFX9'),
     Extension('VK_AMD_rasterization_order',               1, 'device->rad_info.has_out_of_order_rast'),
     Extension('VK_AMD_shader_ballot',                     1, 'device->use_shader_ballot'),
     Extension('VK_AMD_shader_core_properties',            1, True),
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 5036fa69d20..70ffc2412b3 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -167,6 +167,8 @@ static uint32_t get_hash_flags(struct radv_device *device)
 		hash_flags |= RADV_HASH_SHADER_PS_WAVE32;
 	if (device->physical_device->ge_wave_size == 32)
 		hash_flags |= RADV_HASH_SHADER_GE_WAVE32;
+	if (device->physical_device->use_aco)
+		hash_flags |= RADV_HASH_SHADER_ACO;
 	return hash_flags;
 }
 
@@ -2552,6 +2554,14 @@ void radv_stop_feedback(VkPipelineCreationFeedbackEXT *feedback, bool cache_hit)
 }
 
 static
+bool radv_aco_supported_stage(gl_shader_stage stage, bool has_gs, bool has_ts)
+{
+	return (stage == MESA_SHADER_VERTEX && !has_gs && !has_ts) ||
+	       stage == MESA_SHADER_FRAGMENT ||
+	       stage == MESA_SHADER_COMPUTE;
+}
+
+static
 void radv_create_shaders(struct radv_pipeline *pipeline,
                          struct radv_device *device,
                          struct radv_pipeline_cache *cache,
@@ -2613,6 +2623,10 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
 		modules[MESA_SHADER_FRAGMENT] = &fs_m;
 	}
 
+	bool has_gs = modules[MESA_SHADER_GEOMETRY];
+	bool has_ts = modules[MESA_SHADER_TESS_CTRL] || modules[MESA_SHADER_TESS_EVAL];
+	bool use_aco = device->physical_device->use_aco;
+
 	for (unsigned i = 0; i < MESA_SHADER_STAGES; ++i) {
 		const VkPipelineShaderStageCreateInfo *stage = pStages[i];
 
@@ -2621,10 +2635,11 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
 
 		radv_start_feedback(stage_feedbacks[i]);
 
+		bool aco = use_aco && radv_aco_supported_stage(i, has_gs, has_ts);
 		nir[i] = radv_shader_compile_to_nir(device, modules[i],
 						    stage ? stage->pName : "main", i,
 						    stage ? stage->pSpecializationInfo : NULL,
-						    flags, pipeline->layout);
+						    flags, pipeline->layout, aco);
 
 		/* We don't want to alter meta shaders IR directly so clone it
 		 * first.
@@ -2651,7 +2666,10 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
 			                   nir_lower_non_uniform_ssbo_access |
 			                   nir_lower_non_uniform_texture_access |
 			                   nir_lower_non_uniform_image_access);
-			NIR_PASS_V(nir[i], nir_lower_bool_to_int32);
+
+			bool aco = use_aco && radv_aco_supported_stage(i, has_gs, has_ts);
+			if (!aco)
+				NIR_PASS_V(nir[i], nir_lower_bool_to_int32);
 		}
 
 		if (radv_can_dump_shader(device, modules[i], false))
@@ -2690,11 +2708,13 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
 		if (!pipeline->shaders[MESA_SHADER_FRAGMENT]) {
 			radv_start_feedback(stage_feedbacks[MESA_SHADER_FRAGMENT]);
 
+			bool aco = use_aco && radv_aco_supported_stage(MESA_SHADER_FRAGMENT, has_gs, has_ts);
 			pipeline->shaders[MESA_SHADER_FRAGMENT] =
 			       radv_shader_variant_compile(device, modules[MESA_SHADER_FRAGMENT], &nir[MESA_SHADER_FRAGMENT], 1,
 			                                  pipeline->layout, keys + MESA_SHADER_FRAGMENT,
 							  infos + MESA_SHADER_FRAGMENT,
-			                                  keep_executable_info, &binaries[MESA_SHADER_FRAGMENT]);
+			                                  keep_executable_info, aco,
+			                                  &binaries[MESA_SHADER_FRAGMENT]);
 
 			radv_stop_feedback(stage_feedbacks[MESA_SHADER_FRAGMENT], false);
 		}
@@ -2725,7 +2745,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
 			pipeline->shaders[MESA_SHADER_TESS_CTRL] = radv_shader_variant_compile(device, modules[MESA_SHADER_TESS_CTRL], combined_nir, 2,
 			                                                                      pipeline->layout,
 			                                                                      &key, &infos[MESA_SHADER_TESS_CTRL], keep_executable_info,
-			                                                                      &binaries[MESA_SHADER_TESS_CTRL]);
+			                                                                      false, &binaries[MESA_SHADER_TESS_CTRL]);
 
 			radv_stop_feedback(stage_feedbacks[MESA_SHADER_TESS_CTRL], false);
 		}
@@ -2744,7 +2764,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
 			pipeline->shaders[MESA_SHADER_GEOMETRY] = radv_shader_variant_compile(device, modules[MESA_SHADER_GEOMETRY], combined_nir, 2,
 			                                                                     pipeline->layout,
 			                                                                     &keys[pre_stage], &infos[MESA_SHADER_GEOMETRY], keep_executable_info,
-			                                                                     &binaries[MESA_SHADER_GEOMETRY]);
+			                                                                     false, &binaries[MESA_SHADER_GEOMETRY]);
 
 			radv_stop_feedback(stage_feedbacks[MESA_SHADER_GEOMETRY], false);
 		}
@@ -2763,10 +2783,11 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
 
 			radv_start_feedback(stage_feedbacks[i]);
 
+			bool aco = use_aco && radv_aco_supported_stage(i, has_gs, has_ts);
 			pipeline->shaders[i] = radv_shader_variant_compile(device, modules[i], &nir[i], 1,
 									  pipeline->layout,
 									  keys + i, infos + i,keep_executable_info,
-									  &binaries[i]);
+									  aco, &binaries[i]);
 
 			radv_stop_feedback(stage_feedbacks[i], false);
 		}
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 03dc9e02145..0a3e7ca9d88 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -296,6 +296,9 @@ struct radv_physical_device {
 	uint8_t cs_wave_size;
 	uint8_t ge_wave_size;
 
+	/* Whether to use the experimental compiler backend */
+	bool use_aco;
+
 	/* This is the drivers on-disk cache used as a fallback as opposed to
 	 * the pipeline cache defined by apps.
 	 */
@@ -1421,6 +1424,7 @@ struct radv_shader_module;
 #define RADV_HASH_SHADER_CS_WAVE32           (1 << 4)
 #define RADV_HASH_SHADER_PS_WAVE32           (1 << 5)
 #define RADV_HASH_SHADER_GE_WAVE32           (1 << 6)
+#define RADV_HASH_SHADER_ACO                 (1 << 7)
 
 void
 radv_hash_shaders(unsigned char *hash,
diff --git a/src/amd/vulkan/radv_shader.c b/src/amd/vulkan/radv_shader.c
index 98abe8cd437..2bd4c351745 100644
--- a/src/amd/vulkan/radv_shader.c
+++ b/src/amd/vulkan/radv_shader.c
@@ -48,9 +48,11 @@
 #include "util/debug.h"
 #include "ac_exp_param.h"
 
+#include "aco_interface.h"
+
 #include "util/string_buffer.h"
 
-static const struct nir_shader_compiler_options nir_options = {
+static const struct nir_shader_compiler_options nir_options_llvm = {
 	.vertex_id_zero_based = true,
 	.lower_scmp = true,
 	.lower_flrp16 = true,
@@ -80,6 +82,36 @@ static const struct nir_shader_compiler_options nir_options = {
 	.use_interpolated_input_intrinsics = true,
 };
 
+static const struct nir_shader_compiler_options nir_options_aco = {
+	.vertex_id_zero_based = true,
+	.lower_scmp = true,
+	.lower_flrp16 = true,
+	.lower_flrp32 = true,
+	.lower_flrp64 = true,
+	.lower_device_index_to_zero = true,
+	.lower_fdiv = true,
+	.lower_bitfield_insert_to_bitfield_select = true,
+	.lower_bitfield_extract = true,
+	.lower_sub = true, /* TODO: set this to false once !1236 is merged */
+	.lower_pack_snorm_2x16 = true,
+	.lower_pack_snorm_4x8 = true,
+	.lower_pack_unorm_2x16 = true,
+	.lower_pack_unorm_4x8 = true,
+	.lower_unpack_snorm_2x16 = true,
+	.lower_unpack_snorm_4x8 = true,
+	.lower_unpack_unorm_2x16 = true,
+	.lower_unpack_unorm_4x8 = true,
+	.lower_unpack_half_2x16 = true,
+	.lower_extract_byte = true,
+	.lower_extract_word = true,
+	.lower_ffma = true,
+	.lower_fpow = true,
+	.lower_mul_2x32_64 = true,
+	.lower_rotate = true,
+	.max_unroll_iterations = 32,
+	.use_interpolated_input_intrinsics = true,
+};
+
 bool
 radv_can_dump_shader(struct radv_device *device,
 		     struct radv_shader_module *module,
@@ -257,15 +289,18 @@ radv_shader_compile_to_nir(struct radv_device *device,
 			   gl_shader_stage stage,
 			   const VkSpecializationInfo *spec_info,
 			   const VkPipelineCreateFlags flags,
-			   const struct radv_pipeline_layout *layout)
+			   const struct radv_pipeline_layout *layout,
+			   bool use_aco)
 {
 	nir_shader *nir;
+	const nir_shader_compiler_options *nir_options = use_aco ? &nir_options_aco :
+								   &nir_options_llvm;
 	if (module->nir) {
 		/* Some things such as our meta clear/blit code will give us a NIR
 		 * shader directly.  In that case, we just ignore the SPIR-V entirely
 		 * and just use the NIR shader */
 		nir = module->nir;
-		nir->options = &nir_options;
+		nir->options = nir_options;
 		nir_validate_shader(nir, "in internal shader");
 
 		assert(exec_list_length(&nir->functions) == 1);
@@ -305,13 +340,13 @@ radv_shader_compile_to_nir(struct radv_device *device,
 				.descriptor_indexing = true,
 				.device_group = true,
 				.draw_parameters = true,
-				.float16 = true,
+				.float16 = !device->physical_device->use_aco,
 				.float64 = true,
 				.geometry_streams = true,
 				.image_read_without_format = true,
 				.image_write_without_format = true,
-				.int8 = true,
-				.int16 = true,
+				.int8 = !device->physical_device->use_aco,
+				.int16 = !device->physical_device->use_aco,
 				.int64 = true,
 				.int64_atomics = true,
 				.multiview = true,
@@ -320,8 +355,8 @@ radv_shader_compile_to_nir(struct radv_device *device,
 				.runtime_descriptor_array = true,
 				.shader_viewport_index_layer = true,
 				.stencil_export = true,
-				.storage_8bit = true,
-				.storage_16bit = true,
+				.storage_8bit = !device->physical_device->use_aco,
+				.storage_16bit = !device->physical_device->use_aco,
 				.storage_image_ms = true,
 				.subgroup_arithmetic = true,
 				.subgroup_ballot = true,
@@ -343,7 +378,7 @@ radv_shader_compile_to_nir(struct radv_device *device,
 		nir = spirv_to_nir(spirv, module->size / 4,
 				   spec_entries, num_spec_entries,
 				   stage, entrypoint_name,
-				   &spirv_options, &nir_options);
+				   &spirv_options, nir_options);
 		assert(nir->info.stage == stage);
 		nir_validate_shader(nir, "after spirv_to_nir");
 
@@ -383,6 +418,8 @@ radv_shader_compile_to_nir(struct radv_device *device,
 		NIR_PASS_V(nir, nir_split_var_copies);
 		NIR_PASS_V(nir, nir_split_per_member_structs);
 
+		if (nir->info.stage == MESA_SHADER_FRAGMENT && use_aco)
+                        NIR_PASS_V(nir, nir_lower_io_to_vector, nir_var_shader_out);
 		if (nir->info.stage == MESA_SHADER_FRAGMENT)
 			NIR_PASS_V(nir, nir_lower_input_attachments, true);
 
@@ -961,7 +998,7 @@ radv_shader_variant_create(struct radv_device *device,
 		assert(binary->type == RADV_BINARY_TYPE_LEGACY);
 		config = ((struct radv_shader_binary_legacy *)binary)->config;
 		variant->code_size = radv_get_shader_binary_size(((struct radv_shader_binary_legacy *)binary)->code_size);
-		variant->exec_size = variant->code_size;
+		variant->exec_size = ((struct radv_shader_binary_legacy *)binary)->exec_size;
 	}
 
 	variant->info = binary->info;
@@ -1049,13 +1086,12 @@ shader_variant_compile(struct radv_device *device,
 		       struct radv_nir_compiler_options *options,
 		       bool gs_copy_shader,
 		       bool keep_shader_info,
+		       bool use_aco,
 		       struct radv_shader_binary **binary_out)
 {
 	enum radeon_family chip_family = device->physical_device->rad_info.family;
-	enum ac_target_machine_options tm_options = 0;
-	struct ac_llvm_compiler ac_llvm;
 	struct radv_shader_binary *binary = NULL;
-	bool thread_compiler;
+	bool init_llvm;
 
 	options->family = chip_family;
 	options->chip_class = device->physical_device->rad_info.chip_class;
@@ -1079,32 +1115,48 @@ shader_variant_compile(struct radv_device *device,
 	else
 		options->wave_size = device->physical_device->ge_wave_size;
 
-	if (options->supports_spill)
-		tm_options |= AC_TM_SUPPORTS_SPILL;
-	if (device->instance->perftest_flags & RADV_PERFTEST_SISCHED)
-		tm_options |= AC_TM_SISCHED;
-	if (options->check_ir)
-		tm_options |= AC_TM_CHECK_IR;
-	if (device->instance->debug_flags & RADV_DEBUG_NO_LOAD_STORE_OPT)
-		tm_options |= AC_TM_NO_LOAD_STORE_OPT;
-
-	thread_compiler = !(device->instance->debug_flags & RADV_DEBUG_NOTHREADLLVM);
-	ac_init_llvm_once();
-	radv_init_llvm_compiler(&ac_llvm,
-				thread_compiler,
-				chip_family, tm_options,
-				options->wave_size);
-	if (gs_copy_shader) {
-		assert(shader_count == 1);
-		radv_compile_gs_copy_shader(&ac_llvm, *shaders, &binary,
-					    info, options);
+	init_llvm = !use_aco || options->dump_shader;
+#ifndef NDEBUG
+	init_llvm |= options->record_llvm_ir;
+#endif
+	if (init_llvm)
+		ac_init_llvm_once();
+
+	if (use_aco) {
+		aco_compile_shader(shader_count, shaders, &binary, info, options);
+		binary->info = *info;
 	} else {
-		radv_compile_nir_shader(&ac_llvm, &binary, info,
-					shaders, shader_count, options);
-	}
-	binary->info = *info;
+		enum ac_target_machine_options tm_options = 0;
+		struct ac_llvm_compiler ac_llvm;
+		bool thread_compiler;
+
+		if (options->supports_spill)
+			tm_options |= AC_TM_SUPPORTS_SPILL;
+		if (device->instance->perftest_flags & RADV_PERFTEST_SISCHED)
+			tm_options |= AC_TM_SISCHED;
+		if (options->check_ir)
+			tm_options |= AC_TM_CHECK_IR;
+		if (device->instance->debug_flags & RADV_DEBUG_NO_LOAD_STORE_OPT)
+			tm_options |= AC_TM_NO_LOAD_STORE_OPT;
+
+		thread_compiler = !(device->instance->debug_flags & RADV_DEBUG_NOTHREADLLVM);
+		radv_init_llvm_compiler(&ac_llvm,
+					thread_compiler,
+					chip_family, tm_options,
+					options->wave_size);
+
+		if (gs_copy_shader) {
+			assert(shader_count == 1);
+			radv_compile_gs_copy_shader(&ac_llvm, *shaders, &binary,
+						    info, options);
+		} else {
+			radv_compile_nir_shader(&ac_llvm, &binary, info,
+						shaders, shader_count, options);
+		}
 
-	radv_destroy_llvm_compiler(&ac_llvm, thread_compiler);
+		binary->info = *info;
+		radv_destroy_llvm_compiler(&ac_llvm, thread_compiler);
+	}
 
 	struct radv_shader_variant *variant = radv_shader_variant_create(device, binary,
 									 keep_shader_info);
@@ -1143,6 +1195,7 @@ radv_shader_variant_compile(struct radv_device *device,
 			   const struct radv_shader_variant_key *key,
 			   struct radv_shader_info *info,
 			   bool keep_shader_info,
+			   bool use_aco,
 			   struct radv_shader_binary **binary_out)
 {
 	struct radv_nir_compiler_options options = {0};
@@ -1156,7 +1209,7 @@ radv_shader_variant_compile(struct radv_device *device,
 	options.robust_buffer_access = device->robust_buffer_access;
 
 	return shader_variant_compile(device, module, shaders, shader_count, shaders[shader_count - 1]->info.stage, info,
-				     &options, false, keep_shader_info, binary_out);
+				     &options, false, keep_shader_info, use_aco, binary_out);
 }
 
 struct radv_shader_variant *
@@ -1172,7 +1225,7 @@ radv_create_gs_copy_shader(struct radv_device *device,
 	options.key.has_multiview_view_index = multiview;
 
 	return shader_variant_compile(device, NULL, &shader, 1, MESA_SHADER_VERTEX,
-				      info, &options, true, keep_shader_info, binary_out);
+				      info, &options, true, keep_shader_info, false, binary_out);
 }
 
 void
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
index 57f9d160ed0..459ff863a91 100644
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -333,6 +333,7 @@ struct radv_shader_binary_legacy {
 	struct radv_shader_binary base;
 	struct ac_shader_config config;
 	unsigned code_size;
+	unsigned exec_size;
 	unsigned llvm_ir_size;
 	unsigned disasm_size;
 	
@@ -390,7 +391,8 @@ radv_shader_compile_to_nir(struct radv_device *device,
 			   gl_shader_stage stage,
 			   const VkSpecializationInfo *spec_info,
 			   const VkPipelineCreateFlags flags,
-			   const struct radv_pipeline_layout *layout);
+			   const struct radv_pipeline_layout *layout,
+			   bool use_aco);
 
 void *
 radv_alloc_shader_memory(struct radv_device *device,
@@ -412,6 +414,7 @@ radv_shader_variant_compile(struct radv_device *device,
 			    const struct radv_shader_variant_key *key,
 			    struct radv_shader_info *info,
 			    bool keep_shader_info,
+			    bool use_aco,
 			    struct radv_shader_binary **binary_out);
 
 struct radv_shader_variant *




More information about the mesa-commit mailing list