Mesa (master): radv: implement VK_EXT_subgroup_size_control

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Nov 6 08:34:13 UTC 2019


Module: Mesa
Branch: master
Commit: fb07fd4e6cb9feb8c9a812dd5f859f165f213465
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=fb07fd4e6cb9feb8c9a812dd5f859f165f213465

Author: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Date:   Thu Oct 31 10:55:37 2019 +0100

radv: implement VK_EXT_subgroup_size_control

This extension allows to control the subgroup size by allowing a
varying subgroup size and also specifying a required subgroup size.

This implementation only allows to specify a required subgroup
size for compute shaders because there is some caveats with
other shader stages (eg. NGG with geometry shader). This
basically allows apps to use Wave32 for compute shaders.

This extension is enabled for all chips but only GFX10 supports
Wave32. ACO doesn't support it.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>
Reviewed-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>

---

 src/amd/vulkan/radv_device.c      | 22 ++++++++++++++++++++++
 src/amd/vulkan/radv_extensions.py |  1 +
 src/amd/vulkan/radv_pipeline.c    | 29 ++++++++++++++++++++++++++---
 src/amd/vulkan/radv_private.h     |  5 +++++
 src/amd/vulkan/radv_shader.h      |  5 +++++
 5 files changed, 59 insertions(+), 3 deletions(-)

diff --git a/src/amd/vulkan/radv_device.c b/src/amd/vulkan/radv_device.c
index ec59bfb1ea9..93763c4ae4c 100644
--- a/src/amd/vulkan/radv_device.c
+++ b/src/amd/vulkan/radv_device.c
@@ -1090,6 +1090,13 @@ void radv_GetPhysicalDeviceFeatures2(
 			features->timelineSemaphore = true;
 			break;
 		}
+		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT: {
+			VkPhysicalDeviceSubgroupSizeControlFeaturesEXT *features =
+				(VkPhysicalDeviceSubgroupSizeControlFeaturesEXT *)ext;
+			features->subgroupSizeControl = true;
+			features->computeFullSubgroups = true;
+			break;
+		}
 		default:
 			break;
 		}
@@ -1578,6 +1585,21 @@ void radv_GetPhysicalDeviceProperties2(
 			props->maxTimelineSemaphoreValueDifference = UINT64_MAX;
 			break;
 		}
+		case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT: {
+			VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *props =
+				(VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *)ext;
+			props->minSubgroupSize = 64;
+			props->maxSubgroupSize = 64;
+			props->maxComputeWorkgroupSubgroups = UINT32_MAX;
+			props->requiredSubgroupSizeStages = 0;
+
+			if (pdevice->rad_info.chip_class >= GFX10) {
+				/* Only GFX10+ supports wave32. */
+				props->minSubgroupSize = 32;
+				props->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT;
+			}
+			break;
+		}
 		default:
 			break;
 		}
diff --git a/src/amd/vulkan/radv_extensions.py b/src/amd/vulkan/radv_extensions.py
index a081e2da87a..587e9820844 100644
--- a/src/amd/vulkan/radv_extensions.py
+++ b/src/amd/vulkan/radv_extensions.py
@@ -138,6 +138,7 @@ EXTENSIONS = [
     Extension('VK_EXT_shader_stencil_export',             1, True),
     Extension('VK_EXT_shader_subgroup_ballot',            1, True),
     Extension('VK_EXT_shader_subgroup_vote',              1, True),
+    Extension('VK_EXT_subgroup_size_control',             2, '!device->use_aco'),
     Extension('VK_EXT_texel_buffer_alignment',            1, True),
     Extension('VK_EXT_transform_feedback',                1, True),
     Extension('VK_EXT_vertex_attribute_divisor',          3, True),
diff --git a/src/amd/vulkan/radv_pipeline.c b/src/amd/vulkan/radv_pipeline.c
index 533e8c4b0fe..a2839b65dd5 100644
--- a/src/amd/vulkan/radv_pipeline.c
+++ b/src/amd/vulkan/radv_pipeline.c
@@ -2385,17 +2385,27 @@ radv_fill_shader_keys(struct radv_device *device,
 	keys[MESA_SHADER_FRAGMENT].fs.is_int10 = key->is_int10;
 	keys[MESA_SHADER_FRAGMENT].fs.log2_ps_iter_samples = key->log2_ps_iter_samples;
 	keys[MESA_SHADER_FRAGMENT].fs.num_samples = key->num_samples;
+
+	if (nir[MESA_SHADER_COMPUTE]) {
+		keys[MESA_SHADER_COMPUTE].cs.subgroup_size = key->compute_subgroup_size;
+	}
 }
 
 static uint8_t
 radv_get_wave_size(struct radv_device *device,
+		   const VkPipelineShaderStageCreateInfo *pStage,
 		   gl_shader_stage stage,
 		   const struct radv_shader_variant_key *key)
 {
 	if (stage == MESA_SHADER_GEOMETRY && !key->vs_common_out.as_ngg)
 		return 64;
-	else if (stage == MESA_SHADER_COMPUTE)
+	else if (stage == MESA_SHADER_COMPUTE) {
+		if (key->cs.subgroup_size) {
+			/* Return the required subgroup size if specified. */
+			return key->cs.subgroup_size;
+		}
 		return device->physical_device->cs_wave_size;
+	}
 	else if (stage == MESA_SHADER_FRAGMENT)
 		return device->physical_device->ps_wave_size;
 	else
@@ -2404,6 +2414,7 @@ radv_get_wave_size(struct radv_device *device,
 
 static void
 radv_fill_shader_info(struct radv_pipeline *pipeline,
+		      const VkPipelineShaderStageCreateInfo **pStages,
 		      struct radv_shader_variant_key *keys,
                       struct radv_shader_info *infos,
                       nir_shader **nir)
@@ -2505,7 +2516,8 @@ radv_fill_shader_info(struct radv_pipeline *pipeline,
 	for (int i = 0; i < MESA_SHADER_STAGES; i++) {
 		if (nir[i])
 			infos[i].wave_size =
-				radv_get_wave_size(pipeline->device, i, &keys[i]);
+				radv_get_wave_size(pipeline->device, pStages[i],
+						   i, &keys[i]);
 	}
 }
 
@@ -2712,7 +2724,7 @@ void radv_create_shaders(struct radv_pipeline *pipeline,
 
 	radv_fill_shader_keys(device, keys, key, nir);
 
-	radv_fill_shader_info(pipeline, keys, infos, nir);
+	radv_fill_shader_info(pipeline, pStages, keys, infos, nir);
 
 	if ((nir[MESA_SHADER_VERTEX] &&
 	     keys[MESA_SHADER_VERTEX].vs_common_out.as_ngg) ||
@@ -5100,12 +5112,23 @@ static struct radv_pipeline_key
 radv_generate_compute_pipeline_key(struct radv_pipeline *pipeline,
 				   const VkComputePipelineCreateInfo *pCreateInfo)
 {
+	const VkPipelineShaderStageCreateInfo *stage = &pCreateInfo->stage;
 	struct radv_pipeline_key key;
 	memset(&key, 0, sizeof(key));
 
 	if (pCreateInfo->flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT)
 		key.optimisations_disabled = 1;
 
+	const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *subgroup_size =
+		vk_find_struct_const(stage->pNext,
+				     PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT);
+
+	if (subgroup_size) {
+		assert(subgroup_size->requiredSubgroupSize == 32 ||
+		       subgroup_size->requiredSubgroupSize == 64);
+		key.compute_subgroup_size = subgroup_size->requiredSubgroupSize;
+	}
+
 	return key;
 }
 
diff --git a/src/amd/vulkan/radv_private.h b/src/amd/vulkan/radv_private.h
index 99ba500b254..c4d9fe5ce37 100644
--- a/src/amd/vulkan/radv_private.h
+++ b/src/amd/vulkan/radv_private.h
@@ -411,6 +411,11 @@ struct radv_pipeline_key {
 	uint32_t has_multiview_view_index : 1;
 	uint32_t optimisations_disabled : 1;
 	uint8_t topology;
+
+	/* Non-zero if a required subgroup size is specified via
+	 * VK_EXT_subgroup_size_control.
+	 */
+	uint8_t compute_subgroup_size;
 };
 
 struct radv_shader_binary;
diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
index 0ee28b9aa3b..0482255bed5 100644
--- a/src/amd/vulkan/radv_shader.h
+++ b/src/amd/vulkan/radv_shader.h
@@ -104,12 +104,17 @@ struct radv_fs_variant_key {
 	uint32_t is_int10;
 };
 
+struct radv_cs_variant_key {
+	uint8_t subgroup_size;
+};
+
 struct radv_shader_variant_key {
 	union {
 		struct radv_vs_variant_key vs;
 		struct radv_fs_variant_key fs;
 		struct radv_tes_variant_key tes;
 		struct radv_tcs_variant_key tcs;
+		struct radv_cs_variant_key cs;
 
 		/* A common prefix of the vs and tes keys. */
 		struct radv_vs_out_key vs_common_out;




More information about the mesa-commit mailing list