Mesa (main): intel/dev: Add a max_cs_workgroup_threads field
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Wed Jul 14 23:19:12 UTC 2021
Module: Mesa
Branch: main
Commit: 6642749458b6a2e0800bb5e606dcf15e4db0c479
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=6642749458b6a2e0800bb5e606dcf15e4db0c479
Author: Jason Ekstrand <jason at jlekstrand.net>
Date: Tue Jul 13 18:21:43 2021 -0500
intel/dev: Add a max_cs_workgroup_threads field
This is distinct form max_cs_threads because it also encodes
restrictions about the way we use GPGPU/COMPUTE_WALKER. This gets rid
of the MIN2(64, devinfo->max_cs_threads) we have scattered all over the
driver and puts it in a central place.
Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira at intel.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11861>
---
src/gallium/drivers/crocus/crocus_screen.c | 3 +--
src/gallium/drivers/iris/iris_screen.c | 4 +---
src/intel/compiler/brw_fs.cpp | 5 ++---
src/intel/dev/intel_device_info.c | 19 +++++++++++++++++++
src/intel/dev/intel_device_info.h | 11 +++++++++++
src/intel/vulkan/anv_device.c | 6 ++----
src/mesa/drivers/dri/i965/brw_context.c | 8 +-------
7 files changed, 37 insertions(+), 19 deletions(-)
diff --git a/src/gallium/drivers/crocus/crocus_screen.c b/src/gallium/drivers/crocus/crocus_screen.c
index c1e0cf3a294..2647fb923ee 100644
--- a/src/gallium/drivers/crocus/crocus_screen.c
+++ b/src/gallium/drivers/crocus/crocus_screen.c
@@ -545,8 +545,7 @@ crocus_get_compute_param(struct pipe_screen *pscreen,
struct crocus_screen *screen = (struct crocus_screen *)pscreen;
const struct intel_device_info *devinfo = &screen->devinfo;
- const unsigned max_threads = MIN2(64, devinfo->max_cs_threads);
- const uint32_t max_invocations = 32 * max_threads;
+ const uint32_t max_invocations = 32 * devinfo->max_cs_workgroup_threads;
if (devinfo->ver < 7)
return 0;
diff --git a/src/gallium/drivers/iris/iris_screen.c b/src/gallium/drivers/iris/iris_screen.c
index f9df596dade..7b540fa0651 100644
--- a/src/gallium/drivers/iris/iris_screen.c
+++ b/src/gallium/drivers/iris/iris_screen.c
@@ -516,9 +516,7 @@ iris_get_compute_param(struct pipe_screen *pscreen,
struct iris_screen *screen = (struct iris_screen *)pscreen;
const struct intel_device_info *devinfo = &screen->devinfo;
- /* Limit max_threads to 64 for the GPGPU_WALKER command. */
- const unsigned max_threads = MIN2(64, devinfo->max_cs_threads);
- const uint32_t max_invocations = 32 * max_threads;
+ const uint32_t max_invocations = 32 * devinfo->max_cs_workgroup_threads;
#define RET(x) do { \
if (ret) \
diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp
index 7dbebfd3c83..6052d3760ef 100644
--- a/src/intel/compiler/brw_fs.cpp
+++ b/src/intel/compiler/brw_fs.cpp
@@ -10099,7 +10099,7 @@ brw_compile_cs(const struct brw_compiler *compiler,
prog_data->local_size[2];
/* Limit max_threads to 64 for the GPGPU_WALKER command */
- const uint32_t max_threads = MIN2(64, compiler->devinfo->max_cs_threads);
+ const uint32_t max_threads = compiler->devinfo->max_cs_workgroup_threads;
min_dispatch_width = util_next_power_of_two(
MAX2(8, DIV_ROUND_UP(local_workgroup_size, max_threads)));
assert(min_dispatch_width <= 32);
@@ -10316,8 +10316,7 @@ brw_cs_simd_size_for_group_size(const struct intel_device_info *devinfo,
if ((INTEL_DEBUG & DEBUG_DO32) && (mask & simd32))
return 32;
- /* Limit max_threads to 64 for the GPGPU_WALKER command */
- const uint32_t max_threads = MIN2(64, devinfo->max_cs_threads);
+ const uint32_t max_threads = devinfo->max_cs_workgroup_threads;
if ((mask & simd8) && group_size <= 8 * max_threads) {
/* Prefer SIMD16 if can do without spilling. Matches logic in
diff --git a/src/intel/dev/intel_device_info.c b/src/intel/dev/intel_device_info.c
index a1d3b2c2b8e..4a7cea5a718 100644
--- a/src/intel/dev/intel_device_info.c
+++ b/src/intel/dev/intel_device_info.c
@@ -1228,6 +1228,21 @@ getparam(int fd, uint32_t param, int *value)
return true;
}
+static void
+update_cs_workgroup_threads(struct intel_device_info *devinfo)
+{
+ /* GPGPU_WALKER::ThreadWidthCounterMaximum is U6-1 so the most threads we
+ * can program is 64 without going up to a rectangular group. This only
+ * impacts Haswell and TGL which have higher thread counts.
+ *
+ * INTERFACE_DESCRIPTOR_DATA::NumberofThreadsinGPGPUThreadGroup on Xe-HP+
+ * is 10 bits so we have no such restrictions.
+ */
+ devinfo->max_cs_workgroup_threads =
+ devinfo->verx10 >= 125 ? devinfo->max_cs_threads :
+ MIN2(devinfo->max_cs_threads, 64);
+}
+
bool
intel_get_device_info_from_pci_id(int pci_id,
struct intel_device_info *devinfo)
@@ -1302,6 +1317,8 @@ intel_get_device_info_from_pci_id(int pci_id,
if (devinfo->verx10 == 0)
devinfo->verx10 = devinfo->ver * 10;
+ update_cs_workgroup_threads(devinfo);
+
devinfo->chipset_id = pci_id;
return true;
}
@@ -1434,6 +1451,8 @@ fixup_chv_device_info(struct intel_device_info *devinfo)
if (max_cs_threads > devinfo->max_cs_threads)
devinfo->max_cs_threads = max_cs_threads;
+ update_cs_workgroup_threads(devinfo);
+
/* Braswell is even more annoying. Its marketing name isn't determinable
* from the PCI ID and is also dependent on fusing.
*/
diff --git a/src/intel/dev/intel_device_info.h b/src/intel/dev/intel_device_info.h
index 12a62b7690b..30330f8b5b1 100644
--- a/src/intel/dev/intel_device_info.h
+++ b/src/intel/dev/intel_device_info.h
@@ -214,6 +214,17 @@ struct intel_device_info
*/
unsigned max_cs_threads;
+ /**
+ * Maximum number of threads per workgroup supported by the GPGPU_WALKER or
+ * COMPUTE_WALKER command.
+ *
+ * This may be smaller than max_cs_threads as it takes into account added
+ * restrictions on the GPGPU/COMPUTE_WALKER commands. While max_cs_threads
+ * expresses the total parallelism of the GPU, this expresses the maximum
+ * number of threads we can dispatch in a single workgroup.
+ */
+ unsigned max_cs_workgroup_threads;
+
struct {
/**
* Fixed size of the URB.
diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
index deb9c89c56c..55541fe936c 100644
--- a/src/intel/vulkan/anv_device.c
+++ b/src/intel/vulkan/anv_device.c
@@ -1899,8 +1899,7 @@ void anv_GetPhysicalDeviceProperties(
pdevice->has_bindless_images && pdevice->has_a64_buffer_access
? UINT32_MAX : MAX_BINDING_TABLE_SIZE - MAX_RTS - 1;
- /* Limit max_threads to 64 for the GPGPU_WALKER command */
- const uint32_t max_workgroup_size = 32 * MIN2(64, devinfo->max_cs_threads);
+ const uint32_t max_workgroup_size = 32 * devinfo->max_cs_workgroup_threads;
VkSampleCountFlags sample_counts =
isl_device_get_sample_counts(&pdevice->isl_dev);
@@ -2537,8 +2536,7 @@ void anv_GetPhysicalDeviceProperties2(
STATIC_ASSERT(8 <= BRW_SUBGROUP_SIZE && BRW_SUBGROUP_SIZE <= 32);
props->minSubgroupSize = 8;
props->maxSubgroupSize = 32;
- /* Limit max_threads to 64 for the GPGPU_WALKER command. */
- props->maxComputeWorkgroupSubgroups = MIN2(64, pdevice->info.max_cs_threads);
+ props->maxComputeWorkgroupSubgroups = pdevice->info.max_cs_workgroup_threads;
props->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT;
break;
}
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index 86478c20eab..0d4c8317ea0 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -840,14 +840,8 @@ brw_initialize_cs_context_constants(struct brw_context *brw)
/* Maximum number of scalar compute shader invocations that can be run in
* parallel in the same subslice assuming SIMD32 dispatch.
- *
- * We don't advertise more than 64 threads, because we are limited to 64 by
- * our usage of thread_width_max in the gpgpu walker command. This only
- * currently impacts Haswell, which otherwise might be able to advertise 70
- * threads. With SIMD32 and 64 threads, Haswell still provides twice the
- * required the number of invocation needed for ARB_compute_shader.
*/
- const unsigned max_threads = MIN2(64, devinfo->max_cs_threads);
+ const unsigned max_threads = devinfo->max_cs_workgroup_threads;
const uint32_t max_invocations = 32 * max_threads;
ctx->Const.MaxComputeWorkGroupSize[0] = max_invocations;
ctx->Const.MaxComputeWorkGroupSize[1] = max_invocations;
More information about the mesa-commit
mailing list