Mesa (main): pvr: Fix physical device limits.
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Fri Jun 24 09:25:12 UTC 2022
Module: Mesa
Branch: main
Commit: 7858c32550829e14fa665285fe41b8eb58462a37
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=7858c32550829e14fa665285fe41b8eb58462a37
Author: Karmjit Mahil <Karmjit.Mahil at imgtec.com>
Date: Thu Jun 9 13:03:30 2022 +0100
pvr: Fix physical device limits.
This commit changes to the physical device limits which were
missed during the 1.17 transition.
Signed-off-by: Karmjit Mahil <Karmjit.Mahil at imgtec.com>
Reviewed-by: Rajnesh Kanwal <rajnesh.kanwal at imgtec.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17206>
---
src/imagination/common/pvr_device_info.c | 12 +++++++
src/imagination/common/pvr_device_info.h | 4 +++
src/imagination/vulkan/pvr_device.c | 54 +++++++++++++++++++++++++-------
src/imagination/vulkan/pvr_limits.h | 4 +--
4 files changed, 61 insertions(+), 13 deletions(-)
diff --git a/src/imagination/common/pvr_device_info.c b/src/imagination/common/pvr_device_info.c
index 2a3773212b1..69cdb0c6dc1 100644
--- a/src/imagination/common/pvr_device_info.c
+++ b/src/imagination/common/pvr_device_info.c
@@ -46,6 +46,7 @@ const struct pvr_device_features pvr_device_features_4_V_2_51 = {
.has_gs_rta_support = true,
.has_isp_max_tiles_in_flight = true,
.has_isp_samples_per_pixel = true,
+ .has_max_instances_per_pds_task = true,
.has_max_multisample = true,
.has_max_partitions = true,
.has_max_usc_tasks = true,
@@ -63,6 +64,7 @@ const struct pvr_device_features pvr_device_features_4_V_2_51 = {
.has_tpu_image_state_v2 = true,
.has_usc_f16sop_u8 = true,
.has_usc_min_output_registers_per_pix = true,
+ .has_usc_slots = true,
.has_uvs_banks = true,
.has_uvs_pba_entries = true,
.has_uvs_vtx_entries = true,
@@ -73,6 +75,7 @@ const struct pvr_device_features pvr_device_features_4_V_2_51 = {
.common_store_size_in_dwords = 1280U * 4U * 4U,
.isp_max_tiles_in_flight = 4U,
.isp_samples_per_pixel = 2U,
+ .max_instances_per_pds_task = 32U,
.max_multisample = 8U,
.max_partitions = 8U,
.max_usc_tasks = 56U,
@@ -83,6 +86,7 @@ const struct pvr_device_features pvr_device_features_4_V_2_51 = {
.tile_size_x = 32U,
.tile_size_y = 32U,
.usc_min_output_registers_per_pix = 2U,
+ .usc_slots = 32U,
.uvs_banks = 8U,
.uvs_pba_entries = 320U,
.uvs_vtx_entries = 288U,
@@ -126,6 +130,7 @@ const struct pvr_device_features pvr_device_features_33_V_11_3 = {
.has_compute = true,
.has_isp_max_tiles_in_flight = true,
.has_isp_samples_per_pixel = true,
+ .has_max_instances_per_pds_task = true,
.has_max_multisample = true,
.has_max_partitions = true,
.has_max_usc_tasks = true,
@@ -146,6 +151,7 @@ const struct pvr_device_features pvr_device_features_33_V_11_3 = {
.has_usc_f16sop_u8 = true,
.has_usc_min_output_registers_per_pix = true,
.has_usc_pixel_partition_mask = true,
+ .has_usc_slots = true,
.has_uvs_banks = true,
.has_uvs_pba_entries = true,
.has_uvs_vtx_entries = true,
@@ -154,6 +160,7 @@ const struct pvr_device_features pvr_device_features_33_V_11_3 = {
.common_store_size_in_dwords = 512U * 4U * 4U,
.isp_max_tiles_in_flight = 1U,
.isp_samples_per_pixel = 1U,
+ .max_instances_per_pds_task = 32U,
.max_multisample = 4U,
.max_partitions = 4U,
.max_usc_tasks = 24U,
@@ -165,6 +172,7 @@ const struct pvr_device_features pvr_device_features_33_V_11_3 = {
.tile_size_x = 16U,
.tile_size_y = 16U,
.usc_min_output_registers_per_pix = 1U,
+ .usc_slots = 14U,
.uvs_banks = 2U,
.uvs_pba_entries = 320U,
.uvs_vtx_entries = 288U,
@@ -199,6 +207,7 @@ const struct pvr_device_features pvr_device_features_36_V_104_796 = {
.has_gs_rta_support = true,
.has_isp_max_tiles_in_flight = true,
.has_isp_samples_per_pixel = true,
+ .has_max_instances_per_pds_task = true,
.has_max_multisample = true,
.has_max_partitions = true,
.has_max_usc_tasks = true,
@@ -221,6 +230,7 @@ const struct pvr_device_features pvr_device_features_36_V_104_796 = {
.has_usc_f16sop_u8 = true,
.has_usc_min_output_registers_per_pix = true,
.has_usc_pixel_partition_mask = true,
+ .has_usc_slots = true,
.has_uvs_banks = true,
.has_uvs_pba_entries = true,
.has_uvs_vtx_entries = true,
@@ -230,6 +240,7 @@ const struct pvr_device_features pvr_device_features_36_V_104_796 = {
.common_store_size_in_dwords = 1344U * 4U * 4U,
.isp_max_tiles_in_flight = 6U,
.isp_samples_per_pixel = 4U,
+ .max_instances_per_pds_task = 32U,
.max_multisample = 4U,
.max_partitions = 16U,
.max_usc_tasks = 156U,
@@ -241,6 +252,7 @@ const struct pvr_device_features pvr_device_features_36_V_104_796 = {
.tile_size_x = 16U,
.tile_size_y = 16U,
.usc_min_output_registers_per_pix = 2U,
+ .usc_slots = 64U,
.uvs_banks = 8U,
.uvs_pba_entries = 160U,
.uvs_vtx_entries = 144U,
diff --git a/src/imagination/common/pvr_device_info.h b/src/imagination/common/pvr_device_info.h
index 026bfd9fe80..3dd65ba1977 100644
--- a/src/imagination/common/pvr_device_info.h
+++ b/src/imagination/common/pvr_device_info.h
@@ -259,6 +259,7 @@ struct pvr_device_features {
bool has_gs_rta_support : 1;
bool has_isp_max_tiles_in_flight : 1;
bool has_isp_samples_per_pixel : 1;
+ bool has_max_instances_per_pds_task : 1;
bool has_max_multisample : 1;
bool has_max_partitions : 1;
bool has_max_usc_tasks : 1;
@@ -285,6 +286,7 @@ struct pvr_device_features {
bool has_usc_f16sop_u8 : 1;
bool has_usc_min_output_registers_per_pix : 1;
bool has_usc_pixel_partition_mask : 1;
+ bool has_usc_slots : 1;
bool has_uvs_banks : 1;
bool has_uvs_pba_entries : 1;
bool has_uvs_vtx_entries : 1;
@@ -296,6 +298,7 @@ struct pvr_device_features {
uint32_t common_store_size_in_dwords;
uint32_t isp_max_tiles_in_flight;
uint32_t isp_samples_per_pixel;
+ uint32_t max_instances_per_pds_task;
uint32_t max_multisample;
uint32_t max_partitions;
uint32_t max_usc_tasks;
@@ -307,6 +310,7 @@ struct pvr_device_features {
uint32_t tile_size_x;
uint32_t tile_size_y;
uint32_t usc_min_output_registers_per_pix;
+ uint32_t usc_slots;
uint32_t uvs_banks;
uint32_t uvs_pba_entries;
uint32_t uvs_vtx_entries;
diff --git a/src/imagination/vulkan/pvr_device.c b/src/imagination/vulkan/pvr_device.c
index c5b8a02eed0..ebcb92788e3 100644
--- a/src/imagination/vulkan/pvr_device.c
+++ b/src/imagination/vulkan/pvr_device.c
@@ -819,6 +819,33 @@ void pvr_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
const uint32_t max_user_vertex_components =
((uvs_banks <= 8U) && (uvs_pba_entries == 160U)) ? 64U : 128U;
+ /* The workgroup invocations are limited by the case where we have a compute
+ * barrier - each slot has a fixed number of invocations, the whole workgroup
+ * may need to span multiple slots. As each slot will WAIT at the barrier
+ * until the last invocation completes, all have to be schedulable at the
+ * same time.
+ *
+ * Typically all Rogue cores have 16 slots. Some of the smallest cores are
+ * reduced to 14.
+ *
+ * The compute barrier slot exhaustion scenario can be tested with:
+ * dEQP-VK.memory_model.message_passing*u32.coherent.fence_fence
+ * .atomicwrite*guard*comp
+ */
+
+ /* Default value based on the minimum value found in all existing cores. */
+ const uint32_t usc_slots =
+ PVR_GET_FEATURE_VALUE(&pdevice->dev_info, usc_slots, 14);
+
+ /* Default value based on the minimum value found in all existing cores. */
+ const uint32_t max_instances_per_pds_task =
+ PVR_GET_FEATURE_VALUE(&pdevice->dev_info,
+ max_instances_per_pds_task,
+ 32U);
+
+ const uint32_t max_compute_work_group_invocations =
+ (usc_slots * max_instances_per_pds_task >= 512U) ? 512U : 384U;
+
VkPhysicalDeviceLimits limits = {
.maxImageDimension1D = max_render_size,
.maxImageDimension2D = max_render_size,
@@ -879,28 +906,33 @@ void pvr_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
.maxTessellationEvaluationOutputComponents = 0,
/* Geometry Shader Limits */
- .maxGeometryShaderInvocations = 32U,
- .maxGeometryInputComponents = max_user_vertex_components,
- .maxGeometryOutputComponents = max_user_vertex_components,
- .maxGeometryOutputVertices = 256U,
- .maxGeometryTotalOutputComponents = 1024U,
+ .maxGeometryShaderInvocations = 0,
+ .maxGeometryInputComponents = 0,
+ .maxGeometryOutputComponents = 0,
+ .maxGeometryOutputVertices = 0,
+ .maxGeometryTotalOutputComponents = 0,
/* Fragment Shader Limits */
.maxFragmentInputComponents = max_user_vertex_components,
.maxFragmentOutputAttachments = PVR_MAX_COLOR_ATTACHMENTS,
.maxFragmentDualSrcAttachments = 0,
- .maxFragmentCombinedOutputResources = 8U,
+ .maxFragmentCombinedOutputResources =
+ descriptor_limits->max_per_stage_storage_buffers +
+ descriptor_limits->max_per_stage_storage_images +
+ PVR_MAX_COLOR_ATTACHMENTS,
/* Compute Shader Limits */
.maxComputeSharedMemorySize = 16U * 1024U,
.maxComputeWorkGroupCount = { 64U * 1024U, 64U * 1024U, 64U * 1024U },
- .maxComputeWorkGroupInvocations = 512U,
- .maxComputeWorkGroupSize = { 512U, 512U, 64U },
+ .maxComputeWorkGroupInvocations = max_compute_work_group_invocations,
+ .maxComputeWorkGroupSize = { max_compute_work_group_invocations,
+ max_compute_work_group_invocations,
+ 64U },
/* Rasterization Limits */
.subPixelPrecisionBits = sub_pixel_precision,
.subTexelPrecisionBits = 8U,
- .mipmapPrecisionBits = 4U,
+ .mipmapPrecisionBits = 8U,
.maxDrawIndexedIndexValue = UINT32_MAX,
.maxDrawIndirectCount = 2U * 1024U * 1024U * 1024U,
@@ -921,8 +953,8 @@ void pvr_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
.minTexelOffset = -8,
.maxTexelOffset = 7U,
- .minTexelGatherOffset = 0,
- .maxTexelGatherOffset = 0,
+ .minTexelGatherOffset = -8,
+ .maxTexelGatherOffset = 7,
.minInterpolationOffset = -0.5,
.maxInterpolationOffset = 0.5,
.subPixelInterpolationOffsetBits = 4U,
diff --git a/src/imagination/vulkan/pvr_limits.h b/src/imagination/vulkan/pvr_limits.h
index cda91e5cac9..4a24701c11b 100644
--- a/src/imagination/vulkan/pvr_limits.h
+++ b/src/imagination/vulkan/pvr_limits.h
@@ -32,12 +32,12 @@
#include "pvr_device_info.h"
#include "util/u_math.h"
-#define PVR_MAX_COLOR_ATTACHMENTS 8U
+#define PVR_MAX_COLOR_ATTACHMENTS 8U /* Number of PBE emit registers. */
#define PVR_MAX_QUEUES 2U
#define PVR_MAX_VIEWPORTS 1U
#define PVR_MAX_NEG_OFFSCREEN_OFFSET 4096U
-#define PVR_MAX_PUSH_CONSTANTS_SIZE 128U
+#define PVR_MAX_PUSH_CONSTANTS_SIZE 256U
#define PVR_MAX_DESCRIPTOR_SETS 4U
#define PVR_MAX_FRAMEBUFFER_LAYERS ROGUE_MAX_RENDER_TARGETS
More information about the mesa-commit
mailing list