Mesa (main): pvr: Fix physical device limits.

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Fri Jun 24 09:25:12 UTC 2022


Module: Mesa
Branch: main
Commit: 7858c32550829e14fa665285fe41b8eb58462a37
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=7858c32550829e14fa665285fe41b8eb58462a37

Author: Karmjit Mahil <Karmjit.Mahil at imgtec.com>
Date:   Thu Jun  9 13:03:30 2022 +0100

pvr: Fix physical device limits.

This commit changes to the physical device limits which were
missed during the 1.17 transition.

Signed-off-by: Karmjit Mahil <Karmjit.Mahil at imgtec.com>
Reviewed-by: Rajnesh Kanwal <rajnesh.kanwal at imgtec.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/17206>

---

 src/imagination/common/pvr_device_info.c | 12 +++++++
 src/imagination/common/pvr_device_info.h |  4 +++
 src/imagination/vulkan/pvr_device.c      | 54 +++++++++++++++++++++++++-------
 src/imagination/vulkan/pvr_limits.h      |  4 +--
 4 files changed, 61 insertions(+), 13 deletions(-)

diff --git a/src/imagination/common/pvr_device_info.c b/src/imagination/common/pvr_device_info.c
index 2a3773212b1..69cdb0c6dc1 100644
--- a/src/imagination/common/pvr_device_info.c
+++ b/src/imagination/common/pvr_device_info.c
@@ -46,6 +46,7 @@ const struct pvr_device_features pvr_device_features_4_V_2_51 = {
    .has_gs_rta_support = true,
    .has_isp_max_tiles_in_flight = true,
    .has_isp_samples_per_pixel = true,
+   .has_max_instances_per_pds_task = true,
    .has_max_multisample = true,
    .has_max_partitions = true,
    .has_max_usc_tasks = true,
@@ -63,6 +64,7 @@ const struct pvr_device_features pvr_device_features_4_V_2_51 = {
    .has_tpu_image_state_v2 = true,
    .has_usc_f16sop_u8 = true,
    .has_usc_min_output_registers_per_pix = true,
+   .has_usc_slots = true,
    .has_uvs_banks = true,
    .has_uvs_pba_entries = true,
    .has_uvs_vtx_entries = true,
@@ -73,6 +75,7 @@ const struct pvr_device_features pvr_device_features_4_V_2_51 = {
    .common_store_size_in_dwords = 1280U * 4U * 4U,
    .isp_max_tiles_in_flight = 4U,
    .isp_samples_per_pixel = 2U,
+   .max_instances_per_pds_task = 32U,
    .max_multisample = 8U,
    .max_partitions = 8U,
    .max_usc_tasks = 56U,
@@ -83,6 +86,7 @@ const struct pvr_device_features pvr_device_features_4_V_2_51 = {
    .tile_size_x = 32U,
    .tile_size_y = 32U,
    .usc_min_output_registers_per_pix = 2U,
+   .usc_slots = 32U,
    .uvs_banks = 8U,
    .uvs_pba_entries = 320U,
    .uvs_vtx_entries = 288U,
@@ -126,6 +130,7 @@ const struct pvr_device_features pvr_device_features_33_V_11_3 = {
    .has_compute = true,
    .has_isp_max_tiles_in_flight = true,
    .has_isp_samples_per_pixel = true,
+   .has_max_instances_per_pds_task = true,
    .has_max_multisample = true,
    .has_max_partitions = true,
    .has_max_usc_tasks = true,
@@ -146,6 +151,7 @@ const struct pvr_device_features pvr_device_features_33_V_11_3 = {
    .has_usc_f16sop_u8 = true,
    .has_usc_min_output_registers_per_pix = true,
    .has_usc_pixel_partition_mask = true,
+   .has_usc_slots = true,
    .has_uvs_banks = true,
    .has_uvs_pba_entries = true,
    .has_uvs_vtx_entries = true,
@@ -154,6 +160,7 @@ const struct pvr_device_features pvr_device_features_33_V_11_3 = {
    .common_store_size_in_dwords = 512U * 4U * 4U,
    .isp_max_tiles_in_flight = 1U,
    .isp_samples_per_pixel = 1U,
+   .max_instances_per_pds_task = 32U,
    .max_multisample = 4U,
    .max_partitions = 4U,
    .max_usc_tasks = 24U,
@@ -165,6 +172,7 @@ const struct pvr_device_features pvr_device_features_33_V_11_3 = {
    .tile_size_x = 16U,
    .tile_size_y = 16U,
    .usc_min_output_registers_per_pix = 1U,
+   .usc_slots = 14U,
    .uvs_banks = 2U,
    .uvs_pba_entries = 320U,
    .uvs_vtx_entries = 288U,
@@ -199,6 +207,7 @@ const struct pvr_device_features pvr_device_features_36_V_104_796 = {
    .has_gs_rta_support = true,
    .has_isp_max_tiles_in_flight = true,
    .has_isp_samples_per_pixel = true,
+   .has_max_instances_per_pds_task = true,
    .has_max_multisample = true,
    .has_max_partitions = true,
    .has_max_usc_tasks = true,
@@ -221,6 +230,7 @@ const struct pvr_device_features pvr_device_features_36_V_104_796 = {
    .has_usc_f16sop_u8 = true,
    .has_usc_min_output_registers_per_pix = true,
    .has_usc_pixel_partition_mask = true,
+   .has_usc_slots = true,
    .has_uvs_banks = true,
    .has_uvs_pba_entries = true,
    .has_uvs_vtx_entries = true,
@@ -230,6 +240,7 @@ const struct pvr_device_features pvr_device_features_36_V_104_796 = {
    .common_store_size_in_dwords = 1344U * 4U * 4U,
    .isp_max_tiles_in_flight = 6U,
    .isp_samples_per_pixel = 4U,
+   .max_instances_per_pds_task = 32U,
    .max_multisample = 4U,
    .max_partitions = 16U,
    .max_usc_tasks = 156U,
@@ -241,6 +252,7 @@ const struct pvr_device_features pvr_device_features_36_V_104_796 = {
    .tile_size_x = 16U,
    .tile_size_y = 16U,
    .usc_min_output_registers_per_pix = 2U,
+   .usc_slots = 64U,
    .uvs_banks = 8U,
    .uvs_pba_entries = 160U,
    .uvs_vtx_entries = 144U,
diff --git a/src/imagination/common/pvr_device_info.h b/src/imagination/common/pvr_device_info.h
index 026bfd9fe80..3dd65ba1977 100644
--- a/src/imagination/common/pvr_device_info.h
+++ b/src/imagination/common/pvr_device_info.h
@@ -259,6 +259,7 @@ struct pvr_device_features {
    bool has_gs_rta_support : 1;
    bool has_isp_max_tiles_in_flight : 1;
    bool has_isp_samples_per_pixel : 1;
+   bool has_max_instances_per_pds_task : 1;
    bool has_max_multisample : 1;
    bool has_max_partitions : 1;
    bool has_max_usc_tasks : 1;
@@ -285,6 +286,7 @@ struct pvr_device_features {
    bool has_usc_f16sop_u8 : 1;
    bool has_usc_min_output_registers_per_pix : 1;
    bool has_usc_pixel_partition_mask : 1;
+   bool has_usc_slots : 1;
    bool has_uvs_banks : 1;
    bool has_uvs_pba_entries : 1;
    bool has_uvs_vtx_entries : 1;
@@ -296,6 +298,7 @@ struct pvr_device_features {
    uint32_t common_store_size_in_dwords;
    uint32_t isp_max_tiles_in_flight;
    uint32_t isp_samples_per_pixel;
+   uint32_t max_instances_per_pds_task;
    uint32_t max_multisample;
    uint32_t max_partitions;
    uint32_t max_usc_tasks;
@@ -307,6 +310,7 @@ struct pvr_device_features {
    uint32_t tile_size_x;
    uint32_t tile_size_y;
    uint32_t usc_min_output_registers_per_pix;
+   uint32_t usc_slots;
    uint32_t uvs_banks;
    uint32_t uvs_pba_entries;
    uint32_t uvs_vtx_entries;
diff --git a/src/imagination/vulkan/pvr_device.c b/src/imagination/vulkan/pvr_device.c
index c5b8a02eed0..ebcb92788e3 100644
--- a/src/imagination/vulkan/pvr_device.c
+++ b/src/imagination/vulkan/pvr_device.c
@@ -819,6 +819,33 @@ void pvr_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
    const uint32_t max_user_vertex_components =
       ((uvs_banks <= 8U) && (uvs_pba_entries == 160U)) ? 64U : 128U;
 
+   /* The workgroup invocations are limited by the case where we have a compute
+    * barrier - each slot has a fixed number of invocations, the whole workgroup
+    * may need to span multiple slots. As each slot will WAIT at the barrier
+    * until the last invocation completes, all have to be schedulable at the
+    * same time.
+    *
+    * Typically all Rogue cores have 16 slots. Some of the smallest cores are
+    * reduced to 14.
+    *
+    * The compute barrier slot exhaustion scenario can be tested with:
+    * dEQP-VK.memory_model.message_passing*u32.coherent.fence_fence
+    *    .atomicwrite*guard*comp
+    */
+
+   /* Default value based on the minimum value found in all existing cores. */
+   const uint32_t usc_slots =
+      PVR_GET_FEATURE_VALUE(&pdevice->dev_info, usc_slots, 14);
+
+   /* Default value based on the minimum value found in all existing cores. */
+   const uint32_t max_instances_per_pds_task =
+      PVR_GET_FEATURE_VALUE(&pdevice->dev_info,
+                            max_instances_per_pds_task,
+                            32U);
+
+   const uint32_t max_compute_work_group_invocations =
+      (usc_slots * max_instances_per_pds_task >= 512U) ? 512U : 384U;
+
    VkPhysicalDeviceLimits limits = {
       .maxImageDimension1D = max_render_size,
       .maxImageDimension2D = max_render_size,
@@ -879,28 +906,33 @@ void pvr_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
       .maxTessellationEvaluationOutputComponents = 0,
 
       /* Geometry Shader Limits */
-      .maxGeometryShaderInvocations = 32U,
-      .maxGeometryInputComponents = max_user_vertex_components,
-      .maxGeometryOutputComponents = max_user_vertex_components,
-      .maxGeometryOutputVertices = 256U,
-      .maxGeometryTotalOutputComponents = 1024U,
+      .maxGeometryShaderInvocations = 0,
+      .maxGeometryInputComponents = 0,
+      .maxGeometryOutputComponents = 0,
+      .maxGeometryOutputVertices = 0,
+      .maxGeometryTotalOutputComponents = 0,
 
       /* Fragment Shader Limits */
       .maxFragmentInputComponents = max_user_vertex_components,
       .maxFragmentOutputAttachments = PVR_MAX_COLOR_ATTACHMENTS,
       .maxFragmentDualSrcAttachments = 0,
-      .maxFragmentCombinedOutputResources = 8U,
+      .maxFragmentCombinedOutputResources =
+         descriptor_limits->max_per_stage_storage_buffers +
+         descriptor_limits->max_per_stage_storage_images +
+         PVR_MAX_COLOR_ATTACHMENTS,
 
       /* Compute Shader Limits */
       .maxComputeSharedMemorySize = 16U * 1024U,
       .maxComputeWorkGroupCount = { 64U * 1024U, 64U * 1024U, 64U * 1024U },
-      .maxComputeWorkGroupInvocations = 512U,
-      .maxComputeWorkGroupSize = { 512U, 512U, 64U },
+      .maxComputeWorkGroupInvocations = max_compute_work_group_invocations,
+      .maxComputeWorkGroupSize = { max_compute_work_group_invocations,
+                                   max_compute_work_group_invocations,
+                                   64U },
 
       /* Rasterization Limits */
       .subPixelPrecisionBits = sub_pixel_precision,
       .subTexelPrecisionBits = 8U,
-      .mipmapPrecisionBits = 4U,
+      .mipmapPrecisionBits = 8U,
 
       .maxDrawIndexedIndexValue = UINT32_MAX,
       .maxDrawIndirectCount = 2U * 1024U * 1024U * 1024U,
@@ -921,8 +953,8 @@ void pvr_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
 
       .minTexelOffset = -8,
       .maxTexelOffset = 7U,
-      .minTexelGatherOffset = 0,
-      .maxTexelGatherOffset = 0,
+      .minTexelGatherOffset = -8,
+      .maxTexelGatherOffset = 7,
       .minInterpolationOffset = -0.5,
       .maxInterpolationOffset = 0.5,
       .subPixelInterpolationOffsetBits = 4U,
diff --git a/src/imagination/vulkan/pvr_limits.h b/src/imagination/vulkan/pvr_limits.h
index cda91e5cac9..4a24701c11b 100644
--- a/src/imagination/vulkan/pvr_limits.h
+++ b/src/imagination/vulkan/pvr_limits.h
@@ -32,12 +32,12 @@
 #include "pvr_device_info.h"
 #include "util/u_math.h"
 
-#define PVR_MAX_COLOR_ATTACHMENTS 8U
+#define PVR_MAX_COLOR_ATTACHMENTS 8U /* Number of PBE emit registers. */
 #define PVR_MAX_QUEUES 2U
 #define PVR_MAX_VIEWPORTS 1U
 #define PVR_MAX_NEG_OFFSCREEN_OFFSET 4096U
 
-#define PVR_MAX_PUSH_CONSTANTS_SIZE 128U
+#define PVR_MAX_PUSH_CONSTANTS_SIZE 256U
 
 #define PVR_MAX_DESCRIPTOR_SETS 4U
 #define PVR_MAX_FRAMEBUFFER_LAYERS ROGUE_MAX_RENDER_TARGETS



More information about the mesa-commit mailing list