Mesa (main): intel/devinfo: deal with i915 topology query change

Fri Jan 14 12:17:35 UTC 2022

Module: Mesa
Branch: main
Commit: e86ce98c6a08cf6a4129132cc5094370931e0bfa
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=e86ce98c6a08cf6a4129132cc5094370931e0bfa

Author: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
Date:   Fri Jun 25 10:46:30 2021 +0300

intel/devinfo: deal with i915 topology query change

i915 does not report slices accurately anymore on Gfx12.5+. Since this
is information we need to have for performance queries, we need to
rebuild it here.

v2: Remove invalid change to pixel pipes computations (Jordan)

v3: Fix index calculation (Curro)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
Reviewed-by: Francisco Jerez <currojerez at riseup.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14297>

---

 src/intel/dev/intel_device_info.c | 82 ++++++++++++++++++++++++++++++++++++---
 1 file changed, 77 insertions(+), 5 deletions(-)

diff --git a/src/intel/dev/intel_device_info.c b/src/intel/dev/intel_device_info.c
index 4cb1c658a3a..623c6e1788f 100644
--- a/src/intel/dev/intel_device_info.c
+++ b/src/intel/dev/intel_device_info.c
@@ -1103,7 +1103,7 @@ update_pixel_pipes(struct intel_device_info *devinfo)
 
    /* Count the number of subslices on each pixel pipe. Assume that every
     * contiguous group of 4 subslices in the mask belong to the same pixel
-    * pipe. However note that on TGL the kernel returns a mask of enabled
+    * pipe. However note that on TGL+ the kernel returns a mask of enabled
     * *dual* subslices instead of actual subslices somewhat confusingly, so
     * each pixel pipe only takes 2 bits in the mask even though it's still 4
     * subslices.
@@ -1111,11 +1111,13 @@ update_pixel_pipes(struct intel_device_info *devinfo)
    const unsigned ppipe_bits = devinfo->ver >= 12 ? 2 : 4;
    for (unsigned p = 0; p < INTEL_DEVICE_MAX_PIXEL_PIPES; p++) {
       const unsigned offset = p * ppipe_bits;
+      const unsigned subslice_idx = offset /
+         devinfo->max_subslices_per_slice * devinfo->subslice_slice_stride;
       const unsigned ppipe_mask = BITFIELD_RANGE(offset % 8, ppipe_bits);
 
-      if (offset / 8 < ARRAY_SIZE(devinfo->subslice_masks))
+      if (subslice_idx < ARRAY_SIZE(devinfo->subslice_masks))
          devinfo->ppipe_subslices[p] =
-            __builtin_popcount(devinfo->subslice_masks[offset / 8] & ppipe_mask);
+            __builtin_popcount(devinfo->subslice_masks[subslice_idx] & ppipe_mask);
       else
          devinfo->ppipe_subslices[p] = 0;
    }
@@ -1141,7 +1143,7 @@ update_pixel_pipes(struct intel_device_info *devinfo)
 static void
 update_l3_banks(struct intel_device_info *devinfo)
 {
-   if (devinfo->ver != 12 || devinfo->num_slices != 1)
+   if (devinfo->ver != 12)
       return;
 
    if (devinfo->verx10 >= 125) {
@@ -1166,6 +1168,73 @@ update_l3_banks(struct intel_device_info *devinfo)
    }
 }
 
+/* At some point in time, some people decided to redefine what topology means,
+ * from useful HW related information (slice, subslice, etc...), to much less
+ * useful generic stuff that noone cares about (a single slice with lots of
+ * subslices). Of course all of this was done without asking the people who
+ * defined the topology query in the first place, to solve a lack of
+ * information Gfx10+. This function is here to workaround the fact it's not
+ * possible to change people's mind even before this stuff goes upstream. Sad
+ * times...
+ */
+static void
+update_from_single_slice_topology(struct intel_device_info *devinfo,
+                                  const struct drm_i915_query_topology_info *topology)
+{
+   assert(devinfo->verx10 >= 125);
+
+   reset_masks(devinfo);
+
+   assert(topology->max_slices == 1);
+   assert(topology->max_subslices > 0);
+   assert(topology->max_eus_per_subslice > 0);
+
+   /* i915 gives us only one slice so we have to rebuild that out of groups of
+    * 4 dualsubslices.
+    */
+   devinfo->max_subslices_per_slice = 4;
+   devinfo->max_eus_per_subslice = 16;
+   devinfo->subslice_slice_stride = 1;
+   devinfo->eu_slice_stride = DIV_ROUND_UP(16 * 4, 8);
+   devinfo->eu_subslice_stride = DIV_ROUND_UP(16, 8);
+
+   for (uint32_t ss_idx = 0; ss_idx < topology->max_subslices; ss_idx++) {
+      const bool ss_idx_available =
+         (topology->data[topology->subslice_offset + ss_idx / 8] >>
+          (ss_idx % 8)) & 1;
+
+      if (!ss_idx_available)
+         continue;
+
+      uint32_t s = ss_idx / 4;
+      uint32_t ss = ss_idx % 4;
+
+      devinfo->max_slices = MAX2(devinfo->max_slices, s + 1);
+      devinfo->slice_masks |= 1u << s;
+
+      devinfo->subslice_masks[s * devinfo->subslice_slice_stride +
+                              ss / 8] |= 1u << (ss % 8);
+
+      for (uint32_t eu = 0; eu < devinfo->max_eus_per_subslice; eu++) {
+         const bool eu_available =
+            (topology->data[topology->eu_offset +
+                            ss_idx * topology->eu_stride +
+                            eu / 8] >> (eu % 8)) & 1;
+
+         if (!eu_available)
+            continue;
+
+         devinfo->eu_masks[s * devinfo->eu_slice_stride +
+                           ss * devinfo->eu_subslice_stride +
+                           eu / 8] |= 1u << (eu % 8);
+      }
+   }
+
+   update_slice_subslice_counts(devinfo);
+   update_pixel_pipes(devinfo);
+   update_l3_banks(devinfo);
+}
+
 static void
 update_from_topology(struct intel_device_info *devinfo,
                      const struct drm_i915_query_topology_info *topology)
@@ -1458,7 +1527,10 @@ query_topology(struct intel_device_info *devinfo, int fd)
    if (topo_info == NULL)
       return false;
 
-   update_from_topology(devinfo, topo_info);
+   if (devinfo->verx10 >= 125)
+      update_from_single_slice_topology(devinfo, topo_info);
+   else
+      update_from_topology(devinfo, topo_info);
 
    free(topo_info);