Mesa (main): intel/devinfo: deal with i915 topology query change
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Fri Jan 14 12:17:35 UTC 2022
Module: Mesa
Branch: main
Commit: e86ce98c6a08cf6a4129132cc5094370931e0bfa
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=e86ce98c6a08cf6a4129132cc5094370931e0bfa
Author: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
Date: Fri Jun 25 10:46:30 2021 +0300
intel/devinfo: deal with i915 topology query change
i915 does not report slices accurately anymore on Gfx12.5+. Since this
is information we need to have for performance queries, we need to
rebuild it here.
v2: Remove invalid change to pixel pipes computations (Jordan)
v3: Fix index calculation (Curro)
Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
Reviewed-by: Francisco Jerez <currojerez at riseup.net>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14297>
---
src/intel/dev/intel_device_info.c | 82 ++++++++++++++++++++++++++++++++++++---
1 file changed, 77 insertions(+), 5 deletions(-)
diff --git a/src/intel/dev/intel_device_info.c b/src/intel/dev/intel_device_info.c
index 4cb1c658a3a..623c6e1788f 100644
--- a/src/intel/dev/intel_device_info.c
+++ b/src/intel/dev/intel_device_info.c
@@ -1103,7 +1103,7 @@ update_pixel_pipes(struct intel_device_info *devinfo)
/* Count the number of subslices on each pixel pipe. Assume that every
* contiguous group of 4 subslices in the mask belong to the same pixel
- * pipe. However note that on TGL the kernel returns a mask of enabled
+ * pipe. However note that on TGL+ the kernel returns a mask of enabled
* *dual* subslices instead of actual subslices somewhat confusingly, so
* each pixel pipe only takes 2 bits in the mask even though it's still 4
* subslices.
@@ -1111,11 +1111,13 @@ update_pixel_pipes(struct intel_device_info *devinfo)
const unsigned ppipe_bits = devinfo->ver >= 12 ? 2 : 4;
for (unsigned p = 0; p < INTEL_DEVICE_MAX_PIXEL_PIPES; p++) {
const unsigned offset = p * ppipe_bits;
+ const unsigned subslice_idx = offset /
+ devinfo->max_subslices_per_slice * devinfo->subslice_slice_stride;
const unsigned ppipe_mask = BITFIELD_RANGE(offset % 8, ppipe_bits);
- if (offset / 8 < ARRAY_SIZE(devinfo->subslice_masks))
+ if (subslice_idx < ARRAY_SIZE(devinfo->subslice_masks))
devinfo->ppipe_subslices[p] =
- __builtin_popcount(devinfo->subslice_masks[offset / 8] & ppipe_mask);
+ __builtin_popcount(devinfo->subslice_masks[subslice_idx] & ppipe_mask);
else
devinfo->ppipe_subslices[p] = 0;
}
@@ -1141,7 +1143,7 @@ update_pixel_pipes(struct intel_device_info *devinfo)
static void
update_l3_banks(struct intel_device_info *devinfo)
{
- if (devinfo->ver != 12 || devinfo->num_slices != 1)
+ if (devinfo->ver != 12)
return;
if (devinfo->verx10 >= 125) {
@@ -1166,6 +1168,73 @@ update_l3_banks(struct intel_device_info *devinfo)
}
}
+/* At some point in time, some people decided to redefine what topology means,
+ * from useful HW related information (slice, subslice, etc...), to much less
+ * useful generic stuff that noone cares about (a single slice with lots of
+ * subslices). Of course all of this was done without asking the people who
+ * defined the topology query in the first place, to solve a lack of
+ * information Gfx10+. This function is here to workaround the fact it's not
+ * possible to change people's mind even before this stuff goes upstream. Sad
+ * times...
+ */
+static void
+update_from_single_slice_topology(struct intel_device_info *devinfo,
+ const struct drm_i915_query_topology_info *topology)
+{
+ assert(devinfo->verx10 >= 125);
+
+ reset_masks(devinfo);
+
+ assert(topology->max_slices == 1);
+ assert(topology->max_subslices > 0);
+ assert(topology->max_eus_per_subslice > 0);
+
+ /* i915 gives us only one slice so we have to rebuild that out of groups of
+ * 4 dualsubslices.
+ */
+ devinfo->max_subslices_per_slice = 4;
+ devinfo->max_eus_per_subslice = 16;
+ devinfo->subslice_slice_stride = 1;
+ devinfo->eu_slice_stride = DIV_ROUND_UP(16 * 4, 8);
+ devinfo->eu_subslice_stride = DIV_ROUND_UP(16, 8);
+
+ for (uint32_t ss_idx = 0; ss_idx < topology->max_subslices; ss_idx++) {
+ const bool ss_idx_available =
+ (topology->data[topology->subslice_offset + ss_idx / 8] >>
+ (ss_idx % 8)) & 1;
+
+ if (!ss_idx_available)
+ continue;
+
+ uint32_t s = ss_idx / 4;
+ uint32_t ss = ss_idx % 4;
+
+ devinfo->max_slices = MAX2(devinfo->max_slices, s + 1);
+ devinfo->slice_masks |= 1u << s;
+
+ devinfo->subslice_masks[s * devinfo->subslice_slice_stride +
+ ss / 8] |= 1u << (ss % 8);
+
+ for (uint32_t eu = 0; eu < devinfo->max_eus_per_subslice; eu++) {
+ const bool eu_available =
+ (topology->data[topology->eu_offset +
+ ss_idx * topology->eu_stride +
+ eu / 8] >> (eu % 8)) & 1;
+
+ if (!eu_available)
+ continue;
+
+ devinfo->eu_masks[s * devinfo->eu_slice_stride +
+ ss * devinfo->eu_subslice_stride +
+ eu / 8] |= 1u << (eu % 8);
+ }
+ }
+
+ update_slice_subslice_counts(devinfo);
+ update_pixel_pipes(devinfo);
+ update_l3_banks(devinfo);
+}
+
static void
update_from_topology(struct intel_device_info *devinfo,
const struct drm_i915_query_topology_info *topology)
@@ -1458,7 +1527,10 @@ query_topology(struct intel_device_info *devinfo, int fd)
if (topo_info == NULL)
return false;
- update_from_topology(devinfo, topo_info);
+ if (devinfo->verx10 >= 125)
+ update_from_single_slice_topology(devinfo, topo_info);
+ else
+ update_from_topology(devinfo, topo_info);
free(topo_info);
More information about the mesa-commit
mailing list