Mesa (main): intel/dev: fix subslice/eu total computations with some fused configurations

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Fri Nov 5 10:36:24 UTC 2021


Module: Mesa
Branch: main
Commit: a543a9440437df8d7498bc76353e828fcc66f5e5
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=a543a9440437df8d7498bc76353e828fcc66f5e5

Author: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
Date:   Sat Apr  3 13:28:50 2021 +0300

intel/dev: fix subslice/eu total computations with some fused configurations

When a device has its first slice/subslice fused off, we can't use the
number of slices/subslices to iterate the mask array.

v2: Fix spelling (Marcin)
    Use size_t for iterator (Marcin)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
Reported-by: Matt Roper <matthew.d.roper at intel.com>
Cc: <mesa-stable at lists.freedesktop.org>
Reviewed-by: Francisco Jerez <currojerez at riseup.net>
Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/5601
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/10015>

---

 src/gallium/drivers/iris/iris_screen.c |  5 +----
 src/intel/dev/intel_dev_info.c         |  8 ++++----
 src/intel/dev/intel_device_info.c      |  5 ++++-
 src/intel/dev/intel_device_info.h      | 31 +++++++++++++++++++++++++++++++
 src/intel/dev/intel_device_info_test.c | 33 +++++++++++++++++++++++++++++++++
 5 files changed, 73 insertions(+), 9 deletions(-)

diff --git a/src/gallium/drivers/iris/iris_screen.c b/src/gallium/drivers/iris/iris_screen.c
index 6a5c30401fa..7aec05e418a 100644
--- a/src/gallium/drivers/iris/iris_screen.c
+++ b/src/gallium/drivers/iris/iris_screen.c
@@ -572,10 +572,7 @@ iris_get_compute_param(struct pipe_screen *pscreen,
       RET((uint32_t []) { 400 }); /* TODO */
 
    case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS: {
-      unsigned total_num_subslices = 0;
-      for (unsigned i = 0; i < devinfo->num_slices; i++)
-         total_num_subslices += devinfo->num_subslices[i];
-      RET((uint32_t []) { total_num_subslices });
+      RET((uint32_t []) { intel_device_info_subslice_total(devinfo) });
    }
 
    case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE:
diff --git a/src/intel/dev/intel_dev_info.c b/src/intel/dev/intel_dev_info.c
index 1d8cbefd262..91eefb07daf 100644
--- a/src/intel/dev/intel_dev_info.c
+++ b/src/intel/dev/intel_dev_info.c
@@ -79,18 +79,18 @@ main(int argc, char *argv[])
 
       const char *subslice_name = devinfo.ver >= 12 ? "dualsubslice" : "subslice";
       uint32_t n_s = 0, n_ss = 0, n_eus = 0;
-      for (unsigned s = 0; s < devinfo.num_slices; s++) {
+      for (unsigned s = 0; s < devinfo.max_slices; s++) {
          n_s += (devinfo.slice_masks & (1u << s)) ? 1 : 0;
-         for (unsigned ss = 0; ss < devinfo.num_subslices[s]; ss++) {
+         for (unsigned ss = 0; ss < devinfo.max_subslices_per_slice; ss++) {
             fprintf(stdout, "   slice%u.%s%u: ", s, subslice_name, ss);
             if (intel_device_info_subslice_available(&devinfo, s, ss)) {
                n_ss++;
-               for (unsigned eu = 0; eu < devinfo.num_eu_per_subslice; eu++) {
+               for (unsigned eu = 0; eu < devinfo.max_eu_per_subslice; eu++) {
                   n_eus += intel_device_info_eu_available(&devinfo, s, ss, eu) ? 1 : 0;
                   fprintf(stdout, "%s", intel_device_info_eu_available(&devinfo, s, ss, eu) ? "1" : "0");
                }
             } else {
-               fprintf(stderr, "fused");
+               fprintf(stdout, "fused");
             }
             fprintf(stdout, "\n");
          }
diff --git a/src/intel/dev/intel_device_info.c b/src/intel/dev/intel_device_info.c
index 7bf021fc8e4..477d4da666a 100644
--- a/src/intel/dev/intel_device_info.c
+++ b/src/intel/dev/intel_device_info.c
@@ -1050,6 +1050,9 @@ update_from_topology(struct intel_device_info *devinfo,
    assert(sizeof(devinfo->slice_masks) >= DIV_ROUND_UP(topology->max_slices, 8));
    memcpy(&devinfo->slice_masks, topology->data, DIV_ROUND_UP(topology->max_slices, 8));
    devinfo->num_slices = __builtin_popcount(devinfo->slice_masks);
+   devinfo->max_slices = topology->max_slices;
+   devinfo->max_subslices_per_slice = topology->max_subslices;
+   devinfo->max_eu_per_subslice = topology->max_eus_per_subslice;
 
    uint32_t subslice_mask_len =
       topology->max_slices * topology->subslice_stride;
@@ -1691,7 +1694,7 @@ intel_get_device_info_from_fd(int fd, struct intel_device_info *devinfo)
    devinfo->has_tiling_uapi = has_get_tiling(fd);
 
    devinfo->subslice_total = 0;
-   for (uint32_t i = 0; i < devinfo->num_slices; i++)
+   for (uint32_t i = 0; i < devinfo->max_slices; i++)
       devinfo->subslice_total += __builtin_popcount(devinfo->subslice_masks[i]);
 
    /* Gfx7 and older do not support EU/Subslice info */
diff --git a/src/intel/dev/intel_device_info.h b/src/intel/dev/intel_device_info.h
index cc70a824e4f..499bf2a9ad4 100644
--- a/src/intel/dev/intel_device_info.h
+++ b/src/intel/dev/intel_device_info.h
@@ -137,11 +137,24 @@ struct intel_device_info
     */
    unsigned num_slices;
 
+   /**
+    * Maximum number of slices present on this device (can be more than
+    * num_slices if some slices are fused).
+    */
+   unsigned max_slices;
+
    /**
     * Number of subslices for each slice (used to be uniform until CNL).
     */
    unsigned num_subslices[INTEL_DEVICE_MAX_SUBSLICES];
 
+   /**
+    * Maximum number of subslices per slice present on this device (can be
+    * more than the maximum value in the num_subslices[] array if some
+    * subslices are fused).
+    */
+   unsigned max_subslices_per_slice;
+
    /**
     * Number of subslices on each pixel pipe (ICL).
     */
@@ -154,6 +167,12 @@ struct intel_device_info
     */
    unsigned num_eu_per_subslice;
 
+   /**
+    * Maximum number of EUs per subslice (can be more than num_eu_per_subslice
+    * if some EUs are fused off).
+    */
+   unsigned max_eu_per_subslice;
+
    /**
     * Number of threads per eu, varies between 4 and 8 between generations.
     */
@@ -356,6 +375,18 @@ intel_device_info_eu_available(const struct intel_device_info *devinfo,
    return (devinfo->eu_masks[subslice_offset + eu / 8] & (1U << eu % 8)) != 0;
 }
 
+static inline uint32_t
+intel_device_info_subslice_total(const struct intel_device_info *devinfo)
+{
+   uint32_t total = 0;
+
+   for (size_t i = 0; i < ARRAY_SIZE(devinfo->subslice_masks); i++) {
+      total += __builtin_popcount(devinfo->subslice_masks[i]);
+   }
+
+   return total;
+}
+
 static inline uint32_t
 intel_device_info_eu_total(const struct intel_device_info *devinfo)
 {
diff --git a/src/intel/dev/intel_device_info_test.c b/src/intel/dev/intel_device_info_test.c
index f3077bd6954..db5feae0744 100644
--- a/src/intel/dev/intel_device_info_test.c
+++ b/src/intel/dev/intel_device_info_test.c
@@ -31,6 +31,39 @@ main(int argc, char *argv[])
       assert(devinfo.cs_prefetch_size > 0);
 
       assert(devinfo.ver < 7 || devinfo.max_constant_urb_size_kb > 0);
+
+      assert(devinfo.num_slices <= ARRAY_SIZE(devinfo.subslice_masks));
+
+      assert(devinfo.num_slices <= devinfo.max_slices);
+      assert(intel_device_info_subslice_total(&devinfo) <=
+             (devinfo.max_slices * devinfo.max_subslices_per_slice));
+
+      for (uint32_t s = 0; s < ARRAY_SIZE(devinfo.num_subslices); s++)
+         assert(devinfo.num_subslices[s] <= devinfo.max_subslices_per_slice);
+
+      assert(__builtin_popcount(devinfo.slice_masks) <= devinfo.max_slices);
+
+      uint32_t total_subslices = 0;
+      for (size_t i = 0; i < ARRAY_SIZE(devinfo.subslice_masks); i++)
+         total_subslices += __builtin_popcount(devinfo.subslice_masks[i]);
+      assert(total_subslices <=
+             (devinfo.max_slices * devinfo.max_subslices_per_slice));
+
+      assert(intel_device_info_eu_total(&devinfo) > 0);
+      assert(intel_device_info_subslice_total(&devinfo) > 0);
+
+      total_subslices = 0;
+      for (uint32_t s = 0; s < devinfo.max_slices; s++)
+         for (uint32_t ss = 0; ss < devinfo.max_subslices_per_slice; ss++)
+            total_subslices += intel_device_info_subslice_available(&devinfo, s, ss);
+      assert(total_subslices == intel_device_info_subslice_total(&devinfo));
+
+      uint32_t total_eus = 0;
+      for (uint32_t s = 0; s < devinfo.max_slices; s++)
+         for (uint32_t ss = 0; ss < devinfo.max_subslices_per_slice; ss++)
+            for (uint32_t eu = 0; eu < devinfo.max_eu_per_subslice; eu++)
+               total_eus += intel_device_info_eu_available(&devinfo, s, ss, eu);
+      assert(total_eus == intel_device_info_eu_total(&devinfo));
    }
 
    return 0;



More information about the mesa-commit mailing list