Mesa (main): freedreno/ir3: support a4xx compute differences

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Sat Mar 5 08:51:11 UTC 2022


Module: Mesa
Branch: main
Commit: aac7028b583097eb67080d5dec59fd1b3693e843
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=aac7028b583097eb67080d5dec59fd1b3693e843

Author: Ilia Mirkin <imirkin at alum.mit.edu>
Date:   Sun Nov 14 13:05:07 2021 -0500

freedreno/ir3: support a4xx compute differences

Mainly the workgroup id comes injected via consts by the hardware (or
CP), and we must make room for it, otherwise the driver won't know where
to put it.

Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/14794>

---

 src/freedreno/ir3/ir3_compiler.c     |  1 +
 src/freedreno/ir3/ir3_compiler.h     |  5 +++++
 src/freedreno/ir3/ir3_compiler_nir.c | 17 ++++++++++++-----
 src/freedreno/ir3/ir3_nir.c          | 17 +++++++++++++++++
 src/freedreno/ir3/ir3_shader.h       |  5 ++++-
 5 files changed, 39 insertions(+), 6 deletions(-)

diff --git a/src/freedreno/ir3/ir3_compiler.c b/src/freedreno/ir3/ir3_compiler.c
index 077320a26b9..c23d85e6f26 100644
--- a/src/freedreno/ir3/ir3_compiler.c
+++ b/src/freedreno/ir3/ir3_compiler.c
@@ -309,6 +309,7 @@ ir3_compiler_create(struct fd_device *dev, const struct fd_dev_id *dev_id,
    }
 
    compiler->bool_type = (compiler->gen >= 5) ? TYPE_U16 : TYPE_U32;
+   compiler->has_shared_regfile = compiler->gen >= 5;
 
    if (compiler->gen >= 6) {
       compiler->nir_options = options_a6xx;
diff --git a/src/freedreno/ir3/ir3_compiler.h b/src/freedreno/ir3/ir3_compiler.h
index 1767c646ac6..ddbd152ea7c 100644
--- a/src/freedreno/ir3/ir3_compiler.h
+++ b/src/freedreno/ir3/ir3_compiler.h
@@ -177,6 +177,11 @@ struct ir3_compiler {
 
    /* Type to use for 1b nir bools: */
    type_t bool_type;
+
+   /* Whether compute invocation params are passed in via shared regfile or
+    * constbuf. a5xx+ has the shared regfile.
+    */
+   bool has_shared_regfile;
 };
 
 void ir3_compiler_destroy(struct ir3_compiler *compiler);
diff --git a/src/freedreno/ir3/ir3_compiler_nir.c b/src/freedreno/ir3/ir3_compiler_nir.c
index 8337fb855d3..c6bd72c7aaa 100644
--- a/src/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/freedreno/ir3/ir3_compiler_nir.c
@@ -2183,12 +2183,19 @@ emit_intrinsic(struct ir3_context *ctx, nir_intrinsic_instr *intr)
       break;
    case nir_intrinsic_load_workgroup_id:
    case nir_intrinsic_load_workgroup_id_zero_base:
-      if (!ctx->work_group_id) {
-         ctx->work_group_id =
-            create_sysval_input(ctx, SYSTEM_VALUE_WORKGROUP_ID, 0x7);
-         ctx->work_group_id->dsts[0]->flags |= IR3_REG_SHARED;
+      if (ctx->compiler->has_shared_regfile) {
+         if (!ctx->work_group_id) {
+            ctx->work_group_id =
+               create_sysval_input(ctx, SYSTEM_VALUE_WORKGROUP_ID, 0x7);
+            ctx->work_group_id->dsts[0]->flags |= IR3_REG_SHARED;
+         }
+         ir3_split_dest(b, dst, ctx->work_group_id, 0, 3);
+      } else {
+         /* For a3xx/a4xx, this comes in via const injection by the hw */
+         for (int i = 0; i < dest_components; i++) {
+            dst[i] = create_driver_param(ctx, IR3_DP_WORKGROUP_ID_X + i);
+         }
       }
-      ir3_split_dest(b, dst, ctx->work_group_id, 0, 3);
       break;
    case nir_intrinsic_load_base_workgroup_id:
       for (int i = 0; i < dest_components; i++) {
diff --git a/src/freedreno/ir3/ir3_nir.c b/src/freedreno/ir3/ir3_nir.c
index c59cecac0dc..dcd5052eccd 100644
--- a/src/freedreno/ir3/ir3_nir.c
+++ b/src/freedreno/ir3/ir3_nir.c
@@ -756,6 +756,12 @@ ir3_nir_scan_driver_consts(struct ir3_compiler *compiler, nir_shader *shader, st
                layout->num_driver_params =
                   MAX2(layout->num_driver_params, IR3_DP_NUM_WORK_GROUPS_Z + 1);
                break;
+            case nir_intrinsic_load_workgroup_id:
+               if (!compiler->has_shared_regfile) {
+                  layout->num_driver_params =
+                     MAX2(layout->num_driver_params, IR3_DP_WORKGROUP_ID_Z + 1);
+               }
+               break;
             case nir_intrinsic_load_workgroup_size:
                layout->num_driver_params = MAX2(layout->num_driver_params,
                                                 IR3_DP_LOCAL_GROUP_SIZE_Z + 1);
@@ -783,6 +789,17 @@ ir3_nir_scan_driver_consts(struct ir3_compiler *compiler, nir_shader *shader, st
          }
       }
    }
+
+   /* TODO: Provide a spot somewhere to safely upload unwanted values, and a way
+    * to determine if they're wanted or not. For now we always make the whole
+    * driver param range available, since the driver will always instruct the
+    * hardware to upload these.
+    */
+   if (!compiler->has_shared_regfile &&
+         shader->info.stage == MESA_SHADER_COMPUTE) {
+      layout->num_driver_params =
+         MAX2(layout->num_driver_params, IR3_DP_WORKGROUP_ID_Z + 1);
+   }
 }
 
 /* Sets up the variant-dependent constant state for the ir3_shader.  Note
diff --git a/src/freedreno/ir3/ir3_shader.h b/src/freedreno/ir3/ir3_shader.h
index 64597ca8752..e764fa02044 100644
--- a/src/freedreno/ir3/ir3_shader.h
+++ b/src/freedreno/ir3/ir3_shader.h
@@ -54,12 +54,15 @@ enum ir3_driver_param {
    IR3_DP_LOCAL_GROUP_SIZE_Y = 9,
    IR3_DP_LOCAL_GROUP_SIZE_Z = 10,
    IR3_DP_SUBGROUP_ID_SHIFT = 11,
+   IR3_DP_WORKGROUP_ID_X = 12,
+   IR3_DP_WORKGROUP_ID_Y = 13,
+   IR3_DP_WORKGROUP_ID_Z = 14,
    /* NOTE: gl_NumWorkGroups should be vec4 aligned because
     * glDispatchComputeIndirect() needs to load these from
     * the info->indirect buffer.  Keep that in mind when/if
     * adding any addition CS driver params.
     */
-   IR3_DP_CS_COUNT = 12, /* must be aligned to vec4 */
+   IR3_DP_CS_COUNT = 16, /* must be aligned to vec4 */
 
    /* vertex shader driver params: */
    IR3_DP_DRAWID = 0,



More information about the mesa-commit mailing list