[PATCH 4/4] drm/xe/guc: Add test for G2G communications
Daniele Ceraolo Spurio
daniele.ceraolospurio at intel.com
Fri Aug 1 00:33:10 UTC 2025
On 7/24/2025 5:21 PM, John.C.Harrison at Intel.com wrote:
> From: John Harrison <John.C.Harrison at Intel.com>
>
> Add a test for sending messages from every GuC to every other GuC to
> test G2G communications.
>
> Note that, being a debug only feature, the test interface only exists
> in pre-production builds of the GuC firmware.
>
> Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
> ---
> drivers/gpu/drm/xe/abi/guc_actions_abi.h | 2 +
> drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c | 759 ++++++++++++++++++++
> drivers/gpu/drm/xe/tests/xe_live_test_mod.c | 2 +
> drivers/gpu/drm/xe/xe_device_types.h | 7 +
> drivers/gpu/drm/xe/xe_guc.c | 4 +
> drivers/gpu/drm/xe/xe_guc.h | 4 +
> drivers/gpu/drm/xe/xe_guc_ct.c | 5 +
> drivers/gpu/drm/xe/xe_guc_fwif.h | 1 +
> 8 files changed, 784 insertions(+)
> create mode 100644 drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c
>
> diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
> index 81eb046aeebf..0395998ca75c 100644
> --- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h
> +++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
> @@ -154,6 +154,8 @@ enum xe_guc_action {
> XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE = 0x8003,
> XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED = 0x8004,
> XE_GUC_ACTION_NOTIFY_EXCEPTION = 0x8005,
> + XE_GUC_ACTION_TEST_G2G_SEND = 0xF001,
> + XE_GUC_ACTION_TEST_G2G_RECV = 0xF002,
> XE_GUC_ACTION_LIMIT
> };
>
> diff --git a/drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c b/drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c
> new file mode 100644
> index 000000000000..9bc8a43f7138
> --- /dev/null
> +++ b/drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c
> @@ -0,0 +1,759 @@
> +// SPDX-License-Identifier: GPL-2.0 AND MIT
> +/*
> + * Copyright © 2024 Intel Corporation
2025?
> + */
> +
> +#include <linux/delay.h>
> +
> +#include <kunit/test.h>
> +#include <kunit/visibility.h>
> +
> +#include "tests/xe_kunit_helpers.h"
> +#include "tests/xe_pci_test.h"
> +#include "tests/xe_test.h"
> +
> +#include "xe_bo.h"
> +#include "xe_device.h"
> +#include "xe_pm.h"
> +
> +/*
> + * Payload is opaque to GuC. So KMD can define any structure or size it wants.
> + */
> +struct g2g_test_payload {
> + u32 tx_dev;
> + u32 tx_tile;
> + u32 rx_dev;
> + u32 rx_tile;
> + u32 seqno;
> +};
> +
> +static void g2g_test_send(struct kunit *test, struct xe_guc *guc,
> + u32 far_tile, u32 far_dev,
> + struct g2g_test_payload *payload)
> +{
> + struct xe_device *xe = guc_to_xe(guc);
> + struct xe_gt *gt = guc_to_gt(guc);
> + u32 *action, total;
> + size_t payload_len;
> + int ret;
> +
> + payload_len = sizeof(*payload) / sizeof(u32);
> + KUNIT_ASSERT_EQ_MSG(test, sizeof(*payload), payload_len * sizeof(u32),
> + "G2G payload not u32 aligned\n");
Can't you just use a static assert on the size of the structure, instead
of asserting at runtime?
> +
> + total = 4 + payload_len;
> + action = kunit_kmalloc_array(test, total, sizeof(*action), GFP_KERNEL);
The size here might be clearer to understand as sizeof(u32). Not a blocker.
> + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, action);
> +
> + action[0] = XE_GUC_ACTION_TEST_G2G_SEND;
> + action[1] = far_tile;
> + action[2] = far_dev;
> + action[3] = payload_len;
> + memcpy(action + 4, payload, payload_len * sizeof(u32));
sizeof(*payload), instead of payload_len * sizeof(u32) ?
> +
> + atomic_inc(&xe->g2g_test_count);
> +
> + /*
> + * Should specify the expected response notification here. Problem is that
> + * the response will be coming from a different GuC. By the end, it should
> + * all add up as long as an equal number of messages are sent from each GuC
> + * and to each GuC. However, in the middle negative reservation space errors
> + * and such like can occur. Rather than add intrusive changes to the CT layer
> + * it is simpler to just not bother counting it at all. The system should be
> + * idle when running the selftest, and the selftest's notification total size
> + * is well within the G2H allocation size. So there should be no issues with
> + * needing to block for space, which is all the tracking code is really for.
> + */
> + ret = xe_guc_ct_send(&guc->ct, action, total, 0, 0);
> + kunit_kfree(test, action);
> + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "G2G send failed: %d [%d:%d -> %d:%d]\n", ret,
> + gt_to_tile(gt)->id, G2G_DEV(gt), far_tile, far_dev);
> +}
> +
> +/*
> + * NB: Can't use KUNIT_ASSERT and friends in here as this is called asynchronously
> + * from the G2H notification handler. Need that to actually complete rather than
> + * thread-abort in order to keep the rest of the driver alive!
> + */
> +int xe_guc_g2g_test_notification(struct xe_guc *guc, u32 *msg, u32 len)
> +{
> + struct xe_device *xe = guc_to_xe(guc);
> + struct xe_gt *rx_gt = guc_to_gt(guc), *test_gt, *tx_gt = NULL;
> + u32 tx_tile, tx_dev, rx_tile, rx_dev, idx, got_len;
> + struct g2g_test_payload *payload;
> + size_t payload_len;
> + int ret = 0, i;
> +
> + payload_len = sizeof(*payload) / sizeof(u32);
> +
> + if (unlikely(len != (G2H_LEN_DW_G2G_NOTIFY_MIN + payload_len))) {
> + xe_gt_err(rx_gt, "G2G test notification invalid length %u", len);
> + ret = -EPROTO;
> + goto done;
> + }
> +
> + tx_tile = msg[0];
> + tx_dev = msg[1];
> + got_len = msg[2];
> + payload = (struct g2g_test_payload *)(msg + 3);
> +
> + rx_tile = gt_to_tile(rx_gt)->id;
> + rx_dev = G2G_DEV(rx_gt);
> +
> + if (got_len != payload_len) {
> + xe_gt_err(rx_gt, "G2G: Invalid payload length: %u vs %zu\n", got_len, payload_len);
> + ret = -EPROTO;
> + goto done;
> + }
> +
> + if (payload->tx_dev != tx_dev || payload->tx_tile != tx_tile ||
> + payload->rx_dev != rx_dev || payload->rx_tile != rx_tile) {
> + xe_gt_err(rx_gt, "G2G: Invalid payload: %d:%d -> %d:%d vs %d:%d -> %d:%d! [%d]\n",
> + payload->tx_tile, payload->tx_dev, payload->rx_tile, payload->rx_dev,
> + tx_tile, tx_dev, rx_tile, rx_dev, payload->seqno);
> + ret = -EPROTO;
> + goto done;
> + }
> +
> + if (!xe->g2g_test_array) {
> + xe_gt_err(rx_gt, "G2G: Missing test array!\n");
> + ret = -ENOMEM;
> + goto done;
> + }
> +
> + for_each_gt(test_gt, xe, i) {
> + if (gt_to_tile(test_gt)->id != tx_tile)
> + continue;
> +
> + if (G2G_DEV(test_gt) != tx_dev)
> + continue;
> +
> + if (tx_gt) {
> + xe_gt_err(rx_gt, "G2G: Got duplicate TX GTs: %d vs %d for %d:%d!\n",
> + tx_gt->info.id, test_gt->info.id, tx_tile, tx_dev);
> + ret = -EINVAL;
> + goto done;
> + }
> +
> + tx_gt = test_gt;
> + }
> + if (!tx_gt) {
> + xe_gt_err(rx_gt, "G2G: Failed to find a TX GT for %d:%d!\n", tx_tile, tx_dev);
> + ret = -EINVAL;
> + goto done;
> + }
> +
> + idx = (tx_gt->info.id * xe->info.gt_count) + rx_gt->info.id;
> +
> + if (xe->g2g_test_array[idx] != payload->seqno - 1) {
> + xe_gt_err(rx_gt, "G2G: Seqno mismatch %d vs %d for %d:%d -> %d:%d!\n",
> + xe->g2g_test_array[idx], payload->seqno - 1,
> + tx_tile, tx_dev, rx_tile, rx_dev);
> + ret = -EINVAL;
> + goto done;
> + }
> +
> + xe->g2g_test_array[idx] = payload->seqno;
> +
> +done:
> + atomic_dec(&xe->g2g_test_count);
> + return ret;
> +}
> +
> +/*
> + * Send the given seqno from all GuCs to all other GuCs in tile/GT order
> + */
> +static void g2g_test_in_order(struct kunit *test, struct xe_device *xe, u32 seqno)
> +{
> + struct xe_gt *near_gt, *far_gt;
> + int i, j;
> +
> + for_each_gt(near_gt, xe, i) {
> + u32 near_tile = gt_to_tile(near_gt)->id;
> + u32 near_dev = G2G_DEV(near_gt);
> +
> + for_each_gt(far_gt, xe, j) {
> + u32 far_tile = gt_to_tile(far_gt)->id;
> + u32 far_dev = G2G_DEV(far_gt);
> + struct g2g_test_payload payload;
> +
> + if (far_gt->info.id == near_gt->info.id)
> + continue;
> +
> + payload.tx_dev = near_dev;
> + payload.tx_tile = near_tile;
> + payload.rx_dev = far_dev;
> + payload.rx_tile = far_tile;
> + payload.seqno = seqno;
> + g2g_test_send(test, &near_gt->uc.guc, far_tile, far_dev, &payload);
> + }
> + }
> +}
> +
> +#define WAIT_TIME_MS 100
> +#define WAIT_COUNT (1000 / WAIT_TIME_MS)
> +
> +static void g2g_wait_for_complete(void *_xe)
> +{
> + struct xe_device *xe = (struct xe_device *)_xe;
> + struct kunit *test = kunit_get_current_test();
> + int wait = 0;
> +
> + /* Wait for all G2H messages to be received */
> + while (atomic_read(&xe->g2g_test_count)) {
> + if (++wait > WAIT_COUNT)
> + break;
> +
> + msleep(WAIT_TIME_MS);
> + }
> +
> + KUNIT_ASSERT_EQ_MSG(test, 0, atomic_read(&xe->g2g_test_count),
> + "Timed out waiting for notifications\n");
> + kunit_info(test, "Got all notifications back\n");
> +}
> +
> +#undef WAIT_TIME_MS
> +#undef WAIT_COUNT
> +
> +static void g2g_clean_array(void *_xe)
> +{
> + struct xe_device *xe = (struct xe_device *)_xe;
> +
> + xe->g2g_test_array = NULL;
> +}
> +
> +#define NUM_LOOPS 16
> +
> +static void g2g_run_test(struct kunit *test, struct xe_device *xe)
> +{
> + u32 seqno, max_array;
> + int ret, i, j;
> +
> + max_array = xe->info.gt_count * xe->info.gt_count;
> + xe->g2g_test_array = kunit_kcalloc(test, max_array, sizeof(u32), GFP_KERNEL);
> + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe->g2g_test_array);
> +
> + ret = kunit_add_action_or_reset(test, g2g_clean_array, xe);
> + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register clean up action\n");
> +
> + /*
> + * Send incrementing seqnos from all GuCs to all other GuCs in tile/GT order.
> + * Tile/GT order doesn't really mean anything to the hardware but it is going
> + * to be a fixed sequence every time.
> + *
> + * Verify that each one comes back having taken the correct route.
> + */
> + ret = kunit_add_action(test, g2g_wait_for_complete, xe);
> + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register clean up action\n");
> + for (seqno = 1; seqno < NUM_LOOPS; seqno++)
> + g2g_test_in_order(test, xe, seqno);
> + seqno--;
> +
> + kunit_release_action(test, &g2g_wait_for_complete, xe);
> +
> + /* Check for the final seqno in each slot */
> + for (i = 0; i < xe->info.gt_count; i++) {
> + for (j = 0; j < xe->info.gt_count; j++) {
> + u32 idx = (j * xe->info.gt_count) + i;
> +
> + if (i == j)
> + KUNIT_ASSERT_EQ_MSG(test, 0, xe->g2g_test_array[idx],
> + "identity seqno modified: %d for %dx%d!\n",
> + xe->g2g_test_array[idx], i, j);
> + else
> + KUNIT_ASSERT_EQ_MSG(test, seqno, xe->g2g_test_array[idx],
> + "invalid seqno: %d vs %d for %dx%d!\n",
> + xe->g2g_test_array[idx], seqno, i, j);
> + }
> + }
> +
> + kunit_kfree(test, xe->g2g_test_array);
> + kunit_release_action(test, &g2g_clean_array, xe);
> +
> + kunit_info(test, "Test passed\n");
> +}
> +
> +#undef NUM_LOOPS
> +
> +static void g2g_ct_stop(struct xe_guc *guc)
> +{
> + struct xe_gt *remote_gt, *gt = guc_to_gt(guc);
> + struct xe_device *xe = gt_to_xe(gt);
> + int i, t;
> +
> + for_each_gt(remote_gt, xe, i) {
> + u32 tile, dev;
> +
> + if (remote_gt->info.id == gt->info.id)
> + continue;
> +
> + tile = gt_to_tile(remote_gt)->id;
> + dev = G2G_DEV(remote_gt);
> +
> + for (t = 0; t < XE_G2G_TYPE_LIMIT; t++)
> + guc_g2g_deregister(guc, tile, dev, t);
> + }
> +}
> +
> +/* Size of a single allocation that contains all G2G CTBs across all GTs */
> +static u32 g2g_ctb_size(struct kunit *test, struct xe_device *xe)
> +{
> + unsigned int count = xe->info.gt_count;
> + u32 num_channels = (count * (count - 1)) / 2;
> +
> + kunit_info(test, "Size: (%d * %d / 2) * %d * 0x%08X + 0x%08X => 0x%08X [%d]\n",
> + count, count - 1, XE_G2G_TYPE_LIMIT, G2G_BUFFER_SIZE, G2G_DESC_AREA_SIZE,
> + num_channels * XE_G2G_TYPE_LIMIT * G2G_BUFFER_SIZE + G2G_DESC_AREA_SIZE,
> + num_channels * XE_G2G_TYPE_LIMIT);
> +
> + return num_channels * XE_G2G_TYPE_LIMIT * G2G_BUFFER_SIZE + G2G_DESC_AREA_SIZE;
> +}
Since you're including this file from xe_guc.c, can't you call
guc_g2g_size directly? we'll miss the log but I'm not sure how helpful
that log is anyway.
> +
> +/*
> + * Use the driver's regular CTB allocation scheme.
> + */
> +static void g2g_alloc_default(struct kunit *test, struct xe_device *xe)
> +{
> + struct xe_gt *gt;
> + int i;
> +
> + kunit_info(test, "Default [tiles = %d, GTs = %d]\n",
> + xe->info.tile_count, xe->info.gt_count);
> +
> + for_each_gt(gt, xe, i) {
> + struct xe_guc *guc = >->uc.guc;
> + int ret;
> +
> + ret = guc_g2g_alloc(guc);
> + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "G2G alloc failed: %pe", ERR_PTR(ret));
> + continue;
> + }
> +}
> +
> +static void g2g_distribute(struct kunit *test, struct xe_device *xe, struct xe_bo *bo)
> +{
> + struct xe_gt *root_gt, *gt;
> + int i;
> +
> + root_gt = xe_device_get_gt(xe, 0);
> + root_gt->uc.guc.g2g.bo = bo;
> + root_gt->uc.guc.g2g.owned = true;
> + kunit_info(test, "[%d.%d] Assigned 0x%p\n", gt_to_tile(root_gt)->id, root_gt->info.id, bo);
> +
> + for_each_gt(gt, xe, i) {
> + if (gt->info.id != 0) {
> + gt->uc.guc.g2g.owned = false;
> + gt->uc.guc.g2g.bo = xe_bo_get(bo);
> + kunit_info(test, "[%d.%d] Pinned 0x%p\n",
> + gt_to_tile(gt)->id, gt->info.id, gt->uc.guc.g2g.bo);
> + }
> +
> + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gt->uc.guc.g2g.bo);
> + }
> +}
> +
> +/*
> + * Allocate a single blob on the host and split between all G2G CTBs.
Isn't this the same as the default on integrated? I'm wondering if it's
worth skipping xe_live_guc_g2g_kunit_allmem entirely on integrated,
given that the default case is already covered by
xe_live_guc_g2g_kunit_default
> + */
> +static void g2g_alloc_host(struct kunit *test, struct xe_device *xe)
> +{
> + struct xe_bo *bo;
> + u32 g2g_size;
> +
> + kunit_info(test, "Host [tiles = %d, GTs = %d]\n", xe->info.tile_count, xe->info.gt_count);
> +
> + g2g_size = g2g_ctb_size(test, xe);
> + bo = xe_managed_bo_create_pin_map(xe, xe_device_get_root_tile(xe), g2g_size,
> + XE_BO_FLAG_SYSTEM |
> + XE_BO_FLAG_GGTT |
> + XE_BO_FLAG_GGTT_ALL |
> + XE_BO_FLAG_GGTT_INVALIDATE);
> + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bo);
> + kunit_info(test, "[HST] G2G buffer create: 0x%p\n", bo);
> +
> + xe_map_memset(xe, &bo->vmap, 0, 0, g2g_size);
> +
> + g2g_distribute(test, xe, bo);
> +}
> +
> +/*
> + * Allocate a single blob on the given tile and split between all G2G CTBs.
> + */
> +static void g2g_alloc_tile(struct kunit *test, struct xe_device *xe, struct xe_tile *tile)
> +{
> + struct xe_bo *bo;
> + u32 g2g_size;
> +
> + KUNIT_ASSERT_TRUE(test, IS_DGFX(xe));
> + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, tile);
> +
> + kunit_info(test, "Tile %d [tiles = %d, GTs = %d]\n",
> + tile->id, xe->info.tile_count, xe->info.gt_count);
> +
> + g2g_size = g2g_ctb_size(test, xe);
> + bo = xe_managed_bo_create_pin_map(xe, tile, g2g_size,
> + XE_BO_FLAG_VRAM_IF_DGFX(tile) |
> + XE_BO_FLAG_GGTT |
> + XE_BO_FLAG_GGTT_ALL |
> + XE_BO_FLAG_GGTT_INVALIDATE);
> + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bo);
> + kunit_info(test, "[%d.*] G2G buffer create: 0x%p\n", tile->id, bo);
> +
> + xe_map_memset(xe, &bo->vmap, 0, 0, g2g_size);
> +
> + g2g_distribute(test, xe, bo);
> +}
> +
> +static void g2g_free(struct kunit *test, struct xe_device *xe)
> +{
> + struct xe_gt *gt;
> + struct xe_bo *bo;
> + int i;
> +
> + for_each_gt(gt, xe, i) {
> + bo = gt->uc.guc.g2g.bo;
> + if (!bo)
> + continue;
> +
> + if (gt->uc.guc.g2g.owned) {
> + xe_managed_bo_unpin_map_no_vm(xe, bo);
> + kunit_info(test, "[%d.%d] Unmapped 0x%p\n",
> + gt_to_tile(gt)->id, gt->info.id, bo);
> + } else {
> + xe_bo_put(bo);
> + kunit_info(test, "[%d.%d] Unpinned 0x%p\n",
> + gt_to_tile(gt)->id, gt->info.id, bo);
> + }
> +
> + gt->uc.guc.g2g.bo = NULL;
> + }
> +}
> +
> +static void g2g_stop(struct kunit *test, struct xe_device *xe)
> +{
> + struct xe_gt *gt;
> + int i;
> +
> + for_each_gt(gt, xe, i) {
> + struct xe_guc *guc = >->uc.guc;
> +
> + if (!guc->g2g.bo)
> + continue;
> +
> + g2g_ct_stop(guc);
> + }
> +
> + g2g_free(test, xe);
> +}
> +
> +/*
> + * Generate a unique id for each bi-directional CTB for each pair of
> + * near and far tiles/devices. The id can then be used as an index into
> + * a single allocation that is sub-divided into multiple CTBs.
> + *
> + * For example, with two devices per tile and two tiles, the table should
> + * look like:
> + * Far <tile>.<dev>
> + * 0.0 0.1 1.0 1.1
> + * N 0.0 --/-- 00/01 02/03 04/05
> + * e 0.1 01/00 --/-- 06/07 08/09
> + * a 1.0 03/02 07/06 --/-- 10/11
> + * r 1.1 05/04 09/08 11/10 --/--
> + *
> + * Where each entry is Rx/Tx channel id.
> + *
> + * So GuC #3 (tile 1, dev 1) talking to GuC #2 (tile 1, dev 0) would
> + * be reading from channel #11 and writing to channel #10. Whereas,
> + * GuC #2 talking to GuC #3 would be read on #10 and write to #11.
> + */
> +static int g2g_slot_flat(u32 near_tile, u32 near_dev, u32 far_tile, u32 far_dev,
> + u32 type, u32 max_inst, bool have_dev)
> +{
> + u32 near = near_tile, far = far_tile;
> + u32 idx = 0, x, y, direction;
> + int i;
> +
> + if (have_dev) {
> + near = (near << 1) | near_dev;
> + far = (far << 1) | far_dev;
> + }
> +
> + /* No need to send to one's self */
> + if (far == near)
> + return -1;
> +
> + if (far > near) {
> + /* Top right table half */
> + x = far;
> + y = near;
> +
> + /* T/R is 'forwards' direction */
> + direction = type;
> + } else {
> + /* Bottom left table half */
> + x = near;
> + y = far;
> +
> + /* B/L is 'backwards' direction */
> + direction = (1 - type);
> + }
> +
> + /* Count the rows prior to the target */
> + for (i = y; i > 0; i--)
> + idx += max_inst - i;
> +
> + /* Count this row up to the target */
> + idx += (x - 1 - y);
> +
> + /* Slots are in Rx/Tx pairs */
> + idx *= 2;
> +
> + /* Pick Rx/Tx direction */
> + idx += direction;
> +
> + return idx;
> +}
> +
> +static int g2g_register_flat(struct xe_guc *guc, u32 far_tile, u32 far_dev, u32 type, bool have_dev)
> +{
> + struct xe_gt *gt = guc_to_gt(guc);
> + struct xe_device *xe = gt_to_xe(gt);
> + u32 near_tile = gt_to_tile(gt)->id;
> + u32 near_dev = G2G_DEV(gt);
> + u32 max = xe->info.gt_count;
> + int idx;
> + u32 base, desc, buf;
> +
> + if (!guc->g2g.bo)
> + return -ENODEV;
> +
> + idx = g2g_slot_flat(near_tile, near_dev, far_tile, far_dev, type, max, have_dev);
> + xe_assert(xe, idx >= 0);
> +
> + base = guc_bo_ggtt_addr(guc, guc->g2g.bo);
> + desc = base + idx * G2G_DESC_SIZE;
> + buf = base + idx * G2G_BUFFER_SIZE + G2G_DESC_AREA_SIZE;
> +
> + xe_assert(xe, (desc - base + G2G_DESC_SIZE) <= G2G_DESC_AREA_SIZE);
> + xe_assert(xe, (buf - base + G2G_BUFFER_SIZE) <= xe_bo_size(guc->g2g.bo));
> +
> + return guc_action_register_g2g_buffer(guc, type, far_tile, far_dev,
> + desc, buf, G2G_BUFFER_SIZE);
> +}
Similar comment as above for those 2 _flat functions: can't you just
call the original?
> +
> +static void g2g_start(struct kunit *test, struct xe_guc *guc)
> +{
> + struct xe_gt *remote_gt, *gt = guc_to_gt(guc);
> + struct xe_device *xe = gt_to_xe(gt);
> + unsigned int i;
> + int t, ret;
> + bool have_dev;
> +
> + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, guc->g2g.bo);
> +
> + /* GuC interface will need extending if more GT device types are ever created. */
> + KUNIT_ASSERT_TRUE(test,
> + (gt->info.type == XE_GT_TYPE_MAIN) ||
> + (gt->info.type == XE_GT_TYPE_MEDIA));
> +
> + /* Channel numbering depends on whether there are multiple GTs per tile */
> + have_dev = xe->info.gt_count > xe->info.tile_count;
> +
> + for_each_gt(remote_gt, xe, i) {
> + u32 tile, dev;
> +
> + if (remote_gt->info.id == gt->info.id)
> + continue;
> +
> + tile = gt_to_tile(remote_gt)->id;
> + dev = G2G_DEV(remote_gt);
> +
> + for (t = 0; t < XE_G2G_TYPE_LIMIT; t++) {
> + ret = g2g_register_flat(guc, tile, dev, t, have_dev);
> + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "G2G register failed: %pe", ERR_PTR(ret));
> + }
> + }
> +}
And this one is almost the same as guc_g2g_start(). Could just do:
static void g2g_start(struct kunit *test, struct xe_guc *guc)
{
struct xe_gt *remote_gt, *gt = guc_to_gt(guc);
int ret;
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, guc->g2g.bo);
KUNIT_ASSERT_TRUE(test,
(gt->info.type == XE_GT_TYPE_MAIN) ||
(gt->info.type == XE_GT_TYPE_MEDIA));
ret = guc_g2g_start(guc);
KUNIT_ASSERT_EQ_MSG(test, 0, ret, "G2G register failed: %pe",
ERR_PTR(ret));
}
> +
> +enum {
> + G2G_CTB_TYPE_DEFAULT,
> + G2G_CTB_TYPE_HOST,
> + G2G_CTB_TYPE_TILE,
> +};
> +
> +static void g2g_reinit(struct kunit *test, struct xe_device *xe, int ctb_type, struct xe_tile *tile)
> +{
> + struct xe_gt *gt;
> + int i, found = 0;
> +
> + g2g_stop(test, xe);
> +
> + for_each_gt(gt, xe, i) {
> + struct xe_guc *guc = >->uc.guc;
> +
> + KUNIT_ASSERT_NULL(test, guc->g2g.bo);
> + }
> +
> + switch (ctb_type) {
> + case G2G_CTB_TYPE_DEFAULT:
> + g2g_alloc_default(test, xe);
> + break;
> +
> + case G2G_CTB_TYPE_HOST:
> + g2g_alloc_host(test, xe);
> + break;
> +
> + case G2G_CTB_TYPE_TILE:
> + g2g_alloc_tile(test, xe, tile);
> + break;
> +
> + default:
> + KUNIT_ASSERT_TRUE(test, false);
> + }
> +
> + for_each_gt(gt, xe, i) {
> + struct xe_guc *guc = >->uc.guc;
> +
> + if (!guc->g2g.bo)
> + continue;
Do we actually have a case where the g2g buffer is enabled on some GTs
but not others?
Daniele
> +
> + g2g_start(test, guc);
> + found++;
> + }
> +
> + KUNIT_ASSERT_GT_MSG(test, found, 1, "insufficient G2G channels running: %d", found);
> +
> + kunit_info(test, "Testing across %d GTs\n", found);
> +}
> +
> +static void g2g_recreate_ctb(void *_xe)
> +{
> + struct xe_device *xe = (struct xe_device *)_xe;
> + struct kunit *test = kunit_get_current_test();
> +
> + g2g_stop(test, xe);
> +
> + if (xe_guc_g2g_wanted(xe))
> + g2g_reinit(test, xe, G2G_CTB_TYPE_DEFAULT, NULL);
> +}
> +
> +static void g2g_pm_runtime_put(void *_xe)
> +{
> + struct xe_device *xe = (struct xe_device *)_xe;
> +
> + xe_pm_runtime_put(xe);
> +}
> +
> +static void g2g_pm_runtime_get(struct kunit *test)
> +{
> + struct xe_device *xe = test->priv;
> + int ret;
> +
> + xe_pm_runtime_get(xe);
> + ret = kunit_add_action_or_reset(test, g2g_pm_runtime_put, xe);
> + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register runtime PM action\n");
> +}
> +
> +static void g2g_check_skip(struct kunit *test)
> +{
> + struct xe_device *xe = test->priv;
> + struct xe_gt *gt;
> + int i;
> +
> + if (IS_SRIOV_VF(xe))
> + kunit_skip(test, "not supported from a VF");
> +
> + if (xe->info.gt_count <= 1)
> + kunit_skip(test, "not enough GTs");
> +
> + for_each_gt(gt, xe, i) {
> + struct xe_guc *guc = >->uc.guc;
> +
> + if (guc->fw.build_type == CSS_UKERNEL_INFO_BUILDTYPE_PROD)
> + kunit_skip(test,
> + "G2G test interface not available in production firmware builds\n");
> + }
> +}
> +
> +/*
> + * Simple test that does not try to recreate the CTBs.
> + * Requires that the platform already enables G2G comms
> + * but has no risk of leaving the system in a broken state
> + * afterwards.
> + */
> +static void xe_live_guc_g2g_kunit_default(struct kunit *test)
> +{
> + struct xe_device *xe = test->priv;
> +
> + if (!xe_guc_g2g_wanted(xe))
> + kunit_skip(test, "G2G not enabled");
> +
> + g2g_check_skip(test);
> +
> + g2g_pm_runtime_get(test);
> +
> + kunit_info(test, "Testing default CTBs\n");
> + g2g_run_test(test, xe);
> +
> + kunit_release_action(test, &g2g_pm_runtime_put, xe);
> +}
> +
> +/*
> + * More complex test that re-creates the CTBs in various location to
> + * test access to each location from each GuC. Can be run even on
> + * systems that do not enable G2G by default. On the other hand,
> + * because it recreates the CTBs, if something goes wrong it could
> + * leave the system with broken G2G comms.
> + */
> +static void xe_live_guc_g2g_kunit_allmem(struct kunit *test)
> +{
> + struct xe_device *xe = test->priv;
> + int ret;
> +
> + g2g_check_skip(test);
> +
> + g2g_pm_runtime_get(test);
> +
> + /* Make sure to leave the system as we found it */
> + ret = kunit_add_action_or_reset(test, g2g_recreate_ctb, xe);
> + KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register CTB re-creation action\n");
> +
> + kunit_info(test, "Testing CTB type 'default'...\n");
> + g2g_reinit(test, xe, G2G_CTB_TYPE_DEFAULT, NULL);
> + g2g_run_test(test, xe);
> +
> + kunit_info(test, "Testing CTB type 'host'...\n");
> + g2g_reinit(test, xe, G2G_CTB_TYPE_HOST, NULL);
> + g2g_run_test(test, xe);
> +
> + if (IS_DGFX(xe)) {
> + struct xe_tile *tile;
> + int id;
> +
> + for_each_tile(tile, xe, id) {
> + kunit_info(test, "Testing CTB type 'tile: #%d'...\n", id);
> +
> + g2g_reinit(test, xe, G2G_CTB_TYPE_TILE, tile);
> + g2g_run_test(test, xe);
> + }
> + } else {
> + kunit_info(test, "Skipping local memory on integrated platform\n");
> + }
> +
> + kunit_release_action(test, g2g_recreate_ctb, xe);
> + kunit_release_action(test, g2g_pm_runtime_put, xe);
> +}
> +
> +static struct kunit_case xe_guc_g2g_tests[] = {
> + KUNIT_CASE_PARAM(xe_live_guc_g2g_kunit_default, xe_pci_live_device_gen_param),
> + KUNIT_CASE_PARAM(xe_live_guc_g2g_kunit_allmem, xe_pci_live_device_gen_param),
> + {}
> +};
> +
> +VISIBLE_IF_KUNIT
> +struct kunit_suite xe_guc_g2g_test_suite = {
> + .name = "xe_guc_g2g",
> + .test_cases = xe_guc_g2g_tests,
> + .init = xe_kunit_helper_xe_device_live_test_init,
> +};
> +EXPORT_SYMBOL_IF_KUNIT(xe_guc_g2g_test_suite);
> diff --git a/drivers/gpu/drm/xe/tests/xe_live_test_mod.c b/drivers/gpu/drm/xe/tests/xe_live_test_mod.c
> index 81277c77016d..c55e46f1ae92 100644
> --- a/drivers/gpu/drm/xe/tests/xe_live_test_mod.c
> +++ b/drivers/gpu/drm/xe/tests/xe_live_test_mod.c
> @@ -10,12 +10,14 @@ extern struct kunit_suite xe_bo_shrink_test_suite;
> extern struct kunit_suite xe_dma_buf_test_suite;
> extern struct kunit_suite xe_migrate_test_suite;
> extern struct kunit_suite xe_mocs_test_suite;
> +extern struct kunit_suite xe_guc_g2g_test_suite;
>
> kunit_test_suite(xe_bo_test_suite);
> kunit_test_suite(xe_bo_shrink_test_suite);
> kunit_test_suite(xe_dma_buf_test_suite);
> kunit_test_suite(xe_migrate_test_suite);
> kunit_test_suite(xe_mocs_test_suite);
> +kunit_test_suite(xe_guc_g2g_test_suite);
>
> MODULE_AUTHOR("Intel Corporation");
> MODULE_LICENSE("GPL");
> diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
> index 38c8329b4d2c..0416b0eba3bf 100644
> --- a/drivers/gpu/drm/xe/xe_device_types.h
> +++ b/drivers/gpu/drm/xe/xe_device_types.h
> @@ -576,6 +576,13 @@ struct xe_device {
> atomic64_t global_total_pages;
> #endif
>
> +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
> + /** @g2g_test_array: for testing G2G communications */
> + u32 *g2g_test_array;
> + /** @g2g_test_count: for testing G2G communications */
> + atomic_t g2g_test_count;
> +#endif
> +
> /* private: */
>
> #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
> diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
> index 1ca7f4f27e26..7c069236d1be 100644
> --- a/drivers/gpu/drm/xe/xe_guc.c
> +++ b/drivers/gpu/drm/xe/xe_guc.c
> @@ -1673,3 +1673,7 @@ void xe_guc_declare_wedged(struct xe_guc *guc)
> xe_guc_ct_stop(&guc->ct);
> xe_guc_submit_wedge(guc);
> }
> +
> +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
> +#include "tests/xe_guc_g2g_test.c"
> +#endif
> diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h
> index 22cf019a11bf..1cca05967e62 100644
> --- a/drivers/gpu/drm/xe/xe_guc.h
> +++ b/drivers/gpu/drm/xe/xe_guc.h
> @@ -53,6 +53,10 @@ void xe_guc_stop(struct xe_guc *guc);
> int xe_guc_start(struct xe_guc *guc);
> void xe_guc_declare_wedged(struct xe_guc *guc);
>
> +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
> +int xe_guc_g2g_test_notification(struct xe_guc *guc, u32 *payload, u32 len);
> +#endif
> +
> static inline u16 xe_engine_class_to_guc_class(enum xe_engine_class class)
> {
> switch (class) {
> diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
> index 3f4e6a46ff16..f44967f84d30 100644
> --- a/drivers/gpu/drm/xe/xe_guc_ct.c
> +++ b/drivers/gpu/drm/xe/xe_guc_ct.c
> @@ -1439,6 +1439,11 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
> case XE_GUC_ACTION_NOTIFY_EXCEPTION:
> ret = guc_crash_process_msg(ct, action);
> break;
> +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
> + case XE_GUC_ACTION_TEST_G2G_RECV:
> + ret = xe_guc_g2g_test_notification(guc, payload, adj_len);
> + break;
> +#endif
> default:
> xe_gt_err(gt, "unexpected G2H action 0x%04x\n", action);
> }
> diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h
> index ca9f999d38d1..bc94f8d0f037 100644
> --- a/drivers/gpu/drm/xe/xe_guc_fwif.h
> +++ b/drivers/gpu/drm/xe/xe_guc_fwif.h
> @@ -15,6 +15,7 @@
> #define G2H_LEN_DW_SCHED_CONTEXT_MODE_SET 4
> #define G2H_LEN_DW_DEREGISTER_CONTEXT 3
> #define G2H_LEN_DW_TLB_INVALIDATE 3
> +#define G2H_LEN_DW_G2G_NOTIFY_MIN 3
>
> #define GUC_ID_MAX 65535
> #define GUC_ID_UNKNOWN 0xffffffff
More information about the Intel-xe
mailing list