[PATCH 4/4] drm/xe/guc: Add test for G2G communications

Fri Aug 1 17:33:58 UTC 2025

On 7/31/2025 5:33 PM, Daniele Ceraolo Spurio wrote:
> On 7/24/2025 5:21 PM, John.C.Harrison at Intel.com wrote:
>> From: John Harrison <John.C.Harrison at Intel.com>
>>
>> Add a test for sending messages from every GuC to every other GuC to
>> test G2G communications.
>>
>> Note that, being a debug only feature, the test interface only exists
>> in pre-production builds of the GuC firmware.
>>
>> Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
>> ---
>>   drivers/gpu/drm/xe/abi/guc_actions_abi.h    |   2 +
>>   drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c  | 759 ++++++++++++++++++++
>>   drivers/gpu/drm/xe/tests/xe_live_test_mod.c |   2 +
>>   drivers/gpu/drm/xe/xe_device_types.h        |   7 +
>>   drivers/gpu/drm/xe/xe_guc.c                 |   4 +
>>   drivers/gpu/drm/xe/xe_guc.h                 |   4 +
>>   drivers/gpu/drm/xe/xe_guc_ct.c              |   5 +
>>   drivers/gpu/drm/xe/xe_guc_fwif.h            |   1 +
>>   8 files changed, 784 insertions(+)
>>   create mode 100644 drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c
>>
>> diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h 
>> b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
>> index 81eb046aeebf..0395998ca75c 100644
>> --- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h
>> +++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
>> @@ -154,6 +154,8 @@ enum xe_guc_action {
>>       XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE = 0x8003,
>>       XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED = 0x8004,
>>       XE_GUC_ACTION_NOTIFY_EXCEPTION = 0x8005,
>> +    XE_GUC_ACTION_TEST_G2G_SEND = 0xF001,
>> +    XE_GUC_ACTION_TEST_G2G_RECV = 0xF002,
>>       XE_GUC_ACTION_LIMIT
>>   };
>>   diff --git a/drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c 
>> b/drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c
>> new file mode 100644
>> index 000000000000..9bc8a43f7138
>> --- /dev/null
>> +++ b/drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c
>> @@ -0,0 +1,759 @@
>> +// SPDX-License-Identifier: GPL-2.0 AND MIT
>> +/*
>> + * Copyright © 2024 Intel Corporation
>
> 2025?
This has been in progress for a while!

>
>> + */
>> +
>> +#include <linux/delay.h>
>> +
>> +#include <kunit/test.h>
>> +#include <kunit/visibility.h>
>> +
>> +#include "tests/xe_kunit_helpers.h"
>> +#include "tests/xe_pci_test.h"
>> +#include "tests/xe_test.h"
>> +
>> +#include "xe_bo.h"
>> +#include "xe_device.h"
>> +#include "xe_pm.h"
>> +
>> +/*
>> + * Payload is opaque to GuC. So KMD can define any structure or size 
>> it wants.
>> + */
>> +struct g2g_test_payload  {
>> +    u32 tx_dev;
>> +    u32 tx_tile;
>> +    u32 rx_dev;
>> +    u32 rx_tile;
>> +    u32 seqno;
>> +};
>> +
>> +static void g2g_test_send(struct kunit *test, struct xe_guc *guc,
>> +              u32 far_tile, u32 far_dev,
>> +              struct g2g_test_payload *payload)
>> +{
>> +    struct xe_device *xe = guc_to_xe(guc);
>> +    struct xe_gt *gt = guc_to_gt(guc);
>> +    u32 *action, total;
>> +    size_t payload_len;
>> +    int ret;
>> +
>> +    payload_len = sizeof(*payload) / sizeof(u32);
>> +    KUNIT_ASSERT_EQ_MSG(test, sizeof(*payload), payload_len * 
>> sizeof(u32),
>> +                "G2G payload not u32 aligned\n");
>
> Can't you just use a static assert on the size of the structure, 
> instead of asserting at runtime?
Hmm. Probably.

>
>> +
>> +    total = 4 + payload_len;
>> +    action = kunit_kmalloc_array(test, total, sizeof(*action), 
>> GFP_KERNEL);
>
> The size here might be clearer to understand as sizeof(u32). Not a 
> blocker.
Not to me.

>
>> +    KUNIT_ASSERT_NOT_ERR_OR_NULL(test, action);
>> +
>> +    action[0] = XE_GUC_ACTION_TEST_G2G_SEND;
>> +    action[1] = far_tile;
>> +    action[2] = far_dev;
>> +    action[3] = payload_len;
>> +    memcpy(action + 4, payload, payload_len * sizeof(u32));
>
> sizeof(*payload), instead of payload_len * sizeof(u32) ?
The intent is that once the length has been calculated at the start, 
everything is based on that and no back assumptions are made as to where 
that came from.

>
>> +
>> +    atomic_inc(&xe->g2g_test_count);
>> +
>> +    /*
>> +     * Should specify the expected response notification here. 
>> Problem is that
>> +     * the response will be coming from a different GuC. By the end, 
>> it should
>> +     * all add up as long as an equal number of messages are sent 
>> from each GuC
>> +     * and to each GuC. However, in the middle negative reservation 
>> space errors
>> +     * and such like can occur. Rather than add intrusive changes to 
>> the CT layer
>> +     * it is simpler to just not bother counting it at all. The 
>> system should be
>> +     * idle when running the selftest, and the selftest's 
>> notification total size
>> +     * is well within the G2H allocation size. So there should be no 
>> issues with
>> +     * needing to block for space, which is all the tracking code is 
>> really for.
>> +     */
>> +    ret = xe_guc_ct_send(&guc->ct, action, total, 0, 0);
>> +    kunit_kfree(test, action);
>> +    KUNIT_ASSERT_EQ_MSG(test, 0, ret, "G2G send failed: %d [%d:%d -> 
>> %d:%d]\n", ret,
>> +                gt_to_tile(gt)->id, G2G_DEV(gt), far_tile, far_dev);
>> +}
>> +
>> +/*
>> + * NB: Can't use KUNIT_ASSERT and friends in here as this is called 
>> asynchronously
>> + * from the G2H notification handler. Need that to actually complete 
>> rather than
>> + * thread-abort in order to keep the rest of the driver alive!
>> + */
>> +int xe_guc_g2g_test_notification(struct xe_guc *guc, u32 *msg, u32 len)
>> +{
>> +    struct xe_device *xe = guc_to_xe(guc);
>> +    struct xe_gt *rx_gt = guc_to_gt(guc), *test_gt, *tx_gt = NULL;
>> +    u32 tx_tile, tx_dev, rx_tile, rx_dev, idx, got_len;
>> +    struct g2g_test_payload *payload;
>> +    size_t payload_len;
>> +    int ret = 0, i;
>> +
>> +    payload_len = sizeof(*payload) / sizeof(u32);
>> +
>> +    if (unlikely(len != (G2H_LEN_DW_G2G_NOTIFY_MIN + payload_len))) {
>> +        xe_gt_err(rx_gt, "G2G test notification invalid length %u", 
>> len);
>> +        ret = -EPROTO;
>> +        goto done;
>> +    }
>> +
>> +    tx_tile = msg[0];
>> +    tx_dev = msg[1];
>> +    got_len = msg[2];
>> +    payload = (struct g2g_test_payload *)(msg + 3);
>> +
>> +    rx_tile = gt_to_tile(rx_gt)->id;
>> +    rx_dev = G2G_DEV(rx_gt);
>> +
>> +    if (got_len != payload_len) {
>> +        xe_gt_err(rx_gt, "G2G: Invalid payload length: %u vs %zu\n", 
>> got_len, payload_len);
>> +        ret = -EPROTO;
>> +        goto done;
>> +    }
>> +
>> +    if (payload->tx_dev != tx_dev || payload->tx_tile != tx_tile ||
>> +        payload->rx_dev != rx_dev || payload->rx_tile != rx_tile) {
>> +        xe_gt_err(rx_gt, "G2G: Invalid payload: %d:%d -> %d:%d vs 
>> %d:%d -> %d:%d! [%d]\n",
>> +              payload->tx_tile, payload->tx_dev, payload->rx_tile, 
>> payload->rx_dev,
>> +              tx_tile, tx_dev, rx_tile, rx_dev, payload->seqno);
>> +        ret = -EPROTO;
>> +        goto done;
>> +    }
>> +
>> +    if (!xe->g2g_test_array) {
>> +        xe_gt_err(rx_gt, "G2G: Missing test array!\n");
>> +        ret = -ENOMEM;
>> +        goto done;
>> +    }
>> +
>> +    for_each_gt(test_gt, xe, i) {
>> +        if (gt_to_tile(test_gt)->id != tx_tile)
>> +            continue;
>> +
>> +        if (G2G_DEV(test_gt) != tx_dev)
>> +            continue;
>> +
>> +        if (tx_gt) {
>> +            xe_gt_err(rx_gt, "G2G: Got duplicate TX GTs: %d vs %d 
>> for %d:%d!\n",
>> +                  tx_gt->info.id, test_gt->info.id, tx_tile, tx_dev);
>> +            ret = -EINVAL;
>> +            goto done;
>> +        }
>> +
>> +        tx_gt = test_gt;
>> +    }
>> +    if (!tx_gt) {
>> +        xe_gt_err(rx_gt, "G2G: Failed to find a TX GT for %d:%d!\n", 
>> tx_tile, tx_dev);
>> +        ret = -EINVAL;
>> +        goto done;
>> +    }
>> +
>> +    idx = (tx_gt->info.id * xe->info.gt_count) + rx_gt->info.id;
>> +
>> +    if (xe->g2g_test_array[idx] != payload->seqno - 1) {
>> +        xe_gt_err(rx_gt, "G2G: Seqno mismatch %d vs %d for %d:%d -> 
>> %d:%d!\n",
>> +              xe->g2g_test_array[idx], payload->seqno - 1,
>> +              tx_tile, tx_dev, rx_tile, rx_dev);
>> +        ret = -EINVAL;
>> +        goto done;
>> +    }
>> +
>> +    xe->g2g_test_array[idx] = payload->seqno;
>> +
>> +done:
>> +    atomic_dec(&xe->g2g_test_count);
>> +    return ret;
>> +}
>> +
>> +/*
>> + * Send the given seqno from all GuCs to all other GuCs in tile/GT 
>> order
>> + */
>> +static void g2g_test_in_order(struct kunit *test, struct xe_device 
>> *xe, u32 seqno)
>> +{
>> +    struct xe_gt *near_gt, *far_gt;
>> +    int i, j;
>> +
>> +    for_each_gt(near_gt, xe, i) {
>> +        u32 near_tile = gt_to_tile(near_gt)->id;
>> +        u32 near_dev = G2G_DEV(near_gt);
>> +
>> +        for_each_gt(far_gt, xe, j) {
>> +            u32 far_tile = gt_to_tile(far_gt)->id;
>> +            u32 far_dev = G2G_DEV(far_gt);
>> +            struct g2g_test_payload payload;
>> +
>> +            if (far_gt->info.id == near_gt->info.id)
>> +                continue;
>> +
>> +            payload.tx_dev = near_dev;
>> +            payload.tx_tile = near_tile;
>> +            payload.rx_dev = far_dev;
>> +            payload.rx_tile = far_tile;
>> +            payload.seqno = seqno;
>> +            g2g_test_send(test, &near_gt->uc.guc, far_tile, far_dev, 
>> &payload);
>> +        }
>> +    }
>> +}
>> +
>> +#define WAIT_TIME_MS    100
>> +#define WAIT_COUNT    (1000 / WAIT_TIME_MS)
>> +
>> +static void g2g_wait_for_complete(void *_xe)
>> +{
>> +    struct xe_device *xe = (struct xe_device *)_xe;
>> +    struct kunit *test = kunit_get_current_test();
>> +    int wait = 0;
>> +
>> +    /* Wait for all G2H messages to be received */
>> +    while (atomic_read(&xe->g2g_test_count)) {
>> +        if (++wait > WAIT_COUNT)
>> +            break;
>> +
>> +        msleep(WAIT_TIME_MS);
>> +    }
>> +
>> +    KUNIT_ASSERT_EQ_MSG(test, 0, atomic_read(&xe->g2g_test_count),
>> +                "Timed out waiting for notifications\n");
>> +    kunit_info(test, "Got all notifications back\n");
>> +}
>> +
>> +#undef WAIT_TIME_MS
>> +#undef WAIT_COUNT
>> +
>> +static void g2g_clean_array(void *_xe)
>> +{
>> +    struct xe_device *xe = (struct xe_device *)_xe;
>> +
>> +    xe->g2g_test_array = NULL;
>> +}
>> +
>> +#define NUM_LOOPS    16
>> +
>> +static void g2g_run_test(struct kunit *test, struct xe_device *xe)
>> +{
>> +    u32 seqno, max_array;
>> +    int ret, i, j;
>> +
>> +    max_array = xe->info.gt_count * xe->info.gt_count;
>> +    xe->g2g_test_array = kunit_kcalloc(test, max_array, sizeof(u32), 
>> GFP_KERNEL);
>> +    KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe->g2g_test_array);
>> +
>> +    ret = kunit_add_action_or_reset(test, g2g_clean_array, xe);
>> +    KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register clean up 
>> action\n");
>> +
>> +    /*
>> +     * Send incrementing seqnos from all GuCs to all other GuCs in 
>> tile/GT order.
>> +     * Tile/GT order doesn't really mean anything to the hardware 
>> but it is going
>> +     * to be a fixed sequence every time.
>> +     *
>> +     * Verify that each one comes back having taken the correct route.
>> +     */
>> +    ret = kunit_add_action(test, g2g_wait_for_complete, xe);
>> +    KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register clean up 
>> action\n");
>> +    for (seqno = 1; seqno < NUM_LOOPS; seqno++)
>> +        g2g_test_in_order(test, xe, seqno);
>> +    seqno--;
>> +
>> +    kunit_release_action(test, &g2g_wait_for_complete, xe);
>> +
>> +    /* Check for the final seqno in each slot */
>> +    for (i = 0; i < xe->info.gt_count; i++) {
>> +        for (j = 0; j < xe->info.gt_count; j++) {
>> +            u32 idx = (j * xe->info.gt_count) + i;
>> +
>> +            if (i == j)
>> +                KUNIT_ASSERT_EQ_MSG(test, 0, xe->g2g_test_array[idx],
>> +                            "identity seqno modified: %d for %dx%d!\n",
>> +                            xe->g2g_test_array[idx], i, j);
>> +            else
>> +                KUNIT_ASSERT_EQ_MSG(test, seqno, 
>> xe->g2g_test_array[idx],
>> +                            "invalid seqno: %d vs %d for %dx%d!\n",
>> +                            xe->g2g_test_array[idx], seqno, i, j);
>> +        }
>> +    }
>> +
>> +    kunit_kfree(test, xe->g2g_test_array);
>> +    kunit_release_action(test, &g2g_clean_array, xe);
>> +
>> +    kunit_info(test, "Test passed\n");
>> +}
>> +
>> +#undef NUM_LOOPS
>> +
>> +static void g2g_ct_stop(struct xe_guc *guc)
>> +{
>> +    struct xe_gt *remote_gt, *gt = guc_to_gt(guc);
>> +    struct xe_device *xe = gt_to_xe(gt);
>> +    int i, t;
>> +
>> +    for_each_gt(remote_gt, xe, i) {
>> +        u32 tile, dev;
>> +
>> +        if (remote_gt->info.id == gt->info.id)
>> +            continue;
>> +
>> +        tile = gt_to_tile(remote_gt)->id;
>> +        dev = G2G_DEV(remote_gt);
>> +
>> +        for (t = 0; t < XE_G2G_TYPE_LIMIT; t++)
>> +            guc_g2g_deregister(guc, tile, dev, t);
>> +    }
>> +}
>> +
>> +/* Size of a single allocation that contains all G2G CTBs across all 
>> GTs */
>> +static u32 g2g_ctb_size(struct kunit *test, struct xe_device *xe)
>> +{
>> +    unsigned int count = xe->info.gt_count;
>> +    u32 num_channels = (count * (count - 1)) / 2;
>> +
>> +    kunit_info(test, "Size: (%d * %d / 2) * %d * 0x%08X + 0x%08X => 
>> 0x%08X [%d]\n",
>> +           count, count - 1, XE_G2G_TYPE_LIMIT, G2G_BUFFER_SIZE, 
>> G2G_DESC_AREA_SIZE,
>> +           num_channels * XE_G2G_TYPE_LIMIT * G2G_BUFFER_SIZE + 
>> G2G_DESC_AREA_SIZE,
>> +           num_channels * XE_G2G_TYPE_LIMIT);
>> +
>> +    return num_channels * XE_G2G_TYPE_LIMIT * G2G_BUFFER_SIZE + 
>> G2G_DESC_AREA_SIZE;
>> +}
>
> Since you're including this file from xe_guc.c, can't you call 
> guc_g2g_size directly? we'll miss the log but I'm not sure how helpful 
> that log is anyway.
The intent is that the test does not rely on or make assumptions about 
any of the 'official' G2G support. The idea being that there are 
multiple ways of laying out these buffers and the test wants to test 
specific options. Whereas, the driver might change which option it uses 
according to the flavour of the day. Therefore, a bunch of code is 
duplicated here to guarantee that it does not change behind the back of 
the test.

>
>> +
>> +/*
>> + * Use the driver's regular CTB allocation scheme.
>> + */
>> +static void g2g_alloc_default(struct kunit *test, struct xe_device *xe)
>> +{
>> +    struct xe_gt *gt;
>> +    int i;
>> +
>> +    kunit_info(test, "Default [tiles = %d, GTs = %d]\n",
>> +           xe->info.tile_count, xe->info.gt_count);
>> +
>> +    for_each_gt(gt, xe, i) {
>> +        struct xe_guc *guc = &gt->uc.guc;
>> +        int ret;
>> +
>> +        ret = guc_g2g_alloc(guc);
>> +        KUNIT_ASSERT_EQ_MSG(test, 0, ret, "G2G alloc failed: %pe", 
>> ERR_PTR(ret));
>> +        continue;
>> +    }
>> +}
>> +
>> +static void g2g_distribute(struct kunit *test, struct xe_device *xe, 
>> struct xe_bo *bo)
>> +{
>> +    struct xe_gt *root_gt, *gt;
>> +    int i;
>> +
>> +    root_gt = xe_device_get_gt(xe, 0);
>> +    root_gt->uc.guc.g2g.bo = bo;
>> +    root_gt->uc.guc.g2g.owned = true;
>> +    kunit_info(test, "[%d.%d] Assigned 0x%p\n", 
>> gt_to_tile(root_gt)->id, root_gt->info.id, bo);
>> +
>> +    for_each_gt(gt, xe, i) {
>> +        if (gt->info.id != 0) {
>> +            gt->uc.guc.g2g.owned = false;
>> +            gt->uc.guc.g2g.bo = xe_bo_get(bo);
>> +            kunit_info(test, "[%d.%d] Pinned 0x%p\n",
>> +                   gt_to_tile(gt)->id, gt->info.id, gt->uc.guc.g2g.bo);
>> +        }
>> +
>> +        KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gt->uc.guc.g2g.bo);
>> +    }
>> +}
>> +
>> +/*
>> + * Allocate a single blob on the host and split between all G2G CTBs.
>
> Isn't this the same as the default on integrated? I'm wondering if 
> it's worth skipping xe_live_guc_g2g_kunit_allmem entirely on 
> integrated, given that the default case is already covered by 
> xe_live_guc_g2g_kunit_default
Except that the default might be to not support G2G. Just because a 
platform has multiple GTs doesn't mean it actually uses G2G comms. And 
if G2G is disabled at the driver level then the 'default' test will 
skip. Whereas, we still want to run the test to make sure that the 
hardware works, just in case we do need to enable the feature later.

So again, the test can't make any assumptions about what the driver 
itself is doing. That leads to duplication of testing in some 
situations, but the test is pretty quick. IMHO, the duplication is worth 
the safety of guaranteeing that the test always does what it thinks it 
is doing.

>
>> + */
>> +static void g2g_alloc_host(struct kunit *test, struct xe_device *xe)
>> +{
>> +    struct xe_bo *bo;
>> +    u32 g2g_size;
>> +
>> +    kunit_info(test, "Host [tiles = %d, GTs = %d]\n", 
>> xe->info.tile_count, xe->info.gt_count);
>> +
>> +    g2g_size = g2g_ctb_size(test, xe);
>> +    bo = xe_managed_bo_create_pin_map(xe, 
>> xe_device_get_root_tile(xe), g2g_size,
>> +                      XE_BO_FLAG_SYSTEM |
>> +                      XE_BO_FLAG_GGTT |
>> +                      XE_BO_FLAG_GGTT_ALL |
>> +                      XE_BO_FLAG_GGTT_INVALIDATE);
>> +    KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bo);
>> +    kunit_info(test, "[HST] G2G buffer create: 0x%p\n", bo);
>> +
>> +    xe_map_memset(xe, &bo->vmap, 0, 0, g2g_size);
>> +
>> +    g2g_distribute(test, xe, bo);
>> +}
>> +
>> +/*
>> + * Allocate a single blob on the given tile and split between all 
>> G2G CTBs.
>> + */
>> +static void g2g_alloc_tile(struct kunit *test, struct xe_device *xe, 
>> struct xe_tile *tile)
>> +{
>> +    struct xe_bo *bo;
>> +    u32 g2g_size;
>> +
>> +    KUNIT_ASSERT_TRUE(test, IS_DGFX(xe));
>> +    KUNIT_ASSERT_NOT_ERR_OR_NULL(test, tile);
>> +
>> +    kunit_info(test, "Tile %d [tiles = %d, GTs = %d]\n",
>> +           tile->id, xe->info.tile_count, xe->info.gt_count);
>> +
>> +    g2g_size = g2g_ctb_size(test, xe);
>> +    bo = xe_managed_bo_create_pin_map(xe, tile, g2g_size,
>> +                      XE_BO_FLAG_VRAM_IF_DGFX(tile) |
>> +                      XE_BO_FLAG_GGTT |
>> +                      XE_BO_FLAG_GGTT_ALL |
>> +                      XE_BO_FLAG_GGTT_INVALIDATE);
>> +    KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bo);
>> +    kunit_info(test, "[%d.*] G2G buffer create: 0x%p\n", tile->id, bo);
>> +
>> +    xe_map_memset(xe, &bo->vmap, 0, 0, g2g_size);
>> +
>> +    g2g_distribute(test, xe, bo);
>> +}
>> +
>> +static void g2g_free(struct kunit *test, struct xe_device *xe)
>> +{
>> +    struct xe_gt *gt;
>> +    struct xe_bo *bo;
>> +    int i;
>> +
>> +    for_each_gt(gt, xe, i) {
>> +        bo = gt->uc.guc.g2g.bo;
>> +        if (!bo)
>> +            continue;
>> +
>> +        if (gt->uc.guc.g2g.owned) {
>> +            xe_managed_bo_unpin_map_no_vm(xe, bo);
>> +            kunit_info(test, "[%d.%d] Unmapped 0x%p\n",
>> +                   gt_to_tile(gt)->id, gt->info.id, bo);
>> +        } else {
>> +            xe_bo_put(bo);
>> +            kunit_info(test, "[%d.%d] Unpinned 0x%p\n",
>> +                   gt_to_tile(gt)->id, gt->info.id, bo);
>> +        }
>> +
>> +        gt->uc.guc.g2g.bo = NULL;
>> +    }
>> +}
>> +
>> +static void g2g_stop(struct kunit *test, struct xe_device *xe)
>> +{
>> +    struct xe_gt *gt;
>> +    int i;
>> +
>> +    for_each_gt(gt, xe, i) {
>> +        struct xe_guc *guc = &gt->uc.guc;
>> +
>> +        if (!guc->g2g.bo)
>> +            continue;
>> +
>> +        g2g_ct_stop(guc);
>> +    }
>> +
>> +    g2g_free(test, xe);
>> +}
>> +
>> +/*
>> + * Generate a unique id for each bi-directional CTB for each pair of
>> + * near and far tiles/devices. The id can then be used as an index into
>> + * a single allocation that is sub-divided into multiple CTBs.
>> + *
>> + * For example, with two devices per tile and two tiles, the table 
>> should
>> + * look like:
>> + *           Far <tile>.<dev>
>> + *         0.0   0.1   1.0   1.1
>> + * N 0.0  --/-- 00/01 02/03 04/05
>> + * e 0.1  01/00 --/-- 06/07 08/09
>> + * a 1.0  03/02 07/06 --/-- 10/11
>> + * r 1.1  05/04 09/08 11/10 --/--
>> + *
>> + * Where each entry is Rx/Tx channel id.
>> + *
>> + * So GuC #3 (tile 1, dev 1) talking to GuC #2 (tile 1, dev 0) would
>> + * be reading from channel #11 and writing to channel #10. Whereas,
>> + * GuC #2 talking to GuC #3 would be read on #10 and write to #11.
>> + */
>> +static int g2g_slot_flat(u32 near_tile, u32 near_dev, u32 far_tile, 
>> u32 far_dev,
>> +             u32 type, u32 max_inst, bool have_dev)
>> +{
>> +    u32 near = near_tile, far = far_tile;
>> +    u32 idx = 0, x, y, direction;
>> +    int i;
>> +
>> +    if (have_dev) {
>> +        near = (near << 1) | near_dev;
>> +        far = (far << 1) | far_dev;
>> +    }
>> +
>> +    /* No need to send to one's self */
>> +    if (far == near)
>> +        return -1;
>> +
>> +    if (far > near) {
>> +        /* Top right table half */
>> +        x = far;
>> +        y = near;
>> +
>> +        /* T/R is 'forwards' direction */
>> +        direction = type;
>> +    } else {
>> +        /* Bottom left table half */
>> +        x = near;
>> +        y = far;
>> +
>> +        /* B/L is 'backwards' direction */
>> +        direction = (1 - type);
>> +    }
>> +
>> +    /* Count the rows prior to the target */
>> +    for (i = y; i > 0; i--)
>> +        idx += max_inst - i;
>> +
>> +    /* Count this row up to the target */
>> +    idx += (x - 1 - y);
>> +
>> +    /* Slots are in Rx/Tx pairs */
>> +    idx *= 2;
>> +
>> +    /* Pick Rx/Tx direction */
>> +    idx += direction;
>> +
>> +    return idx;
>> +}
>> +
>> +static int g2g_register_flat(struct xe_guc *guc, u32 far_tile, u32 
>> far_dev, u32 type, bool have_dev)
>> +{
>> +    struct xe_gt *gt = guc_to_gt(guc);
>> +    struct xe_device *xe = gt_to_xe(gt);
>> +    u32 near_tile = gt_to_tile(gt)->id;
>> +    u32 near_dev = G2G_DEV(gt);
>> +    u32 max = xe->info.gt_count;
>> +    int idx;
>> +    u32 base, desc, buf;
>> +
>> +    if (!guc->g2g.bo)
>> +        return -ENODEV;
>> +
>> +    idx = g2g_slot_flat(near_tile, near_dev, far_tile, far_dev, 
>> type, max, have_dev);
>> +    xe_assert(xe, idx >= 0);
>> +
>> +    base = guc_bo_ggtt_addr(guc, guc->g2g.bo);
>> +    desc = base + idx * G2G_DESC_SIZE;
>> +    buf = base + idx * G2G_BUFFER_SIZE + G2G_DESC_AREA_SIZE;
>> +
>> +    xe_assert(xe, (desc - base + G2G_DESC_SIZE) <= G2G_DESC_AREA_SIZE);
>> +    xe_assert(xe, (buf - base + G2G_BUFFER_SIZE) <= 
>> xe_bo_size(guc->g2g.bo));
>> +
>> +    return guc_action_register_g2g_buffer(guc, type, far_tile, far_dev,
>> +                          desc, buf, G2G_BUFFER_SIZE);
>> +}
>
> Similar comment as above for those 2 _flat functions: can't you just 
> call the original?
As above, the 'original' might change but the test does not want to.

>
>> +
>> +static void g2g_start(struct kunit *test, struct xe_guc *guc)
>> +{
>> +    struct xe_gt *remote_gt, *gt = guc_to_gt(guc);
>> +    struct xe_device *xe = gt_to_xe(gt);
>> +    unsigned int i;
>> +    int t, ret;
>> +    bool have_dev;
>> +
>> +    KUNIT_ASSERT_NOT_ERR_OR_NULL(test, guc->g2g.bo);
>> +
>> +    /* GuC interface will need extending if more GT device types are 
>> ever created. */
>> +    KUNIT_ASSERT_TRUE(test,
>> +              (gt->info.type == XE_GT_TYPE_MAIN) ||
>> +              (gt->info.type == XE_GT_TYPE_MEDIA));
>> +
>> +    /* Channel numbering depends on whether there are multiple GTs 
>> per tile */
>> +    have_dev = xe->info.gt_count > xe->info.tile_count;
>> +
>> +    for_each_gt(remote_gt, xe, i) {
>> +        u32 tile, dev;
>> +
>> +        if (remote_gt->info.id == gt->info.id)
>> +            continue;
>> +
>> +        tile = gt_to_tile(remote_gt)->id;
>> +        dev = G2G_DEV(remote_gt);
>> +
>> +        for (t = 0; t < XE_G2G_TYPE_LIMIT; t++) {
>> +            ret = g2g_register_flat(guc, tile, dev, t, have_dev);
>> +            KUNIT_ASSERT_EQ_MSG(test, 0, ret, "G2G register failed: 
>> %pe", ERR_PTR(ret));
>> +        }
>> +    }
>> +}
>
> And this one is almost the same as guc_g2g_start(). Could just do:
>
> static void g2g_start(struct kunit *test, struct xe_guc *guc)
> {
>         struct xe_gt *remote_gt, *gt = guc_to_gt(guc);
>         int ret;
>
>         KUNIT_ASSERT_NOT_ERR_OR_NULL(test, guc->g2g.bo);
>
>         KUNIT_ASSERT_TRUE(test,
>                       (gt->info.type == XE_GT_TYPE_MAIN) ||
>                       (gt->info.type == XE_GT_TYPE_MEDIA));
>
>         ret = guc_g2g_start(guc);
>         KUNIT_ASSERT_EQ_MSG(test, 0, ret, "G2G register failed: %pe", 
> ERR_PTR(ret));
> }
>
You are correct, but see above.

>
>> +
>> +enum {
>> +    G2G_CTB_TYPE_DEFAULT,
>> +    G2G_CTB_TYPE_HOST,
>> +    G2G_CTB_TYPE_TILE,
>> +};
>> +
>> +static void g2g_reinit(struct kunit *test, struct xe_device *xe, int 
>> ctb_type, struct xe_tile *tile)
>> +{
>> +    struct xe_gt *gt;
>> +    int i, found = 0;
>> +
>> +    g2g_stop(test, xe);
>> +
>> +    for_each_gt(gt, xe, i) {
>> +        struct xe_guc *guc = &gt->uc.guc;
>> +
>> +        KUNIT_ASSERT_NULL(test, guc->g2g.bo);
>> +    }
>> +
>> +    switch (ctb_type) {
>> +    case G2G_CTB_TYPE_DEFAULT:
>> +        g2g_alloc_default(test, xe);
>> +        break;
>> +
>> +    case G2G_CTB_TYPE_HOST:
>> +        g2g_alloc_host(test, xe);
>> +        break;
>> +
>> +    case G2G_CTB_TYPE_TILE:
>> +        g2g_alloc_tile(test, xe, tile);
>> +        break;
>> +
>> +    default:
>> +        KUNIT_ASSERT_TRUE(test, false);
>> +    }
>> +
>> +    for_each_gt(gt, xe, i) {
>> +        struct xe_guc *guc = &gt->uc.guc;
>> +
>> +        if (!guc->g2g.bo)
>> +            continue;
>
> Do we actually have a case where the g2g buffer is enabled on some GTs 
> but not others?
Not that I am aware, but for the sake of a single if statement we can 
avoid a null pointer deref if that ever happens. E.g. if we had a 
multi-tile/multi-GT device and the multiple render GTs needed to chat 
but the media GTs did not.

There is also the option that each tile has its own memory allocation 
for the bo and one tile failed to allocate, for example.

John.

>
> Daniele
>
>> +
>> +        g2g_start(test, guc);
>> +        found++;
>> +    }
>> +
>> +    KUNIT_ASSERT_GT_MSG(test, found, 1, "insufficient G2G channels 
>> running: %d", found);
>> +
>> +    kunit_info(test, "Testing across %d GTs\n", found);
>> +}
>> +
>> +static void g2g_recreate_ctb(void *_xe)
>> +{
>> +    struct xe_device *xe = (struct xe_device *)_xe;
>> +    struct kunit *test = kunit_get_current_test();
>> +
>> +    g2g_stop(test, xe);
>> +
>> +    if (xe_guc_g2g_wanted(xe))
>> +        g2g_reinit(test, xe, G2G_CTB_TYPE_DEFAULT, NULL);
>> +}
>> +
>> +static void g2g_pm_runtime_put(void *_xe)
>> +{
>> +    struct xe_device *xe = (struct xe_device *)_xe;
>> +
>> +    xe_pm_runtime_put(xe);
>> +}
>> +
>> +static void g2g_pm_runtime_get(struct kunit *test)
>> +{
>> +    struct xe_device *xe = test->priv;
>> +    int ret;
>> +
>> +    xe_pm_runtime_get(xe);
>> +    ret = kunit_add_action_or_reset(test, g2g_pm_runtime_put, xe);
>> +    KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register runtime PM 
>> action\n");
>> +}
>> +
>> +static void g2g_check_skip(struct kunit *test)
>> +{
>> +    struct xe_device *xe = test->priv;
>> +    struct xe_gt *gt;
>> +    int i;
>> +
>> +    if (IS_SRIOV_VF(xe))
>> +        kunit_skip(test, "not supported from a VF");
>> +
>> +    if (xe->info.gt_count <= 1)
>> +        kunit_skip(test, "not enough GTs");
>> +
>> +    for_each_gt(gt, xe, i) {
>> +        struct xe_guc *guc = &gt->uc.guc;
>> +
>> +        if (guc->fw.build_type == CSS_UKERNEL_INFO_BUILDTYPE_PROD)
>> +            kunit_skip(test,
>> +                   "G2G test interface not available in production 
>> firmware builds\n");
>> +    }
>> +}
>> +
>> +/*
>> + * Simple test that does not try to recreate the CTBs.
>> + * Requires that the platform already enables G2G comms
>> + * but has no risk of leaving the system in a broken state
>> + * afterwards.
>> + */
>> +static void xe_live_guc_g2g_kunit_default(struct kunit *test)
>> +{
>> +    struct xe_device *xe = test->priv;
>> +
>> +    if (!xe_guc_g2g_wanted(xe))
>> +        kunit_skip(test, "G2G not enabled");
>> +
>> +    g2g_check_skip(test);
>> +
>> +    g2g_pm_runtime_get(test);
>> +
>> +    kunit_info(test, "Testing default CTBs\n");
>> +    g2g_run_test(test, xe);
>> +
>> +    kunit_release_action(test, &g2g_pm_runtime_put, xe);
>> +}
>> +
>> +/*
>> + * More complex test that re-creates the CTBs in various location to
>> + * test access to each location from each GuC. Can be run even on
>> + * systems that do not enable G2G by default. On the other hand,
>> + * because it recreates the CTBs, if something goes wrong it could
>> + * leave the system with broken G2G comms.
>> + */
>> +static void xe_live_guc_g2g_kunit_allmem(struct kunit *test)
>> +{
>> +    struct xe_device *xe = test->priv;
>> +    int ret;
>> +
>> +    g2g_check_skip(test);
>> +
>> +    g2g_pm_runtime_get(test);
>> +
>> +    /* Make sure to leave the system as we found it */
>> +    ret = kunit_add_action_or_reset(test, g2g_recreate_ctb, xe);
>> +    KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register CTB 
>> re-creation action\n");
>> +
>> +    kunit_info(test, "Testing CTB type 'default'...\n");
>> +    g2g_reinit(test, xe, G2G_CTB_TYPE_DEFAULT, NULL);
>> +    g2g_run_test(test, xe);
>> +
>> +    kunit_info(test, "Testing CTB type 'host'...\n");
>> +    g2g_reinit(test, xe, G2G_CTB_TYPE_HOST, NULL);
>> +    g2g_run_test(test, xe);
>> +
>> +    if (IS_DGFX(xe)) {
>> +        struct xe_tile *tile;
>> +        int id;
>> +
>> +        for_each_tile(tile, xe, id) {
>> +            kunit_info(test, "Testing CTB type 'tile: #%d'...\n", id);
>> +
>> +            g2g_reinit(test, xe, G2G_CTB_TYPE_TILE, tile);
>> +            g2g_run_test(test, xe);
>> +        }
>> +    } else {
>> +        kunit_info(test, "Skipping local memory on integrated 
>> platform\n");
>> +    }
>> +
>> +    kunit_release_action(test, g2g_recreate_ctb, xe);
>> +    kunit_release_action(test, g2g_pm_runtime_put, xe);
>> +}
>> +
>> +static struct kunit_case xe_guc_g2g_tests[] = {
>> +    KUNIT_CASE_PARAM(xe_live_guc_g2g_kunit_default, 
>> xe_pci_live_device_gen_param),
>> +    KUNIT_CASE_PARAM(xe_live_guc_g2g_kunit_allmem, 
>> xe_pci_live_device_gen_param),
>> +    {}
>> +};
>> +
>> +VISIBLE_IF_KUNIT
>> +struct kunit_suite xe_guc_g2g_test_suite = {
>> +    .name = "xe_guc_g2g",
>> +    .test_cases = xe_guc_g2g_tests,
>> +    .init = xe_kunit_helper_xe_device_live_test_init,
>> +};
>> +EXPORT_SYMBOL_IF_KUNIT(xe_guc_g2g_test_suite);
>> diff --git a/drivers/gpu/drm/xe/tests/xe_live_test_mod.c 
>> b/drivers/gpu/drm/xe/tests/xe_live_test_mod.c
>> index 81277c77016d..c55e46f1ae92 100644
>> --- a/drivers/gpu/drm/xe/tests/xe_live_test_mod.c
>> +++ b/drivers/gpu/drm/xe/tests/xe_live_test_mod.c
>> @@ -10,12 +10,14 @@ extern struct kunit_suite xe_bo_shrink_test_suite;
>>   extern struct kunit_suite xe_dma_buf_test_suite;
>>   extern struct kunit_suite xe_migrate_test_suite;
>>   extern struct kunit_suite xe_mocs_test_suite;
>> +extern struct kunit_suite xe_guc_g2g_test_suite;
>>     kunit_test_suite(xe_bo_test_suite);
>>   kunit_test_suite(xe_bo_shrink_test_suite);
>>   kunit_test_suite(xe_dma_buf_test_suite);
>>   kunit_test_suite(xe_migrate_test_suite);
>>   kunit_test_suite(xe_mocs_test_suite);
>> +kunit_test_suite(xe_guc_g2g_test_suite);
>>     MODULE_AUTHOR("Intel Corporation");
>>   MODULE_LICENSE("GPL");
>> diff --git a/drivers/gpu/drm/xe/xe_device_types.h 
>> b/drivers/gpu/drm/xe/xe_device_types.h
>> index 38c8329b4d2c..0416b0eba3bf 100644
>> --- a/drivers/gpu/drm/xe/xe_device_types.h
>> +++ b/drivers/gpu/drm/xe/xe_device_types.h
>> @@ -576,6 +576,13 @@ struct xe_device {
>>       atomic64_t global_total_pages;
>>   #endif
>>   +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
>> +    /** @g2g_test_array: for testing G2G communications */
>> +    u32 *g2g_test_array;
>> +    /** @g2g_test_count: for testing G2G communications */
>> +    atomic_t g2g_test_count;
>> +#endif
>> +
>>       /* private: */
>>     #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
>> diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
>> index 1ca7f4f27e26..7c069236d1be 100644
>> --- a/drivers/gpu/drm/xe/xe_guc.c
>> +++ b/drivers/gpu/drm/xe/xe_guc.c
>> @@ -1673,3 +1673,7 @@ void xe_guc_declare_wedged(struct xe_guc *guc)
>>       xe_guc_ct_stop(&guc->ct);
>>       xe_guc_submit_wedge(guc);
>>   }
>> +
>> +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
>> +#include "tests/xe_guc_g2g_test.c"
>> +#endif
>> diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h
>> index 22cf019a11bf..1cca05967e62 100644
>> --- a/drivers/gpu/drm/xe/xe_guc.h
>> +++ b/drivers/gpu/drm/xe/xe_guc.h
>> @@ -53,6 +53,10 @@ void xe_guc_stop(struct xe_guc *guc);
>>   int xe_guc_start(struct xe_guc *guc);
>>   void xe_guc_declare_wedged(struct xe_guc *guc);
>>   +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
>> +int xe_guc_g2g_test_notification(struct xe_guc *guc, u32 *payload, 
>> u32 len);
>> +#endif
>> +
>>   static inline u16 xe_engine_class_to_guc_class(enum xe_engine_class 
>> class)
>>   {
>>       switch (class) {
>> diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c 
>> b/drivers/gpu/drm/xe/xe_guc_ct.c
>> index 3f4e6a46ff16..f44967f84d30 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_ct.c
>> +++ b/drivers/gpu/drm/xe/xe_guc_ct.c
>> @@ -1439,6 +1439,11 @@ static int process_g2h_msg(struct xe_guc_ct 
>> *ct, u32 *msg, u32 len)
>>       case XE_GUC_ACTION_NOTIFY_EXCEPTION:
>>           ret = guc_crash_process_msg(ct, action);
>>           break;
>> +#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
>> +    case XE_GUC_ACTION_TEST_G2G_RECV:
>> +        ret = xe_guc_g2g_test_notification(guc, payload, adj_len);
>> +        break;
>> +#endif
>>       default:
>>           xe_gt_err(gt, "unexpected G2H action 0x%04x\n", action);
>>       }
>> diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h 
>> b/drivers/gpu/drm/xe/xe_guc_fwif.h
>> index ca9f999d38d1..bc94f8d0f037 100644
>> --- a/drivers/gpu/drm/xe/xe_guc_fwif.h
>> +++ b/drivers/gpu/drm/xe/xe_guc_fwif.h
>> @@ -15,6 +15,7 @@
>>   #define G2H_LEN_DW_SCHED_CONTEXT_MODE_SET    4
>>   #define G2H_LEN_DW_DEREGISTER_CONTEXT        3
>>   #define G2H_LEN_DW_TLB_INVALIDATE        3
>> +#define G2H_LEN_DW_G2G_NOTIFY_MIN        3
>>     #define GUC_ID_MAX            65535
>>   #define GUC_ID_UNKNOWN            0xffffffff
>