[PATCH] drm/xe/guc: Add GuC based register capture for error capture

Dong, Zhanjun zhanjun.dong at intel.com
Tue Jan 2 23:13:32 UTC 2024


Please see my comments inline below.

Regards,
Zhanjun Dong

On 2024-01-01 11:13 a.m., Lucas De Marchi wrote:
> On Fri, Dec 29, 2023 at 02:03:45PM -0800, Zhanjun Dong wrote:
>> Port GuC based register capture for error capture from i915 to Xe.
>>
>> There are 3 parts in this commit:
>> . Prepare for capture registers
>>    There is a bo create at guc ads init time, that is very early
>>    and engine map is not ready, make it hard to calculate the
>>    capture buffer size, new function created for worst case size
>>    caluation. Other than that, this part basically follows the i915
>>    design.
>> . Process capture notification message
>>    Basically follows i915 design
>> . Sysfs command process.
>>    Xe switched to devcoredump, adopted command line process with
>>    captured list.
>>
>> Signed-off-by: Zhanjun Dong <zhanjun.dong at intel.com>
>> ---
>> drivers/gpu/drm/xe/Kconfig               |   11 +
>> drivers/gpu/drm/xe/Makefile              |    1 +
>> drivers/gpu/drm/xe/abi/guc_actions_abi.h |    7 +
>> drivers/gpu/drm/xe/regs/xe_engine_regs.h |   19 +
>> drivers/gpu/drm/xe/regs/xe_gt_regs.h     |   70 +
>> drivers/gpu/drm/xe/regs/xe_reg_defs.h    |    1 -
>> drivers/gpu/drm/xe/regs/xe_regs.h        |    4 +
>> drivers/gpu/drm/xe/xe_bo_types.h         |    2 +
>> drivers/gpu/drm/xe/xe_devcoredump.c      |    1 +
>> drivers/gpu/drm/xe/xe_gt_printk.h        |    3 +
>> drivers/gpu/drm/xe/xe_gt_topology.c      |    2 +
>> drivers/gpu/drm/xe/xe_gt_types.h         |    6 +
>> drivers/gpu/drm/xe/xe_guc.c              |    5 +
>> drivers/gpu/drm/xe/xe_guc_ads.c          |  255 +++-
>> drivers/gpu/drm/xe/xe_guc_ads_types.h    |    2 +
>> drivers/gpu/drm/xe/xe_guc_capture.c      | 1574 ++++++++++++++++++++++
> 
> mind the reviewer. This can't really possibly be reviewed. It must be
> split in sensible chunks (that still build and work)
> 
This feature is about ~3400 lines of patch, for comparasion, the i915 
version is about 4000 lines.
Yes, I will split into more chunks.
> I skimmed through this patch and think it has too much copy and paste
> from i915... it needs to be trimmed down and redesigned. Early feedback:
Sure, it will be trimmed down and split into more chunks.
Not so sure if I fully understand your word of "redesign", the feature 
has the similar functionality amoung i915 and Xe, the feature only being 
triggered on GPU hang, so the complixity of this feature could be sorted 
as O(1) and consider no performance impact, so reuse i915 code looks 
reasonable. Do you mean adopte to Xe framework?
> 
> - Any support fro graphics version < 12 to be removed
> - GEN* names are banned in xe, don't add them > - Each part of the driver you change as pre-requisite, should be a
>    separate patch. I will go through some examples below
Sure, I will check.
> 
>> drivers/gpu/drm/xe/xe_guc_capture.h      |   33 +
>> drivers/gpu/drm/xe/xe_guc_capture_fwif.h |  227 ++++
>> drivers/gpu/drm/xe/xe_guc_ct.c           |    2 +
>> drivers/gpu/drm/xe/xe_guc_fwif.h         |   69 +
>> drivers/gpu/drm/xe/xe_guc_log.c          |  179 +++
>> drivers/gpu/drm/xe/xe_guc_log.h          |   15 +
>> drivers/gpu/drm/xe/xe_guc_log_types.h    |   26 +
>> drivers/gpu/drm/xe/xe_guc_submit.c       |   22 +-
>> drivers/gpu/drm/xe/xe_guc_submit.h       |    3 +
>> drivers/gpu/drm/xe/xe_guc_types.h        |    2 +
>> drivers/gpu/drm/xe/xe_hw_engine.c        |   89 +-
>> drivers/gpu/drm/xe/xe_hw_engine_types.h  |  126 +-
>> drivers/gpu/drm/xe/xe_lrc.c              |    4 +-
>> drivers/gpu/drm/xe/xe_lrc_types.h        |    2 +
>> 30 files changed, 2632 insertions(+), 130 deletions(-)
>> create mode 100644 drivers/gpu/drm/xe/xe_guc_capture.c
>> create mode 100644 drivers/gpu/drm/xe/xe_guc_capture.h
>> create mode 100644 drivers/gpu/drm/xe/xe_guc_capture_fwif.h
>>
>> diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig
>> index a53b0fdc15a7..337f6b613438 100644
>> --- a/drivers/gpu/drm/xe/Kconfig
>> +++ b/drivers/gpu/drm/xe/Kconfig
>> @@ -83,6 +83,17 @@ config DRM_XE_FORCE_PROBE
>>
>>       Use "!*" to block the probe of the driver for all known devices.
>>
>> +config DRM_XE_CAPTURE_ERROR
>> +    bool "Enable capturing GPU state following a hang"
>> +    depends on DRM_XE
>> +    default y
>> +    help
>> +      This option enables capturing the GPU state when a hang is 
>> detected.
>> +      This information is vital for triaging hangs and assists in 
>> debugging.
>> +      Please report any hang to your Intel representative to help 
>> with triaging.
>> +
>> +      If in doubt, say "Y".
> 
> do we really need a kconfig?
I think this is for users want to save every KB of memory or debug info 
no longer their major concerns.
> 
>> +
>> menu "drm/Xe Debugging"
>> depends on DRM_XE
>> depends on EXPERT
>> diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
>> index df8601d6a59f..5a97b2b74d1a 100644
>> --- a/drivers/gpu/drm/xe/Makefile
>> +++ b/drivers/gpu/drm/xe/Makefile
>> @@ -92,6 +92,7 @@ xe-y += xe_bb.o \
>>     xe_gt_topology.o \
>>     xe_guc.o \
>>     xe_guc_ads.o \
>> +    xe_guc_capture.o \
>>     xe_guc_ct.o \
>>     xe_guc_db_mgr.o \
>>     xe_guc_debugfs.o \
>> diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h 
>> b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
>> index 3062e0e0d467..dd57620258b2 100644
>> --- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h
>> +++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
>> @@ -182,6 +182,13 @@ enum xe_guc_sleep_state_status {
>> #define GUC_LOG_CONTROL_VERBOSITY_MASK    (0xF << 
>> GUC_LOG_CONTROL_VERBOSITY_SHIFT)
>> #define GUC_LOG_CONTROL_DEFAULT_LOGGING    (1 << 8)
>>
>> +enum intel_guc_state_capture_event_status {
>> +    INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_SUCCESS = 0x0,
>> +    INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_NOSPACE = 0x1,
>> +};
>> +
>> +#define INTEL_GUC_STATE_CAPTURE_EVENT_STATUS_MASK      0x000000FF
>> +
>> #define XE_GUC_TLB_INVAL_TYPE_SHIFT 0
>> #define XE_GUC_TLB_INVAL_MODE_SHIFT 8
>> /* Flush PPC or SMRO caches along with TLB invalidation request */
>> diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h 
>> b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
>> index 0b1266c88a6a..060628dbac8e 100644
>> --- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
>> +++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
>> @@ -64,10 +64,17 @@
>>
>> #define RING_ACTHD_UDW(base)            XE_REG((base) + 0x5c)
>> #define RING_DMA_FADD_UDW(base)            XE_REG((base) + 0x60)
>> +#define RING_IPEIR(base)            XE_REG((base) + 0x64)
>> #define RING_IPEHR(base)            XE_REG((base) + 0x68)
>> +#define RING_INSTDONE(base)            XE_REG((base) + 0x6c)
>> +#define RING_INSTPS(base)            XE_REG((base) + 0x70)
>> +
>> #define RING_ACTHD(base)            XE_REG((base) + 0x74)
>> #define RING_DMA_FADD(base)            XE_REG((base) + 0x78)
>> #define RING_HWS_PGA(base)            XE_REG((base) + 0x80)
>> +#define RING_CMD_BUF_CCTL(base)            XE_REG((base) + 0x84)
>> +#define IPEIR(base)                XE_REG((base) + 0x88)
>> +
>> #define RING_HWSTAM(base)            XE_REG((base) + 0x98)
>> #define RING_MI_MODE(base)            XE_REG((base) + 0x9c)
>> #define RING_NOPID(base)            XE_REG((base) + 0x94)
>> @@ -111,9 +118,18 @@
>> #define   FF_DOP_CLOCK_GATE_DISABLE        REG_BIT(1)
>> #define   REPLAY_MODE_GRANULARITY        REG_BIT(0)
>>
>> +#define RING_BBSTATE(base)            XE_REG((base) + 0x110)
>> #define RING_BBADDR(base)            XE_REG((base) + 0x140)
>> #define RING_BBADDR_UDW(base)            XE_REG((base) + 0x168)
>>
>> +#define CCID(base)                XE_REG((base) + 0x180)
>> +#define   CCID_EN                BIT(0)
>> +#define   CCID_EXTENDED_STATE_RESTORE        BIT(2)
>> +#define   CCID_EXTENDED_STATE_SAVE        BIT(3)
>> +#define RING_BB_PER_CTX_PTR(base)        XE_REG((base) + 0x1c0) /* 
>> gen8+ */
>> +#define RING_INDIRECT_CTX(base)            XE_REG((base) + 0x1c4) /* 
>> gen8+ */
>> +#define RING_INDIRECT_CTX_OFFSET(base)        XE_REG((base) + 0x1c8) 
>> /* gen8+ */
>> +
>> #define BCS_SWCTRL(base)            XE_REG((base) + 0x200, 
>> XE_REG_OPTION_MASKED)
>> #define   BCS_SWCTRL_DISABLE_256B        REG_BIT(2)
>>
>> @@ -129,6 +145,9 @@
>> #define      CTX_CTRL_INHIBIT_SYN_CTX_SWITCH    REG_BIT(3)
>> #define      CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT    REG_BIT(0)
>>
>> +#define GEN8_RING_PDP_UDW(base, n)        XE_REG((base) + 0x270 + (n) 
>> * 8 + 4)
>> +#define GEN8_RING_PDP_LDW(base, n)        XE_REG((base) + 0x270 + (n) 
>> * 8)
>> +
>> #define RING_MODE(base)                XE_REG((base) + 0x29c)
>> #define   GFX_DISABLE_LEGACY_MODE        REG_BIT(3)
>>
>> diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h 
>> b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
>> index 6aaaf1f63c72..2546d58a7130 100644
>> --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
>> +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
>> @@ -59,6 +59,38 @@
>>
>> #define XELP_GLOBAL_MOCS(i)            XE_REG(0x4000 + (i) * 4)
>> #define XEHP_GLOBAL_MOCS(i)            XE_REG_MCR(0x4000 + (i) * 4)
>> +
>> +#define ERROR_GEN6                XE_REG(0x40a0)
>> +
>> +#define DONE_REG                XE_REG(0x40b0)
>> +#define GEN8_PRIVATE_PAT_LO            XE_REG(0x40e0)
>> +#define GEN8_PRIVATE_PAT_HI            XE_REG(0x40e0 + 4)
>> +#define GEN10_PAT_INDEX(index)            XE_REG(0x40e0 + (index) * 4)
>> +#define BSD_HWS_PGA_GEN7            XE_REG(0x4180)
>> +
>> +#define GEN12_CCS_AUX_INV            XE_REG(0x4208)
>> +#define GEN12_VD0_AUX_INV            XE_REG(0x4218)
>> +#define GEN12_VE0_AUX_INV            XE_REG(0x4238)
>> +#define GEN12_BCS0_AUX_INV            XE_REG(0x4248)
>> +
>> +#define GEN8_RTCR                XE_REG(0x4260)
>> +#define GEN8_M1TCR                XE_REG(0x4264)
>> +#define GEN8_M2TCR                XE_REG(0x4268)
>> +#define GEN8_BTCR                XE_REG(0x426c)
>> +#define GEN8_VTCR                XE_REG(0x4270)
>> +
>> +#define BLT_HWS_PGA_GEN7            XE_REG(0x4280)
>> +
>> +#define GEN12_VD2_AUX_INV            XE_REG(0x4298)
>> +#define GEN12_CCS0_AUX_INV            XE_REG(0x42c8)
>> +#define   AUX_INV                REG_BIT(0)
>> +
>> +#define VEBOX_HWS_PGA_GEN7            XE_REG(0x4380)
>> +
>> +#define GEN12_AUX_ERR_DBG            XE_REG(0x43f4)
>> +
>> +#define GEN7_TLB_RD_ADDR            XE_REG(0x4700)
>> +
>> #define CCS_AUX_INV                XE_REG(0x4208)
>>
>> #define VD0_AUX_INV                XE_REG(0x4218)
>> @@ -73,6 +105,9 @@
>> #define WM_CHICKEN3                XE_REG_MCR(0x5588, 
>> XE_REG_OPTION_MASKED)
>> #define   HIZ_PLANE_COMPRESSION_DIS        REG_BIT(10)
>>
>> +#define GEN8_FAULT_TLB_DATA0            XE_REG(0x4b10)
>> +#define GEN8_FAULT_TLB_DATA1            XE_REG(0x4b14)
>> +
>> #define CHICKEN_RASTER_2            XE_REG_MCR(0x6208, 
>> XE_REG_OPTION_MASKED)
>> #define   TBIMR_FAST_CLIP            REG_BIT(5)
>>
>> @@ -94,6 +129,8 @@
>> #define   FF_MODE2_TDS_TIMER_MASK        REG_GENMASK(23, 16)
>> #define   FF_MODE2_TDS_TIMER_128        
>> REG_FIELD_PREP(FF_MODE2_TDS_TIMER_MASK, 4)
>>
>> +#define XEHPG_INSTDONE_GEOM_SVG            XE_REG_MCR(0x666c)
>> +
>> #define CACHE_MODE_1                XE_REG(0x7004, XE_REG_OPTION_MASKED)
>> #define   MSAA_OPTIMIZATION_REDUC_DISABLE    REG_BIT(11)
>>
>> @@ -110,6 +147,10 @@
>> #define   FLSH_IGNORES_PSD            REG_BIT(10)
>> #define   FD_END_COLLECT            REG_BIT(5)
>>
>> +#define GEN7_SC_INSTDONE            XE_REG(0x7100)
>> +#define GEN12_SC_INSTDONE_EXTRA            XE_REG(0x7104)
>> +#define GEN12_SC_INSTDONE_EXTRA2        XE_REG(0x7108)
>> +
>> #define COMMON_SLICE_CHICKEN4            XE_REG(0x7300, 
>> XE_REG_OPTION_MASKED)
>> #define   DISABLE_TDC_LOAD_BALANCING_CALC    REG_BIT(6)
>>
>> @@ -298,6 +339,22 @@
>>
>> #define XE2LPM_L3SQCREG5            XE_REG_MCR(0xb658)
>>
>> +#define GEN12_FAULT_TLB_DATA0            XE_REG(0xceb8)
>> +#define XEHP_FAULT_TLB_DATA0            XE_REG_MCR(0xceb8)
>> +#define GEN12_FAULT_TLB_DATA1            XE_REG(0xcebc)
>> +#define XEHP_FAULT_TLB_DATA1            XE_REG_MCR(0xcebc)
>> +#define   FAULT_VA_HIGH_BITS            (0xf << 0)
>> +#define   FAULT_GTT_SEL                BIT(4)
>> +
>> +#define GEN12_RING_FAULT_REG            XE_REG(0xcec4)
>> +#define XEHP_RING_FAULT_REG            XE_REG_MCR(0xcec4)
>> +#define XELPMP_RING_FAULT_REG            XE_REG(0xcec4)
>> +#define   GEN8_RING_FAULT_ENGINE_ID(x)        (((x) >> 12) & 0x7)
>> +#define   RING_FAULT_GTTSEL_MASK        BIT(11)
>> +#define   RING_FAULT_SRCID(x)            (((x) >> 3) & 0xff)
>> +#define   RING_FAULT_FAULT_TYPE(x)        (((x) >> 1) & 0x3)
>> +#define   RING_FAULT_VALID            BIT(0)
>> +
>> #define XEHP_MERT_MOD_CTRL            XE_REG_MCR(0xcf28)
>> #define RENDER_MOD_CTRL                XE_REG_MCR(0xcf2c)
>> #define COMP_MOD_CTRL                XE_REG_MCR(0xcf30)
>> @@ -316,6 +373,13 @@
>> #define   INVALIDATION_BROADCAST_MODE_DIS    REG_BIT(12)
>> #define   GLOBAL_INVALIDATION_MODE        REG_BIT(2)
>>
>> +#define GEN12_GAM_DONE                XE_REG(0xcf68)
>> +
>> +#define GEN7_SAMPLER_INSTDONE            XE_REG(0xe160)
>> +#define GEN8_SAMPLER_INSTDONE            XE_REG_MCR(0xe160)
>> +#define GEN7_ROW_INSTDONE            XE_REG(0xe164)
>> +#define GEN8_ROW_INSTDONE            XE_REG_MCR(0xe164)
>> +
>> #define HALF_SLICE_CHICKEN5            XE_REG_MCR(0xe188, 
>> XE_REG_OPTION_MASKED)
>> #define   DISABLE_SAMPLE_G_PERFORMANCE        REG_BIT(0)
>>
>> @@ -478,10 +542,16 @@
>> #define   GT_CS_MASTER_ERROR_INTERRUPT        REG_BIT(3)
>> #define   GT_RENDER_USER_INTERRUPT        REG_BIT(0)
>>
>> +#define GEN12_SFC_DONE(n)            XE_REG(0x1cc000 + (n) * 0x1000)
>> +
>> #define PVC_GT0_PACKAGE_ENERGY_STATUS        XE_REG(0x281004)
>> #define PVC_GT0_PACKAGE_RAPL_LIMIT        XE_REG(0x281008)
>> #define PVC_GT0_PACKAGE_POWER_SKU_UNIT        XE_REG(0x281068)
>> #define PVC_GT0_PLATFORM_ENERGY_STATUS        XE_REG(0x28106c)
>> #define PVC_GT0_PACKAGE_POWER_SKU        XE_REG(0x281080)
>>
> 
> - drop any GEN prefixes/suffixes
> - only add registers you're actually using
Thanks, I will trim it down in next version.
> 
> 
>> +/* 32 fences + sign bit for FENCE_REG_NONE */
>> +#define XE_MAX_NUM_FENCE_BITS 6
>> +#define XE_MAX_NUM_FENCES (2 ^ (XE_MAX_NUM_FENCE_BITS - 1))
>> +
>> #endif
>> diff --git a/drivers/gpu/drm/xe/regs/xe_reg_defs.h 
>> b/drivers/gpu/drm/xe/regs/xe_reg_defs.h
>> index c50e7650c09a..15869c2e2a52 100644
>> --- a/drivers/gpu/drm/xe/regs/xe_reg_defs.h
>> +++ b/drivers/gpu/drm/xe/regs/xe_reg_defs.h
>> @@ -57,7 +57,6 @@ struct xe_reg_mcr {
>>     struct xe_reg __reg;
>> };
>>
>> -
>> /**
>>  * XE_REG_OPTION_MASKED - Register is "masked", with upper 16 bits 
>> marking the
>>  * written bits on the lower 16 bits.
>> diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h 
>> b/drivers/gpu/drm/xe/regs/xe_regs.h
>> index 2c214bb9b671..574a5a78739c 100644
>> --- a/drivers/gpu/drm/xe/regs/xe_regs.h
>> +++ b/drivers/gpu/drm/xe/regs/xe_regs.h
>> @@ -7,6 +7,10 @@
>>
>> #include "regs/xe_reg_defs.h"
>>
>> +#define EIR                    XE_REG(0x20b0)
>> +
>> +#define HSW_GTT_CACHE_EN            XE_REG(0x4024)
>> +
>> #define TIMESTAMP_OVERRIDE                    XE_REG(0x44074)
>> #define   TIMESTAMP_OVERRIDE_US_COUNTER_DENOMINATOR_MASK    
>> REG_GENMASK(15, 12)
>> #define   TIMESTAMP_OVERRIDE_US_COUNTER_DIVIDER_MASK        
>> REG_GENMASK(9, 0)
>> diff --git a/drivers/gpu/drm/xe/xe_bo_types.h 
>> b/drivers/gpu/drm/xe/xe_bo_types.h
>> index 64c2249a4e40..cdef4dd1ef06 100644
>> --- a/drivers/gpu/drm/xe/xe_bo_types.h
>> +++ b/drivers/gpu/drm/xe/xe_bo_types.h
>> @@ -44,6 +44,8 @@ struct xe_bo {
>>     struct iosys_map vmap;
>>     /** @ttm_kmap: TTM bo kmap object for internal use only. Keep off. */
>>     struct ttm_bo_kmap_obj kmap;
>> +    /** @ads_map: contents of the GuC ADS */
>> +    struct iosys_map ads_map;
> 
> 
> that would be very weird.... a map to the ADS inside each bo?? It sounds
> like a bad architecture of the feature, but grepping on this patch it
> seems this is not used at all
Thanks. This ads_map is not needed and to be removed in next version.
> 
>>     /** @pinned_link: link to present / evicted list of pinned BO */
>>     struct list_head pinned_link;
>> #ifdef CONFIG_PROC_FS
>> diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c 
>> b/drivers/gpu/drm/xe/xe_devcoredump.c
>> index 68abc0b195be..b032d6e2cc3a 100644
>> --- a/drivers/gpu/drm/xe/xe_devcoredump.c
>> +++ b/drivers/gpu/drm/xe/xe_devcoredump.c
>> @@ -14,6 +14,7 @@
>> #include "xe_force_wake.h"
>> #include "xe_gt.h"
>> #include "xe_guc_ct.h"
>> +#include "xe_guc_capture.h"
>> #include "xe_guc_submit.h"
>> #include "xe_hw_engine.h"
>>
>> diff --git a/drivers/gpu/drm/xe/xe_gt_printk.h 
>> b/drivers/gpu/drm/xe/xe_gt_printk.h
>> index c2b004d3f48e..107360edfcd6 100644
>> --- a/drivers/gpu/drm/xe/xe_gt_printk.h
>> +++ b/drivers/gpu/drm/xe/xe_gt_printk.h
>> @@ -22,6 +22,9 @@
>> #define xe_gt_notice(_gt, _fmt, ...) \
>>     xe_gt_printk((_gt), notice, _fmt, ##__VA_ARGS__)
>>
>> +#define xe_gt_notice_ratelimited(_gt, _fmt, ...) \
>> +    xe_gt_printk((_gt), err_ratelimited, _fmt, ##__VA_ARGS__)
> 
> if this is really need, it should be a standalone patch.
Thanks, I will trim it down in next version.
> 
>> +
>> #define xe_gt_info(_gt, _fmt, ...) \
>>     xe_gt_printk((_gt), info, _fmt, ##__VA_ARGS__)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c 
>> b/drivers/gpu/drm/xe/xe_gt_topology.c
>> index a8d7f272c30a..696e41aa5596 100644
>> --- a/drivers/gpu/drm/xe/xe_gt_topology.c
>> +++ b/drivers/gpu/drm/xe/xe_gt_topology.c
>> @@ -96,11 +96,13 @@ xe_gt_topology_init(struct xe_gt *gt)
>>     drm_WARN_ON(&xe->drm, num_geometry_regs > 3);
>>     drm_WARN_ON(&xe->drm, num_compute_regs > 3);
>>
>> +    gt->fuse_topo.g_dss_num_max = num_geometry_regs;
> 
> wat?  dss_num_max has nothing to do with how many registers we use.
> 
>>     load_dss_mask(gt, gt->fuse_topo.g_dss_mask,
>>               num_geometry_regs,
>>               XELP_GT_GEOMETRY_DSS_ENABLE,
>>               XE2_GT_GEOMETRY_DSS_1,
>>               XE2_GT_GEOMETRY_DSS_2);
>> +    gt->fuse_topo.c_dss_num_max = num_compute_regs;
>>     load_dss_mask(gt, gt->fuse_topo.c_dss_mask, num_compute_regs,
>>               XEHP_GT_COMPUTE_DSS_ENABLE,
>>               XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,
>> diff --git a/drivers/gpu/drm/xe/xe_gt_types.h 
>> b/drivers/gpu/drm/xe/xe_gt_types.h
>> index f74684660475..4c0540abfecb 100644
>> --- a/drivers/gpu/drm/xe/xe_gt_types.h
>> +++ b/drivers/gpu/drm/xe/xe_gt_types.h
>> @@ -328,6 +328,12 @@ struct xe_gt {
>>
>>         /** @eu_mask_per_dss: EU mask per DSS*/
>>         xe_eu_mask_t eu_mask_per_dss;
>> +        /** @g_dss_num_max: count of dual-subslices usable by 
>> geometry */
>> +        u8 g_dss_num_max;
>> +
>> +        /** @c_dss_num_max: count of dual-subslices usable by compute */
>> +        u8 c_dss_num_max;
> 
> 
> not really needed. check calc_topo_query_size() and xe_query.c in
> general on how to derive the info you need.
> 
> Lucas De Marchi
Thanks, I will dig into details.
> 
>> diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c 
>> b/drivers/gpu/drm/xe/xe_guc_capture.c
>> new file mode 100644
>> index 000000000000..ace5115cae6c
>> --- /dev/null
>> +++ b/drivers/gpu/drm/xe/xe_guc_capture.c
>> @@ -0,0 +1,1574 @@
>> +// SPDX-License-Identifier: MIT
>> +/*
>> + * Copyright © 2021-2022 Intel Corporation
>> + */
>> +
>> +#include <linux/types.h>
>> +
>> +#include <drm/drm_print.h>
>> +
>> +#include "abi/guc_actions_abi.h"
>> +#include "regs/xe_regs.h"
>> +#include "regs/xe_engine_regs.h"
>> +#include "regs/xe_gt_regs.h"
>> +#include "regs/xe_guc_regs.h"
>> +
>> +#include "xe_guc_capture_fwif.h"
>> +
>> +#include "xe_bo.h"
>> +#include "xe_device.h"
>> +#include "xe_exec_queue_types.h"
>> +#include "xe_hw_engine_types.h"
>> +#include "xe_gt.h"
>> +#include "xe_gt_printk.h"
>> +#include "xe_guc.h"
>> +#include "xe_guc_capture.h"
>> +#include "xe_guc_ct.h"
>> +
>> +#include "xe_guc_log.h"
>> +#include "xe_gt_mcr.h"
>> +#include "xe_guc_submit.h"
>> +#include "xe_macros.h"
>> +#include "xe_map.h"
> 
> 
> here you should add kernel-doc on what this feature is about, how it's 
> used etc >
> 
> I think this is enough feedback for now
> 
> 
> Lucas De Marchi


More information about the Intel-xe mailing list