[PATCH v10 3/8] drm/xe/eustall: Add support to init, enable and disable EU stall sampling

Dixit, Ashutosh ashutosh.dixit at intel.com
Wed Feb 19 15:27:24 UTC 2025


On Tue, 18 Feb 2025 11:53:53 -0800, Harish Chegondi wrote:
>
> +static void xe_eu_stall_stream_free(struct xe_eu_stall_data_stream *stream)
> +{
> +	struct xe_gt *gt = stream->gt;
> +
> +	gt->eu_stall->stream = NULL;
> +	kfree(stream);
> +}
> +
> +static void xe_eu_stall_data_buf_destroy(struct xe_eu_stall_data_stream *stream)
> +{
> +	xe_bo_unpin_map_no_vm(stream->bo);
> +	kfree(stream->xecore_buf);
> +}
> +
> +static int xe_eu_stall_data_buf_alloc(struct xe_eu_stall_data_stream *stream,
> +				      u16 last_xecore)
> +{
> +	struct xe_tile *tile = stream->gt->tile;
> +	struct xe_bo *bo;
> +	u32 size;
> +
> +	size = stream->per_xecore_buf_size * last_xecore;
> +
> +	bo = xe_bo_create_pin_map_at_aligned(tile->xe, tile, NULL,
> +					     size, ~0ull, ttm_bo_type_kernel,
> +					     XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, SZ_64);
> +	if (IS_ERR(bo))
> +		return PTR_ERR(bo);
> +
> +	XE_WARN_ON(!IS_ALIGNED(xe_bo_ggtt_addr(bo), SZ_64));
> +	stream->bo = bo;
> +
> +	return 0;
> +}

/snip/

> +static int xe_eu_stall_stream_init(struct xe_eu_stall_data_stream *stream,
> +				   struct eu_stall_open_properties *props)
> +{
> +	unsigned int max_wait_num_reports, xecore, last_xecore, num_xecores;
> +	struct per_xecore_buf *xecore_buf;
> +	struct xe_gt *gt = stream->gt;
> +	xe_dss_mask_t all_xecores;
> +	u16 group, instance;
> +	u32 vaddr_offset;
> +	int ret;
> +
> +	bitmap_or(all_xecores, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask,
> +		  XE_MAX_DSS_FUSE_BITS);
> +	num_xecores = bitmap_weight(all_xecores, XE_MAX_DSS_FUSE_BITS);
> +	last_xecore = xe_gt_topology_mask_last_dss(all_xecores) + 1;
> +
> +	max_wait_num_reports = num_data_rows(per_xecore_buf_size * num_xecores);
> +	if (props->wait_num_reports == 0 || props->wait_num_reports > max_wait_num_reports) {
> +		xe_gt_dbg(gt, "Invalid EU stall event report count %u\n",
> +			  props->wait_num_reports);
> +		xe_gt_dbg(gt, "Minimum event report count is 1, maximum is %u\n",
> +			  max_wait_num_reports);
> +		return -EINVAL;
> +	}
> +	stream->per_xecore_buf_size = per_xecore_buf_size;
> +	stream->sampling_rate_mult = props->sampling_rate_mult;
> +	stream->wait_num_reports = props->wait_num_reports;
> +	stream->data_record_size = xe_eu_stall_data_record_size(gt_to_xe(gt));
> +	stream->xecore_buf = kcalloc(last_xecore, sizeof(*stream->xecore_buf), GFP_KERNEL);
> +	if (!stream->xecore_buf)
> +		return -ENOMEM;

How about moving this stream->xecore_buf allocation stuff into
xe_eu_stall_data_buf_alloc? Just move it before stream->bo allocation in
xe_eu_stall_data_buf_alloc.

Because in xe_eu_stall_data_buf_destroy() we are freeing both stream->bo
and stream->xecore_buf. So if we move this stream->xecore_buf allocation
into xe_eu_stall_data_buf_alloc, xe_eu_stall_data_buf_alloc and
xe_eu_stall_data_buf_destroy will be completely symmetric.

And by "data_buf" we can understand stream->bo and stream->xecore_buf taken
together. There are other ways of doing this too but I think this is the
simplest change.

After addressing the above comment above, the rest lgtm, so this patch is
also:

Reviewed-by: Ashutosh Dixit <ashutosh.dixit at intel.com>


More information about the Intel-xe mailing list