[PATCH v10 4/8] drm/xe/eustall: Add support to read() and poll() EU stall data

Dixit, Ashutosh ashutosh.dixit at intel.com
Wed Feb 19 18:15:52 UTC 2025


On Tue, 18 Feb 2025 11:53:54 -0800, Harish Chegondi wrote:
>
> @@ -39,7 +40,9 @@ struct per_xecore_buf {
>  };
>
>  struct xe_eu_stall_data_stream {
> +	bool pollin;
>	bool enabled;
> +	wait_queue_head_t poll_wq;
>	size_t data_record_size;
>	size_t per_xecore_buf_size;
>	unsigned int wait_num_reports;
> @@ -47,7 +50,11 @@ struct xe_eu_stall_data_stream {
>
>	struct xe_gt *gt;
>	struct xe_bo *bo;
> +	/* Lock to protect xecore_buf */
> +	struct mutex buf_lock;

Why do we need this new lock? I thought we would just be able to use
gt->eu_stall->stream_lock? stream_lock is already taken for read(), so we
just need to take it for eu_stall_data_buf_poll()?

> @@ -357,16 +580,26 @@ static int xe_eu_stall_stream_init(struct xe_eu_stall_data_stream *stream,
>			  max_wait_num_reports);
>		return -EINVAL;
>	}
> +
> +	init_waitqueue_head(&stream->poll_wq);
> +	INIT_DELAYED_WORK(&stream->buf_poll_work, eu_stall_data_buf_poll_work_fn);
> +	mutex_init(&stream->buf_lock);
> +	stream->buf_poll_wq = alloc_ordered_workqueue("xe_eu_stall", 0);
> +	if (!stream->buf_poll_wq)
> +		return -ENOMEM;
>	stream->per_xecore_buf_size = per_xecore_buf_size;
>	stream->sampling_rate_mult = props->sampling_rate_mult;
>	stream->wait_num_reports = props->wait_num_reports;
>	stream->data_record_size = xe_eu_stall_data_record_size(gt_to_xe(gt));
>	stream->xecore_buf = kcalloc(last_xecore, sizeof(*stream->xecore_buf), GFP_KERNEL);
> -	if (!stream->xecore_buf)
> +	if (!stream->xecore_buf) {
> +		destroy_workqueue(stream->buf_poll_wq);
>		return -ENOMEM;
> +	}
>
>	ret = xe_eu_stall_data_buf_alloc(stream, last_xecore);
>	if (ret) {
> +		destroy_workqueue(stream->buf_poll_wq);
>		kfree(stream->xecore_buf);

OK, won't block on this, but error unwinding is cleaner with label's and
goto's.

Also, if stream->buf_lock is needed, mutex_destroy is also needed for error
unwinding and also during stream close.

> diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
> index d5281de04d54..1cc6bfc34ccb 100644
> --- a/drivers/gpu/drm/xe/xe_trace.h
> +++ b/drivers/gpu/drm/xe/xe_trace.h
> @@ -427,6 +427,39 @@ DEFINE_EVENT(xe_pm_runtime, xe_pm_runtime_get_ioctl,
>	     TP_ARGS(xe, caller)
>  );
>
> +TRACE_EVENT(xe_eu_stall_data_read,
> +	    TP_PROTO(u8 slice, u8 subslice,
> +		     u32 read_ptr, u32 write_ptr,
> +		     u32 read_offset, u32 write_offset,
> +		     size_t total_size),
> +	    TP_ARGS(slice, subslice, read_ptr, write_ptr,
> +		    read_offset, write_offset, total_size),
> +
> +	    TP_STRUCT__entry(__field(u8, slice)
> +			     __field(u8, subslice)
> +			     __field(u32, read_ptr)
> +			     __field(u32, write_ptr)
> +			     __field(u32, read_offset)
> +			     __field(u32, write_offset)
> +			     __field(size_t, total_size)
> +			     ),
> +
> +	    TP_fast_assign(__entry->slice = slice;
> +			   __entry->subslice = subslice;
> +			   __entry->read_ptr = read_ptr;
> +			   __entry->write_ptr = write_ptr;
> +			   __entry->read_offset = read_offset;
> +			   __entry->write_offset = write_offset;

Keep it if we need it, but do we need both the read/write ptr's and
offset's? Since offset's are the same as ptr's, but without the ms bit.

> +			   __entry->total_size = total_size;
> +			   ),
> +
> +	    TP_printk("slice:%u subslice:%u readptr:0x%x writeptr:0x%x read off:%u write off:%u size:%zu ",
> +		      __entry->slice, __entry->subslice,
> +		      __entry->read_ptr, __entry->write_ptr,
> +		      __entry->read_offset, __entry->write_offset,
> +		      __entry->total_size)
> +);
> +
>  #endif
>
>  /* This part must be outside protection */
> --
> 2.48.1
>


More information about the Intel-xe mailing list