[PATCH v10 4/8] drm/xe/eustall: Add support to read() and poll() EU stall data
Dixit, Ashutosh
ashutosh.dixit at intel.com
Wed Feb 19 18:15:52 UTC 2025
On Tue, 18 Feb 2025 11:53:54 -0800, Harish Chegondi wrote:
>
> @@ -39,7 +40,9 @@ struct per_xecore_buf {
> };
>
> struct xe_eu_stall_data_stream {
> + bool pollin;
> bool enabled;
> + wait_queue_head_t poll_wq;
> size_t data_record_size;
> size_t per_xecore_buf_size;
> unsigned int wait_num_reports;
> @@ -47,7 +50,11 @@ struct xe_eu_stall_data_stream {
>
> struct xe_gt *gt;
> struct xe_bo *bo;
> + /* Lock to protect xecore_buf */
> + struct mutex buf_lock;
Why do we need this new lock? I thought we would just be able to use
gt->eu_stall->stream_lock? stream_lock is already taken for read(), so we
just need to take it for eu_stall_data_buf_poll()?
> @@ -357,16 +580,26 @@ static int xe_eu_stall_stream_init(struct xe_eu_stall_data_stream *stream,
> max_wait_num_reports);
> return -EINVAL;
> }
> +
> + init_waitqueue_head(&stream->poll_wq);
> + INIT_DELAYED_WORK(&stream->buf_poll_work, eu_stall_data_buf_poll_work_fn);
> + mutex_init(&stream->buf_lock);
> + stream->buf_poll_wq = alloc_ordered_workqueue("xe_eu_stall", 0);
> + if (!stream->buf_poll_wq)
> + return -ENOMEM;
> stream->per_xecore_buf_size = per_xecore_buf_size;
> stream->sampling_rate_mult = props->sampling_rate_mult;
> stream->wait_num_reports = props->wait_num_reports;
> stream->data_record_size = xe_eu_stall_data_record_size(gt_to_xe(gt));
> stream->xecore_buf = kcalloc(last_xecore, sizeof(*stream->xecore_buf), GFP_KERNEL);
> - if (!stream->xecore_buf)
> + if (!stream->xecore_buf) {
> + destroy_workqueue(stream->buf_poll_wq);
> return -ENOMEM;
> + }
>
> ret = xe_eu_stall_data_buf_alloc(stream, last_xecore);
> if (ret) {
> + destroy_workqueue(stream->buf_poll_wq);
> kfree(stream->xecore_buf);
OK, won't block on this, but error unwinding is cleaner with label's and
goto's.
Also, if stream->buf_lock is needed, mutex_destroy is also needed for error
unwinding and also during stream close.
> diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
> index d5281de04d54..1cc6bfc34ccb 100644
> --- a/drivers/gpu/drm/xe/xe_trace.h
> +++ b/drivers/gpu/drm/xe/xe_trace.h
> @@ -427,6 +427,39 @@ DEFINE_EVENT(xe_pm_runtime, xe_pm_runtime_get_ioctl,
> TP_ARGS(xe, caller)
> );
>
> +TRACE_EVENT(xe_eu_stall_data_read,
> + TP_PROTO(u8 slice, u8 subslice,
> + u32 read_ptr, u32 write_ptr,
> + u32 read_offset, u32 write_offset,
> + size_t total_size),
> + TP_ARGS(slice, subslice, read_ptr, write_ptr,
> + read_offset, write_offset, total_size),
> +
> + TP_STRUCT__entry(__field(u8, slice)
> + __field(u8, subslice)
> + __field(u32, read_ptr)
> + __field(u32, write_ptr)
> + __field(u32, read_offset)
> + __field(u32, write_offset)
> + __field(size_t, total_size)
> + ),
> +
> + TP_fast_assign(__entry->slice = slice;
> + __entry->subslice = subslice;
> + __entry->read_ptr = read_ptr;
> + __entry->write_ptr = write_ptr;
> + __entry->read_offset = read_offset;
> + __entry->write_offset = write_offset;
Keep it if we need it, but do we need both the read/write ptr's and
offset's? Since offset's are the same as ptr's, but without the ms bit.
> + __entry->total_size = total_size;
> + ),
> +
> + TP_printk("slice:%u subslice:%u readptr:0x%x writeptr:0x%x read off:%u write off:%u size:%zu ",
> + __entry->slice, __entry->subslice,
> + __entry->read_ptr, __entry->write_ptr,
> + __entry->read_offset, __entry->write_offset,
> + __entry->total_size)
> +);
> +
> #endif
>
> /* This part must be outside protection */
> --
> 2.48.1
>
More information about the Intel-xe
mailing list