[PATCH v9 3/8] drm/xe/eustall: Add support to init, enable and disable EU stall sampling

Harish Chegondi harish.chegondi at intel.com
Tue Feb 11 22:02:36 UTC 2025


On Tue, Feb 11, 2025 at 09:33:55AM -0800, Dixit, Ashutosh wrote:
> On Mon, 10 Feb 2025 05:46:44 -0800, Harish Chegondi wrote:
> >
> 
Hi Ashutosh,
> Hi Harish,
> 
> > diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c
> > index 0ceb3091f81e..12afa9720971 100644
> > --- a/drivers/gpu/drm/xe/xe_eu_stall.c
> > +++ b/drivers/gpu/drm/xe/xe_eu_stall.c
> > @@ -8,17 +8,57 @@
> >  #include <linux/poll.h>
> >  #include <linux/types.h>
> >
> > +#include <drm/drm_drv.h>
> >  #include <uapi/drm/xe_drm.h>
> >
> > +#include "xe_bo.h"
> >  #include "xe_device.h"
> >  #include "xe_eu_stall.h"
> > +#include "xe_force_wake.h"
> > +#include "xe_gt_mcr.h"
> >  #include "xe_gt_printk.h"
> >  #include "xe_gt_topology.h"
> >  #include "xe_macros.h"
> >  #include "xe_observation.h"
> > +#include "xe_pm.h"
> > +
> > +#include "regs/xe_eu_stall_regs.h"
> > +#include "regs/xe_gt_regs.h"
> > +
> > +#define POLL_PERIOD_MS	10
> >
> >  static size_t per_xecore_buf_size = SZ_512K;
> >
> > +struct per_xecore_buf {
> > +	/* Buffer vaddr */
> > +	u8 *vaddr;
> > +	/* Write pointer */
> > +	u32 write;
> > +	/* Read pointer */
> > +	u32 read;
> > +	/* lock to protect read and write pointers */
> > +	struct mutex ptr_lock;
> > +};
> > +
> > +struct xe_eu_stall_data_stream {
> > +	bool enabled;
> > +	size_t data_record_size;
> > +	size_t per_xecore_buf_size;
> > +	unsigned int wait_num_reports;
> > +	unsigned int sampling_rate_mult;
> > +
> > +	struct xe_gt *gt;
> > +	struct xe_bo *bo;
> > +	struct per_xecore_buf *xecore_buf;
> > +};
> > +
> > +struct xe_eu_stall_gt {
> > +	/* Lock to protect stream */
> > +	struct mutex stream_lock;
> > +	/* EU stall data stream */
> > +	struct xe_eu_stall_data_stream *stream;
> > +};
> > +
> >  /**
> >   * struct eu_stall_open_properties - EU stall sampling properties received
> >   *				     from user space at open.
> > @@ -33,6 +73,47 @@ struct eu_stall_open_properties {
> >	struct xe_gt *gt;
> >  };
> >
> > +/**
> > + * struct xe_eu_stall_data_pvc - EU stall data format for PVC
> > + *			Bits		Field
> > + * @ip_addr:		0  to 28	IP (addr)
> > + * @active_count:	29 to 36	active count
> > + * @other_count:	37 to 44	other count
> > + * @control_count:	45 to 52	control count
> > + * @pipestall_count:	53 to 60	pipestall count
> > + * @send_count:		61 to 68	send count
> > + * @dist_acc_count:	69 to 76	dist_acc count
> > + * @sbid_count:		77 to 84	sbid count
> > + * @sync_count:		85 to 92	sync count
> > + * @inst_fetch_count:	93 to 100	inst_fetch count
> > + * @unused_bits:	101 to 127	unused bits
> > + * @unused:		remaining	unused bytes
> > + */
> 
> I'd say delete this comment, it seems to be no zero extra information above
> what is already included in the struct below.
> 
> If you want to document the bit positions, add them on the same line as the
> struct field, e.g.:
> 
> 	__u64 active_count:8; /* bits 36:29 */

hooks scripts complain if I don't describe individual fields. I either
have to move the descriptions above each field or remove the description
of the structure above - "struct xe_eu_stall_data_pvc - EU stall data
format for PVC". Let me check if the hooks scripts complain if I
document the bit positions like you suggested.

> 
> > +struct xe_eu_stall_data_pvc {
> > +	__u64 ip_addr:29;
> > +	__u64 active_count:8;
> > +	__u64 other_count:8;
> > +	__u64 control_count:8;
> > +	__u64 pipestall_count:8;
> > +	__u64 send_count:8;
> > +	__u64 dist_acc_count:8;
> > +	__u64 sbid_count:8;
> > +	__u64 sync_count:8;
> > +	__u64 inst_fetch_count:8;
> > +	__u64 unused_bits:27;
> > +	__u64 unused[6];
> > +} __packed;
> > +
> > +static size_t xe_eu_stall_data_record_size(struct xe_device *xe)
> > +{
> > +	unsigned long record_size = 0;
> > +
> > +	if (xe->info.platform == XE_PVC)
> > +		record_size = sizeof(struct xe_eu_stall_data_pvc);
> > +
> > +	return record_size;
> > +}
> > +
> >  /**
> >   * num_data_rows - Return the number of EU stall data rows of 64B each
> >   *		   for a given data size.
> > @@ -44,6 +125,36 @@ static u32 num_data_rows(u32 data_size)
> >	return (data_size >> 6);
> >  }
> >
> > +/**
> > + * xe_eu_stall_init() - Allocate and initialize GT level EU stall data
> > + *			structure xe_eu_stall_gt within struct xe_gt.
> > + *
> > + * @gt: GT being initialized.
> > + *
> > + * Returns: zero on success or a negative error code.
> > + */
> > +int xe_eu_stall_init(struct xe_gt *gt)
> > +{
> > +	gt->eu_stall = kzalloc(sizeof(*gt->eu_stall), GFP_KERNEL);
> > +	if (!gt->eu_stall)
> > +		return -ENOMEM;
> > +
> > +	mutex_init(&gt->eu_stall->stream_lock);
> > +	return 0;
> > +}
> > +
> > +/**
> > + * xe_eu_stall_fini() - Clean up the GT level EU stall data
> > + *			structure xe_eu_stall_gt within struct xe_gt.
> > + *
> > + * @gt: GT being cleaned up.
> > + */
> > +void xe_eu_stall_fini(struct xe_gt *gt)
> > +{
> > +	mutex_destroy(&gt->eu_stall->stream_lock);
> > +	kfree(gt->eu_stall);
> > +}
> > +
> >  static int set_prop_eu_stall_sampling_rate(struct xe_device *xe, u64 value,
> >					   struct eu_stall_open_properties *props)
> >  {
> > @@ -166,6 +277,120 @@ static ssize_t xe_eu_stall_stream_read(struct file *file, char __user *buf,
> >	return ret;
> >  }
> >
> > +static void free_eu_stall_data_buf(struct xe_eu_stall_data_stream *stream)
> > +{
> > +	if (stream->bo)
> > +		xe_bo_unpin_map_no_vm(stream->bo);
> > +}
> > +
> > +static int alloc_eu_stall_data_buf(struct xe_eu_stall_data_stream *stream,
> > +				   u16 num_xecore)
> > +{
> > +	struct xe_tile *tile = stream->gt->tile;
> > +	struct xe_bo *bo;
> > +	u32 size;
> > +
> > +	size = stream->per_xecore_buf_size * num_xecore;
> > +
> > +	bo = xe_bo_create_pin_map_at_aligned(tile->xe, tile, NULL,
> > +					     size, ~0ull, ttm_bo_type_kernel,
> > +					     XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, SZ_64);
> > +	if (IS_ERR(bo))
> > +		return PTR_ERR(bo);
> > +
> > +	XE_WARN_ON(!IS_ALIGNED(xe_bo_ggtt_addr(bo), SZ_64));
> > +	stream->bo = bo;
> > +
> > +	return 0;
> > +}
> > +
> > +static int xe_eu_stall_stream_enable(struct xe_eu_stall_data_stream *stream)
> > +{
> > +	u32 write_ptr_reg, write_ptr, read_ptr_reg, reg_value;
> > +	struct per_xecore_buf *xecore_buf;
> > +	struct xe_gt *gt = stream->gt;
> > +	u16 group, instance;
> > +	unsigned int fw_ref;
> > +	int xecore;
> > +
> > +	/* Take runtime pm ref and forcewake to disable RC6 */
> > +	xe_pm_runtime_get(gt_to_xe(gt));
> > +	fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_RENDER);
> > +	if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_RENDER)) {
> > +		xe_gt_err(gt, "Failed to get RENDER forcewake\n");
> > +		xe_force_wake_put(gt_to_fw(gt), XE_FW_RENDER);
> 
> The above line should not be there.
I think you are correct. Not sure why there are several places in the xe
driver where xe_force_wake_put() is called if a forcewake ref doesn't
have a domain. It makes sense for XE_FORCEWAKE_ALL, as getting a
reference on XE_FORCEWAKE_ALL will only get reference on applicable
domains. Also I didn't use XE_FORCEWAKE_ALL as the all the EU stall
hardware is in the XE_FW_RENDER domain so don't need XE_FORCEWAKE_ALL.
> 
> > +		xe_pm_runtime_put(gt_to_xe(gt));
> > +		return -ETIMEDOUT;
> > +	}
> > +
> > +	for_each_dss_steering(xecore, gt, group, instance) {
> > +		write_ptr_reg = xe_gt_mcr_unicast_read(gt, XEHPC_EUSTALL_REPORT, group, instance);
> > +		write_ptr = REG_FIELD_GET(XEHPC_EUSTALL_REPORT_WRITE_PTR_MASK, write_ptr_reg);
> > +		read_ptr_reg = REG_FIELD_PREP(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, write_ptr);
> > +		read_ptr_reg = _MASKED_FIELD(XEHPC_EUSTALL_REPORT1_READ_PTR_MASK, read_ptr_reg);
> 
> My previous question regarding this is still unanswered: "Why do we need to
> do all this, read the write ptr reg and write to read ptr register? Can't
> we just set both read and write ptr to the start of the per xecore buf
> address (likely stream->bo ggtt_addr).

Hardware doesn't allow writes to the write pointer. I believe it may be
a security risk and therefore doesn't allow it. That's the reason I read
the write pointer and set the value to the read pointer.

> 
> > +		/* Initialize the read pointer to the write pointer */
> > +		xe_gt_mcr_unicast_write(gt, XEHPC_EUSTALL_REPORT1, read_ptr_reg, group, instance);
> > +		write_ptr <<= 6;
> > +		write_ptr &= (stream->per_xecore_buf_size << 1) - 1;
> > +		xecore_buf = &stream->xecore_buf[xecore];
> > +		xecore_buf->write = write_ptr;
> > +		xecore_buf->read = write_ptr;
> > +	}
> > +	reg_value = _MASKED_FIELD(EUSTALL_MOCS | EUSTALL_SAMPLE_RATE,
> > +				  REG_FIELD_PREP(EUSTALL_MOCS, gt->mocs.uc_index << 1) |
> 
> Please explain this mocs.uc_index business, as I mentioned in the prev version.
MOCS stands for Memory Object Control State is used to specify memory
attributes. MOCS is a set of registers with each register specifying a
certain combination of memory attributes. MOCS index is used to look up the set of
MOCS registers where the corresponding control parameters are specified.

Bits[3:9] in the control register indicate the MOCS programming that
should be applied to stall data eviction. As per the format, bit-0 is
reserved and bits 1 to 6 are for MOCS table index. Therefore the index
is shifted left by 1.

> 
> > +				  REG_FIELD_PREP(EUSTALL_SAMPLE_RATE,
> > +						 stream->sampling_rate_mult));
> > +	xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_CTRL, reg_value);
> > +	/* GGTT addresses can never be > 32 bits */
> > +	xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE_UPPER, 0);
> > +	reg_value = xe_bo_ggtt_addr(stream->bo);
> > +	reg_value |= REG_FIELD_PREP(XEHPC_EUSTALL_BASE_XECORE_BUF_SZ, 2);
> 
> This "2" should be derived from per_xecore_buf_size or
> stream->per_xecore_buf_size by dividing by 256, as I previously indicated.

Even though the hardware allows the user to configure the buffer size to
128K/256K/512K, for simplicity we decided to use the highest buffer size
512K for which the register setting is 2. If the future I plan to send
out a patch that allows the user to change the buffer size via debugfs
as the user may choose to use a smaller buffer size for debug purpose.
In that patch I will add as you suggested. But for now, I think it
is not required to divide by 256.

> 
> > +	reg_value |= XEHPC_EUSTALL_BASE_ENABLE_SAMPLING;
> > +	xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE, reg_value);
> > +
> > +	return 0;
> > +}
> > +
> > +static int xe_eu_stall_stream_init(struct xe_eu_stall_data_stream *stream,
> > +				   struct eu_stall_open_properties *props)
> > +{
> > +	struct per_xecore_buf *xecore_buf;
> > +	u16 num_xecore, group, instance;
> > +	struct xe_gt *gt = stream->gt;
> > +	xe_dss_mask_t all_xecore;
> > +	u32 vaddr_offset;
> 
> Should probably be size_t
> 
> > +	int ret, xecore;
> > +
> > +	stream->sampling_rate_mult = props->sampling_rate_mult;
> > +	stream->wait_num_reports = props->wait_num_reports;
> > +	stream->per_xecore_buf_size = per_xecore_buf_size;
> > +	stream->data_record_size = xe_eu_stall_data_record_size(gt_to_xe(gt));
> > +
> > +	bitmap_or(all_xecore, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask,
> > +		  XE_MAX_DSS_FUSE_BITS);
> > +	/*
> > +	 * Enabled subslices can be discontiguous. Find the maximum number of subslices
> > +	 * that are enabled.
> > +	 */
> > +	num_xecore = xe_gt_topology_mask_last_dss(all_xecore) + 1;
> > +
> > +	ret = alloc_eu_stall_data_buf(stream, num_xecore);
> > +	if (ret)
> > +		return ret;
> > +
> > +	stream->xecore_buf = kcalloc(num_xecore, sizeof(*stream->xecore_buf), GFP_KERNEL);
> > +	if (!stream->xecore_buf)
> > +		return -ENOMEM;
> 
> What about free_eu_stall_data_buf here?
> 
> The way to do this is: either the function succeeds, or if it fails, it
> frees all resources it allocates before returning.
Will fix it.
> 
> > +
> > +	for_each_dss_steering(xecore, gt, group, instance) {
> > +		xecore_buf = &stream->xecore_buf[xecore];
> > +		vaddr_offset = xecore * stream->per_xecore_buf_size;
> > +		xecore_buf->vaddr = stream->bo->vmap.vaddr + vaddr_offset;
> > +		mutex_init(&xecore_buf->ptr_lock);
> > +	}
> > +	return 0;
> > +}
> > +
> >  /**
> >   * xe_eu_stall_stream_poll - handles userspace poll() of a EU stall data stream fd.
> >   *
> > @@ -181,6 +406,49 @@ static __poll_t xe_eu_stall_stream_poll(struct file *file, poll_table *wait)
> >	return ret;
> >  }
> >
> > +static int xe_eu_stall_enable_locked(struct xe_eu_stall_data_stream *stream)
> > +{
> > +	int ret = 0;
> > +
> > +	if (stream->enabled)
> > +		return ret;
> > +
> > +	stream->enabled = true;
> > +
> > +	ret = xe_eu_stall_stream_enable(stream);
> > +	return ret;
> > +}
> > +
> > +static int xe_eu_stall_disable_locked(struct xe_eu_stall_data_stream *stream)
> > +{
> > +	struct xe_gt *gt = stream->gt;
> > +
> > +	if (!stream->enabled)
> > +		return 0;
> > +
> > +	stream->enabled = false;
> > +
> > +	xe_gt_mcr_multicast_write(gt, XEHPC_EUSTALL_BASE, 0);
> > +
> > +	xe_force_wake_put(gt_to_fw(gt), XE_FW_RENDER);
> > +	xe_pm_runtime_put(gt_to_xe(gt));
> > +
> > +	return 0;
> > +}
> > +
> > +static long xe_eu_stall_stream_ioctl_locked(struct xe_eu_stall_data_stream *stream,
> > +					    unsigned int cmd, unsigned long arg)
> > +{
> > +	switch (cmd) {
> > +	case DRM_XE_OBSERVATION_IOCTL_ENABLE:
> > +		return xe_eu_stall_enable_locked(stream);
> > +	case DRM_XE_OBSERVATION_IOCTL_DISABLE:
> > +		return xe_eu_stall_disable_locked(stream);
> > +	}
> > +
> > +	return -EINVAL;
> > +}
> > +
> >  /**
> >   * xe_eu_stall_stream_ioctl - support ioctl() of a xe EU stall data stream fd.
> >   *
> > @@ -195,14 +463,22 @@ static long xe_eu_stall_stream_ioctl(struct file *file,
> >				     unsigned int cmd,
> >				     unsigned long arg)
> >  {
> > -	switch (cmd) {
> > -	case DRM_XE_OBSERVATION_IOCTL_ENABLE:
> > -		return 0;
> > -	case DRM_XE_OBSERVATION_IOCTL_DISABLE:
> > -		return 0;
> > -	}
> > +	struct xe_eu_stall_data_stream *stream = file->private_data;
> > +	struct xe_gt *gt = stream->gt;
> > +	long ret;
> >
> > -	return -EINVAL;
> > +	mutex_lock(&gt->eu_stall->stream_lock);
> > +	ret = xe_eu_stall_stream_ioctl_locked(stream, cmd, arg);
> > +	mutex_unlock(&gt->eu_stall->stream_lock);
> > +
> > +	return ret;
> > +}
> > +
> > +static void
> > +xe_eu_stall_stream_close_locked(struct xe_eu_stall_data_stream *stream)
> > +{
> > +	xe_eu_stall_disable_locked(stream);
> > +	free_eu_stall_data_buf(stream);
> >  }
> 
> I don't see the point of this function since it is just called from one
> place xe_eu_stall_stream_close. You might as well put these 2 lines of code
> there.
Will remove this.
> 
> As suggested below we can have a xe_eu_stall_stream_destroy() which can be
> called from xe_eu_stall_stream_open_locked() and from
> xe_eu_stall_stream_close(). But optional.
> 
> >
> >  /**
> > @@ -215,6 +491,19 @@ static long xe_eu_stall_stream_ioctl(struct file *file,
> >   */
> >  static int xe_eu_stall_stream_close(struct inode *inode, struct file *file)
> >  {
> > +	struct xe_eu_stall_data_stream *stream = file->private_data;
> > +	struct xe_gt *gt = stream->gt;
> > +
> > +	mutex_lock(&gt->eu_stall->stream_lock);
> > +	xe_eu_stall_stream_close_locked(stream);
> > +	kfree(stream->xecore_buf);
> > +	kfree(stream);
> > +	gt->eu_stall->stream = NULL;
> 
> This sequence should generally be identical to the error unwind sequence in
> xe_eu_stall_stream_open_locked.
> 
> > +	mutex_unlock(&gt->eu_stall->stream_lock);
> > +
> > +	/* Release the reference the EU stall stream kept on the driver */
> > +	drm_dev_put(&gt->tile->xe->drm);
> > +
> >	return 0;
> >  }
> >
> > @@ -230,7 +519,67 @@ static const struct file_operations fops_eu_stall = {
> >
> >  static inline bool has_eu_stall_sampling_support(struct xe_device *xe)
> >  {
> > -	return false;
> > +	return ((xe->info.platform == XE_PVC) ? true : false);
> 
> return xe->info.platform == XE_PVC;
A subsequent patch adds another check for xe2. So, I used the
conditional operator.
> 
> > +}
> > +
> > +/**
> > + * xe_eu_stall_stream_open_locked - Open a EU stall data stream FD.
> > + * @dev: drm device instance
> > + * @props: individually validated u64 property value pairs
> > + * @file: drm file
> > + *
> > + * Returns: zero on success or a negative error code.
> > + */
> > +static int
> > +xe_eu_stall_stream_open_locked(struct drm_device *dev,
> > +			       struct eu_stall_open_properties *props,
> > +			       struct drm_file *file)
> > +{
> > +	struct xe_eu_stall_data_stream *stream;
> > +	struct xe_gt *gt = props->gt;
> > +	unsigned long f_flags = 0;
> > +	int ret, stream_fd;
> > +
> > +	/* Only one session can be active at any time */
> > +	if (gt->eu_stall->stream) {
> > +		xe_gt_dbg(gt, "EU stall sampling session already active\n");
> > +		return -EBUSY;
> > +	}
> > +
> > +	stream = kzalloc(sizeof(*stream), GFP_KERNEL);
> > +	if (!stream)
> > +		return -ENOMEM;
> > +
> > +	gt->eu_stall->stream = stream;
> > +	stream->gt = gt;
> > +
> > +	ret = xe_eu_stall_stream_init(stream, props);
> > +	if (ret) {
> > +		xe_gt_dbg(gt, "EU stall stream init failed : %d\n", ret);
> > +		goto err_alloc;
> 
> This is wrong as pointed below. If this fails we jump to 'kfree(stream)'.
When I wrote this code initially, I may have tried to reduce the goto
labels considering the fact that kfree() checks if the input ptr is NULL
and returns if so.
> 
> Also don't invent random label names. See
> xe_oa_stream_open_ioctl_locked(): err_destroy is for destroy, err_free for
> free etc.
> 
> So this should be goto err_free.
Will change it. I have seen code where the label is named err_alloc to
undo an alloc. I think each developer names the labels differently.
> 
> > +	}
> > +
> > +	stream_fd = anon_inode_getfd("[xe_eu_stall]", &fops_eu_stall, stream, f_flags);
> > +	if (stream_fd < 0) {
> > +		ret = stream_fd;
> > +		xe_gt_dbg(gt, "EU stall inode get fd failed : %d\n", ret);
> > +		goto err_open;
> 
> goto err_destroy. see below.
> 
> > +	}
> > +
> > +	/* Take a reference on the driver that will be kept with stream_fd
> > +	 * until its release.
> > +	 */
> > +	drm_dev_get(&gt->tile->xe->drm);
> > +
> > +	return stream_fd;
> > +
> > +err_open:
> > +	free_eu_stall_data_buf(stream);
> > +err_alloc:
> > +	gt->eu_stall->stream = NULL;
> > +	kfree(stream->xecore_buf);
> 
> This is wrong, stream->xecore_buf is allocated in xe_eu_stall_stream_init
> and we are jumping here if xe_eu_stall_stream_init fails.
I will change it. But considering the fact that kfree() has a NULL
check, this is not a bug.
> 
> See xe_oa_stream_open_ioctl_locked() as an example for doing this unwinding
> correctly. We need a 'destroy' function which will free stuff allocated in
> xe_eu_stall_stream_init. And that function should be called from
> close/release too.
> 
> Ideally, we should not have to peek inside these functions to see what is
> being allocated there and then free them. The destroy function is the
> wrapper which should do all the free's.
> 
> Or maybe if you want to skip destroy() we can do that too, but at least the
> code here needs to be fixed.
> 
> > +	kfree(stream);
> 
> err_free label before this line. Should do kfree(stream) and
> 'gt->eu_stall->stream = NULL'.
> 
> > +	return ret;
> 
> So basically, without destroy(), this is something like:
> 
> 	err_destroy:
> 		kfree(stream->xecore_buf);
> 		free_eu_stall_data_buf(stream);
> 	err_free:
> 		kfree(stream);
> 		gt->eu_stall->stream = NULL;
> 		return ret;
> 
> Not what you have above. And the code under err_destroy can go into a
> destroy() function, but if you want we can skip that for now.
> 
> 
> >  }
> >
> >  /**
> > @@ -252,7 +601,7 @@ int xe_eu_stall_stream_open(struct drm_device *dev,
> >  {
> >	struct xe_device *xe = to_xe_device(dev);
> >	struct eu_stall_open_properties props = {};
> > -	int ret, stream_fd;
> > +	int ret;
> >
> >	if (xe_observation_paranoid && !perfmon_capable()) {
> >		xe_gt_dbg(props.gt, "Insufficient privileges for EU stall monitoring\n");
> > @@ -263,6 +612,10 @@ int xe_eu_stall_stream_open(struct drm_device *dev,
> >		return -EPERM;
> >	}
> >
> > +	/* Initialize and set default values */
> > +	props.wait_num_reports = 1;
> > +	props.sampling_rate_mult = 4;
> 
> What about props.gt? Can it also be initialized to gt[0], corresponding to
> default gt_id 0, basically default gt if not specified in uapi is gt0?

I had that initially, but there was a review comment in the first few
versions of this patch review, I think from Matt Roper that GT ID
should be mandatory from the user space.

> 
> > +
> >	ret = xe_eu_stall_user_extensions(xe, data, &props);
> >	if (ret)
> >		return ret;
> > @@ -272,9 +625,9 @@ int xe_eu_stall_stream_open(struct drm_device *dev,
> >		return -EINVAL;
> >	}
> >
> > -	stream_fd = anon_inode_getfd("[xe_eu_stall]", &fops_eu_stall, NULL, 0);
> > -	if (stream_fd < 0)
> > -		xe_gt_dbg(props.gt, "EU stall inode get fd failed : %d\n", stream_fd);
> > +	mutex_lock(&props.gt->eu_stall->stream_lock);
> > +	ret = xe_eu_stall_stream_open_locked(dev, &props, file);
> > +	mutex_unlock(&props.gt->eu_stall->stream_lock);
> >
> > -	return stream_fd;
> > +	return ret;
> >  }
> > diff --git a/drivers/gpu/drm/xe/xe_eu_stall.h b/drivers/gpu/drm/xe/xe_eu_stall.h
> > index d514e78341d5..e42250c1d294 100644
> > --- a/drivers/gpu/drm/xe/xe_eu_stall.h
> > +++ b/drivers/gpu/drm/xe/xe_eu_stall.h
> > @@ -6,9 +6,10 @@
> >  #ifndef __XE_EU_STALL_H__
> >  #define __XE_EU_STALL_H__
> >
> > -#include <drm/drm_device.h>
> > -#include <drm/drm_file.h>
> > -#include <linux/types.h>
> > +#include "xe_gt_types.h"
> > +
> > +int xe_eu_stall_init(struct xe_gt *gt);
> > +void xe_eu_stall_fini(struct xe_gt *gt);
> >
> >  int xe_eu_stall_stream_open(struct drm_device *dev,
> >			    u64 data,
> > diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
> > index 9fb8f1e678dc..ee5a16727300 100644
> > --- a/drivers/gpu/drm/xe/xe_gt.c
> > +++ b/drivers/gpu/drm/xe/xe_gt.c
> > @@ -60,6 +60,7 @@
> >  #include "xe_vm.h"
> >  #include "xe_wa.h"
> >  #include "xe_wopcm.h"
> > +#include "xe_eu_stall.h"
> >
> >  static void gt_fini(struct drm_device *drm, void *arg)
> >  {
> > @@ -159,6 +160,7 @@ void xe_gt_remove(struct xe_gt *gt)
> >		xe_hw_fence_irq_finish(&gt->fence_irq[i]);
> >
> >	xe_gt_disable_host_l2_vram(gt);
> > +	xe_eu_stall_fini(gt);
> 
> Maybe move to the top of the function, so that fini happens in reverse
> order to init?
Okay.
> 
> >  }
> >
> >  static void gt_reset_worker(struct work_struct *w);
> > @@ -625,6 +627,10 @@ int xe_gt_init(struct xe_gt *gt)
> >
> >	xe_gt_record_user_engines(gt);
> >
> > +	err = xe_eu_stall_init(gt);
> > +	if (err)
> > +		return err;
> > +
> >	return 0;
> >  }
> >
> > diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
> > index 6e66bf0e8b3f..833a1a67e9ae 100644
> > --- a/drivers/gpu/drm/xe/xe_gt_types.h
> > +++ b/drivers/gpu/drm/xe/xe_gt_types.h
> > @@ -430,6 +430,9 @@ struct xe_gt {
> >
> >	/** @oa: oa observation subsystem per gt info */
> >	struct xe_oa_gt oa;
> > +
> > +	/** @eu_stall: EU stall counters subsystem per gt info */
> > +	struct xe_eu_stall_gt *eu_stall;
> 
> This needs a forward declaration for 'struct xe_eu_stall_gt'.

I thought so initially, but the compiler didn't complain. I guess because
eu_stall is a pointer and not an instance of struct xe_eu_stall_gt ?
> 
> >  };
> >
> >  #endif
> > --
> > 2.48.1
> >

Thank You
Harish.


More information about the Intel-xe mailing list