[PATCH 1/1] drm/xe/oa: Combined diff of changes between v13 and v14

Ashutosh Dixit ashutosh.dixit at intel.com
Fri May 24 18:24:04 UTC 2024


Because Xe OA changes between successive revisions are distributed across
multiple patches, provide a combined diff of all changes in a single patch
to help with code review.

These changes pertain to: https://patchwork.freedesktop.org/series/121084/

This patch will not compile, it's purpose is to just provide the diff.

* Change oa_units[] array to __u64 in query_oa_units to avoid MSVC compile
  error
* xe_device_mem_access_get/put -> xe_pm_runtime_get/put
* Hold runtime_pm references across OA buffer alloc/free
* Reorder #includes in xe_oa.c
* Fix platforms for which OA_TLB_INV_CR is valid
* Use -EIO to signal to userspace to read OASTATUS using
  DRM_XE_PERF_IOCTL_STATUS, change previous sites returning -EIO to
  return -EINVAL
* Make drm_xe_oa_stream_status bits contiguous
* s/DRM_XE_OA_PROPERTY_OA_EXPONENT/DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT/
* rmw oa_status bits

Note: Last published consolidated diff was for v11. Between v11 and v13 was
just the addition of the patch "Enable Xe2+ overrun mode".

Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit at intel.com>
---
 drivers/gpu/drm/xe/regs/xe_oa_regs.h |  4 ++
 drivers/gpu/drm/xe/xe_guc_pc.c       |  9 +--
 drivers/gpu/drm/xe/xe_oa.c           | 98 +++++++++++++++++++---------
 drivers/gpu/drm/xe/xe_oa_types.h     |  3 +
 include/uapi/drm/xe_drm.h            | 21 +++---
 5 files changed, 93 insertions(+), 42 deletions(-)

diff --git a/drivers/gpu/drm/xe/regs/xe_oa_regs.h b/drivers/gpu/drm/xe/regs/xe_oa_regs.h
index 6ad3304c4d19..d9fcb08f500d 100644
--- a/drivers/gpu/drm/xe/regs/xe_oa_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_oa_regs.h
@@ -72,6 +72,10 @@
 #define  OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS	REG_BIT(1)
 
 #define OAG_OASTATUS			XE_REG(0xdafc)
+#define  OASTATUS_MMIO_TRG_Q_FULL	REG_BIT(6)
+#define  OASTATUS_COUNTER_OVERFLOW	REG_BIT(2)
+#define  OASTATUS_BUFFER_OVERFLOW	REG_BIT(1)
+#define  OASTATUS_REPORT_LOST		REG_BIT(0)
 #define OAG_MMIOTRIGGER			XE_REG(0xdb1c)
 /* OAC unit */
 #define OAC_OACONTROL			XE_REG(0x15114)
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 1c582d786c30..da701cf5325d 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -24,6 +24,7 @@
 #include "xe_map.h"
 #include "xe_mmio.h"
 #include "xe_pcode.h"
+#include "xe_pm.h"
 
 #define MCHBAR_MIRROR_BASE_SNB	0x140000
 
@@ -805,9 +806,9 @@ int xe_guc_pc_override_gucrc_mode(struct xe_guc_pc *pc, enum slpc_gucrc_mode mod
 {
 	int ret;
 
-	xe_device_mem_access_get(pc_to_xe(pc));
+	xe_pm_runtime_get(pc_to_xe(pc));
 	ret = pc_action_set_param(pc, SLPC_PARAM_PWRGATE_RC_MODE, mode);
-	xe_device_mem_access_put(pc_to_xe(pc));
+	xe_pm_runtime_put(pc_to_xe(pc));
 
 	return ret;
 }
@@ -822,9 +823,9 @@ int xe_guc_pc_unset_gucrc_mode(struct xe_guc_pc *pc)
 {
 	int ret;
 
-	xe_device_mem_access_get(pc_to_xe(pc));
+	xe_pm_runtime_get(pc_to_xe(pc));
 	ret = pc_action_unset_param(pc, SLPC_PARAM_PWRGATE_RC_MODE);
-	xe_device_mem_access_put(pc_to_xe(pc));
+	xe_pm_runtime_put(pc_to_xe(pc));
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index bde25e195577..38e6364a60b1 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -4,6 +4,7 @@
  */
 
 #include <linux/anon_inodes.h>
+#include <linux/delay.h>
 #include <linux/nospec.h>
 #include <linux/poll.h>
 
@@ -15,18 +16,22 @@
 #include "regs/xe_gt_regs.h"
 #include "regs/xe_lrc_layout.h"
 #include "regs/xe_oa_regs.h"
-#include "xe_device.h"
-#include "xe_exec_queue.h"
+#include "xe_assert.h"
 #include "xe_bb.h"
 #include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_exec_queue.h"
+#include "xe_force_wake.h"
 #include "xe_gt.h"
 #include "xe_gt_mcr.h"
 #include "xe_guc_pc.h"
 #include "xe_lrc.h"
+#include "xe_macros.h"
 #include "xe_mmio.h"
 #include "xe_oa.h"
-#include "xe_sched_job.h"
 #include "xe_perf.h"
+#include "xe_pm.h"
+#include "xe_sched_job.h"
 
 #define DEFAULT_POLL_FREQUENCY_HZ 200
 #define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ)
@@ -441,22 +446,44 @@ static void xe_oa_disable(struct xe_oa_stream *stream)
 		drm_err(&stream->oa->xe->drm,
 			"wait for OA to be disabled timed out\n");
 
-	xe_mmio_write32(stream->gt, OA_TLB_INV_CR, 1);
-	if (xe_mmio_wait32(stream->gt, OA_TLB_INV_CR, 1, 0, 50000, NULL, false))
-		drm_err(&stream->oa->xe->drm,
-			"wait for OA tlb invalidate timed out\n");
+	if (GRAPHICS_VERx100(stream->oa->xe) <= 1270 && GRAPHICS_VERx100(stream->oa->xe) != 1260) {
+		/* <= XE_METEORLAKE except XE_PVC */
+		xe_mmio_write32(stream->gt, OA_TLB_INV_CR, 1);
+		if (xe_mmio_wait32(stream->gt, OA_TLB_INV_CR, 1, 0, 50000, NULL, false))
+			drm_err(&stream->oa->xe->drm,
+				"wait for OA tlb invalidate timed out\n");
+	}
 }
 
 static int xe_oa_wait_unlocked(struct xe_oa_stream *stream)
 {
 	/* We might wait indefinitely if periodic sampling is not enabled */
 	if (!stream->periodic)
-		return -EIO;
+		return -EINVAL;
 
 	return wait_event_interruptible(stream->poll_wq,
 					xe_oa_buffer_check_unlocked(stream));
 }
 
+#define OASTATUS_RELEVANT_BITS (OASTATUS_MMIO_TRG_Q_FULL | OASTATUS_COUNTER_OVERFLOW | \
+				OASTATUS_BUFFER_OVERFLOW | OASTATUS_REPORT_LOST)
+
+static int __xe_oa_read(struct xe_oa_stream *stream, char __user *buf,
+			size_t count, size_t *offset)
+{
+	/* Only clear our bits to avoid side-effects */
+	stream->oa_status = xe_mmio_rmw32(stream->gt, __oa_regs(stream)->oa_status,
+					  OASTATUS_RELEVANT_BITS, 0);
+	/*
+	 * Signal to userspace that there is non-zero OA status to read via
+	 * @DRM_XE_PERF_IOCTL_STATUS perf fd ioctl
+	 */
+	if (stream->oa_status & OASTATUS_RELEVANT_BITS)
+		return -EIO;
+
+	return xe_oa_append_reports(stream, buf, count, offset);
+}
+
 static ssize_t xe_oa_read(struct file *file, char __user *buf,
 			  size_t count, loff_t *ppos)
 {
@@ -466,7 +493,7 @@ static ssize_t xe_oa_read(struct file *file, char __user *buf,
 
 	/* Can't read from disabled streams */
 	if (!stream->enabled || !stream->sample)
-		return -EIO;
+		return -EINVAL;
 
 	if (!(file->f_flags & O_NONBLOCK)) {
 		do {
@@ -475,12 +502,12 @@ static ssize_t xe_oa_read(struct file *file, char __user *buf,
 				return ret;
 
 			mutex_lock(&stream->stream_lock);
-			ret = xe_oa_append_reports(stream, buf, count, &offset);
+			ret = __xe_oa_read(stream, buf, count, &offset);
 			mutex_unlock(&stream->stream_lock);
 		} while (!offset && !ret);
 	} else {
 		mutex_lock(&stream->stream_lock);
-		ret = xe_oa_append_reports(stream, buf, count, &offset);
+		ret = __xe_oa_read(stream, buf, count, &offset);
 		mutex_unlock(&stream->stream_lock);
 	}
 
@@ -489,11 +516,14 @@ static ssize_t xe_oa_read(struct file *file, char __user *buf,
 	 * before unblocking. The exception to this is if __xe_oa_read returns -ENOSPC,
 	 * which means that more OA data is available than could fit in the user provided
 	 * buffer. In this case we want the next poll() call to not block.
+	 *
+	 * Also in case of -EIO, we have already waited for data before returning
+	 * -EIO, so need to wait again
 	 */
-	if (ret != -ENOSPC)
+	if (ret != -ENOSPC && ret != -EIO)
 		stream->pollin = false;
 
-	/* Possible values for ret are 0, -EFAULT, -ENOSPC, -EIO, ... */
+	/* Possible values for ret are 0, -EFAULT, -ENOSPC, -EIO, -EINVAL, ... */
 	return offset ?: (ret ?: -EAGAIN);
 }
 
@@ -792,10 +822,11 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream)
 	xe_oa_disable_metric_set(stream);
 	xe_exec_queue_put(stream->k_exec_q);
 
+	xe_oa_free_oa_buffer(stream);
+
 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
-	xe_device_mem_access_put(stream->oa->xe);
+	xe_pm_runtime_put(stream->oa->xe);
 
-	xe_oa_free_oa_buffer(stream);
 	/* Wa_1509372804:pvc: Unset the override of GUCRC mode to enable rc6 */
 	if (stream->override_gucrc)
 		XE_WARN_ON(xe_guc_pc_unset_gucrc_mode(&gt->uc.guc.pc));
@@ -1037,12 +1068,19 @@ static long xe_oa_status_locked(struct xe_oa_stream *stream, unsigned long arg)
 	struct drm_xe_oa_stream_status status = {};
 	void __user *uaddr = (void __user *)arg;
 
-	status.oa_status = xe_mmio_read32(stream->gt, __oa_regs(stream)->oa_status);
+	/* Map from register to uapi bits */
+	if (stream->oa_status & OASTATUS_REPORT_LOST)
+		status.oa_status |= DRM_XE_OASTATUS_REPORT_LOST;
+	if (stream->oa_status & OASTATUS_BUFFER_OVERFLOW)
+		status.oa_status |= DRM_XE_OASTATUS_BUFFER_OVERFLOW;
+	if (stream->oa_status & OASTATUS_COUNTER_OVERFLOW)
+		status.oa_status |= DRM_XE_OASTATUS_COUNTER_OVERFLOW;
+	if (stream->oa_status & OASTATUS_MMIO_TRG_Q_FULL)
+		status.oa_status |= DRM_XE_OASTATUS_MMIO_TRG_Q_FULL;
 
 	if (copy_to_user(uaddr, &status, sizeof(status)))
 		return -EFAULT;
 
-	xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_status, 0);
 	return 0;
 }
 
@@ -1201,7 +1239,7 @@ static bool xe_oa_find_reg_in_lri(u32 *state, u32 reg, u32 *offset, u32 end)
 static u32 xe_oa_context_image_offset(struct xe_oa_stream *stream, u32 reg)
 {
 	struct xe_lrc *lrc = &stream->exec_q->lrc[0];
-	u32 len = (xe_lrc_size(stream->oa->xe, stream->hwe->class) +
+	u32 len = (xe_gt_lrc_size(stream->gt, stream->hwe->class) +
 		   lrc->ring.size) / sizeof(u32);
 	u32 offset = xe_lrc_regs_offset(lrc) / sizeof(u32);
 	u32 *state = (u32 *)lrc->bo->vmap.vaddr;
@@ -1308,14 +1346,14 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
 		stream->override_gucrc = true;
 	}
 
-	ret = xe_oa_alloc_oa_buffer(stream);
-	if (ret)
-		goto err_unset_gucrc;
-
 	/* Take runtime pm ref and forcewake to disable RC6 */
-	xe_device_mem_access_get(stream->oa->xe);
+	xe_pm_runtime_get(stream->oa->xe);
 	XE_WARN_ON(xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL));
 
+	ret = xe_oa_alloc_oa_buffer(stream);
+	if (ret)
+		goto err_fw_put;
+
 	stream->k_exec_q = xe_exec_queue_create(stream->oa->xe, NULL,
 						BIT(stream->hwe->logical_instance), 1,
 						stream->hwe, EXEC_QUEUE_FLAG_KERNEL, 0);
@@ -1323,7 +1361,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
 		ret = PTR_ERR(stream->k_exec_q);
 		drm_err(&stream->oa->xe->drm, "gt%d, hwe %s, xe_exec_queue_create failed=%d",
 			stream->gt->info.id, stream->hwe->name, ret);
-		goto err_fw_put;
+		goto err_free_oa_buf;
 	}
 
 	ret = xe_oa_enable_metric_set(stream);
@@ -1349,11 +1387,11 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
 err_put_k_exec_q:
 	xe_oa_disable_metric_set(stream);
 	xe_exec_queue_put(stream->k_exec_q);
+err_free_oa_buf:
+	xe_oa_free_oa_buffer(stream);
 err_fw_put:
 	XE_WARN_ON(xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL));
-	xe_device_mem_access_put(stream->oa->xe);
-	xe_oa_free_oa_buffer(stream);
-err_unset_gucrc:
+	xe_pm_runtime_put(stream->oa->xe);
 	if (stream->override_gucrc)
 		XE_WARN_ON(xe_guc_pc_unset_gucrc_mode(&gt->uc.guc.pc));
 err_free_configs:
@@ -1426,9 +1464,9 @@ u32 xe_oa_timestamp_frequency(struct xe_gt *gt)
 	case XE_DG2:
 	case XE_PVC:
 	case XE_METEORLAKE:
-		xe_device_mem_access_get(gt_to_xe(gt));
+		xe_pm_runtime_get(gt_to_xe(gt));
 		reg = xe_mmio_read32(gt, RPM_CONFIG0);
-		xe_device_mem_access_put(gt_to_xe(gt));
+		xe_pm_runtime_put(gt_to_xe(gt));
 
 		shift = REG_FIELD_GET(RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK, reg);
 		return gt->info.reference_clock << (3 - shift);
@@ -1598,7 +1636,7 @@ static const xe_oa_set_property_fn xe_oa_set_property_funcs[] = {
 	[DRM_XE_OA_PROPERTY_SAMPLE_OA] = xe_oa_set_prop_sample_oa,
 	[DRM_XE_OA_PROPERTY_OA_METRIC_SET] = xe_oa_set_prop_metric_set,
 	[DRM_XE_OA_PROPERTY_OA_FORMAT] = xe_oa_set_prop_oa_format,
-	[DRM_XE_OA_PROPERTY_OA_EXPONENT] = xe_oa_set_prop_oa_exponent,
+	[DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT] = xe_oa_set_prop_oa_exponent,
 	[DRM_XE_OA_PROPERTY_OA_DISABLED] = xe_oa_set_prop_disabled,
 	[DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID] = xe_oa_set_prop_exec_queue_id,
 	[DRM_XE_OA_PROPERTY_OA_ENGINE_INSTANCE] = xe_oa_set_prop_engine_instance,
diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
index d8d5c9d8c22e..c62811482934 100644
--- a/drivers/gpu/drm/xe/xe_oa_types.h
+++ b/drivers/gpu/drm/xe/xe_oa_types.h
@@ -225,5 +225,8 @@ struct xe_oa_stream {
 
 	/** @override_gucrc: GuC RC has been overridden for the OA stream */
 	bool override_gucrc;
+
+	/** @oa_status: temporary storage for oa_status register value */
+	u32 oa_status;
 };
 #endif
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index dc17e1fad025..8378e5a51d37 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -1508,8 +1508,12 @@ struct drm_xe_query_oa_units {
 	__u32 num_oa_units;
 	/** @pad: MBZ */
 	__u32 pad;
-	/** @oa_units: OA units returned for this device */
-	struct drm_xe_oa_unit oa_units[];
+	/**
+	 * @oa_units: struct @drm_xe_oa_unit array returned for this device.
+	 * Written below as a u64 array to avoid problems with nested flexible
+	 * arrays with some compilers
+	 */
+	__u64 oa_units[];
 };
 
 /** enum drm_xe_oa_format_type - OA format types */
@@ -1571,10 +1575,10 @@ enum drm_xe_oa_property_id {
 #define DRM_XE_OA_FORMAT_MASK_BC_REPORT		(0xff << 24)
 
 	/**
-	 * @DRM_XE_OA_PROPERTY_OA_EXPONENT: Requests periodic OA unit sampling
-	 * with sampling frequency proportional to 2^(period_exponent + 1)
+	 * @DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT: Requests periodic OA unit
+	 * sampling with sampling frequency proportional to 2^(period_exponent + 1)
 	 */
-	DRM_XE_OA_PROPERTY_OA_EXPONENT,
+	DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT,
 
 	/**
 	 * @DRM_XE_OA_PROPERTY_OA_DISABLED: A value of 1 will open the OA
@@ -1624,15 +1628,16 @@ struct drm_xe_oa_config {
 
 /**
  * struct drm_xe_oa_stream_status - OA stream status returned from
- * @DRM_XE_PERF_IOCTL_STATUS perf fd ioctl
+ * @DRM_XE_PERF_IOCTL_STATUS perf fd ioctl. Userspace can call the ioctl to
+ * query stream status in response to EIO errno from perf fd read().
  */
 struct drm_xe_oa_stream_status {
 	/** @extensions: Pointer to the first extension struct, if any */
 	__u64 extensions;
 
-	/** @oa_status: OA status register as specified in PRM/Bspec 46717/61226 */
+	/** @oa_status: OA stream status (see Bspec 46717/61226) */
 	__u64 oa_status;
-#define DRM_XE_OASTATUS_MMIO_TRG_Q_FULL		(1 << 6)
+#define DRM_XE_OASTATUS_MMIO_TRG_Q_FULL		(1 << 3)
 #define DRM_XE_OASTATUS_COUNTER_OVERFLOW	(1 << 2)
 #define DRM_XE_OASTATUS_BUFFER_OVERFLOW		(1 << 1)
 #define DRM_XE_OASTATUS_REPORT_LOST		(1 << 0)
-- 
2.41.0



More information about the Intel-xe mailing list