[PATCH 13/14] drm/i915: Mechanism to forward clock monotonic raw time in perf samples

Sagar Arun Kamble sagar.a.kamble at intel.com
Wed Jul 12 09:57:01 UTC 2017


From: Sourab Gupta <sourab.gupta at intel.com>

Currently, we have the ability to only forward the GPU timestamps in the
samples (which are generated via OA reports or PIPE_CONTROL commands
inserted in the ring). This limits the ability to correlate these samples
with the system events. If we scale the GPU timestamps according the
timestamp base/frequency info present in bspec, it is observed that the
timestamps drift really quickly from the system time.

An ability is therefore needed to report timestamps in different clock
domains, such as CLOCK_MONOTONIC (or _MONO_RAW), in the perf samples to
be of more practical use to the userspace. This ability becomes important
when we want to correlate/plot GPU events/samples with other system events
on the same timeline (e.g. vblank events, or timestamps when work was
submitted to kernel, etc.)

The patch here proposes a mechanism to achieve this. The correlation
between gpu time and system time is established using the cross timestamp
framework. For this purpose, the timestamp clock associated with the
command stream, is abstracted as timecounter/cyclecounter, before utilizing
cross timestamp framework to retrieve gpu/system time correlated values.
Different such gpu/system time values are then used to detect and correct
the error in published gpu timestamp clock frequency. The userspace can
request CLOCK_MONOTONIC_RAW timestamps in samples by requesting the
corresponding property while opening the stream.

v2: Added i915_driver_init_late() function to capture the new late init
phase for perf (Chris)

Signed-off-by: Sourab Gupta <sourab.gupta at intel.com>
Signed-off-by: Sagar Arun Kamble <sagar.a.kamble at intel.com>
---
 drivers/gpu/drm/i915/i915_drv.c  |  15 +++
 drivers/gpu/drm/i915/i915_drv.h  |  29 +++++
 drivers/gpu/drm/i915/i915_perf.c | 271 +++++++++++++++++++++++++++++++++++++--
 include/uapi/drm/i915_drm.h      |   9 +-
 4 files changed, 312 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index d310d82..8902f28 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -935,6 +935,19 @@ static void i915_driver_cleanup_early(struct drm_i915_private *dev_priv)
 	i915_engines_cleanup(dev_priv);
 }
 
+/**
+ * i915_driver_init_late - initialize late stage driver components
+ * @dev_priv: device private
+ *
+ * Setup the driver components, which need to be inited after driver state has
+ * been registered and device enabled.
+ */
+static void i915_driver_init_late(struct drm_i915_private *dev_priv)
+{
+	i915_perf_init_late(dev_priv);
+}
+
+
 static int i915_mmio_setup(struct drm_i915_private *dev_priv)
 {
 	struct pci_dev *pdev = dev_priv->drm.pdev;
@@ -1336,6 +1349,8 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	i915_driver_register(dev_priv);
 
+	i915_driver_init_late(dev_priv);
+
 	intel_runtime_pm_enable(dev_priv);
 
 	dev_priv->ipc_enabled = false;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 233a1eb..33528bc 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -43,6 +43,9 @@
 #include <linux/pm_qos.h>
 #include <linux/reservation.h>
 #include <linux/shmem_fs.h>
+#include <linux/timecounter.h>
+#include <linux/clocksource.h>
+#include <linux/timekeeping.h>
 
 #include <drm/drmP.h>
 #include <drm/intel-gtt.h>
@@ -2042,6 +2045,12 @@ struct i915_perf_stream {
 	struct reservation_object sampled_req_resv;
 
 	/**
+	 * @last_sample_ts: monotonic_raw clk timestamp (in ns) for last sample
+	 * belonging to this perf stream.
+	 */
+	u64 last_sample_ts;
+
+	/**
 	 * @ops: The callbacks providing the implementation of this specific
 	 * type of configured stream.
 	 */
@@ -2212,6 +2221,23 @@ struct i915_perf_cs_sample {
 	u32 tag;
 };
 
+/**
+ * struct i915_clock_info - decribes i915 timestamp clock
+ *
+ */
+struct i915_clock_info {
+	struct cyclecounter cc;
+	struct timecounter tc;
+	struct system_device_crosststamp xtstamp;
+	ktime_t clk_offset; /*
+			     * Offset (in ns) between monoraw clk
+			     * and gpu time.
+			     */
+	u32 timestamp_frequency;
+	u32 resync_period; /* in msecs */
+	struct delayed_work clk_sync_work;
+};
+
 struct intel_cdclk_state {
 	unsigned int cdclk, vco, ref;
 };
@@ -2533,6 +2559,8 @@ struct drm_i915_private {
 
 	struct i915_runtime_pm pm;
 
+	struct i915_clock_info ts_clk_info;
+
 	struct {
 		bool initialized;
 
@@ -3878,6 +3906,7 @@ int intel_engine_cmd_parser(struct intel_engine_cs *engine,
 
 /* i915_perf.c */
 extern void i915_perf_init(struct drm_i915_private *dev_priv);
+extern void i915_perf_init_late(struct drm_i915_private *dev_priv);
 extern void i915_perf_fini(struct drm_i915_private *dev_priv);
 extern void i915_perf_register(struct drm_i915_private *dev_priv);
 extern void i915_perf_unregister(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index d849b99..c5a3009 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -194,6 +194,7 @@
 #include <linux/anon_inodes.h>
 #include <linux/sizes.h>
 #include <linux/srcu.h>
+#include <linux/ktime.h>
 
 #include "i915_drv.h"
 #include "i915_oa_hsw.h"
@@ -265,6 +266,9 @@
 #define POLL_FREQUENCY 200
 #define POLL_PERIOD (NSEC_PER_SEC / POLL_FREQUENCY)
 
+#define MAX_CLK_SYNC_PERIOD (60*MSEC_PER_SEC)
+#define INIT_CLK_SYNC_PERIOD (20) /* in msecs */
+
 /* for sysctl proc_dointvec_minmax of dev.i915.perf_stream_paranoid */
 static int zero;
 static int one = 1;
@@ -293,13 +297,24 @@
 #define TS_ADDR_ALIGN 8
 #define I915_PERF_TS_SAMPLE_SIZE 8
 
+/* Published frequency of GT command stream timestamp clock */
+#define FREQUENCY_12_5_MHZ	(12500000)
+#define FREQUENCY_12_0_MHZ	(12000000)
+#define FREQUENCY_19_2_MHZ	(19200000)
+#define GT_CS_TIMESTAMP_FREQUENCY(dev_priv) (IS_GEN9(dev_priv) ? \
+				(IS_BROXTON(dev_priv) ? \
+				FREQUENCY_19_2_MHZ : \
+				FREQUENCY_12_0_MHZ) : \
+				FREQUENCY_12_5_MHZ)
+
 /*Data common to perf samples (periodic OA / CS based OA / Timestamps)*/
 struct i915_perf_sample_data {
 	u32 source;
 	u32 ctx_id;
 	u32 pid;
 	u32 tag;
-	u64 ts;
+	u64 gpu_ts;
+	u64 clk_monoraw;
 	const u8 *report;
 };
 
@@ -358,6 +373,7 @@ struct i915_perf_sample_data {
 #define SAMPLE_PID	      (1<<3)
 #define SAMPLE_TAG	      (1<<4)
 #define SAMPLE_TS	      (1<<5)
+#define SAMPLE_CLK_MONO_RAW   (1<<6)
 
 /**
  * struct perf_open_properties - for validated properties given to open a stream
@@ -406,6 +422,136 @@ static u32 gen7_oa_hw_tail_read(struct drm_i915_private *dev_priv)
 }
 
 /**
+ * i915_clk_cyclecounter_read - read raw cycle counter
+ * @cc: cyclecounter structure
+ **/
+static u64 i915_clk_cyclecounter_read(
+				const struct cyclecounter *cc)
+{
+	struct drm_i915_private *dev_priv =
+		container_of(cc, typeof(*dev_priv),
+			     ts_clk_info.cc);
+
+	return I915_READ64_2x32(GT_TIMESTAMP_COUNT,
+					GT_TIMESTAMP_COUNT_UDW);
+}
+
+static void i915_clk_update_mult_shift(struct cyclecounter *cc,
+					u32 frequency)
+{
+	clocks_calc_mult_shift(&cc->mult, &cc->shift,
+			frequency, NSEC_PER_SEC, 3600);
+}
+
+static void i915_clk_init_base_freq(struct drm_i915_private *dev_priv,
+				struct cyclecounter *cc)
+{
+	cc->read = i915_clk_cyclecounter_read;
+	cc->mask = CYCLECOUNTER_MASK(64);
+	i915_clk_update_mult_shift(cc,
+			dev_priv->ts_clk_info.timestamp_frequency);
+}
+
+/**
+ * i915_get_syncdevicetime - Callback given to timekeeping code to read
+	device time and system clk counter value
+ * @device_time: current device time
+ * @system: system counter value read synchronously with device time
+ * @ctx: context provided by timekeeping code
+ *
+ **/
+static int i915_get_syncdevicetime(ktime_t *device_time,
+					 struct system_counterval_t *system,
+					 void *ctx)
+{
+	struct drm_i915_private *dev_priv = (struct drm_i915_private *)ctx;
+	struct timecounter *tc = &dev_priv->ts_clk_info.tc;
+	struct clocksource *curr_clksource;
+
+	*device_time = ns_to_ktime(timecounter_read(tc));
+
+	curr_clksource = get_current_clocksource();
+	system->cycles = curr_clksource->read(curr_clksource);
+	system->cs = curr_clksource;
+
+	return 0;
+}
+
+static void i915_perf_clock_sync_work(struct work_struct *work)
+{
+	struct drm_i915_private *dev_priv =
+		container_of(work, typeof(*dev_priv),
+		ts_clk_info.clk_sync_work.work);
+	struct system_device_crosststamp *xtstamp =
+			&dev_priv->ts_clk_info.xtstamp;
+	ktime_t last_sys_time = xtstamp->sys_monoraw;
+	ktime_t last_gpu_time = xtstamp->device;
+	ktime_t clk_mono_offset, gpu_time_offset;
+	s64 delta;
+	u32 gpu_freq = dev_priv->ts_clk_info.timestamp_frequency;
+	u32 freq_delta = 0;
+
+	get_device_system_crosststamp(i915_get_syncdevicetime, dev_priv,
+					NULL, xtstamp);
+
+	clk_mono_offset = ktime_sub(xtstamp->sys_monoraw, last_sys_time);
+	gpu_time_offset = ktime_sub(xtstamp->device, last_gpu_time);
+
+	/* delta time in ns */
+	delta = ktime_to_ns(ktime_sub(gpu_time_offset, clk_mono_offset));
+
+	/* If time delta < 1 us, we can assume gpu frequency is correct */
+	if (abs(delta) < NSEC_PER_USEC)
+		goto out;
+
+	/* The two clocks shouldn't deviate more than 1 second during the
+	 * resync period. If this is the case (which may happen due to
+	 * suspend/resume), then don't apply frequency correction, and
+	 * fast forward/rewind the clocks to resync immediately
+	 */
+	if (abs(delta) > NSEC_PER_SEC)
+		goto out;
+
+	/* Calculate frequency delta */
+	freq_delta = abs(delta)*gpu_freq;
+	do_div(freq_delta, ktime_to_ns(clk_mono_offset));
+
+	if (freq_delta == 0)
+		goto out;
+
+	if (delta < 0)
+		freq_delta = -freq_delta;
+
+	dev_priv->ts_clk_info.timestamp_frequency += freq_delta;
+	i915_clk_update_mult_shift(&dev_priv->ts_clk_info.cc,
+			dev_priv->ts_clk_info.timestamp_frequency);
+
+	/*
+	 * Get updated device/system times based on corrected frequency.
+	 * Note that this may cause jumps in device time depending on whether
+	 * frequency delta is positive or negative.
+	 * Note: Take care that monotonicity of sample timestamps is maintained
+	 * even with these jumps.
+	 */
+	get_device_system_crosststamp(i915_get_syncdevicetime, dev_priv,
+					NULL, xtstamp);
+
+out:
+	dev_priv->ts_clk_info.clk_offset = ktime_sub(xtstamp->sys_monoraw,
+						xtstamp->device);
+
+	/* We can schedule next synchronization at incrementally higher
+	 * durations, so that the accuracy of our calculated frequency
+	 * can improve over time.
+	 */
+	dev_priv->ts_clk_info.resync_period *= 2;
+	if (dev_priv->ts_clk_info.resync_period < MAX_CLK_SYNC_PERIOD)
+		schedule_delayed_work(&dev_priv->ts_clk_info.clk_sync_work,
+			msecs_to_jiffies(dev_priv->ts_clk_info.resync_period));
+}
+
+
+/**
  * i915_perf_emit_sample_capture - Insert the commands to capture metrics into
  * the command stream of a GPU engine.
  * @request: request in whose context the metrics are being collected.
@@ -487,7 +633,7 @@ static void insert_perf_sample(struct i915_perf_stream *stream,
 
 	if (stream->sample_flags & SAMPLE_OA_REPORT)
 		sample_size += dev_priv->perf.oa.oa_buffer.format_size;
-	else if (stream->sample_flags & SAMPLE_TS) {
+	else if (stream->sample_flags & (SAMPLE_TS | SAMPLE_CLK_MONO_RAW)) {
 		/*
 		 * XXX: Since TS data can anyways be derived from OA report, so
 		 * no need to capture it for RCS engine, if capture oa data is
@@ -737,7 +883,7 @@ static void i915_perf_stream_emit_sample_capture(
 						  sample->oa_offset);
 		if (ret)
 			goto err_unref;
-	} else if (stream->sample_flags & SAMPLE_TS) {
+	} else if (stream->sample_flags & (SAMPLE_TS | SAMPLE_CLK_MONO_RAW)) {
 		/*
 		 * XXX: Since TS data can anyways be derived from OA report, so
 		 * no need to capture it for RCS engine, if capture oa data is
@@ -1070,7 +1216,14 @@ static int append_perf_sample(struct i915_perf_stream *stream,
 	}
 
 	if (sample_flags & SAMPLE_TS) {
-		if (copy_to_user(buf, &data->ts, I915_PERF_TS_SAMPLE_SIZE))
+		if (copy_to_user(buf, &data->gpu_ts, I915_PERF_TS_SAMPLE_SIZE))
+			return -EFAULT;
+		buf += I915_PERF_TS_SAMPLE_SIZE;
+	}
+
+	if (sample_flags & SAMPLE_CLK_MONO_RAW) {
+		if (copy_to_user(buf, &data->clk_monoraw,
+					I915_PERF_TS_SAMPLE_SIZE))
 			return -EFAULT;
 		buf += I915_PERF_TS_SAMPLE_SIZE;
 	}
@@ -1087,6 +1240,34 @@ static int append_perf_sample(struct i915_perf_stream *stream,
 }
 
 /**
+ * get_clk_monoraw_from_gpu_ts - Retrieve _MONO_RAW system clock value from
+ * the raw gpu timestamp.
+ * @stream: An i915-perf stream opened for GPU metrics
+ * @gpu_ts: raw gpu timestamp value
+ *
+ */
+static u64 get_clk_monoraw_from_gpu_ts(struct i915_perf_stream *stream,
+					u64 gpu_ts)
+{
+	struct drm_i915_private *dev_priv = stream->dev_priv;
+	struct timecounter *tc = &dev_priv->ts_clk_info.tc;
+	u64 gpu_time, clk_monoraw;
+
+	gpu_time = cyclecounter_cyc2ns(tc->cc, gpu_ts, tc->mask, &tc->frac);
+
+	clk_monoraw = gpu_time + ktime_to_ns(dev_priv->ts_clk_info.clk_offset);
+
+	/* Ensure monotonicity by clamping the system time in case it goes
+	 * backwards.
+	 */
+	if (clk_monoraw < stream->last_sample_ts)
+		clk_monoraw = stream->last_sample_ts;
+
+	stream->last_sample_ts = clk_monoraw;
+	return clk_monoraw;
+}
+
+/**
  * get_gpu_ts_from_oa_report - Retrieve absolute gpu timestamp from OA report
  *
  * Note: We are assuming that we're updating last_gpu_ts frequently enough so
@@ -1139,7 +1320,14 @@ static int append_oa_buffer_sample(struct i915_perf_stream *stream,
 		data.tag = stream->last_tag;
 
 	if (sample_flags & SAMPLE_TS)
-		data.ts = get_gpu_ts_from_oa_report(dev_priv, report);
+		data.gpu_ts = get_gpu_ts_from_oa_report(dev_priv, report);
+
+	if (sample_flags & SAMPLE_CLK_MONO_RAW) {
+		u64 gpu_ts = get_gpu_ts_from_oa_report(dev_priv, report);
+
+		data.clk_monoraw = get_clk_monoraw_from_gpu_ts(stream, gpu_ts);
+	}
+
 
 	if (sample_flags & SAMPLE_OA_REPORT)
 		data.report = report;
@@ -1731,7 +1919,7 @@ static int append_cs_buffer_sample(struct i915_perf_stream *stream,
 		if (ret)
 			return ret;
 
-		if (sample_flags & SAMPLE_TS)
+		if (sample_flags & (SAMPLE_TS | SAMPLE_CLK_MONO_RAW))
 			gpu_ts = get_gpu_ts_from_oa_report(dev_priv, report);
 	}
 
@@ -1753,14 +1941,20 @@ static int append_cs_buffer_sample(struct i915_perf_stream *stream,
 		stream->last_tag = node->tag;
 	}
 
-	if (sample_flags & SAMPLE_TS) {
+	if (sample_flags & (SAMPLE_TS | SAMPLE_CLK_MONO_RAW)) {
 		/* If OA sampling is enabled, derive the ts from OA report.
 		 * Else, forward the timestamp collected via command stream.
 		 */
 		if (!(sample_flags & SAMPLE_OA_REPORT))
 			gpu_ts = *(u64 *) (stream->cs_buffer.vaddr +
 					   node->ts_offset);
-		data.ts = gpu_ts;
+
+		if (sample_flags & SAMPLE_TS)
+			data.gpu_ts = gpu_ts;
+		if (sample_flags & SAMPLE_CLK_MONO_RAW)
+			data.clk_monoraw =
+				get_clk_monoraw_from_gpu_ts(stream, gpu_ts);
+
 	}
 
 	return append_perf_sample(stream, buf, count, offset, &data);
@@ -2855,12 +3049,29 @@ static void i915_perf_stream_enable(struct i915_perf_stream *stream)
 	if (stream->sample_flags & SAMPLE_OA_REPORT) {
 		dev_priv->perf.oa.ops.oa_enable(dev_priv);
 
-		if (stream->sample_flags & SAMPLE_TS)
+		if (stream->sample_flags & (SAMPLE_TS|SAMPLE_CLK_MONO_RAW))
 			dev_priv->perf.oa.last_gpu_ts =
 				I915_READ64_2x32(GT_TIMESTAMP_COUNT,
 					GT_TIMESTAMP_COUNT_UDW);
 	}
 
+	if (stream->sample_flags & SAMPLE_CLK_MONO_RAW) {
+		struct system_device_crosststamp *xtstamp =
+			&dev_priv->ts_clk_info.xtstamp;
+
+		get_device_system_crosststamp(i915_get_syncdevicetime, dev_priv,
+					NULL, xtstamp);
+		dev_priv->ts_clk_info.clk_offset = ktime_sub(
+						xtstamp->sys_monoraw,
+						xtstamp->device);
+
+		if (dev_priv->ts_clk_info.resync_period < MAX_CLK_SYNC_PERIOD)
+			schedule_delayed_work(
+				&dev_priv->ts_clk_info.clk_sync_work,
+				msecs_to_jiffies(
+					dev_priv->ts_clk_info.resync_period));
+	}
+
 	if (stream->cs_mode || dev_priv->perf.oa.periodic)
 		hrtimer_start(&dev_priv->perf.poll_check_timer,
 			      ns_to_ktime(POLL_PERIOD),
@@ -2890,6 +3101,8 @@ static void i915_perf_stream_disable(struct i915_perf_stream *stream)
 	struct drm_i915_private *dev_priv = stream->dev_priv;
 	long ret;
 
+	cancel_delayed_work_sync(&dev_priv->ts_clk_info.clk_sync_work);
+
 	if (stream->cs_mode || dev_priv->perf.oa.periodic)
 		hrtimer_cancel(&dev_priv->perf.poll_check_timer);
 
@@ -2954,7 +3167,8 @@ static int i915_perf_stream_init(struct i915_perf_stream *stream,
 	bool require_cs_mode = props->sample_flags & (SAMPLE_PID |
 						      SAMPLE_TAG);
 	bool cs_sample_data = props->sample_flags & (SAMPLE_OA_REPORT |
-							SAMPLE_TS);
+							SAMPLE_TS |
+							SAMPLE_CLK_MONO_RAW);
 	struct i915_perf_stream *curr_stream;
 	struct intel_engine_cs *engine = NULL;
 	int idx;
@@ -3124,6 +3338,19 @@ static int i915_perf_stream_init(struct i915_perf_stream *stream,
 			require_cs_mode = true;
 	}
 
+	if (props->sample_flags & SAMPLE_CLK_MONO_RAW) {
+		stream->sample_flags |= SAMPLE_CLK_MONO_RAW;
+		stream->sample_size += I915_PERF_TS_SAMPLE_SIZE;
+
+		/*
+		 * it's meaningful to request SAMPLE_CLK_MONO_RAW with just CS
+		 * mode or periodic OA mode sampling but we don't allow
+		 * SAMPLE_CLK_MONO without either mode
+		 */
+		if (!require_oa_unit)
+			require_cs_mode = true;
+	}
+
 	if (require_cs_mode && !props->cs_mode) {
 		DRM_ERROR("PID/TAG/TS sampling requires engine to be specified");
 		ret = -EINVAL;
@@ -3143,7 +3370,8 @@ static int i915_perf_stream_init(struct i915_perf_stream *stream,
 		 * requested as they're usable with periodic OA or CS sampling.
 		 */
 		if (!require_cs_mode &&
-		    !(props->sample_flags & (SAMPLE_CTX_ID | SAMPLE_TS))) {
+		    !(props->sample_flags &
+			(SAMPLE_CTX_ID | SAMPLE_TS | SAMPLE_CLK_MONO_RAW))) {
 			DRM_ERROR("Stream engine given without requesting any CS specific property\n");
 			ret = -EINVAL;
 			goto err_enable;
@@ -3903,6 +4131,9 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
 		case DRM_I915_PERF_PROP_SAMPLE_TS:
 			props->sample_flags |= SAMPLE_TS;
 			break;
+		case DRM_I915_PERF_PROP_SAMPLE_CLOCK_MONOTONIC_RAW:
+			props->sample_flags |= SAMPLE_CLK_MONO_RAW;
+			break;
 		case DRM_I915_PERF_PROP_MAX:
 			MISSING_CASE(id);
 			return -EINVAL;
@@ -4257,6 +4488,16 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
 	}
 
 	if (dev_priv->perf.oa.n_builtin_sets) {
+		struct i915_clock_info *clk_info = &dev_priv->ts_clk_info;
+
+		clk_info->timestamp_frequency =
+					GT_CS_TIMESTAMP_FREQUENCY(dev_priv);
+		clk_info->resync_period = INIT_CLK_SYNC_PERIOD;
+		INIT_DELAYED_WORK(&clk_info->clk_sync_work,
+					i915_perf_clock_sync_work);
+
+		i915_clk_init_base_freq(dev_priv, &clk_info->cc);
+
 		hrtimer_init(&dev_priv->perf.poll_check_timer,
 				CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 		dev_priv->perf.poll_check_timer.function = poll_check_timer_cb;
@@ -4272,6 +4513,14 @@ void i915_perf_init(struct drm_i915_private *dev_priv)
 	}
 }
 
+void i915_perf_init_late(struct drm_i915_private *dev_priv)
+{
+	struct i915_clock_info *clk_info = &dev_priv->ts_clk_info;
+
+	timecounter_init(&clk_info->tc,	&clk_info->cc,
+				ktime_to_ns(ktime_get_boottime()));
+}
+
 /**
  * i915_perf_fini - Counter part to i915_perf_init()
  * @dev_priv: i915 device instance
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 011737b..016ab09 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1431,6 +1431,12 @@ enum drm_i915_perf_property_id {
 	 */
 	DRM_I915_PERF_PROP_SAMPLE_TS,
 
+	/**
+	 * This property requests inclusion of CLOCK_MONOTONIC system time in
+	 * the perf sample data.
+	 */
+	DRM_I915_PERF_PROP_SAMPLE_CLOCK_MONOTONIC_RAW,
+
 	DRM_I915_PERF_PROP_MAX /* non-ABI */
 };
 
@@ -1500,7 +1506,8 @@ enum drm_i915_perf_record_type {
 	 *     { u32 ctx_id; } && DRM_I915_PERF_PROP_SAMPLE_CTX_ID
 	 *     { u32 pid; } && DRM_I915_PERF_PROP_SAMPLE_PID
 	 *     { u32 tag; } && DRM_I915_PERF_PROP_SAMPLE_TAG
-	 *     { u64 timestamp; } && DRM_I915_PERF_PROP_SAMPLE_TS
+	 *     { u64 gpu_ts; } && DRM_I915_PERF_PROP_SAMPLE_TS
+	 *     { u64 clk_mono; } && DRM_I915_PERF_PROP_SAMPLE_CLOCK_MONOTONIC
 	 *     { u32 oa_report[]; } && DRM_I915_PERF_PROP_SAMPLE_OA
 	 * };
 	 */
-- 
1.9.1



More information about the Intel-gfx-trybot mailing list