[Intel-gfx] [PATCH v16 06/13] drm/i915/perf: move perf types to their own header

Umesh Nerlige Ramappa umesh.nerlige.ramappa at intel.com
Tue Sep 10 17:35:36 UTC 2019


Looks good. The struct perf inside drm_i915_private could also move to 
i915_perf_types.h. Irrespective of that,

Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>

Regards,
Umesh

On Mon, Sep 09, 2019 at 12:31:09PM +0300, Lionel Landwerlin wrote:
>Following a pattern used throughout the driver.
>
>Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
>---
> drivers/gpu/drm/i915/i915_drv.h        | 300 +----------------------
> drivers/gpu/drm/i915/i915_perf.h       |   2 +
> drivers/gpu/drm/i915/i915_perf_types.h | 327 +++++++++++++++++++++++++
> 3 files changed, 330 insertions(+), 299 deletions(-)
> create mode 100644 drivers/gpu/drm/i915/i915_perf_types.h
>
>diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>index 274a1193d4f0..f4145ae6ab6e 100644
>--- a/drivers/gpu/drm/i915/i915_drv.h
>+++ b/drivers/gpu/drm/i915/i915_drv.h
>@@ -92,6 +92,7 @@
> #include "i915_gem_fence_reg.h"
> #include "i915_gem_gtt.h"
> #include "i915_gpu_error.h"
>+#include "i915_perf_types.h"
> #include "i915_request.h"
> #include "i915_scheduler.h"
> #include "gt/intel_timeline.h"
>@@ -979,305 +980,6 @@ struct intel_wm_config {
> 	bool sprites_scaled;
> };
>
>-struct i915_oa_format {
>-	u32 format;
>-	int size;
>-};
>-
>-struct i915_oa_reg {
>-	i915_reg_t addr;
>-	u32 value;
>-};
>-
>-struct i915_oa_config {
>-	char uuid[UUID_STRING_LEN + 1];
>-	int id;
>-
>-	const struct i915_oa_reg *mux_regs;
>-	u32 mux_regs_len;
>-	const struct i915_oa_reg *b_counter_regs;
>-	u32 b_counter_regs_len;
>-	const struct i915_oa_reg *flex_regs;
>-	u32 flex_regs_len;
>-
>-	struct attribute_group sysfs_metric;
>-	struct attribute *attrs[2];
>-	struct device_attribute sysfs_metric_id;
>-
>-	atomic_t ref_count;
>-};
>-
>-struct i915_perf_stream;
>-
>-/**
>- * struct i915_perf_stream_ops - the OPs to support a specific stream type
>- */
>-struct i915_perf_stream_ops {
>-	/**
>-	 * @enable: Enables the collection of HW samples, either in response to
>-	 * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened
>-	 * without `I915_PERF_FLAG_DISABLED`.
>-	 */
>-	void (*enable)(struct i915_perf_stream *stream);
>-
>-	/**
>-	 * @disable: Disables the collection of HW samples, either in response
>-	 * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying
>-	 * the stream.
>-	 */
>-	void (*disable)(struct i915_perf_stream *stream);
>-
>-	/**
>-	 * @poll_wait: Call poll_wait, passing a wait queue that will be woken
>-	 * once there is something ready to read() for the stream
>-	 */
>-	void (*poll_wait)(struct i915_perf_stream *stream,
>-			  struct file *file,
>-			  poll_table *wait);
>-
>-	/**
>-	 * @wait_unlocked: For handling a blocking read, wait until there is
>-	 * something to ready to read() for the stream. E.g. wait on the same
>-	 * wait queue that would be passed to poll_wait().
>-	 */
>-	int (*wait_unlocked)(struct i915_perf_stream *stream);
>-
>-	/**
>-	 * @read: Copy buffered metrics as records to userspace
>-	 * **buf**: the userspace, destination buffer
>-	 * **count**: the number of bytes to copy, requested by userspace
>-	 * **offset**: zero at the start of the read, updated as the read
>-	 * proceeds, it represents how many bytes have been copied so far and
>-	 * the buffer offset for copying the next record.
>-	 *
>-	 * Copy as many buffered i915 perf samples and records for this stream
>-	 * to userspace as will fit in the given buffer.
>-	 *
>-	 * Only write complete records; returning -%ENOSPC if there isn't room
>-	 * for a complete record.
>-	 *
>-	 * Return any error condition that results in a short read such as
>-	 * -%ENOSPC or -%EFAULT, even though these may be squashed before
>-	 * returning to userspace.
>-	 */
>-	int (*read)(struct i915_perf_stream *stream,
>-		    char __user *buf,
>-		    size_t count,
>-		    size_t *offset);
>-
>-	/**
>-	 * @destroy: Cleanup any stream specific resources.
>-	 *
>-	 * The stream will always be disabled before this is called.
>-	 */
>-	void (*destroy)(struct i915_perf_stream *stream);
>-};
>-
>-/**
>- * struct i915_perf_stream - state for a single open stream FD
>- */
>-struct i915_perf_stream {
>-	/**
>-	 * @dev_priv: i915 drm device
>-	 */
>-	struct drm_i915_private *dev_priv;
>-
>-	/**
>-	 * @wakeref: As we keep the device awake while the perf stream is
>-	 * active, we track our runtime pm reference for later release.
>-	 */
>-	intel_wakeref_t wakeref;
>-
>-	/**
>-	 * @engine: Engine associated with this performance stream.
>-	 */
>-	struct intel_engine_cs *engine;
>-
>-	/**
>-	 * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*`
>-	 * properties given when opening a stream, representing the contents
>-	 * of a single sample as read() by userspace.
>-	 */
>-	u32 sample_flags;
>-
>-	/**
>-	 * @sample_size: Considering the configured contents of a sample
>-	 * combined with the required header size, this is the total size
>-	 * of a single sample record.
>-	 */
>-	int sample_size;
>-
>-	/**
>-	 * @ctx: %NULL if measuring system-wide across all contexts or a
>-	 * specific context that is being monitored.
>-	 */
>-	struct i915_gem_context *ctx;
>-
>-	/**
>-	 * @enabled: Whether the stream is currently enabled, considering
>-	 * whether the stream was opened in a disabled state and based
>-	 * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls.
>-	 */
>-	bool enabled;
>-
>-	/**
>-	 * @ops: The callbacks providing the implementation of this specific
>-	 * type of configured stream.
>-	 */
>-	const struct i915_perf_stream_ops *ops;
>-
>-	/**
>-	 * @oa_config: The OA configuration used by the stream.
>-	 */
>-	struct i915_oa_config *oa_config;
>-
>-	/**
>-	 * The OA context specific information.
>-	 */
>-	struct intel_context *pinned_ctx;
>-	u32 specific_ctx_id;
>-	u32 specific_ctx_id_mask;
>-
>-	struct hrtimer poll_check_timer;
>-	wait_queue_head_t poll_wq;
>-	bool pollin;
>-
>-	bool periodic;
>-	int period_exponent;
>-
>-	/**
>-	 * State of the OA buffer.
>-	 */
>-	struct {
>-		struct i915_vma *vma;
>-		u8 *vaddr;
>-		u32 last_ctx_id;
>-		int format;
>-		int format_size;
>-		int size_exponent;
>-
>-		/**
>-		 * Locks reads and writes to all head/tail state
>-		 *
>-		 * Consider: the head and tail pointer state needs to be read
>-		 * consistently from a hrtimer callback (atomic context) and
>-		 * read() fop (user context) with tail pointer updates happening
>-		 * in atomic context and head updates in user context and the
>-		 * (unlikely) possibility of read() errors needing to reset all
>-		 * head/tail state.
>-		 *
>-		 * Note: Contention/performance aren't currently a significant
>-		 * concern here considering the relatively low frequency of
>-		 * hrtimer callbacks (5ms period) and that reads typically only
>-		 * happen in response to a hrtimer event and likely complete
>-		 * before the next callback.
>-		 *
>-		 * Note: This lock is not held *while* reading and copying data
>-		 * to userspace so the value of head observed in htrimer
>-		 * callbacks won't represent any partial consumption of data.
>-		 */
>-		spinlock_t ptr_lock;
>-
>-		/**
>-		 * One 'aging' tail pointer and one 'aged' tail pointer ready to
>-		 * used for reading.
>-		 *
>-		 * Initial values of 0xffffffff are invalid and imply that an
>-		 * update is required (and should be ignored by an attempted
>-		 * read)
>-		 */
>-		struct {
>-			u32 offset;
>-		} tails[2];
>-
>-		/**
>-		 * Index for the aged tail ready to read() data up to.
>-		 */
>-		unsigned int aged_tail_idx;
>-
>-		/**
>-		 * A monotonic timestamp for when the current aging tail pointer
>-		 * was read; used to determine when it is old enough to trust.
>-		 */
>-		u64 aging_timestamp;
>-
>-		/**
>-		 * Although we can always read back the head pointer register,
>-		 * we prefer to avoid trusting the HW state, just to avoid any
>-		 * risk that some hardware condition could * somehow bump the
>-		 * head pointer unpredictably and cause us to forward the wrong
>-		 * OA buffer data to userspace.
>-		 */
>-		u32 head;
>-	} oa_buffer;
>-};
>-
>-/**
>- * struct i915_oa_ops - Gen specific implementation of an OA unit stream
>- */
>-struct i915_oa_ops {
>-	/**
>-	 * @is_valid_b_counter_reg: Validates register's address for
>-	 * programming boolean counters for a particular platform.
>-	 */
>-	bool (*is_valid_b_counter_reg)(struct drm_i915_private *dev_priv,
>-				       u32 addr);
>-
>-	/**
>-	 * @is_valid_mux_reg: Validates register's address for programming mux
>-	 * for a particular platform.
>-	 */
>-	bool (*is_valid_mux_reg)(struct drm_i915_private *dev_priv, u32 addr);
>-
>-	/**
>-	 * @is_valid_flex_reg: Validates register's address for programming
>-	 * flex EU filtering for a particular platform.
>-	 */
>-	bool (*is_valid_flex_reg)(struct drm_i915_private *dev_priv, u32 addr);
>-
>-	/**
>-	 * @enable_metric_set: Selects and applies any MUX configuration to set
>-	 * up the Boolean and Custom (B/C) counters that are part of the
>-	 * counter reports being sampled. May apply system constraints such as
>-	 * disabling EU clock gating as required.
>-	 */
>-	int (*enable_metric_set)(struct i915_perf_stream *stream);
>-
>-	/**
>-	 * @disable_metric_set: Remove system constraints associated with using
>-	 * the OA unit.
>-	 */
>-	void (*disable_metric_set)(struct i915_perf_stream *stream);
>-
>-	/**
>-	 * @oa_enable: Enable periodic sampling
>-	 */
>-	void (*oa_enable)(struct i915_perf_stream *stream);
>-
>-	/**
>-	 * @oa_disable: Disable periodic sampling
>-	 */
>-	void (*oa_disable)(struct i915_perf_stream *stream);
>-
>-	/**
>-	 * @read: Copy data from the circular OA buffer into a given userspace
>-	 * buffer.
>-	 */
>-	int (*read)(struct i915_perf_stream *stream,
>-		    char __user *buf,
>-		    size_t count,
>-		    size_t *offset);
>-
>-	/**
>-	 * @oa_hw_tail_read: read the OA tail pointer register
>-	 *
>-	 * In particular this enables us to share all the fiddly code for
>-	 * handling the OA unit tail pointer race that affects multiple
>-	 * generations.
>-	 */
>-	u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream);
>-};
>-
> struct intel_cdclk_state {
> 	unsigned int cdclk, vco, ref, bypass;
> 	u8 voltage_level;
>diff --git a/drivers/gpu/drm/i915/i915_perf.h b/drivers/gpu/drm/i915/i915_perf.h
>index 95549de65212..270193526b56 100644
>--- a/drivers/gpu/drm/i915/i915_perf.h
>+++ b/drivers/gpu/drm/i915/i915_perf.h
>@@ -8,6 +8,8 @@
>
> #include <linux/types.h>
>
>+#include "i915_perf_types.h"
>+
> struct drm_device;
> struct drm_file;
> struct drm_i915_private;
>diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h
>new file mode 100644
>index 000000000000..c6673b67d694
>--- /dev/null
>+++ b/drivers/gpu/drm/i915/i915_perf_types.h
>@@ -0,0 +1,327 @@
>+/* SPDX-License-Identifier: MIT */
>+/*
>+ * Copyright © 2019 Intel Corporation
>+ */
>+
>+#ifndef _I915_PERF_TYPES_H_
>+#define _I915_PERF_TYPES_H_
>+
>+#include <linux/atomic.h>
>+#include <linux/device.h>
>+#include <linux/hrtimer.h>
>+#include <linux/list.h>
>+#include <linux/poll.h>
>+#include <linux/sysfs.h>
>+#include <linux/uuid.h>
>+#include <linux/wait.h>
>+
>+#include "i915_reg.h"
>+#include "intel_wakeref.h"
>+
>+struct drm_i915_private;
>+struct file;
>+struct i915_gem_context;
>+struct i915_vma;
>+struct intel_context;
>+struct intel_engine_cs;
>+
>+struct i915_oa_format {
>+	u32 format;
>+	int size;
>+};
>+
>+struct i915_oa_reg {
>+	i915_reg_t addr;
>+	u32 value;
>+};
>+
>+struct i915_oa_config {
>+	char uuid[UUID_STRING_LEN + 1];
>+	int id;
>+
>+	const struct i915_oa_reg *mux_regs;
>+	u32 mux_regs_len;
>+	const struct i915_oa_reg *b_counter_regs;
>+	u32 b_counter_regs_len;
>+	const struct i915_oa_reg *flex_regs;
>+	u32 flex_regs_len;
>+
>+	struct attribute_group sysfs_metric;
>+	struct attribute *attrs[2];
>+	struct device_attribute sysfs_metric_id;
>+
>+	atomic_t ref_count;
>+};
>+
>+struct i915_perf_stream;
>+
>+/**
>+ * struct i915_perf_stream_ops - the OPs to support a specific stream type
>+ */
>+struct i915_perf_stream_ops {
>+	/**
>+	 * @enable: Enables the collection of HW samples, either in response to
>+	 * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened
>+	 * without `I915_PERF_FLAG_DISABLED`.
>+	 */
>+	void (*enable)(struct i915_perf_stream *stream);
>+
>+	/**
>+	 * @disable: Disables the collection of HW samples, either in response
>+	 * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying
>+	 * the stream.
>+	 */
>+	void (*disable)(struct i915_perf_stream *stream);
>+
>+	/**
>+	 * @poll_wait: Call poll_wait, passing a wait queue that will be woken
>+	 * once there is something ready to read() for the stream
>+	 */
>+	void (*poll_wait)(struct i915_perf_stream *stream,
>+			  struct file *file,
>+			  poll_table *wait);
>+
>+	/**
>+	 * @wait_unlocked: For handling a blocking read, wait until there is
>+	 * something to ready to read() for the stream. E.g. wait on the same
>+	 * wait queue that would be passed to poll_wait().
>+	 */
>+	int (*wait_unlocked)(struct i915_perf_stream *stream);
>+
>+	/**
>+	 * @read: Copy buffered metrics as records to userspace
>+	 * **buf**: the userspace, destination buffer
>+	 * **count**: the number of bytes to copy, requested by userspace
>+	 * **offset**: zero at the start of the read, updated as the read
>+	 * proceeds, it represents how many bytes have been copied so far and
>+	 * the buffer offset for copying the next record.
>+	 *
>+	 * Copy as many buffered i915 perf samples and records for this stream
>+	 * to userspace as will fit in the given buffer.
>+	 *
>+	 * Only write complete records; returning -%ENOSPC if there isn't room
>+	 * for a complete record.
>+	 *
>+	 * Return any error condition that results in a short read such as
>+	 * -%ENOSPC or -%EFAULT, even though these may be squashed before
>+	 * returning to userspace.
>+	 */
>+	int (*read)(struct i915_perf_stream *stream,
>+		    char __user *buf,
>+		    size_t count,
>+		    size_t *offset);
>+
>+	/**
>+	 * @destroy: Cleanup any stream specific resources.
>+	 *
>+	 * The stream will always be disabled before this is called.
>+	 */
>+	void (*destroy)(struct i915_perf_stream *stream);
>+};
>+
>+/**
>+ * struct i915_perf_stream - state for a single open stream FD
>+ */
>+struct i915_perf_stream {
>+	/**
>+	 * @dev_priv: i915 drm device
>+	 */
>+	struct drm_i915_private *dev_priv;
>+
>+	/**
>+	 * @wakeref: As we keep the device awake while the perf stream is
>+	 * active, we track our runtime pm reference for later release.
>+	 */
>+	intel_wakeref_t wakeref;
>+
>+	/**
>+	 * @engine: Engine associated with this performance stream.
>+	 */
>+	struct intel_engine_cs *engine;
>+
>+	/**
>+	 * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*`
>+	 * properties given when opening a stream, representing the contents
>+	 * of a single sample as read() by userspace.
>+	 */
>+	u32 sample_flags;
>+
>+	/**
>+	 * @sample_size: Considering the configured contents of a sample
>+	 * combined with the required header size, this is the total size
>+	 * of a single sample record.
>+	 */
>+	int sample_size;
>+
>+	/**
>+	 * @ctx: %NULL if measuring system-wide across all contexts or a
>+	 * specific context that is being monitored.
>+	 */
>+	struct i915_gem_context *ctx;
>+
>+	/**
>+	 * @enabled: Whether the stream is currently enabled, considering
>+	 * whether the stream was opened in a disabled state and based
>+	 * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls.
>+	 */
>+	bool enabled;
>+
>+	/**
>+	 * @ops: The callbacks providing the implementation of this specific
>+	 * type of configured stream.
>+	 */
>+	const struct i915_perf_stream_ops *ops;
>+
>+	/**
>+	 * @oa_config: The OA configuration used by the stream.
>+	 */
>+	struct i915_oa_config *oa_config;
>+
>+	/**
>+	 * The OA context specific information.
>+	 */
>+	struct intel_context *pinned_ctx;
>+	u32 specific_ctx_id;
>+	u32 specific_ctx_id_mask;
>+
>+	struct hrtimer poll_check_timer;
>+	wait_queue_head_t poll_wq;
>+	bool pollin;
>+
>+	bool periodic;
>+	int period_exponent;
>+
>+	/**
>+	 * State of the OA buffer.
>+	 */
>+	struct {
>+		struct i915_vma *vma;
>+		u8 *vaddr;
>+		u32 last_ctx_id;
>+		int format;
>+		int format_size;
>+		int size_exponent;
>+
>+		/**
>+		 * Locks reads and writes to all head/tail state
>+		 *
>+		 * Consider: the head and tail pointer state needs to be read
>+		 * consistently from a hrtimer callback (atomic context) and
>+		 * read() fop (user context) with tail pointer updates happening
>+		 * in atomic context and head updates in user context and the
>+		 * (unlikely) possibility of read() errors needing to reset all
>+		 * head/tail state.
>+		 *
>+		 * Note: Contention/performance aren't currently a significant
>+		 * concern here considering the relatively low frequency of
>+		 * hrtimer callbacks (5ms period) and that reads typically only
>+		 * happen in response to a hrtimer event and likely complete
>+		 * before the next callback.
>+		 *
>+		 * Note: This lock is not held *while* reading and copying data
>+		 * to userspace so the value of head observed in htrimer
>+		 * callbacks won't represent any partial consumption of data.
>+		 */
>+		spinlock_t ptr_lock;
>+
>+		/**
>+		 * One 'aging' tail pointer and one 'aged' tail pointer ready to
>+		 * used for reading.
>+		 *
>+		 * Initial values of 0xffffffff are invalid and imply that an
>+		 * update is required (and should be ignored by an attempted
>+		 * read)
>+		 */
>+		struct {
>+			u32 offset;
>+		} tails[2];
>+
>+		/**
>+		 * Index for the aged tail ready to read() data up to.
>+		 */
>+		unsigned int aged_tail_idx;
>+
>+		/**
>+		 * A monotonic timestamp for when the current aging tail pointer
>+		 * was read; used to determine when it is old enough to trust.
>+		 */
>+		u64 aging_timestamp;
>+
>+		/**
>+		 * Although we can always read back the head pointer register,
>+		 * we prefer to avoid trusting the HW state, just to avoid any
>+		 * risk that some hardware condition could * somehow bump the
>+		 * head pointer unpredictably and cause us to forward the wrong
>+		 * OA buffer data to userspace.
>+		 */
>+		u32 head;
>+	} oa_buffer;
>+};
>+
>+/**
>+ * struct i915_oa_ops - Gen specific implementation of an OA unit stream
>+ */
>+struct i915_oa_ops {
>+	/**
>+	 * @is_valid_b_counter_reg: Validates register's address for
>+	 * programming boolean counters for a particular platform.
>+	 */
>+	bool (*is_valid_b_counter_reg)(struct drm_i915_private *dev_priv,
>+				       u32 addr);
>+
>+	/**
>+	 * @is_valid_mux_reg: Validates register's address for programming mux
>+	 * for a particular platform.
>+	 */
>+	bool (*is_valid_mux_reg)(struct drm_i915_private *dev_priv, u32 addr);
>+
>+	/**
>+	 * @is_valid_flex_reg: Validates register's address for programming
>+	 * flex EU filtering for a particular platform.
>+	 */
>+	bool (*is_valid_flex_reg)(struct drm_i915_private *dev_priv, u32 addr);
>+
>+	/**
>+	 * @enable_metric_set: Selects and applies any MUX configuration to set
>+	 * up the Boolean and Custom (B/C) counters that are part of the
>+	 * counter reports being sampled. May apply system constraints such as
>+	 * disabling EU clock gating as required.
>+	 */
>+	int (*enable_metric_set)(struct i915_perf_stream *stream);
>+
>+	/**
>+	 * @disable_metric_set: Remove system constraints associated with using
>+	 * the OA unit.
>+	 */
>+	void (*disable_metric_set)(struct i915_perf_stream *stream);
>+
>+	/**
>+	 * @oa_enable: Enable periodic sampling
>+	 */
>+	void (*oa_enable)(struct i915_perf_stream *stream);
>+
>+	/**
>+	 * @oa_disable: Disable periodic sampling
>+	 */
>+	void (*oa_disable)(struct i915_perf_stream *stream);
>+
>+	/**
>+	 * @read: Copy data from the circular OA buffer into a given userspace
>+	 * buffer.
>+	 */
>+	int (*read)(struct i915_perf_stream *stream,
>+		    char __user *buf,
>+		    size_t count,
>+		    size_t *offset);
>+
>+	/**
>+	 * @oa_hw_tail_read: read the OA tail pointer register
>+	 *
>+	 * In particular this enables us to share all the fiddly code for
>+	 * handling the OA unit tail pointer race that affects multiple
>+	 * generations.
>+	 */
>+	u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream);
>+};
>+
>+#endif /* _I915_PERF_TYPES_H_ */
>-- 
>2.23.0
>
>_______________________________________________
>Intel-gfx mailing list
>Intel-gfx at lists.freedesktop.org
>https://lists.freedesktop.org/mailman/listinfo/intel-gfx


More information about the Intel-gfx mailing list