[Intel-gfx] [PATCH v16 06/13] drm/i915/perf: move perf types to their own header
Umesh Nerlige Ramappa
umesh.nerlige.ramappa at intel.com
Tue Sep 10 17:35:36 UTC 2019
Looks good. The struct perf inside drm_i915_private could also move to
i915_perf_types.h. Irrespective of that,
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>
Regards,
Umesh
On Mon, Sep 09, 2019 at 12:31:09PM +0300, Lionel Landwerlin wrote:
>Following a pattern used throughout the driver.
>
>Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
>---
> drivers/gpu/drm/i915/i915_drv.h | 300 +----------------------
> drivers/gpu/drm/i915/i915_perf.h | 2 +
> drivers/gpu/drm/i915/i915_perf_types.h | 327 +++++++++++++++++++++++++
> 3 files changed, 330 insertions(+), 299 deletions(-)
> create mode 100644 drivers/gpu/drm/i915/i915_perf_types.h
>
>diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>index 274a1193d4f0..f4145ae6ab6e 100644
>--- a/drivers/gpu/drm/i915/i915_drv.h
>+++ b/drivers/gpu/drm/i915/i915_drv.h
>@@ -92,6 +92,7 @@
> #include "i915_gem_fence_reg.h"
> #include "i915_gem_gtt.h"
> #include "i915_gpu_error.h"
>+#include "i915_perf_types.h"
> #include "i915_request.h"
> #include "i915_scheduler.h"
> #include "gt/intel_timeline.h"
>@@ -979,305 +980,6 @@ struct intel_wm_config {
> bool sprites_scaled;
> };
>
>-struct i915_oa_format {
>- u32 format;
>- int size;
>-};
>-
>-struct i915_oa_reg {
>- i915_reg_t addr;
>- u32 value;
>-};
>-
>-struct i915_oa_config {
>- char uuid[UUID_STRING_LEN + 1];
>- int id;
>-
>- const struct i915_oa_reg *mux_regs;
>- u32 mux_regs_len;
>- const struct i915_oa_reg *b_counter_regs;
>- u32 b_counter_regs_len;
>- const struct i915_oa_reg *flex_regs;
>- u32 flex_regs_len;
>-
>- struct attribute_group sysfs_metric;
>- struct attribute *attrs[2];
>- struct device_attribute sysfs_metric_id;
>-
>- atomic_t ref_count;
>-};
>-
>-struct i915_perf_stream;
>-
>-/**
>- * struct i915_perf_stream_ops - the OPs to support a specific stream type
>- */
>-struct i915_perf_stream_ops {
>- /**
>- * @enable: Enables the collection of HW samples, either in response to
>- * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened
>- * without `I915_PERF_FLAG_DISABLED`.
>- */
>- void (*enable)(struct i915_perf_stream *stream);
>-
>- /**
>- * @disable: Disables the collection of HW samples, either in response
>- * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying
>- * the stream.
>- */
>- void (*disable)(struct i915_perf_stream *stream);
>-
>- /**
>- * @poll_wait: Call poll_wait, passing a wait queue that will be woken
>- * once there is something ready to read() for the stream
>- */
>- void (*poll_wait)(struct i915_perf_stream *stream,
>- struct file *file,
>- poll_table *wait);
>-
>- /**
>- * @wait_unlocked: For handling a blocking read, wait until there is
>- * something to ready to read() for the stream. E.g. wait on the same
>- * wait queue that would be passed to poll_wait().
>- */
>- int (*wait_unlocked)(struct i915_perf_stream *stream);
>-
>- /**
>- * @read: Copy buffered metrics as records to userspace
>- * **buf**: the userspace, destination buffer
>- * **count**: the number of bytes to copy, requested by userspace
>- * **offset**: zero at the start of the read, updated as the read
>- * proceeds, it represents how many bytes have been copied so far and
>- * the buffer offset for copying the next record.
>- *
>- * Copy as many buffered i915 perf samples and records for this stream
>- * to userspace as will fit in the given buffer.
>- *
>- * Only write complete records; returning -%ENOSPC if there isn't room
>- * for a complete record.
>- *
>- * Return any error condition that results in a short read such as
>- * -%ENOSPC or -%EFAULT, even though these may be squashed before
>- * returning to userspace.
>- */
>- int (*read)(struct i915_perf_stream *stream,
>- char __user *buf,
>- size_t count,
>- size_t *offset);
>-
>- /**
>- * @destroy: Cleanup any stream specific resources.
>- *
>- * The stream will always be disabled before this is called.
>- */
>- void (*destroy)(struct i915_perf_stream *stream);
>-};
>-
>-/**
>- * struct i915_perf_stream - state for a single open stream FD
>- */
>-struct i915_perf_stream {
>- /**
>- * @dev_priv: i915 drm device
>- */
>- struct drm_i915_private *dev_priv;
>-
>- /**
>- * @wakeref: As we keep the device awake while the perf stream is
>- * active, we track our runtime pm reference for later release.
>- */
>- intel_wakeref_t wakeref;
>-
>- /**
>- * @engine: Engine associated with this performance stream.
>- */
>- struct intel_engine_cs *engine;
>-
>- /**
>- * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*`
>- * properties given when opening a stream, representing the contents
>- * of a single sample as read() by userspace.
>- */
>- u32 sample_flags;
>-
>- /**
>- * @sample_size: Considering the configured contents of a sample
>- * combined with the required header size, this is the total size
>- * of a single sample record.
>- */
>- int sample_size;
>-
>- /**
>- * @ctx: %NULL if measuring system-wide across all contexts or a
>- * specific context that is being monitored.
>- */
>- struct i915_gem_context *ctx;
>-
>- /**
>- * @enabled: Whether the stream is currently enabled, considering
>- * whether the stream was opened in a disabled state and based
>- * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls.
>- */
>- bool enabled;
>-
>- /**
>- * @ops: The callbacks providing the implementation of this specific
>- * type of configured stream.
>- */
>- const struct i915_perf_stream_ops *ops;
>-
>- /**
>- * @oa_config: The OA configuration used by the stream.
>- */
>- struct i915_oa_config *oa_config;
>-
>- /**
>- * The OA context specific information.
>- */
>- struct intel_context *pinned_ctx;
>- u32 specific_ctx_id;
>- u32 specific_ctx_id_mask;
>-
>- struct hrtimer poll_check_timer;
>- wait_queue_head_t poll_wq;
>- bool pollin;
>-
>- bool periodic;
>- int period_exponent;
>-
>- /**
>- * State of the OA buffer.
>- */
>- struct {
>- struct i915_vma *vma;
>- u8 *vaddr;
>- u32 last_ctx_id;
>- int format;
>- int format_size;
>- int size_exponent;
>-
>- /**
>- * Locks reads and writes to all head/tail state
>- *
>- * Consider: the head and tail pointer state needs to be read
>- * consistently from a hrtimer callback (atomic context) and
>- * read() fop (user context) with tail pointer updates happening
>- * in atomic context and head updates in user context and the
>- * (unlikely) possibility of read() errors needing to reset all
>- * head/tail state.
>- *
>- * Note: Contention/performance aren't currently a significant
>- * concern here considering the relatively low frequency of
>- * hrtimer callbacks (5ms period) and that reads typically only
>- * happen in response to a hrtimer event and likely complete
>- * before the next callback.
>- *
>- * Note: This lock is not held *while* reading and copying data
>- * to userspace so the value of head observed in htrimer
>- * callbacks won't represent any partial consumption of data.
>- */
>- spinlock_t ptr_lock;
>-
>- /**
>- * One 'aging' tail pointer and one 'aged' tail pointer ready to
>- * used for reading.
>- *
>- * Initial values of 0xffffffff are invalid and imply that an
>- * update is required (and should be ignored by an attempted
>- * read)
>- */
>- struct {
>- u32 offset;
>- } tails[2];
>-
>- /**
>- * Index for the aged tail ready to read() data up to.
>- */
>- unsigned int aged_tail_idx;
>-
>- /**
>- * A monotonic timestamp for when the current aging tail pointer
>- * was read; used to determine when it is old enough to trust.
>- */
>- u64 aging_timestamp;
>-
>- /**
>- * Although we can always read back the head pointer register,
>- * we prefer to avoid trusting the HW state, just to avoid any
>- * risk that some hardware condition could * somehow bump the
>- * head pointer unpredictably and cause us to forward the wrong
>- * OA buffer data to userspace.
>- */
>- u32 head;
>- } oa_buffer;
>-};
>-
>-/**
>- * struct i915_oa_ops - Gen specific implementation of an OA unit stream
>- */
>-struct i915_oa_ops {
>- /**
>- * @is_valid_b_counter_reg: Validates register's address for
>- * programming boolean counters for a particular platform.
>- */
>- bool (*is_valid_b_counter_reg)(struct drm_i915_private *dev_priv,
>- u32 addr);
>-
>- /**
>- * @is_valid_mux_reg: Validates register's address for programming mux
>- * for a particular platform.
>- */
>- bool (*is_valid_mux_reg)(struct drm_i915_private *dev_priv, u32 addr);
>-
>- /**
>- * @is_valid_flex_reg: Validates register's address for programming
>- * flex EU filtering for a particular platform.
>- */
>- bool (*is_valid_flex_reg)(struct drm_i915_private *dev_priv, u32 addr);
>-
>- /**
>- * @enable_metric_set: Selects and applies any MUX configuration to set
>- * up the Boolean and Custom (B/C) counters that are part of the
>- * counter reports being sampled. May apply system constraints such as
>- * disabling EU clock gating as required.
>- */
>- int (*enable_metric_set)(struct i915_perf_stream *stream);
>-
>- /**
>- * @disable_metric_set: Remove system constraints associated with using
>- * the OA unit.
>- */
>- void (*disable_metric_set)(struct i915_perf_stream *stream);
>-
>- /**
>- * @oa_enable: Enable periodic sampling
>- */
>- void (*oa_enable)(struct i915_perf_stream *stream);
>-
>- /**
>- * @oa_disable: Disable periodic sampling
>- */
>- void (*oa_disable)(struct i915_perf_stream *stream);
>-
>- /**
>- * @read: Copy data from the circular OA buffer into a given userspace
>- * buffer.
>- */
>- int (*read)(struct i915_perf_stream *stream,
>- char __user *buf,
>- size_t count,
>- size_t *offset);
>-
>- /**
>- * @oa_hw_tail_read: read the OA tail pointer register
>- *
>- * In particular this enables us to share all the fiddly code for
>- * handling the OA unit tail pointer race that affects multiple
>- * generations.
>- */
>- u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream);
>-};
>-
> struct intel_cdclk_state {
> unsigned int cdclk, vco, ref, bypass;
> u8 voltage_level;
>diff --git a/drivers/gpu/drm/i915/i915_perf.h b/drivers/gpu/drm/i915/i915_perf.h
>index 95549de65212..270193526b56 100644
>--- a/drivers/gpu/drm/i915/i915_perf.h
>+++ b/drivers/gpu/drm/i915/i915_perf.h
>@@ -8,6 +8,8 @@
>
> #include <linux/types.h>
>
>+#include "i915_perf_types.h"
>+
> struct drm_device;
> struct drm_file;
> struct drm_i915_private;
>diff --git a/drivers/gpu/drm/i915/i915_perf_types.h b/drivers/gpu/drm/i915/i915_perf_types.h
>new file mode 100644
>index 000000000000..c6673b67d694
>--- /dev/null
>+++ b/drivers/gpu/drm/i915/i915_perf_types.h
>@@ -0,0 +1,327 @@
>+/* SPDX-License-Identifier: MIT */
>+/*
>+ * Copyright © 2019 Intel Corporation
>+ */
>+
>+#ifndef _I915_PERF_TYPES_H_
>+#define _I915_PERF_TYPES_H_
>+
>+#include <linux/atomic.h>
>+#include <linux/device.h>
>+#include <linux/hrtimer.h>
>+#include <linux/list.h>
>+#include <linux/poll.h>
>+#include <linux/sysfs.h>
>+#include <linux/uuid.h>
>+#include <linux/wait.h>
>+
>+#include "i915_reg.h"
>+#include "intel_wakeref.h"
>+
>+struct drm_i915_private;
>+struct file;
>+struct i915_gem_context;
>+struct i915_vma;
>+struct intel_context;
>+struct intel_engine_cs;
>+
>+struct i915_oa_format {
>+ u32 format;
>+ int size;
>+};
>+
>+struct i915_oa_reg {
>+ i915_reg_t addr;
>+ u32 value;
>+};
>+
>+struct i915_oa_config {
>+ char uuid[UUID_STRING_LEN + 1];
>+ int id;
>+
>+ const struct i915_oa_reg *mux_regs;
>+ u32 mux_regs_len;
>+ const struct i915_oa_reg *b_counter_regs;
>+ u32 b_counter_regs_len;
>+ const struct i915_oa_reg *flex_regs;
>+ u32 flex_regs_len;
>+
>+ struct attribute_group sysfs_metric;
>+ struct attribute *attrs[2];
>+ struct device_attribute sysfs_metric_id;
>+
>+ atomic_t ref_count;
>+};
>+
>+struct i915_perf_stream;
>+
>+/**
>+ * struct i915_perf_stream_ops - the OPs to support a specific stream type
>+ */
>+struct i915_perf_stream_ops {
>+ /**
>+ * @enable: Enables the collection of HW samples, either in response to
>+ * `I915_PERF_IOCTL_ENABLE` or implicitly called when stream is opened
>+ * without `I915_PERF_FLAG_DISABLED`.
>+ */
>+ void (*enable)(struct i915_perf_stream *stream);
>+
>+ /**
>+ * @disable: Disables the collection of HW samples, either in response
>+ * to `I915_PERF_IOCTL_DISABLE` or implicitly called before destroying
>+ * the stream.
>+ */
>+ void (*disable)(struct i915_perf_stream *stream);
>+
>+ /**
>+ * @poll_wait: Call poll_wait, passing a wait queue that will be woken
>+ * once there is something ready to read() for the stream
>+ */
>+ void (*poll_wait)(struct i915_perf_stream *stream,
>+ struct file *file,
>+ poll_table *wait);
>+
>+ /**
>+ * @wait_unlocked: For handling a blocking read, wait until there is
>+ * something to ready to read() for the stream. E.g. wait on the same
>+ * wait queue that would be passed to poll_wait().
>+ */
>+ int (*wait_unlocked)(struct i915_perf_stream *stream);
>+
>+ /**
>+ * @read: Copy buffered metrics as records to userspace
>+ * **buf**: the userspace, destination buffer
>+ * **count**: the number of bytes to copy, requested by userspace
>+ * **offset**: zero at the start of the read, updated as the read
>+ * proceeds, it represents how many bytes have been copied so far and
>+ * the buffer offset for copying the next record.
>+ *
>+ * Copy as many buffered i915 perf samples and records for this stream
>+ * to userspace as will fit in the given buffer.
>+ *
>+ * Only write complete records; returning -%ENOSPC if there isn't room
>+ * for a complete record.
>+ *
>+ * Return any error condition that results in a short read such as
>+ * -%ENOSPC or -%EFAULT, even though these may be squashed before
>+ * returning to userspace.
>+ */
>+ int (*read)(struct i915_perf_stream *stream,
>+ char __user *buf,
>+ size_t count,
>+ size_t *offset);
>+
>+ /**
>+ * @destroy: Cleanup any stream specific resources.
>+ *
>+ * The stream will always be disabled before this is called.
>+ */
>+ void (*destroy)(struct i915_perf_stream *stream);
>+};
>+
>+/**
>+ * struct i915_perf_stream - state for a single open stream FD
>+ */
>+struct i915_perf_stream {
>+ /**
>+ * @dev_priv: i915 drm device
>+ */
>+ struct drm_i915_private *dev_priv;
>+
>+ /**
>+ * @wakeref: As we keep the device awake while the perf stream is
>+ * active, we track our runtime pm reference for later release.
>+ */
>+ intel_wakeref_t wakeref;
>+
>+ /**
>+ * @engine: Engine associated with this performance stream.
>+ */
>+ struct intel_engine_cs *engine;
>+
>+ /**
>+ * @sample_flags: Flags representing the `DRM_I915_PERF_PROP_SAMPLE_*`
>+ * properties given when opening a stream, representing the contents
>+ * of a single sample as read() by userspace.
>+ */
>+ u32 sample_flags;
>+
>+ /**
>+ * @sample_size: Considering the configured contents of a sample
>+ * combined with the required header size, this is the total size
>+ * of a single sample record.
>+ */
>+ int sample_size;
>+
>+ /**
>+ * @ctx: %NULL if measuring system-wide across all contexts or a
>+ * specific context that is being monitored.
>+ */
>+ struct i915_gem_context *ctx;
>+
>+ /**
>+ * @enabled: Whether the stream is currently enabled, considering
>+ * whether the stream was opened in a disabled state and based
>+ * on `I915_PERF_IOCTL_ENABLE` and `I915_PERF_IOCTL_DISABLE` calls.
>+ */
>+ bool enabled;
>+
>+ /**
>+ * @ops: The callbacks providing the implementation of this specific
>+ * type of configured stream.
>+ */
>+ const struct i915_perf_stream_ops *ops;
>+
>+ /**
>+ * @oa_config: The OA configuration used by the stream.
>+ */
>+ struct i915_oa_config *oa_config;
>+
>+ /**
>+ * The OA context specific information.
>+ */
>+ struct intel_context *pinned_ctx;
>+ u32 specific_ctx_id;
>+ u32 specific_ctx_id_mask;
>+
>+ struct hrtimer poll_check_timer;
>+ wait_queue_head_t poll_wq;
>+ bool pollin;
>+
>+ bool periodic;
>+ int period_exponent;
>+
>+ /**
>+ * State of the OA buffer.
>+ */
>+ struct {
>+ struct i915_vma *vma;
>+ u8 *vaddr;
>+ u32 last_ctx_id;
>+ int format;
>+ int format_size;
>+ int size_exponent;
>+
>+ /**
>+ * Locks reads and writes to all head/tail state
>+ *
>+ * Consider: the head and tail pointer state needs to be read
>+ * consistently from a hrtimer callback (atomic context) and
>+ * read() fop (user context) with tail pointer updates happening
>+ * in atomic context and head updates in user context and the
>+ * (unlikely) possibility of read() errors needing to reset all
>+ * head/tail state.
>+ *
>+ * Note: Contention/performance aren't currently a significant
>+ * concern here considering the relatively low frequency of
>+ * hrtimer callbacks (5ms period) and that reads typically only
>+ * happen in response to a hrtimer event and likely complete
>+ * before the next callback.
>+ *
>+ * Note: This lock is not held *while* reading and copying data
>+ * to userspace so the value of head observed in htrimer
>+ * callbacks won't represent any partial consumption of data.
>+ */
>+ spinlock_t ptr_lock;
>+
>+ /**
>+ * One 'aging' tail pointer and one 'aged' tail pointer ready to
>+ * used for reading.
>+ *
>+ * Initial values of 0xffffffff are invalid and imply that an
>+ * update is required (and should be ignored by an attempted
>+ * read)
>+ */
>+ struct {
>+ u32 offset;
>+ } tails[2];
>+
>+ /**
>+ * Index for the aged tail ready to read() data up to.
>+ */
>+ unsigned int aged_tail_idx;
>+
>+ /**
>+ * A monotonic timestamp for when the current aging tail pointer
>+ * was read; used to determine when it is old enough to trust.
>+ */
>+ u64 aging_timestamp;
>+
>+ /**
>+ * Although we can always read back the head pointer register,
>+ * we prefer to avoid trusting the HW state, just to avoid any
>+ * risk that some hardware condition could * somehow bump the
>+ * head pointer unpredictably and cause us to forward the wrong
>+ * OA buffer data to userspace.
>+ */
>+ u32 head;
>+ } oa_buffer;
>+};
>+
>+/**
>+ * struct i915_oa_ops - Gen specific implementation of an OA unit stream
>+ */
>+struct i915_oa_ops {
>+ /**
>+ * @is_valid_b_counter_reg: Validates register's address for
>+ * programming boolean counters for a particular platform.
>+ */
>+ bool (*is_valid_b_counter_reg)(struct drm_i915_private *dev_priv,
>+ u32 addr);
>+
>+ /**
>+ * @is_valid_mux_reg: Validates register's address for programming mux
>+ * for a particular platform.
>+ */
>+ bool (*is_valid_mux_reg)(struct drm_i915_private *dev_priv, u32 addr);
>+
>+ /**
>+ * @is_valid_flex_reg: Validates register's address for programming
>+ * flex EU filtering for a particular platform.
>+ */
>+ bool (*is_valid_flex_reg)(struct drm_i915_private *dev_priv, u32 addr);
>+
>+ /**
>+ * @enable_metric_set: Selects and applies any MUX configuration to set
>+ * up the Boolean and Custom (B/C) counters that are part of the
>+ * counter reports being sampled. May apply system constraints such as
>+ * disabling EU clock gating as required.
>+ */
>+ int (*enable_metric_set)(struct i915_perf_stream *stream);
>+
>+ /**
>+ * @disable_metric_set: Remove system constraints associated with using
>+ * the OA unit.
>+ */
>+ void (*disable_metric_set)(struct i915_perf_stream *stream);
>+
>+ /**
>+ * @oa_enable: Enable periodic sampling
>+ */
>+ void (*oa_enable)(struct i915_perf_stream *stream);
>+
>+ /**
>+ * @oa_disable: Disable periodic sampling
>+ */
>+ void (*oa_disable)(struct i915_perf_stream *stream);
>+
>+ /**
>+ * @read: Copy data from the circular OA buffer into a given userspace
>+ * buffer.
>+ */
>+ int (*read)(struct i915_perf_stream *stream,
>+ char __user *buf,
>+ size_t count,
>+ size_t *offset);
>+
>+ /**
>+ * @oa_hw_tail_read: read the OA tail pointer register
>+ *
>+ * In particular this enables us to share all the fiddly code for
>+ * handling the OA unit tail pointer race that affects multiple
>+ * generations.
>+ */
>+ u32 (*oa_hw_tail_read)(struct i915_perf_stream *stream);
>+};
>+
>+#endif /* _I915_PERF_TYPES_H_ */
>--
>2.23.0
>
>_______________________________________________
>Intel-gfx mailing list
>Intel-gfx at lists.freedesktop.org
>https://lists.freedesktop.org/mailman/listinfo/intel-gfx
More information about the Intel-gfx
mailing list