[Intel-gfx] [PATCH 3/3] drm/i915: move gpu error sysfs to i915_gpu_error.c
John Harrison
john.c.harrison at intel.com
Wed Oct 11 17:25:44 UTC 2023
On 10/11/2023 09:38, Jani Nikula wrote:
> Hide gpu error specifics in i915_gpu_error.c. This is also cleaner wrt
> conditional compilation, as i915_gpu_error.c is only built with
> DRM_I915_CAPTURE_ERROR=y.
>
> With this, we can also make i915_first_error_state() static.
>
> Signed-off-by: Jani Nikula <jani.nikula at intel.com>
> ---
> drivers/gpu/drm/i915/i915_gpu_error.c | 75 ++++++++++++++++++++++++-
> drivers/gpu/drm/i915/i915_gpu_error.h | 17 +++---
> drivers/gpu/drm/i915/i915_sysfs.c | 79 +--------------------------
> 3 files changed, 86 insertions(+), 85 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index b4c8459deb7b..f9e750217f18 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -57,6 +57,7 @@
> #include "i915_memcpy.h"
> #include "i915_reg.h"
> #include "i915_scatterlist.h"
> +#include "i915_sysfs.h"
> #include "i915_utils.h"
>
> #define ALLOW_FAIL (__GFP_KSWAPD_RECLAIM | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
> @@ -2208,7 +2209,7 @@ void i915_capture_error_state(struct intel_gt *gt,
> i915_gpu_coredump_put(error);
> }
>
> -struct i915_gpu_coredump *
> +static struct i915_gpu_coredump *
> i915_first_error_state(struct drm_i915_private *i915)
> {
> struct i915_gpu_coredump *error;
> @@ -2484,3 +2485,75 @@ void i915_gpu_error_debugfs_register(struct drm_i915_private *i915)
> debugfs_create_file("i915_gpu_info", 0644, minor->debugfs_root, i915,
> &i915_gpu_info_fops);
> }
> +
> +static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
> + struct bin_attribute *attr, char *buf,
> + loff_t off, size_t count)
> +{
> +
> + struct device *kdev = kobj_to_dev(kobj);
> + struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
> + struct i915_gpu_coredump *gpu;
> + ssize_t ret = 0;
> +
> + /*
> + * FIXME: Concurrent clients triggering resets and reading + clearing
> + * dumps can cause inconsistent sysfs reads when a user calls in with a
> + * non-zero offset to complete a prior partial read but the
> + * gpu_coredump has been cleared or replaced.
> + */
> +
> + gpu = i915_first_error_state(i915);
> + if (IS_ERR(gpu)) {
> + ret = PTR_ERR(gpu);
> + } else if (gpu) {
> + ret = i915_gpu_coredump_copy_to_buffer(gpu, buf, off, count);
> + i915_gpu_coredump_put(gpu);
> + } else {
> + const char *str = "No error state collected\n";
> + size_t len = strlen(str);
> +
> + if (off < len) {
> + ret = min_t(size_t, count, len - off);
> + memcpy(buf, str + off, ret);
> + }
> + }
Can this and the debugfs equivalent not be common code? It seems like
the implementations are conceptually the same even if the code currently
looks quite different.
John.
> +
> + return ret;
> +}
> +
> +static ssize_t error_state_write(struct file *file, struct kobject *kobj,
> + struct bin_attribute *attr, char *buf,
> + loff_t off, size_t count)
> +{
> + struct device *kdev = kobj_to_dev(kobj);
> + struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
> +
> + drm_dbg(&dev_priv->drm, "Resetting error state\n");
> + i915_reset_error_state(dev_priv);
> +
> + return count;
> +}
> +
> +static const struct bin_attribute error_state_attr = {
> + .attr.name = "error",
> + .attr.mode = S_IRUSR | S_IWUSR,
> + .size = 0,
> + .read = error_state_read,
> + .write = error_state_write,
> +};
> +
> +void i915_gpu_error_sysfs_setup(struct drm_i915_private *i915)
> +{
> + struct device *kdev = i915->drm.primary->kdev;
> +
> + if (sysfs_create_bin_file(&kdev->kobj, &error_state_attr))
> + drm_err(&i915->drm, "error_state sysfs setup failed\n");
> +}
> +
> +void i915_gpu_error_sysfs_teardown(struct drm_i915_private *i915)
> +{
> + struct device *kdev = i915->drm.primary->kdev;
> +
> + sysfs_remove_bin_file(&kdev->kobj, &error_state_attr);
> +}
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h b/drivers/gpu/drm/i915/i915_gpu_error.h
> index a6f2a7518cf0..68c964d6720a 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.h
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.h
> @@ -323,11 +323,12 @@ static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
> kref_put(&gpu->ref, __i915_gpu_coredump_free);
> }
>
> -struct i915_gpu_coredump *i915_first_error_state(struct drm_i915_private *i915);
> void i915_reset_error_state(struct drm_i915_private *i915);
> void i915_disable_error_state(struct drm_i915_private *i915, int err);
>
> void i915_gpu_error_debugfs_register(struct drm_i915_private *i915);
> +void i915_gpu_error_sysfs_setup(struct drm_i915_private *i915);
> +void i915_gpu_error_sysfs_teardown(struct drm_i915_private *i915);
>
> #else
>
> @@ -396,12 +397,6 @@ static inline void i915_gpu_coredump_put(struct i915_gpu_coredump *gpu)
> {
> }
>
> -static inline struct i915_gpu_coredump *
> -i915_first_error_state(struct drm_i915_private *i915)
> -{
> - return ERR_PTR(-ENODEV);
> -}
> -
> static inline void i915_reset_error_state(struct drm_i915_private *i915)
> {
> }
> @@ -415,6 +410,14 @@ static inline void i915_gpu_error_debugfs_register(struct drm_i915_private *i915
> {
> }
>
> +static inline void i915_gpu_error_sysfs_setup(struct drm_i915_private *i915)
> +{
> +}
> +
> +static inline void i915_gpu_error_sysfs_teardown(struct drm_i915_private *i915)
> +{
> +}
> +
> #endif /* IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) */
>
> #endif /* _I915_GPU_ERROR_H_ */
> diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
> index e88bb4f04305..613decd47760 100644
> --- a/drivers/gpu/drm/i915/i915_sysfs.c
> +++ b/drivers/gpu/drm/i915/i915_sysfs.c
> @@ -155,81 +155,6 @@ static const struct bin_attribute dpf_attrs_1 = {
> .private = (void *)1
> };
>
> -#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
> -
> -static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
> - struct bin_attribute *attr, char *buf,
> - loff_t off, size_t count)
> -{
> -
> - struct device *kdev = kobj_to_dev(kobj);
> - struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
> - struct i915_gpu_coredump *gpu;
> - ssize_t ret = 0;
> -
> - /*
> - * FIXME: Concurrent clients triggering resets and reading + clearing
> - * dumps can cause inconsistent sysfs reads when a user calls in with a
> - * non-zero offset to complete a prior partial read but the
> - * gpu_coredump has been cleared or replaced.
> - */
> -
> - gpu = i915_first_error_state(i915);
> - if (IS_ERR(gpu)) {
> - ret = PTR_ERR(gpu);
> - } else if (gpu) {
> - ret = i915_gpu_coredump_copy_to_buffer(gpu, buf, off, count);
> - i915_gpu_coredump_put(gpu);
> - } else {
> - const char *str = "No error state collected\n";
> - size_t len = strlen(str);
> -
> - if (off < len) {
> - ret = min_t(size_t, count, len - off);
> - memcpy(buf, str + off, ret);
> - }
> - }
> -
> - return ret;
> -}
> -
> -static ssize_t error_state_write(struct file *file, struct kobject *kobj,
> - struct bin_attribute *attr, char *buf,
> - loff_t off, size_t count)
> -{
> - struct device *kdev = kobj_to_dev(kobj);
> - struct drm_i915_private *dev_priv = kdev_minor_to_i915(kdev);
> -
> - drm_dbg(&dev_priv->drm, "Resetting error state\n");
> - i915_reset_error_state(dev_priv);
> -
> - return count;
> -}
> -
> -static const struct bin_attribute error_state_attr = {
> - .attr.name = "error",
> - .attr.mode = S_IRUSR | S_IWUSR,
> - .size = 0,
> - .read = error_state_read,
> - .write = error_state_write,
> -};
> -
> -static void i915_setup_error_capture(struct device *kdev)
> -{
> - if (sysfs_create_bin_file(&kdev->kobj, &error_state_attr))
> - drm_err(&kdev_minor_to_i915(kdev)->drm,
> - "error_state sysfs setup failed\n");
> -}
> -
> -static void i915_teardown_error_capture(struct device *kdev)
> -{
> - sysfs_remove_bin_file(&kdev->kobj, &error_state_attr);
> -}
> -#else
> -static void i915_setup_error_capture(struct device *kdev) {}
> -static void i915_teardown_error_capture(struct device *kdev) {}
> -#endif
> -
> void i915_setup_sysfs(struct drm_i915_private *dev_priv)
> {
> struct device *kdev = dev_priv->drm.primary->kdev;
> @@ -255,7 +180,7 @@ void i915_setup_sysfs(struct drm_i915_private *dev_priv)
> drm_warn(&dev_priv->drm,
> "failed to register GT sysfs directory\n");
>
> - i915_setup_error_capture(kdev);
> + i915_gpu_error_sysfs_setup(dev_priv);
>
> intel_engines_add_sysfs(dev_priv);
> }
> @@ -264,7 +189,7 @@ void i915_teardown_sysfs(struct drm_i915_private *dev_priv)
> {
> struct device *kdev = dev_priv->drm.primary->kdev;
>
> - i915_teardown_error_capture(kdev);
> + i915_gpu_error_sysfs_teardown(dev_priv);
>
> device_remove_bin_file(kdev, &dpf_attrs_1);
> device_remove_bin_file(kdev, &dpf_attrs);
More information about the Intel-gfx
mailing list