[Intel-gfx] [PATCH v2 1/3] drm: Add support for panic message output

Daniel Vetter daniel at ffwll.ch
Tue Mar 12 10:58:10 UTC 2019


On Mon, Mar 11, 2019 at 11:33:15PM +0100, Noralf Trønnes wrote:
> 
> 
> Den 11.03.2019 20.23, skrev Daniel Vetter:
> > On Mon, Mar 11, 2019 at 06:42:16PM +0100, Noralf Trønnes wrote:
> >> This adds support for outputting kernel messages on panic().
> >> A kernel message dumper is used to dump the log. The dumper iterates
> >> over each DRM device and it's crtc's to find suitable framebuffers.
> >>
> >> All the other dumpers are run before this one except mtdoops.
> >> Only atomic drivers are supported.
> >>
> >> Signed-off-by: Noralf Trønnes <noralf at tronnes.org>
> > 
> > Bunch of comments/ideas for you or Darwish below, whoever picks this up.
> 
> Actually it would ne nice if Darwish could pick it up since he will do
> it on i915 which will be useful to a much broader audience.
> If not I'll respin when I'm done with the drm_fb_helper refactoring.
> 
> <snip>
> 
> >> diff --git a/drivers/gpu/drm/drm_panic.c b/drivers/gpu/drm/drm_panic.c
> >> new file mode 100644
> >> index 000000000000..4bca7f0bc369
> >> --- /dev/null
> >> +++ b/drivers/gpu/drm/drm_panic.c
> >> @@ -0,0 +1,363 @@
> >> +// SPDX-License-Identifier: GPL-2.0
> >> +// Copyright 2018 Noralf Trønnes
> >> +
> >> +#include <linux/debugfs.h>
> >> +#include <linux/font.h>
> >> +#include <linux/kernel.h>
> >> +#include <linux/kmsg_dump.h>
> >> +#include <linux/seq_file.h>
> >> +#include <linux/slab.h>
> >> +#include <linux/uaccess.h>
> >> +
> >> +#include <drm/drm_crtc.h>
> >> +#include <drm/drm_device.h>
> >> +#include <drm/drm_drv.h>
> >> +#include <drm/drm_file.h>
> >> +#include <drm/drm_framebuffer.h>
> >> +#include <drm/drm_fb_helper.h>
> >> +#include <drm/drm_plane.h>
> >> +#include <drm/drm_print.h>
> >> +#include <drm/drmP.h>
> >> +
> >> +#include "drm_internal.h"
> >> +
> >> +/*
> >> + * The log lines in an ARM stack dump are 92 characters long
> >> + * and 120 is a nice multiple for HD and 4K.
> >> + */
> >> +#define DRM_PANIC_COLUMN_WIDTH	120
> > 
> > I think we should compute this dynamically from the actual fb width and
> > font witdth.
> 
> The font is picked based on whether it can fit 50 lines of text.
> 
> The next rule is to decide when to add another column. I decided to use
> a number that gets most log lines in one line on the screen. If most
> lines get broken into two lines, there's not much use in an extra column.
> 
> Do you have another idea on how to do this?
> 
> (There's even a 16x32 font now that wasn't available when I did this.)

Oh I had no idea that this is for multi-column oops printing. That's
indeed neat for 4k screens.
> 
> > 
> >> +
> >> +struct drm_panic_ctx {
> >> +	struct drm_framebuffer *fb;
> >> +	unsigned int width;
> >> +	unsigned int height;
> >> +	void *vmap;
> >> +
> >> +	const struct font_desc *font;
> >> +	unsigned int col_width;
> >> +	unsigned int bottom_y;
> >> +	size_t max_line_length;
> >> +
> >> +	unsigned int x;
> >> +	unsigned int y;
> >> +};
> >> +
> >> +static const struct font_desc *drm_panic_font8x8, *drm_panic_font8x16;
> >> +
> >> +static void drm_panic_draw_xy(struct drm_framebuffer *fb, void *vmap,
> >> +			      int x, int y, bool fg)
> >> +{
> >> +	if (fb->funcs->panic_draw_xy)
> >> +		fb->funcs->panic_draw_xy(fb, vmap, x, y, fg);
> >> +	else
> >> +		drm_framebuffer_panic_draw_xy(fb, vmap, x, y, fg);
> >> +}
> >> +
> >> +static void drm_panic_render_char(struct drm_panic_ctx *ctx,
> >> +				  unsigned int offset, char c)
> >> +{
> >> +	unsigned int h, w, x, y;
> >> +	u8 fontline;
> >> +
> >> +	for (h = 0; h < ctx->font->height; h++) {
> >> +		fontline = *(u8 *)(ctx->font->data + c * ctx->font->height + h);
> >> +
> >> +		for (w = 0; w < ctx->font->width; w++) {
> >> +			x = ctx->x + (offset * ctx->font->width) + w;
> >> +			y = ctx->y + h;
> >> +			drm_panic_draw_xy(ctx->fb, ctx->vmap, x, y,
> >> +					  fontline & BIT(7 - w));
> >> +		}
> >> +	}
> >> +}
> >> +
> >> +static void drm_panic_render_line(struct drm_panic_ctx *ctx,
> >> +				  const char *line, size_t len)
> >> +{
> >> +	unsigned int i;
> >> +
> >> +	for (i = 0; i < len; i++)
> >> +		drm_panic_render_char(ctx, i, line[i]);
> >> +
> >> +	/* Clear out the rest of the line */
> >> +	for (i = len; i < ctx->max_line_length; i++)
> >> +		drm_panic_render_char(ctx, i, ' ');
> >> +}
> >> +
> >> +static bool drm_panic_newline(struct drm_panic_ctx *ctx)
> >> +{
> >> +	if (ctx->x == 0 && ctx->y == 0)
> >> +		return false;
> >> +	if (ctx->y == 0) {
> >> +		ctx->x -= ctx->col_width;
> >> +		ctx->y = ctx->bottom_y;
> >> +	} else {
> >> +		ctx->y -= ctx->font->height;
> >> +	}
> >> +
> >> +	return true;
> >> +}
> >> +
> >> +/* Render from bottom right most column */
> >> +static bool drm_panic_render_lines(struct drm_panic_ctx *ctx,
> >> +				   const char *str, size_t len)
> >> +{
> >> +	size_t l, line_length;
> >> +	const char *pos;
> >> +	int i;
> >> +
> >> +	while (len) {
> >> +		pos = str + len - 1;
> >> +
> >> +		if (*pos == '\n') {
> >> +			len--;
> >> +			if (!drm_panic_newline(ctx))
> >> +				return false;
> >> +			continue;
> >> +		}
> >> +
> >> +		while (pos > str && *(pos - 1) != '\n')
> >> +			pos--;
> >> +
> >> +		line_length = len - (pos - str);
> >> +
> >> +		if (!line_length || len < line_length) {
> >> +			pr_err("%s: Bug! line_length=%zu len=%zu\n",
> >> +			       __func__, line_length, len);
> >> +			return false;
> >> +		}
> >> +
> >> +		len -= line_length;
> >> +
> >> +		for (i = DIV_ROUND_UP(line_length, ctx->max_line_length) - 1; i >= 0; i--) {
> >> +			l = min(ctx->max_line_length, line_length - i * ctx->max_line_length);
> >> +			drm_panic_render_line(ctx, pos + (i * ctx->max_line_length), l);
> >> +			if (i && !drm_panic_newline(ctx))
> >> +				return false;
> >> +		}
> >> +	}
> >> +
> >> +	return true;
> >> +}
> >> +
> >> +static void drm_panic_kmsg_render_screen(struct drm_plane *plane,
> >> +					 struct kmsg_dumper *dumper)
> >> +{
> >> +	struct drm_framebuffer *fb = plane->state->fb;
> >> +	bool first_iteration = true;
> >> +	struct drm_panic_ctx ctx;
> >> +	static char text[1024];
> >> +	size_t len;
> >> +
> >> +	ctx.vmap = fb->funcs->panic_vmap(fb);
> >> +
> >> +	/* Print some info when testing */
> >> +	if (dumper->max_reason == KMSG_DUMP_OOPS) {
> >> +		struct drm_format_name_buf format_name;
> >> +
> >> +		pr_info("%s: [FB:%d] %ux%u format=%s vmap=%p\n",
> >> +			__func__, fb->base.id, fb->width, fb->height,
> >> +			drm_get_format_name(fb->format->format, &format_name),
> >> +			ctx.vmap);
> >> +	}
> >> +
> >> +	if (!ctx.vmap)
> >> +		return;
> > 
> > For some framebuffers it might be useful to not require vmap, but instead
> > have some page-by-page kind of access helper. Since preemptively setting
> > up a vmap for all the non-continous buffers is a bit much, and we really
> > can't set up the vmap in the panic handler.
> > 
> > Maybe we should call this panic_prepare/panic_finish and
> > s/vmap/cookie/, to make it entirely opaque?
> > 
> > But a bit overengineering perhaps :-)
> > 
> 
> I guess i915 wants something else than vmap, but I have no idea how a
> page-by-page solution is to be implemented.

i915 should be mostly fine, since we have a GTT for remapping and can make
it look continuous. It might not be in the part of the GTT accessible by
the cpu though.

Wrt implementation: My idea would be to extract the pixel writing function
and export it to drivers. The driver would then implement a
->panic_draw_xy function which looks up the right page, computes it
address, computes the x/y offset within (taking into account tiling and
stuff like that), and then uses the helper function to draw the right
pixel value for the format at the given address.

That's why I suggested that drivers need to either implement
->panic_prepare or ->panic_draw_xy. Since for this approach you might not
need a ->panic_prepare/cleanup implementation, since it's all done in
->panic_draw_xy on a pixel-by-pixel basis.

> When we get bootsplash, we will at least have a kernel address during
> that phase for all drivers supporting it.

Hm, not following what you mean here. Why is bootsplash special?

> >> +
> >> +	ctx.fb = fb;
> >> +	ctx.width = drm_rect_width(&plane->state->src) >> 16;
> >> +	ctx.height = drm_rect_height(&plane->state->src) >> 16;
> >> +
> >> +	/*
> >> +	 * TODO:
> >> +	 * Find which part of the fb that is visible.
> >> +	 * Width and height are zero on vc4
> >> +	 */
> >> +	if (!ctx.width || !ctx.height) {
> >> +		ctx.width = fb->width;
> >> +		ctx.height = fb->height;
> >> +	}
> >> +
> >> +	/* Try to fit 50 lines */
> >> +	if (ctx.height < 50 * 16 && drm_panic_font8x8)
> >> +		ctx.font = drm_panic_font8x8;
> >> +	else
> >> +		ctx.font = drm_panic_font8x16;
> >> +
> >> +	ctx.col_width = DRM_PANIC_COLUMN_WIDTH * ctx.font->width;
> >> +	ctx.bottom_y = ((ctx.height / ctx.font->height) - 1) * ctx.font->height;
> >> +
> >> +	if (ctx.width < 2 * ctx.col_width)
> >> +		ctx.max_line_length = ctx.width / ctx.font->width;
> >> +	else
> >> +		ctx.max_line_length = DRM_PANIC_COLUMN_WIDTH - 2; /* border=2 */
> >> +
> >> +	ctx.x = 0;
> >> +	ctx.y = ctx.bottom_y;
> >> +	if (ctx.width > ctx.col_width)
> >> +		ctx.x = ((ctx.width / ctx.col_width) - 1) * ctx.col_width;
> >> +
> >> +	pr_debug("%s: font=%s %ux%u max_line_length=%zu (%u,%u)\n",
> >> +		 __func__, ctx.font->name, ctx.width, ctx.height,
> >> +		 ctx.max_line_length, ctx.x, ctx.y);
> >> +
> >> +	kmsg_dump_rewind(dumper);
> >> +
> >> +	/* The latest messages are fetched first */
> >> +	while (kmsg_dump_get_buffer(dumper, false, text, sizeof(text), &len)) {
> >> +		/* Strip off the very last newline so we start at the bottom */
> >> +		if (first_iteration) {
> >> +			len--;
> >> +			first_iteration = false;
> >> +		}
> >> +
> >> +		if (!drm_panic_render_lines(&ctx, text, len))
> >> +			break;
> >> +	}
> >> +
> >> +	if (fb->funcs->panic_vunmap)
> >> +		fb->funcs->panic_vunmap(fb, vmap);
> >> +}
> >> +
> >> +static void drm_panic_try_dev(struct drm_device *dev, struct kmsg_dumper *dumper)
> >> +{
> >> +	struct drm_framebuffer *fb;
> >> +	struct drm_plane *plane;
> >> +	struct drm_crtc *crtc;
> >> +
> >> +	if (!drm_core_check_feature(dev, DRIVER_ATOMIC))
> >> +		return;
> >> +
> >> +	if (dumper->max_reason == KMSG_DUMP_OOPS)
> >> +		pr_info("%s: %s on minor %d\n", __func__, dev->driver->name,
> >> +			dev->primary->index);
> >> +
> >> +	drm_for_each_crtc(crtc, dev) {
> >> +		if (!ww_mutex_trylock(&crtc->mutex.mutex))
> >> +			continue;
> >> +
> >> +		if (!crtc->enabled || !crtc->primary)
> >> +			goto crtc_unlock;
> >> +
> >> +		if (!crtc->state || !crtc->state->active)
> >> +			goto crtc_unlock;
> >> +
> >> +		plane = crtc->primary;
> >> +		if (!ww_mutex_trylock(&plane->mutex.mutex))
> >> +			goto crtc_unlock;
> >> +
> >> +		/*
> >> +		 * TODO: Should we check plane_state->visible?
> >> +		 * It is not set on vc4
> > 
> > I think we should. 
> 
> I need to recheck vc4, because when I did this a year ago ->visible was
> not set on vc4.

I think if you look at plane_state->src (i.e. the clipped src rect) then
if that is non-zero I think you can assume ->visible is also set. Becuase
that's both computed by the same helpers.

And if that's not the case, fall back to ->src_x/y/h/w and just assume the
plane is visible (usually those are the simpler drivers anyway).

> > We should also check whether that primary is connected
> > to the crtc (some hw you can reassign planes freely between crtc, and the
> > crtc->primary pointer is only used for compat with legacy ioctl).
> 
> Like this?
> 		/* Check that the plane has not been reassigned */
> 		if (plane->state->crtc != crtc)
> 			goto plane_unlock;

Yup.

> >> +		if (!plane->state || !plane->state->visible)
> >> +		 */
> >> +		if (!plane->state)
> >> +			goto plane_unlock;
> >> +
> >> +		fb = plane->state->fb;
> >> +		if (!fb || !fb->funcs->panic_vmap)
> > 
> > Docs says this is optional, doesn't seem to be all that optional. I'd
> > check for this or a driver-specific ->panic_draw_xy instead.
> 
> ->panic_vmap determines whether panic is supported or not, so in that
> sense it's optional. ->panic_draw_xy is optional with a default fallback
> for the linear case.
> 
> >> +			goto plane_unlock;
> >> +
> >> +		/*
> >> +		 * fbcon puts out panic messages so stay away to avoid jumbled
> >> +		 * output. If vc->vc_mode != KD_TEXT fbcon won't put out
> >> +		 * messages (see vt_console_print).
> >> +		 */
> >> +		if (dev->fb_helper && dev->fb_helper->fb == fb)
> >> +			goto plane_unlock;
> >> +
> >> +		drm_panic_kmsg_render_screen(plane, dumper);
> >> +plane_unlock:
> >> +		ww_mutex_unlock(&plane->mutex.mutex);
> >> +crtc_unlock:
> >> +		ww_mutex_unlock(&crtc->mutex.mutex);
> >> +	}
> >> +}
> >> +
> >> +static int drm_panic_dev_iter(struct device *dev, void *data)
> >> +{
> >> +	struct drm_minor *minor = dev_get_drvdata(dev);
> >> +
> >> +	if (minor && minor->type == DRM_MINOR_PRIMARY)
> >> +		drm_panic_try_dev(minor->dev, data);
> >> +
> >> +	return 0;
> >> +}
> >> +
> >> +static void drm_panic_kmsg_dump(struct kmsg_dumper *dumper,
> >> +				enum kmsg_dump_reason reason)
> >> +{
> >> +	class_for_each_device(drm_class, NULL, dumper, drm_panic_dev_iter);
> > 
> > class_for_each_device uses klist, which only uses an irqsave spinlock. I
> > think that's good enough. Comment to that effect would be good e.g.
> > 
> > 	/* based on klist, which uses only a spin_lock_irqsave, which we
> > 	 * assume still works */
> > 
> > If we aim for perfect this should be a trylock still, maybe using our own
> > device list.
> > 
> >> +}
> >> +
> >> +static struct kmsg_dumper drm_panic_kmsg_dumper = {
> >> +	.dump = drm_panic_kmsg_dump,
> >> +	.max_reason = KMSG_DUMP_PANIC,
> >> +};
> >> +
> >> +static ssize_t drm_panic_file_panic_write(struct file *file,
> >> +					  const char __user *user_buf,
> >> +					  size_t count, loff_t *ppos)
> >> +{
> >> +	unsigned long long val;
> >> +	char buf[24];
> >> +	size_t size;
> >> +	ssize_t ret;
> >> +
> >> +	size = min(sizeof(buf) - 1, count);
> >> +	if (copy_from_user(buf, user_buf, size))
> >> +		return -EFAULT;
> >> +
> >> +	buf[size] = '\0';
> >> +	ret = kstrtoull(buf, 0, &val);
> >> +	if (ret)
> >> +		return ret;
> >> +
> >> +	drm_panic_kmsg_dumper.max_reason = KMSG_DUMP_OOPS;
> >> +	wmb();
> >> +
> >> +	/* Do a real test with: echo c > /proc/sysrq-trigger */
> >> +
> >> +	if (val == 0) {
> >> +		pr_info("Test panic screen using kmsg_dump(OOPS)\n");
> >> +		kmsg_dump(KMSG_DUMP_OOPS);
> >> +	} else if (val == 1) {
> >> +		char *null_pointer = NULL;
> >> +
> >> +		pr_info("Test panic screen using NULL pointer dereference\n");
> >> +		*null_pointer = 1;
> >> +	} else {
> >> +		return -EINVAL;
> >> +	}
> > 
> > This isn't quite what I had in mind, since it still kills the kernel (like
> > sysrq-trigger). 
> 
> If val == 0, it doesn't kill the kernel, it only dumps the kernel log.
> And it doesn't taint the kernel either.

Ah I didn't realize that. Sounds like a good option to keep.

> > Instead what I had in mind is to recreate the worst
> > possible panic context as much as feasible (disabling interrupts should be
> > a good start, maybe we can even do an nmi callback), and then call our
> > panic implementation. That way we can test the panic handler in a
> > non-destructive way (i.e. aside from last dmesg lines printed to the
> > screen nothing bad happens to the kernel: No real panic, no oops, no
> > tainting).
> 
> The interrupt case I can do, nmi I have no idea.

I just read the printk nmi code again and it looks like there's now even
more special handling for issues happening in nmi context, so we should
never see an oops from nmi context. So local_irq_disable() wrapping should
be good enough for testing.
-Daniel

> 
> Noralf.
> 
> > 
> > And igt testcase could then exercise this, and CI run it.
> > 
> >> +
> >> +	return count;
> >> +}
> >> +
> >> +static const struct file_operations drm_panic_panic_ops = {
> >> +	.write =        drm_panic_file_panic_write,
> >> +	.open =         simple_open,
> >> +	.llseek =       default_llseek,
> >> +};
> >> +
> >> +static struct dentry *drm_panic_d_panic;
> >> +
> >> +void __init drm_panic_init(struct dentry *debugfs_root)
> >> +{
> >> +	drm_panic_font8x8 = find_font("VGA8x8");
> >> +	drm_panic_font8x16 = find_font("VGA8x16");
> >> +	if (!drm_panic_font8x16) {
> >> +		DRM_WARN("Couldn't find font, panic screen disabled\n");
> >> +		return;
> >> +	}
> >> +
> >> +	drm_panic_d_panic = debugfs_create_file("panic-test", 0200,
> >> +						debugfs_root, NULL,
> >> +						&drm_panic_panic_ops);
> >> +	kmsg_dump_register(&drm_panic_kmsg_dumper);
> >> +}
> >> +
> >> +void drm_panic_exit(struct dentry *debugfs_root)
> >> +{
> >> +	kmsg_dump_unregister(&drm_panic_kmsg_dumper);
> >> +	debugfs_remove(drm_panic_d_panic);
> >> +}
> >> diff --git a/include/drm/drm_framebuffer.h b/include/drm/drm_framebuffer.h
> >> index f0b34c977ec5..f3274798ecfe 100644
> >> --- a/include/drm/drm_framebuffer.h
> >> +++ b/include/drm/drm_framebuffer.h
> >> @@ -94,6 +94,44 @@ struct drm_framebuffer_funcs {
> >>  		     struct drm_file *file_priv, unsigned flags,
> >>  		     unsigned color, struct drm_clip_rect *clips,
> >>  		     unsigned num_clips);
> >> +
> >> +	/**
> >> +	 * @panic_vmap:
> >> +	 *
> >> +	 * Optional callback for panic handling.
> >> +	 *
> >> +	 * For vmapping the selected framebuffer in a panic context. Must
> >> +	 * be super careful about locking (only trylocking allowed).
> >> +	 *
> >> +	 * RETURNS:
> >> +	 *
> >> +	 * NULL if it didn't work out, otherwise an opaque cookie which is
> >> +	 * passed to @panic_draw_xy. It can be anything: vmap area, structure
> >> +	 * with more details, just a few flags, ...
> >> +	 */
> >> +	void *(*panic_vmap)(struct drm_framebuffer *fb);
> >> +
> >> +	/**
> >> +	 * @panic_vunmap:
> >> +	 *
> >> +	 * Optional callback for cleaning up after panic testing.
> >> +	 *
> >> +	 * Crtc and plane locks are released after this callback has run.
> >> +	 * vmap is the cookie returned by @panic_vmap.
> >> +	 */
> >> +	void (*panic_vunmap)(struct drm_framebuffer *fb, void *vmap);
> >> +
> >> +	/**
> >> +	 * @panic_draw_xy:
> >> +	 *
> >> +	 * Optional callback for drawing pixels during panic.
> >> +	 *
> >> +	 * For drawing pixels onto a framebuffer prepared with @panic_vmap.
> >> +	 * vmap is the cookie returned by @panic_vmap.
> >> +	 * If it's not set, drm_framebuffer_panic_draw_xy() is used.
> >> +	 */
> >> +	void (*panic_draw_xy)(struct drm_framebuffer *fb, void *vmap,
> >> +			      int x, int y, bool foreground);
> >>  };
> >>  
> >>  /**
> >> @@ -293,4 +331,7 @@ int drm_framebuffer_plane_width(int width,
> >>  int drm_framebuffer_plane_height(int height,
> >>  				 const struct drm_framebuffer *fb, int plane);
> >>  
> >> +void drm_framebuffer_panic_draw_xy(struct drm_framebuffer *fb, void *vmap,
> >> +				   int x, int y, bool foreground);
> >> +
> >>  #endif
> >> -- 
> >> 2.20.1
> >>
> > 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch


More information about the Intel-gfx mailing list