[Intel-gfx] [PATCH v2 1/3] drm: Add support for panic message output
Noralf Trønnes
noralf at tronnes.org
Mon Mar 11 22:33:15 UTC 2019
Den 11.03.2019 20.23, skrev Daniel Vetter:
> On Mon, Mar 11, 2019 at 06:42:16PM +0100, Noralf Trønnes wrote:
>> This adds support for outputting kernel messages on panic().
>> A kernel message dumper is used to dump the log. The dumper iterates
>> over each DRM device and it's crtc's to find suitable framebuffers.
>>
>> All the other dumpers are run before this one except mtdoops.
>> Only atomic drivers are supported.
>>
>> Signed-off-by: Noralf Trønnes <noralf at tronnes.org>
>
> Bunch of comments/ideas for you or Darwish below, whoever picks this up.
Actually it would ne nice if Darwish could pick it up since he will do
it on i915 which will be useful to a much broader audience.
If not I'll respin when I'm done with the drm_fb_helper refactoring.
<snip>
>> diff --git a/drivers/gpu/drm/drm_panic.c b/drivers/gpu/drm/drm_panic.c
>> new file mode 100644
>> index 000000000000..4bca7f0bc369
>> --- /dev/null
>> +++ b/drivers/gpu/drm/drm_panic.c
>> @@ -0,0 +1,363 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +// Copyright 2018 Noralf Trønnes
>> +
>> +#include <linux/debugfs.h>
>> +#include <linux/font.h>
>> +#include <linux/kernel.h>
>> +#include <linux/kmsg_dump.h>
>> +#include <linux/seq_file.h>
>> +#include <linux/slab.h>
>> +#include <linux/uaccess.h>
>> +
>> +#include <drm/drm_crtc.h>
>> +#include <drm/drm_device.h>
>> +#include <drm/drm_drv.h>
>> +#include <drm/drm_file.h>
>> +#include <drm/drm_framebuffer.h>
>> +#include <drm/drm_fb_helper.h>
>> +#include <drm/drm_plane.h>
>> +#include <drm/drm_print.h>
>> +#include <drm/drmP.h>
>> +
>> +#include "drm_internal.h"
>> +
>> +/*
>> + * The log lines in an ARM stack dump are 92 characters long
>> + * and 120 is a nice multiple for HD and 4K.
>> + */
>> +#define DRM_PANIC_COLUMN_WIDTH 120
>
> I think we should compute this dynamically from the actual fb width and
> font witdth.
The font is picked based on whether it can fit 50 lines of text.
The next rule is to decide when to add another column. I decided to use
a number that gets most log lines in one line on the screen. If most
lines get broken into two lines, there's not much use in an extra column.
Do you have another idea on how to do this?
(There's even a 16x32 font now that wasn't available when I did this.)
>
>> +
>> +struct drm_panic_ctx {
>> + struct drm_framebuffer *fb;
>> + unsigned int width;
>> + unsigned int height;
>> + void *vmap;
>> +
>> + const struct font_desc *font;
>> + unsigned int col_width;
>> + unsigned int bottom_y;
>> + size_t max_line_length;
>> +
>> + unsigned int x;
>> + unsigned int y;
>> +};
>> +
>> +static const struct font_desc *drm_panic_font8x8, *drm_panic_font8x16;
>> +
>> +static void drm_panic_draw_xy(struct drm_framebuffer *fb, void *vmap,
>> + int x, int y, bool fg)
>> +{
>> + if (fb->funcs->panic_draw_xy)
>> + fb->funcs->panic_draw_xy(fb, vmap, x, y, fg);
>> + else
>> + drm_framebuffer_panic_draw_xy(fb, vmap, x, y, fg);
>> +}
>> +
>> +static void drm_panic_render_char(struct drm_panic_ctx *ctx,
>> + unsigned int offset, char c)
>> +{
>> + unsigned int h, w, x, y;
>> + u8 fontline;
>> +
>> + for (h = 0; h < ctx->font->height; h++) {
>> + fontline = *(u8 *)(ctx->font->data + c * ctx->font->height + h);
>> +
>> + for (w = 0; w < ctx->font->width; w++) {
>> + x = ctx->x + (offset * ctx->font->width) + w;
>> + y = ctx->y + h;
>> + drm_panic_draw_xy(ctx->fb, ctx->vmap, x, y,
>> + fontline & BIT(7 - w));
>> + }
>> + }
>> +}
>> +
>> +static void drm_panic_render_line(struct drm_panic_ctx *ctx,
>> + const char *line, size_t len)
>> +{
>> + unsigned int i;
>> +
>> + for (i = 0; i < len; i++)
>> + drm_panic_render_char(ctx, i, line[i]);
>> +
>> + /* Clear out the rest of the line */
>> + for (i = len; i < ctx->max_line_length; i++)
>> + drm_panic_render_char(ctx, i, ' ');
>> +}
>> +
>> +static bool drm_panic_newline(struct drm_panic_ctx *ctx)
>> +{
>> + if (ctx->x == 0 && ctx->y == 0)
>> + return false;
>> + if (ctx->y == 0) {
>> + ctx->x -= ctx->col_width;
>> + ctx->y = ctx->bottom_y;
>> + } else {
>> + ctx->y -= ctx->font->height;
>> + }
>> +
>> + return true;
>> +}
>> +
>> +/* Render from bottom right most column */
>> +static bool drm_panic_render_lines(struct drm_panic_ctx *ctx,
>> + const char *str, size_t len)
>> +{
>> + size_t l, line_length;
>> + const char *pos;
>> + int i;
>> +
>> + while (len) {
>> + pos = str + len - 1;
>> +
>> + if (*pos == '\n') {
>> + len--;
>> + if (!drm_panic_newline(ctx))
>> + return false;
>> + continue;
>> + }
>> +
>> + while (pos > str && *(pos - 1) != '\n')
>> + pos--;
>> +
>> + line_length = len - (pos - str);
>> +
>> + if (!line_length || len < line_length) {
>> + pr_err("%s: Bug! line_length=%zu len=%zu\n",
>> + __func__, line_length, len);
>> + return false;
>> + }
>> +
>> + len -= line_length;
>> +
>> + for (i = DIV_ROUND_UP(line_length, ctx->max_line_length) - 1; i >= 0; i--) {
>> + l = min(ctx->max_line_length, line_length - i * ctx->max_line_length);
>> + drm_panic_render_line(ctx, pos + (i * ctx->max_line_length), l);
>> + if (i && !drm_panic_newline(ctx))
>> + return false;
>> + }
>> + }
>> +
>> + return true;
>> +}
>> +
>> +static void drm_panic_kmsg_render_screen(struct drm_plane *plane,
>> + struct kmsg_dumper *dumper)
>> +{
>> + struct drm_framebuffer *fb = plane->state->fb;
>> + bool first_iteration = true;
>> + struct drm_panic_ctx ctx;
>> + static char text[1024];
>> + size_t len;
>> +
>> + ctx.vmap = fb->funcs->panic_vmap(fb);
>> +
>> + /* Print some info when testing */
>> + if (dumper->max_reason == KMSG_DUMP_OOPS) {
>> + struct drm_format_name_buf format_name;
>> +
>> + pr_info("%s: [FB:%d] %ux%u format=%s vmap=%p\n",
>> + __func__, fb->base.id, fb->width, fb->height,
>> + drm_get_format_name(fb->format->format, &format_name),
>> + ctx.vmap);
>> + }
>> +
>> + if (!ctx.vmap)
>> + return;
>
> For some framebuffers it might be useful to not require vmap, but instead
> have some page-by-page kind of access helper. Since preemptively setting
> up a vmap for all the non-continous buffers is a bit much, and we really
> can't set up the vmap in the panic handler.
>
> Maybe we should call this panic_prepare/panic_finish and
> s/vmap/cookie/, to make it entirely opaque?
>
> But a bit overengineering perhaps :-)
>
I guess i915 wants something else than vmap, but I have no idea how a
page-by-page solution is to be implemented.
When we get bootsplash, we will at least have a kernel address during
that phase for all drivers supporting it.
>> +
>> + ctx.fb = fb;
>> + ctx.width = drm_rect_width(&plane->state->src) >> 16;
>> + ctx.height = drm_rect_height(&plane->state->src) >> 16;
>> +
>> + /*
>> + * TODO:
>> + * Find which part of the fb that is visible.
>> + * Width and height are zero on vc4
>> + */
>> + if (!ctx.width || !ctx.height) {
>> + ctx.width = fb->width;
>> + ctx.height = fb->height;
>> + }
>> +
>> + /* Try to fit 50 lines */
>> + if (ctx.height < 50 * 16 && drm_panic_font8x8)
>> + ctx.font = drm_panic_font8x8;
>> + else
>> + ctx.font = drm_panic_font8x16;
>> +
>> + ctx.col_width = DRM_PANIC_COLUMN_WIDTH * ctx.font->width;
>> + ctx.bottom_y = ((ctx.height / ctx.font->height) - 1) * ctx.font->height;
>> +
>> + if (ctx.width < 2 * ctx.col_width)
>> + ctx.max_line_length = ctx.width / ctx.font->width;
>> + else
>> + ctx.max_line_length = DRM_PANIC_COLUMN_WIDTH - 2; /* border=2 */
>> +
>> + ctx.x = 0;
>> + ctx.y = ctx.bottom_y;
>> + if (ctx.width > ctx.col_width)
>> + ctx.x = ((ctx.width / ctx.col_width) - 1) * ctx.col_width;
>> +
>> + pr_debug("%s: font=%s %ux%u max_line_length=%zu (%u,%u)\n",
>> + __func__, ctx.font->name, ctx.width, ctx.height,
>> + ctx.max_line_length, ctx.x, ctx.y);
>> +
>> + kmsg_dump_rewind(dumper);
>> +
>> + /* The latest messages are fetched first */
>> + while (kmsg_dump_get_buffer(dumper, false, text, sizeof(text), &len)) {
>> + /* Strip off the very last newline so we start at the bottom */
>> + if (first_iteration) {
>> + len--;
>> + first_iteration = false;
>> + }
>> +
>> + if (!drm_panic_render_lines(&ctx, text, len))
>> + break;
>> + }
>> +
>> + if (fb->funcs->panic_vunmap)
>> + fb->funcs->panic_vunmap(fb, vmap);
>> +}
>> +
>> +static void drm_panic_try_dev(struct drm_device *dev, struct kmsg_dumper *dumper)
>> +{
>> + struct drm_framebuffer *fb;
>> + struct drm_plane *plane;
>> + struct drm_crtc *crtc;
>> +
>> + if (!drm_core_check_feature(dev, DRIVER_ATOMIC))
>> + return;
>> +
>> + if (dumper->max_reason == KMSG_DUMP_OOPS)
>> + pr_info("%s: %s on minor %d\n", __func__, dev->driver->name,
>> + dev->primary->index);
>> +
>> + drm_for_each_crtc(crtc, dev) {
>> + if (!ww_mutex_trylock(&crtc->mutex.mutex))
>> + continue;
>> +
>> + if (!crtc->enabled || !crtc->primary)
>> + goto crtc_unlock;
>> +
>> + if (!crtc->state || !crtc->state->active)
>> + goto crtc_unlock;
>> +
>> + plane = crtc->primary;
>> + if (!ww_mutex_trylock(&plane->mutex.mutex))
>> + goto crtc_unlock;
>> +
>> + /*
>> + * TODO: Should we check plane_state->visible?
>> + * It is not set on vc4
>
> I think we should.
I need to recheck vc4, because when I did this a year ago ->visible was
not set on vc4.
> We should also check whether that primary is connected
> to the crtc (some hw you can reassign planes freely between crtc, and the
> crtc->primary pointer is only used for compat with legacy ioctl).
Like this?
/* Check that the plane has not been reassigned */
if (plane->state->crtc != crtc)
goto plane_unlock;
>
>> + if (!plane->state || !plane->state->visible)
>> + */
>> + if (!plane->state)
>> + goto plane_unlock;
>> +
>> + fb = plane->state->fb;
>> + if (!fb || !fb->funcs->panic_vmap)
>
> Docs says this is optional, doesn't seem to be all that optional. I'd
> check for this or a driver-specific ->panic_draw_xy instead.
->panic_vmap determines whether panic is supported or not, so in that
sense it's optional. ->panic_draw_xy is optional with a default fallback
for the linear case.
>> + goto plane_unlock;
>> +
>> + /*
>> + * fbcon puts out panic messages so stay away to avoid jumbled
>> + * output. If vc->vc_mode != KD_TEXT fbcon won't put out
>> + * messages (see vt_console_print).
>> + */
>> + if (dev->fb_helper && dev->fb_helper->fb == fb)
>> + goto plane_unlock;
>> +
>> + drm_panic_kmsg_render_screen(plane, dumper);
>> +plane_unlock:
>> + ww_mutex_unlock(&plane->mutex.mutex);
>> +crtc_unlock:
>> + ww_mutex_unlock(&crtc->mutex.mutex);
>> + }
>> +}
>> +
>> +static int drm_panic_dev_iter(struct device *dev, void *data)
>> +{
>> + struct drm_minor *minor = dev_get_drvdata(dev);
>> +
>> + if (minor && minor->type == DRM_MINOR_PRIMARY)
>> + drm_panic_try_dev(minor->dev, data);
>> +
>> + return 0;
>> +}
>> +
>> +static void drm_panic_kmsg_dump(struct kmsg_dumper *dumper,
>> + enum kmsg_dump_reason reason)
>> +{
>> + class_for_each_device(drm_class, NULL, dumper, drm_panic_dev_iter);
>
> class_for_each_device uses klist, which only uses an irqsave spinlock. I
> think that's good enough. Comment to that effect would be good e.g.
>
> /* based on klist, which uses only a spin_lock_irqsave, which we
> * assume still works */
>
> If we aim for perfect this should be a trylock still, maybe using our own
> device list.
>
>> +}
>> +
>> +static struct kmsg_dumper drm_panic_kmsg_dumper = {
>> + .dump = drm_panic_kmsg_dump,
>> + .max_reason = KMSG_DUMP_PANIC,
>> +};
>> +
>> +static ssize_t drm_panic_file_panic_write(struct file *file,
>> + const char __user *user_buf,
>> + size_t count, loff_t *ppos)
>> +{
>> + unsigned long long val;
>> + char buf[24];
>> + size_t size;
>> + ssize_t ret;
>> +
>> + size = min(sizeof(buf) - 1, count);
>> + if (copy_from_user(buf, user_buf, size))
>> + return -EFAULT;
>> +
>> + buf[size] = '\0';
>> + ret = kstrtoull(buf, 0, &val);
>> + if (ret)
>> + return ret;
>> +
>> + drm_panic_kmsg_dumper.max_reason = KMSG_DUMP_OOPS;
>> + wmb();
>> +
>> + /* Do a real test with: echo c > /proc/sysrq-trigger */
>> +
>> + if (val == 0) {
>> + pr_info("Test panic screen using kmsg_dump(OOPS)\n");
>> + kmsg_dump(KMSG_DUMP_OOPS);
>> + } else if (val == 1) {
>> + char *null_pointer = NULL;
>> +
>> + pr_info("Test panic screen using NULL pointer dereference\n");
>> + *null_pointer = 1;
>> + } else {
>> + return -EINVAL;
>> + }
>
> This isn't quite what I had in mind, since it still kills the kernel (like
> sysrq-trigger).
If val == 0, it doesn't kill the kernel, it only dumps the kernel log.
And it doesn't taint the kernel either.
> Instead what I had in mind is to recreate the worst
> possible panic context as much as feasible (disabling interrupts should be
> a good start, maybe we can even do an nmi callback), and then call our
> panic implementation. That way we can test the panic handler in a
> non-destructive way (i.e. aside from last dmesg lines printed to the
> screen nothing bad happens to the kernel: No real panic, no oops, no
> tainting).
The interrupt case I can do, nmi I have no idea.
Noralf.
>
> And igt testcase could then exercise this, and CI run it.
>
>> +
>> + return count;
>> +}
>> +
>> +static const struct file_operations drm_panic_panic_ops = {
>> + .write = drm_panic_file_panic_write,
>> + .open = simple_open,
>> + .llseek = default_llseek,
>> +};
>> +
>> +static struct dentry *drm_panic_d_panic;
>> +
>> +void __init drm_panic_init(struct dentry *debugfs_root)
>> +{
>> + drm_panic_font8x8 = find_font("VGA8x8");
>> + drm_panic_font8x16 = find_font("VGA8x16");
>> + if (!drm_panic_font8x16) {
>> + DRM_WARN("Couldn't find font, panic screen disabled\n");
>> + return;
>> + }
>> +
>> + drm_panic_d_panic = debugfs_create_file("panic-test", 0200,
>> + debugfs_root, NULL,
>> + &drm_panic_panic_ops);
>> + kmsg_dump_register(&drm_panic_kmsg_dumper);
>> +}
>> +
>> +void drm_panic_exit(struct dentry *debugfs_root)
>> +{
>> + kmsg_dump_unregister(&drm_panic_kmsg_dumper);
>> + debugfs_remove(drm_panic_d_panic);
>> +}
>> diff --git a/include/drm/drm_framebuffer.h b/include/drm/drm_framebuffer.h
>> index f0b34c977ec5..f3274798ecfe 100644
>> --- a/include/drm/drm_framebuffer.h
>> +++ b/include/drm/drm_framebuffer.h
>> @@ -94,6 +94,44 @@ struct drm_framebuffer_funcs {
>> struct drm_file *file_priv, unsigned flags,
>> unsigned color, struct drm_clip_rect *clips,
>> unsigned num_clips);
>> +
>> + /**
>> + * @panic_vmap:
>> + *
>> + * Optional callback for panic handling.
>> + *
>> + * For vmapping the selected framebuffer in a panic context. Must
>> + * be super careful about locking (only trylocking allowed).
>> + *
>> + * RETURNS:
>> + *
>> + * NULL if it didn't work out, otherwise an opaque cookie which is
>> + * passed to @panic_draw_xy. It can be anything: vmap area, structure
>> + * with more details, just a few flags, ...
>> + */
>> + void *(*panic_vmap)(struct drm_framebuffer *fb);
>> +
>> + /**
>> + * @panic_vunmap:
>> + *
>> + * Optional callback for cleaning up after panic testing.
>> + *
>> + * Crtc and plane locks are released after this callback has run.
>> + * vmap is the cookie returned by @panic_vmap.
>> + */
>> + void (*panic_vunmap)(struct drm_framebuffer *fb, void *vmap);
>> +
>> + /**
>> + * @panic_draw_xy:
>> + *
>> + * Optional callback for drawing pixels during panic.
>> + *
>> + * For drawing pixels onto a framebuffer prepared with @panic_vmap.
>> + * vmap is the cookie returned by @panic_vmap.
>> + * If it's not set, drm_framebuffer_panic_draw_xy() is used.
>> + */
>> + void (*panic_draw_xy)(struct drm_framebuffer *fb, void *vmap,
>> + int x, int y, bool foreground);
>> };
>>
>> /**
>> @@ -293,4 +331,7 @@ int drm_framebuffer_plane_width(int width,
>> int drm_framebuffer_plane_height(int height,
>> const struct drm_framebuffer *fb, int plane);
>>
>> +void drm_framebuffer_panic_draw_xy(struct drm_framebuffer *fb, void *vmap,
>> + int x, int y, bool foreground);
>> +
>> #endif
>> --
>> 2.20.1
>>
>
More information about the Intel-gfx
mailing list