[PATCH v2] drm/print: Introduce drm_line_printer
Michal Wajdeczko
michal.wajdeczko at intel.com
Thu May 30 21:27:24 UTC 2024
On 30.05.2024 20:47, John Harrison wrote:
> On 5/30/2024 00:49, Jani Nikula wrote:
>> On Wed, 29 May 2024, John Harrison <john.c.harrison at intel.com> wrote:
>>> On 5/28/2024 06:06, Michal Wajdeczko wrote:
>>>> This drm printer wrapper can be used to increase the robustness of
>>>> the captured output generated by any other drm_printer to make sure
>>>> we didn't lost any intermediate lines of the output by adding line
>>>> numbers to each output line. Helpful for capturing some crash data.
>>>>
>>>> Signed-off-by: Michal Wajdeczko <michal.wajdeczko at intel.com>
>>>> Cc: Jani Nikula <jani.nikula at intel.com>
>>>> Cc: John Harrison <John.C.Harrison at Intel.com>
>>>> ---
>>>> v2: don't abuse prefix, use union instead (Jani)
>>>> don't use 'dp' as name, prefer 'p' (Jani)
>>>> add support for unique series identifier (John)
>>>> ---
>>>> drivers/gpu/drm/drm_print.c | 14 ++++++++
>>>> include/drm/drm_print.h | 68
>>>> ++++++++++++++++++++++++++++++++++++-
>>>> 2 files changed, 81 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/gpu/drm/drm_print.c b/drivers/gpu/drm/drm_print.c
>>>> index cf2efb44722c..be9cbebff5b3 100644
>>>> --- a/drivers/gpu/drm/drm_print.c
>>>> +++ b/drivers/gpu/drm/drm_print.c
>>>> @@ -214,6 +214,20 @@ void __drm_printfn_err(struct drm_printer *p,
>>>> struct va_format *vaf)
>>>> }
>>>> EXPORT_SYMBOL(__drm_printfn_err);
>>>> +void __drm_printfn_line(struct drm_printer *p, struct va_format
>>>> *vaf)
>>>> +{
>>>> + unsigned int counter = ++p->line.counter;
>>> Wrong units, but see below anyway...
>>>
>>>> + const char *prefix = p->prefix ?: "";
>>>> + const char *pad = p->prefix ? " " : "";
>>>> +
>>>> + if (p->line.series)
>>>> + drm_printf(p->arg, "%s%s%u.%u: %pV",
>>>> + prefix, pad, p->line.series, counter, vaf);
>>>> + else
>>>> + drm_printf(p->arg, "%s%s%u: %pV", prefix, pad, counter, vaf);
>>>> +}
>>>> +EXPORT_SYMBOL(__drm_printfn_line);
>>>> +
>>>> /**
>>>> * drm_puts - print a const string to a &drm_printer stream
>>>> * @p: the &drm printer
>>>> diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h
>>>> index 089950ad8681..f4d9b98d7909 100644
>>>> --- a/include/drm/drm_print.h
>>>> +++ b/include/drm/drm_print.h
>>>> @@ -176,7 +176,13 @@ struct drm_printer {
>>>> void (*puts)(struct drm_printer *p, const char *str);
>>>> void *arg;
>>>> const char *prefix;
>>>> - enum drm_debug_category category;
>>>> + union {
>>>> + enum drm_debug_category category;
>>>> + struct {
>>>> + unsigned short series;
>>>> + unsigned short counter;
>>> Any particular reason for using 'short' rather than 'int'? Short is only
>>> 16bits right? That might seem huge but a GuC log buffer with 16MB debug
>>> log (and minimum sizes for the other sections) when dumped via the
>>> original debugfs hexdump mechanism is 1,245,444 lines. That line count
>>> goes down a lot when you start using longer lines for the dump, but it
>>> is still in the tens of thousands of lines. So limiting to 16 bits is
>>> an overflow just waiting to happen.
>>>
>>>> + } line;
>>>> + };
>>>> };
>>>> void __drm_printfn_coredump(struct drm_printer *p, struct
>>>> va_format *vaf);
>>>> @@ -186,6 +192,7 @@ void __drm_puts_seq_file(struct drm_printer *p,
>>>> const char *str);
>>>> void __drm_printfn_info(struct drm_printer *p, struct va_format
>>>> *vaf);
>>>> void __drm_printfn_dbg(struct drm_printer *p, struct va_format
>>>> *vaf);
>>>> void __drm_printfn_err(struct drm_printer *p, struct va_format
>>>> *vaf);
>>>> +void __drm_printfn_line(struct drm_printer *p, struct va_format *vaf);
>>>> __printf(2, 3)
>>>> void drm_printf(struct drm_printer *p, const char *f, ...);
>>>> @@ -357,6 +364,65 @@ static inline struct drm_printer
>>>> drm_err_printer(struct drm_device *drm,
>>>> return p;
>>>> }
>>>> +/**
>>>> + * drm_line_printer - construct a &drm_printer that prefixes
>>>> outputs with line numbers
>>>> + * @p: the &struct drm_printer which actually generates the output
>>>> + * @prefix: optional output prefix, or NULL for no prefix
>>>> + * @series: optional unique series identifier, or 0 to omit
>>>> identifier in the output
>>>> + *
>>>> + * This printer can be used to increase the robustness of the
>>>> captured output
>>>> + * to make sure we didn't lost any intermediate lines of the
>>>> output. Helpful
>>>> + * while capturing some crash data.
>>>> + *
>>>> + * Example 1::
>>>> + *
>>>> + * void crash_dump(struct drm_device *drm)
>>>> + * {
>>>> + * static unsigned short id;
>>>> + * struct drm_printer p = drm_err_printer(drm, "crash");
>>>> + * struct drm_printer lp = drm_line_printer(&p, "dump", ++id);
>>> Is there any benefit or other reason to split the prefix across two
>>> separate printers? It seems like a source of confusion. As in, the code
>>> will allow a double prefix, there is not much you can do about that
>>> because losing the prefix from drm_line_printer would mean no prefix at
>>> all when not using drm_err_printer underneath. But why explicitly split
>>> the message across both printers in the usage example? This is saying
>>> that this is the recommended way to use the interface, but with no
>>> explanation of why the split is required or how the split should be
>>> done.
>> You could have a printer, and then add two separate line counted blocks.
>>
>> struct drm_printer p = drm_err_printer(drm, "parent");
>> struct drm_printer lp1 = drm_line_printer(&p, "child 1", 0);
>>
>> ...
>>
>> struct drm_printer lp2 = drm_line_printer(&p, "child 2", 0);
>>
>> ...
>>
>> p could be defined elsewhere and passed into separate functions which
>> each have the line printing. The two prefixes can be useful.
> Except you can't have a multi-level prefix if using the info level
> printer as that does not take a prefix. And I'm really not seeing a
but it's up to you which printer you choose as 'primary' printer that
will render final output
> reason why you would want the line count to restart in the middle of a
> single atomic dump operation.
>
>>
>>> Also, there is really no specific connection to crashes. The purpose of
>>> this is for allowing the dumping of multi-line blocks of data. One use
>>> is for debugging crashes. But there are many debug operations that
>>> require the same dump and do not involve a crash. And this support is
>>> certainly not intended to be used on general end-user GPU hangs. For
>>> those, everything should be part of the devcoredump core file that is
>>> produced and accessible via sysfs. We absolutely should not be dumping
>>> huge data blocks in customer release drivers except in very extreme
>>> circumstances.
>> A devcoredump implementation could use a drm_printer too?
> You mean for reading the devcoredump file from sysfs? Except that the
> whole reason this was started was because Michal absolutely refuses to
> allow line counted output in a sysfs/debugfs file because "it is
> unnecessary and breaks the decoding tools".
>
>>
>> Is this only about bikeshedding the example? I'm not sure I get the
>> point here.
> I would call it getting accurate and understandable documentation.
>
> The existing example is splitting what should be an atomic name "crash
> dump" across two separate line printer objects. That is something that
> is so unrealistic that it implies there is a required reason to break
> the prefix up. Documentation that is ambiguous and confusing is
> potentially worse than no documentation at all.
>
all below setups will render the same output.
which one, in your opinion, shows all potential of the drm_line_printer?
struct drm_printer p = drm_err_printer(drm, "AAA BBB");
struct drm_printer lp = drm_line_printer(&p, NULL, id);
struct drm_printer p = drm_err_printer(drm, NULL);
struct drm_printer lp = drm_line_printer(&p, "AAA BBB", id);
struct drm_printer p = drm_err_printer(drm, "AAA");
struct drm_printer lp = drm_line_printer(&p, "BBB", ++id);
>
>>
>>>> + *
>>>> + * drm_printf(&lp, "foo");
>>>> + * drm_printf(&lp, "bar");
>>>> + * }
>>>> + *
>>>> + * Above code will print into the dmesg something like::
>>>> + *
>>>> + * [ ] 0000:00:00.0: [drm] *ERROR* crash dump 1.1: foo
>>>> + * [ ] 0000:00:00.0: [drm] *ERROR* crash dump 1.2: bar
>>>> + *
>>>> + * Example 2::
>>>> + *
>>>> + * void line_dump(struct device *dev)
>>>> + * {
>>>> + * struct drm_printer p = drm_info_printer(dev);
>>>> + * struct drm_printer lp = drm_line_printer(&p, NULL, 0);
>>>> + *
>>>> + * drm_printf(&lp, "foo");
>>>> + * drm_printf(&lp, "bar");
>>>> + * }
>>>> + *
>>>> + * Above code will print::
>>>> + *
>>>> + * [ ] 0000:00:00.0: [drm] 1: foo
>>>> + * [ ] 0000:00:00.0: [drm] 2: bar
>>> Not really seeing the point of having two examples listed. The first
>>> includes all the optional extras, the second is just repeating with no
>>> new information.
>> You see how the "series" param behaves?
> The second example doesn't have a series parameter. If the only purpose
> is to say "the print of the series value is suppressed if zero" then why
> not just have that one line? Documentation should also be concise.
it was already documented in that way:
@series: optional unique series identifier, or 0 to omit identifier in
the output
>
> John.
>
>>
>> BR,
>> Jani.
>>
>>> John.
>>>
>>>> + *
>>>> + * RETURNS:
>>>> + * The &drm_printer object
>>>> + */
>>>> +static inline struct drm_printer drm_line_printer(struct
>>>> drm_printer *p,
>>>> + const char *prefix,
>>>> + unsigned short series)
>>>> +{
>>>> + struct drm_printer lp = {
>>>> + .printfn = __drm_printfn_line,
>>>> + .arg = p,
>>>> + .prefix = prefix,
>>>> + .line = { .series = series, },
>>>> + };
>>>> + return lp;
>>>> +}
>>>> +
>>>> /*
>>>> * struct device based logging
>>>> *
>
More information about the dri-devel
mailing list