<html><head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
  </head>
  <body>
    On 5/16/2023 12:17, Belgaumkar, Vinay wrote:<br>
    <blockquote type="cite" cite="mid:696ba7f2-4353-e154-af0b-83604dda2546@intel.com">
      On 4/18/2023 11:17 AM, <a class="moz-txt-link-abbreviated" href="mailto:John.C.Harrison@Intel.com">John.C.Harrison@Intel.com</a> wrote:
      <br>
      <blockquote type="cite">From: John Harrison
        <a class="moz-txt-link-rfc2396E" href="mailto:John.C.Harrison@Intel.com"><John.C.Harrison@Intel.com></a>
        <br>
        <br>
        This is useful for getting debug information out in certain
        <br>
        situations, such as failing kernel selftests and CI runs that
        don't
        <br>
        log error captures. It is especially useful for things like
        retrieving
        <br>
        GuC logs as GuC operation can't be tracked by adding printk or
        ftrace
        <br>
        entries.
        <br>
        <br>
        v2: Add CONFIG_DRM_I915_DEBUG_GEM wrapper (review feedback by
        Rodrigo).
        <br>
      </blockquote>
      <br>
      Do the CI sparse warnings hold water? With that looked at,
      <br>
    </blockquote>
    You mean this one totally fatal and absolutely must be fixed error?<br>
    <blockquote>
      <pre class="moz-quote-pre" wrap="">Fast mode used, each commit won't be checked separately.</pre>
    </blockquote>
    <br>
    Does anyone even know what that means or why it (apparently totally
    randomly) hits some patch sets and not others?<br>
    <br>
    If you mean the checkpatch warnings. One is about not reporting out
    of memory errors (because you are supposed to return -ENOMEM and let
    the user handle it instead), but that doesn't apply for an internal
    kernel only thing which is already just a debug print. The other is
    about macro argument re-use, which is not an issue in this case and
    not worth re-writing the code to avoid.<br>
    <br>
    John.<br>
    <br>
    <br>
    <blockquote type="cite" cite="mid:696ba7f2-4353-e154-af0b-83604dda2546@intel.com">
      <br>
      LGTM,
      <br>
      <br>
      Reviewed-by: Vinay Belgaumkar <a class="moz-txt-link-rfc2396E" href="mailto:vinay.belgaumkar@intel.com"><vinay.belgaumkar@intel.com></a>
      <br>
      <br>
      <blockquote type="cite">
        <br>
        Signed-off-by: John Harrison <a class="moz-txt-link-rfc2396E" href="mailto:John.C.Harrison@Intel.com"><John.C.Harrison@Intel.com></a>
        <br>
        ---
        <br>
          drivers/gpu/drm/i915/i915_gpu_error.c | 132
        ++++++++++++++++++++++++++
        <br>
          drivers/gpu/drm/i915/i915_gpu_error.h |  10 ++
        <br>
          2 files changed, 142 insertions(+)
        <br>
        <br>
        diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c
        b/drivers/gpu/drm/i915/i915_gpu_error.c
        <br>
        index f020c0086fbcd..03d62c250c465 100644
        <br>
        --- a/drivers/gpu/drm/i915/i915_gpu_error.c
        <br>
        +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
        <br>
        @@ -2219,3 +2219,135 @@ void i915_disable_error_state(struct
        drm_i915_private *i915, int err)
        <br>
                  i915->gpu_error.first_error = ERR_PTR(err);
        <br>
              spin_unlock_irq(&i915->gpu_error.lock);
        <br>
          }
        <br>
        +
        <br>
        +#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
        <br>
        +void intel_klog_error_capture(struct intel_gt *gt,
        <br>
        +                  intel_engine_mask_t engine_mask)
        <br>
        +{
        <br>
        +    static int g_count;
        <br>
        +    struct drm_i915_private *i915 = gt->i915;
        <br>
        +    struct i915_gpu_coredump *error;
        <br>
        +    intel_wakeref_t wakeref;
        <br>
        +    size_t buf_size = PAGE_SIZE * 128;
        <br>
        +    size_t pos_err;
        <br>
        +    char *buf, *ptr, *next;
        <br>
        +    int l_count = g_count++;
        <br>
        +    int line = 0;
        <br>
        +
        <br>
        +    /* Can't allocate memory during a reset */
        <br>
        +    if (test_bit(I915_RESET_BACKOFF, &gt->reset.flags))
        {
        <br>
        +        drm_err(&gt->i915->drm, "[Capture/%d.%d]
        Inside GT reset, skipping error capture :(\n",
        <br>
        +            l_count, line++);
        <br>
        +        return;
        <br>
        +    }
        <br>
        +
        <br>
        +    error = READ_ONCE(i915->gpu_error.first_error);
        <br>
        +    if (error) {
        <br>
        +        drm_err(&i915->drm, "[Capture/%d.%d] Clearing
        existing error capture first...\n",
        <br>
        +            l_count, line++);
        <br>
        +        i915_reset_error_state(i915);
        <br>
        +    }
        <br>
        +
        <br>
        +    with_intel_runtime_pm(&i915->runtime_pm, wakeref)
        <br>
        +        error = i915_gpu_coredump(gt, engine_mask,
        CORE_DUMP_FLAG_NONE);
        <br>
        +
        <br>
        +    if (IS_ERR(error)) {
        <br>
        +        drm_err(&i915->drm, "[Capture/%d.%d] Failed to
        capture error capture: %ld!\n",
        <br>
        +            l_count, line++, PTR_ERR(error));
        <br>
        +        return;
        <br>
        +    }
        <br>
        +
        <br>
        +    buf = kvmalloc(buf_size, GFP_KERNEL);
        <br>
        +    if (!buf) {
        <br>
        +        drm_err(&i915->drm, "[Capture/%d.%d] Failed to
        allocate buffer for error capture!\n",
        <br>
        +            l_count, line++);
        <br>
        +        i915_gpu_coredump_put(error);
        <br>
        +        return;
        <br>
        +    }
        <br>
        +
        <br>
        +    drm_info(&i915->drm, "[Capture/%d.%d] Dumping i915
        error capture for %ps...\n",
        <br>
        +         l_count, line++, __builtin_return_address(0));
        <br>
        +
        <br>
        +    /* Largest string length safe to print via dmesg */
        <br>
        +#    define MAX_CHUNK    800
        <br>
        +
        <br>
        +    pos_err = 0;
        <br>
        +    while (1) {
        <br>
        +        ssize_t got = i915_gpu_coredump_copy_to_buffer(error,
        buf, pos_err, buf_size - 1);
        <br>
        +
        <br>
        +        if (got <= 0)
        <br>
        +            break;
        <br>
        +
        <br>
        +        buf[got] = 0;
        <br>
        +        pos_err += got;
        <br>
        +
        <br>
        +        ptr = buf;
        <br>
        +        while (got > 0) {
        <br>
        +            size_t count;
        <br>
        +            char tag[2];
        <br>
        +
        <br>
        +            next = strnchr(ptr, got, '\n');
        <br>
        +            if (next) {
        <br>
        +                count = next - ptr;
        <br>
        +                *next = 0;
        <br>
        +                tag[0] = '>';
        <br>
        +                tag[1] = '<';
        <br>
        +            } else {
        <br>
        +                count = got;
        <br>
        +                tag[0] = '}';
        <br>
        +                tag[1] = '{';
        <br>
        +            }
        <br>
        +
        <br>
        +            if (count > MAX_CHUNK) {
        <br>
        +                size_t pos;
        <br>
        +                char *ptr2 = ptr;
        <br>
        +
        <br>
        +                for (pos = MAX_CHUNK; pos < count; pos +=
        MAX_CHUNK) {
        <br>
        +                    char chr = ptr[pos];
        <br>
        +
        <br>
        +                    ptr[pos] = 0;
        <br>
        +                    drm_info(&i915->drm,
        "[Capture/%d.%d] }%s{\n",
        <br>
        +                         l_count, line++, ptr2);
        <br>
        +                    ptr[pos] = chr;
        <br>
        +                    ptr2 = ptr + pos;
        <br>
        +
        <br>
        +                    /*
        <br>
        +                     * If spewing large amounts of data via a
        serial console,
        <br>
        +                     * this can be a very slow process. So be
        friendly and try
        <br>
        +                     * not to cause 'softlockup on CPU'
        problems.
        <br>
        +                     */
        <br>
        +                    cond_resched();
        <br>
        +                }
        <br>
        +
        <br>
        +                if (ptr2 < (ptr + count))
        <br>
        +                    drm_info(&i915->drm,
        "[Capture/%d.%d] %c%s%c\n",
        <br>
        +                         l_count, line++, tag[0], ptr2,
        tag[1]);
        <br>
        +                else if (tag[0] == '>')
        <br>
        +                    drm_info(&i915->drm,
        "[Capture/%d.%d] ><\n",
        <br>
        +                         l_count, line++);
        <br>
        +            } else {
        <br>
        +                drm_info(&i915->drm, "[Capture/%d.%d]
        %c%s%c\n",
        <br>
        +                     l_count, line++, tag[0], ptr, tag[1]);
        <br>
        +            }
        <br>
        +
        <br>
        +            ptr = next;
        <br>
        +            got -= count;
        <br>
        +            if (next) {
        <br>
        +                ptr++;
        <br>
        +                got--;
        <br>
        +            }
        <br>
        +
        <br>
        +            /* As above. */
        <br>
        +            cond_resched();
        <br>
        +        }
        <br>
        +
        <br>
        +        if (got)
        <br>
        +            drm_info(&i915->drm, "[Capture/%d.%d] Got
        %zd bytes remaining!\n",
        <br>
        +                 l_count, line++, got);
        <br>
        +    }
        <br>
        +
        <br>
        +    kvfree(buf);
        <br>
        +
        <br>
        +    drm_info(&i915->drm, "[Capture/%d.%d] Dumped %zd
        bytes\n", l_count, line++, pos_err);
        <br>
        +}
        <br>
        +#endif
        <br>
        diff --git a/drivers/gpu/drm/i915/i915_gpu_error.h
        b/drivers/gpu/drm/i915/i915_gpu_error.h
        <br>
        index a91932cc65317..a78c061ce26fb 100644
        <br>
        --- a/drivers/gpu/drm/i915/i915_gpu_error.h
        <br>
        +++ b/drivers/gpu/drm/i915/i915_gpu_error.h
        <br>
        @@ -258,6 +258,16 @@ static inline u32
        i915_reset_engine_count(struct i915_gpu_error *error,
        <br>
          #define CORE_DUMP_FLAG_NONE           0x0
        <br>
          #define CORE_DUMP_FLAG_IS_GUC_CAPTURE BIT(0)
        <br>
          +#if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR) &&
        IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
        <br>
        +void intel_klog_error_capture(struct intel_gt *gt,
        <br>
        +                  intel_engine_mask_t engine_mask);
        <br>
        +#else
        <br>
        +static inline void intel_klog_error_capture(struct intel_gt
        *gt,
        <br>
        +                        intel_engine_mask_t engine_mask)
        <br>
        +{
        <br>
        +}
        <br>
        +#endif
        <br>
        +
        <br>
          #if IS_ENABLED(CONFIG_DRM_I915_CAPTURE_ERROR)
        <br>
            __printf(2, 3)
        <br>
      </blockquote>
    </blockquote>
    <br>
  </body>
</html>