<html><head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
  </head>
  <body>
    <p><br>
    </p>
    <div class="moz-cite-prefix">On 2021-11-24 4:37 a.m., Christian
      König wrote:<br>
    </div>
    <blockquote type="cite" cite="mid:3d17fd21-751d-6f87-95b9-d120db7f4a0e@gmail.com">Am
      23.11.21 um 20:22 schrieb Philip Yang:
      <br>
      <blockquote type="cite">IH ring1 is used to process GPU retry
        fault, overflow is enabled to
        <br>
        drain retry fault because we want receive other interrupts while
        <br>
        handling retry fault to recover range. There is no overflow flag
        set
        <br>
        when wptr pass rptr. Use timestamp of rptr and wptr to handle
        overflow
        <br>
        and drain retry fault.
        <br>
        <br>
        Add helper function amdgpu_ih_decode_iv_ts to get 48bit
        timestamp from
        <br>
        IV entry. drain retry fault is done if processed_timestamp is
        <br>
        equal to or larger than checkpoint timestamp.
        <br>
        <br>
        Add function amdgpu_ih_process1 to process IH ring1 until
        timestamp of
        <br>
        rptr is larger then timestamp of next entry.
        <br>
        <br>
        Helper amdgpu_ih_ts_after to compare time stamps with 48bit wrap
        around.
        <br>
        <br>
        Signed-off-by: Philip Yang <a class="moz-txt-link-rfc2396E" href="mailto:Philip.Yang@amd.com"><Philip.Yang@amd.com></a>
        <br>
        ---
        <br>
          drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c  | 107
        ++++++++++++++++++------
        <br>
          drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h  |  13 ++-
        <br>
          drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c |   2 +-
        <br>
          drivers/gpu/drm/amd/amdgpu/navi10_ih.c  |   1 +
        <br>
          drivers/gpu/drm/amd/amdgpu/vega10_ih.c  |   1 +
        <br>
          drivers/gpu/drm/amd/amdgpu/vega20_ih.c  |   1 +
        <br>
          drivers/gpu/drm/amd/amdkfd/kfd_svm.c    |   2 +-
        <br>
          7 files changed, 99 insertions(+), 28 deletions(-)
        <br>
        <br>
        diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
        b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
        <br>
        index 0c7963dfacad..30b4e0e01444 100644
        <br>
        --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
        <br>
        +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
        <br>
        @@ -164,52 +164,45 @@ void amdgpu_ih_ring_write(struct
        amdgpu_ih_ring *ih, const uint32_t *iv,
        <br>
              }
        <br>
          }
        <br>
          +/* return true if time stamp t2 is after t1 with 48bit wrap
        around */
        <br>
        +static inline bool amdgpu_ih_ts_after(uint64_t t1, uint64_t t2)
        <br>
        +{
        <br>
        +    return ((int64_t)(t2 << 16) - (int64_t)(t1 <<
        16)) > 0LL;
        <br>
        +}
        <br>
        +
        <br>
          /* Waiter helper that checks current rptr matches or passes
        checkpoint wptr */
        <br>
        -static bool amdgpu_ih_has_checkpoint_processed(struct
        amdgpu_device *adev,
        <br>
        +static bool amdgpu_ih_has_checkpoint_processed_ts(struct
        amdgpu_device *adev,
        <br>
                              struct amdgpu_ih_ring *ih,
        <br>
        -                    uint32_t checkpoint_wptr,
        <br>
        -                    uint32_t *prev_rptr)
        <br>
        +                    uint64_t checkpoint_ts)
        <br>
          {
        <br>
        -    uint32_t cur_rptr = ih->rptr | (*prev_rptr &
        ~ih->ptr_mask);
        <br>
        -
        <br>
        -    /* rptr has wrapped. */
        <br>
        -    if (cur_rptr < *prev_rptr)
        <br>
        -        cur_rptr += ih->ptr_mask + 1;
        <br>
        -    *prev_rptr = cur_rptr;
        <br>
        -
        <br>
        -    /* check ring is empty to workaround missing wptr overflow
        flag */
        <br>
        -    return cur_rptr >= checkpoint_wptr ||
        <br>
        -           (cur_rptr & ih->ptr_mask) ==
        amdgpu_ih_get_wptr(adev, ih);
        <br>
        +    return !amdgpu_ih_ts_after(ih->processed_timestamp,
        checkpoint_ts);
        <br>
      </blockquote>
      <br>
      I don't see much of a reason to keep this function, it only
      consists of a call to amdgpu_ih_ts_after() and is used only once.
      <br>
    </blockquote>
    Will remove this function.<br>
    <blockquote type="cite" cite="mid:3d17fd21-751d-6f87-95b9-d120db7f4a0e@gmail.com">
      <br>
      <blockquote type="cite">  }
        <br>
            /**
        <br>
        - * amdgpu_ih_wait_on_checkpoint_process - wait to process IVs
        up to checkpoint
        <br>
        + * amdgpu_ih_wait_on_checkpoint_process_ts - wait to process
        IVs up to checkpoint
        <br>
           *
        <br>
           * @adev: amdgpu_device pointer
        <br>
           * @ih: ih ring to process
        <br>
           *
        <br>
           * Used to ensure ring has processed IVs up to the checkpoint
        write pointer.
        <br>
           */
        <br>
        -int amdgpu_ih_wait_on_checkpoint_process(struct amdgpu_device
        *adev,
        <br>
        +int amdgpu_ih_wait_on_checkpoint_process_ts(struct
        amdgpu_device *adev,
        <br>
                              struct amdgpu_ih_ring *ih)
        <br>
          {
        <br>
        -    uint32_t checkpoint_wptr, rptr;
        <br>
        +    uint32_t checkpoint_wptr;
        <br>
        +    uint64_t checkpoint_ts;
        <br>
                if (!ih->enabled || adev->shutdown)
        <br>
                  return -ENODEV;
        <br>
                checkpoint_wptr = amdgpu_ih_get_wptr(adev, ih);
        <br>
        -    /* Order wptr with rptr. */
        <br>
        +    /* Order wptr with ring data. */
        <br>
              rmb();
        <br>
        -    rptr = READ_ONCE(ih->rptr);
        <br>
        -
        <br>
        -    /* wptr has wrapped. */
        <br>
        -    if (rptr > checkpoint_wptr)
        <br>
        -        checkpoint_wptr += ih->ptr_mask + 1;
        <br>
        +    checkpoint_ts = amdgpu_ih_decode_iv_ts(adev, ih,
        checkpoint_wptr, -1);
        <br>
                return wait_event_interruptible(ih->wait_process,
        <br>
      </blockquote>
    </blockquote>
    I will change this to wait_event_interruptible_timeout, found wait
    event may never returns if gpu reset, rptr=wptr=0,
    ih->processed_timestamp is not updated. Timeout value 1 second is
    long enough to drain fault.<br>
    <blockquote type="cite" cite="mid:3d17fd21-751d-6f87-95b9-d120db7f4a0e@gmail.com">
      <blockquote type="cite">-               
        amdgpu_ih_has_checkpoint_processed(adev, ih,
        <br>
        -                        checkpoint_wptr, &rptr));
        <br>
        +                amdgpu_ih_has_checkpoint_processed_ts(adev, ih,
        <br>
        +                        checkpoint_ts));
        <br>
          }
        <br>
            /**
        <br>
        @@ -254,6 +247,56 @@ int amdgpu_ih_process(struct amdgpu_device
        *adev, struct amdgpu_ih_ring *ih)
        <br>
              return IRQ_HANDLED;
        <br>
          }
        <br>
          +/**
        <br>
        + * amdgpu_ih_process1 - interrupt handler work for IH ring1
        <br>
        + *
        <br>
        + * @adev: amdgpu_device pointer
        <br>
        + * @ih: ih ring to process
        <br>
        + *
        <br>
        + * Interrupt handler of IH ring1, walk the IH ring1.
        <br>
        + * Returns irq process return code.
        <br>
        + */
        <br>
        +int amdgpu_ih_process1(struct amdgpu_device *adev, struct
        amdgpu_ih_ring *ih)
        <br>
      </blockquote>
      <br>
      I don't think we need this new function any more.
      <br>
      <br>
      The check if the timestamp goes backwards can now be done inside
      the page fault handler.
      <br>
    </blockquote>
    Do you mean to merge this into the original ring0 interrupt handler?
    Then we need add parameter (ih->overflow_enabled) and process two
    different cases in ring0 interrupt handler, I think that is not good
    to maintain later so I want to separate them.<br>
    <blockquote type="cite" cite="mid:3d17fd21-751d-6f87-95b9-d120db7f4a0e@gmail.com">
      <br>
      <blockquote type="cite">+{
        <br>
        +    uint64_t ts, ts_next;
        <br>
        +    unsigned int count;
        <br>
        +    u32 wptr;
        <br>
        +
        <br>
        +    if (!ih->enabled || adev->shutdown)
        <br>
        +        return IRQ_NONE;
        <br>
        +
        <br>
        +    wptr = amdgpu_ih_get_wptr(adev, ih);
        <br>
        +    if (ih->rptr == wptr)
        <br>
        +        return 0;
        <br>
        +
        <br>
        +restart_ih:
        <br>
        +    count = AMDGPU_IH_MAX_NUM_IVS;
        <br>
        +
        <br>
        +    ts = amdgpu_ih_decode_iv_ts(adev, ih, ih->rptr, 0);
        <br>
        +    ts_next = amdgpu_ih_decode_iv_ts(adev, ih, ih->rptr, 1);
        <br>
        +    while (amdgpu_ih_ts_after(ts, ts_next) && --count)
        {
        <br>
        +        amdgpu_irq_dispatch(adev, ih);
        <br>
        +        ih->rptr &= ih->ptr_mask;
        <br>
        +        ts = ts_next;
        <br>
        +        ts_next = amdgpu_ih_decode_iv_ts(adev, ih, ih->rptr,
        1);
        <br>
        +    }
        <br>
        +    /*
        <br>
        +     * Process the last timestamp updated entry or one more
        entry
        <br>
        +     * if count = 0, ts is timestamp of the entry.
        <br>
        +     */
        <br>
        +    amdgpu_irq_dispatch(adev, ih);
        <br>
        +    amdgpu_ih_set_rptr(adev, ih);
        <br>
        +    wake_up_all(&ih->wait_process);
        <br>
        +
        <br>
        +    wptr = amdgpu_ih_get_wptr(adev, ih);
        <br>
        +    /* Order reading of wptr vs. reading of IH ring data */
        <br>
        +    rmb();
        <br>
        +    if (amdgpu_ih_ts_after(ts, amdgpu_ih_decode_iv_ts(adev, ih,
        wptr, -1)))
        <br>
        +        goto restart_ih;
        <br>
        +
        <br>
        +    return IRQ_HANDLED;
        <br>
        +}
        <br>
        +
        <br>
          /**
        <br>
           * amdgpu_ih_decode_iv_helper - decode an interrupt vector
        <br>
           *
        <br>
        @@ -298,4 +341,20 @@ void amdgpu_ih_decode_iv_helper(struct
        amdgpu_device *adev,
        <br>
                /* wptr/rptr are in bytes! */
        <br>
              ih->rptr += 32;
        <br>
        +    ih->processed_timestamp = entry->timestamp;
        <br>
        +}
        <br>
        +
        <br>
        +uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring
        *ih, u32 rptr,
        <br>
        +                       signed offset)
        <br>
        +{
        <br>
        +    uint32_t iv_size = 32;
        <br>
        +    uint32_t dw1, dw2;
        <br>
        +    uint32_t index;
        <br>
        +
        <br>
        +    rptr += iv_size * offset;
        <br>
        +    index = (rptr & ih->ptr_mask) >> 2;
        <br>
        +
        <br>
        +    dw1 = le32_to_cpu(ih->ring[index + 1]);
        <br>
        +    dw2 = le32_to_cpu(ih->ring[index + 2]);
        <br>
        +    return dw1 | ((u64)(dw2 & 0xffff) << 32);
        <br>
          }
        <br>
        diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
        b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
        <br>
        index 0649b59830a5..d7e1ffeca38f 100644
        <br>
        --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
        <br>
        +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
        <br>
        @@ -68,6 +68,7 @@ struct amdgpu_ih_ring {
        <br>
                /* For waiting on IH processing at checkpoint. */
        <br>
              wait_queue_head_t wait_process;
        <br>
        +    uint64_t        processed_timestamp;
        <br>
          };
        <br>
            /* provided by the ih block */
        <br>
        @@ -76,12 +77,17 @@ struct amdgpu_ih_funcs {
        <br>
              u32 (*get_wptr)(struct amdgpu_device *adev, struct
        amdgpu_ih_ring *ih);
        <br>
              void (*decode_iv)(struct amdgpu_device *adev, struct
        amdgpu_ih_ring *ih,
        <br>
                        struct amdgpu_iv_entry *entry);
        <br>
        +    uint64_t (*decode_iv_ts)(struct amdgpu_ih_ring *ih, u32
        rptr,
        <br>
        +                 signed offset);
        <br>
              void (*set_rptr)(struct amdgpu_device *adev, struct
        amdgpu_ih_ring *ih);
        <br>
          };
        <br>
            #define amdgpu_ih_get_wptr(adev, ih)
        (adev)->irq.ih_funcs->get_wptr((adev), (ih))
        <br>
          #define amdgpu_ih_decode_iv(adev, iv) \
        <br>
              (adev)->irq.ih_funcs->decode_iv((adev), (ih), (iv))
        <br>
        +#define amdgpu_ih_decode_iv_ts(adev, ih, rptr, offset) \
        <br>
        +    (WARN_ON_ONCE(!(adev)->irq.ih_funcs->decode_iv_ts) ?
        0 : \
        <br>
      </blockquote>
      <br>
      Please drop that WARN_ON_ONCE here.
      <br>
      <br>
    </blockquote>
    <p>Agree, will drop it.<br>
    </p>
    <p>Regards,</p>
    <p>Philip<br>
    </p>
    <blockquote type="cite" cite="mid:3d17fd21-751d-6f87-95b9-d120db7f4a0e@gmail.com">Regards,
      <br>
      Christian.
      <br>
      <br>
      <blockquote type="cite">+   
        (adev)->irq.ih_funcs->decode_iv_ts((ih), (rptr),
        (offset)))
        <br>
          #define amdgpu_ih_set_rptr(adev, ih)
        (adev)->irq.ih_funcs->set_rptr((adev), (ih))
        <br>
            int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct
        amdgpu_ih_ring *ih,
        <br>
        @@ -89,10 +95,13 @@ int amdgpu_ih_ring_init(struct amdgpu_device
        *adev, struct amdgpu_ih_ring *ih,
        <br>
          void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct
        amdgpu_ih_ring *ih);
        <br>
          void amdgpu_ih_ring_write(struct amdgpu_ih_ring *ih, const
        uint32_t *iv,
        <br>
                        unsigned int num_dw);
        <br>
        -int amdgpu_ih_wait_on_checkpoint_process(struct amdgpu_device
        *adev,
        <br>
        -                    struct amdgpu_ih_ring *ih);
        <br>
        +int amdgpu_ih_wait_on_checkpoint_process_ts(struct
        amdgpu_device *adev,
        <br>
        +                        struct amdgpu_ih_ring *ih);
        <br>
          int amdgpu_ih_process(struct amdgpu_device *adev, struct
        amdgpu_ih_ring *ih);
        <br>
        +int amdgpu_ih_process1(struct amdgpu_device *adev, struct
        amdgpu_ih_ring *ih);
        <br>
          void amdgpu_ih_decode_iv_helper(struct amdgpu_device *adev,
        <br>
                          struct amdgpu_ih_ring *ih,
        <br>
                          struct amdgpu_iv_entry *entry);
        <br>
        +uint64_t amdgpu_ih_decode_iv_ts_helper(struct amdgpu_ih_ring
        *ih, u32 rptr,
        <br>
        +                       signed offset);
        <br>
          #endif
        <br>
        diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
        b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
        <br>
        index e9023687dc9a..891486cca94b 100644
        <br>
        --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
        <br>
        +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
        <br>
        @@ -224,7 +224,7 @@ static void amdgpu_irq_handle_ih1(struct
        work_struct *work)
        <br>
              struct amdgpu_device *adev = container_of(work, struct
        amdgpu_device,
        <br>
                                    irq.ih1_work);
        <br>
          -    amdgpu_ih_process(adev, &adev->irq.ih1);
        <br>
        +    amdgpu_ih_process1(adev, &adev->irq.ih1);
        <br>
          }
        <br>
            /**
        <br>
        diff --git a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
        b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
        <br>
        index 38241cf0e1f1..8ce5b8ca1fd7 100644
        <br>
        --- a/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
        <br>
        +++ b/drivers/gpu/drm/amd/amdgpu/navi10_ih.c
        <br>
        @@ -716,6 +716,7 @@ static const struct amd_ip_funcs
        navi10_ih_ip_funcs = {
        <br>
          static const struct amdgpu_ih_funcs navi10_ih_funcs = {
        <br>
              .get_wptr = navi10_ih_get_wptr,
        <br>
              .decode_iv = amdgpu_ih_decode_iv_helper,
        <br>
        +    .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper,
        <br>
              .set_rptr = navi10_ih_set_rptr
        <br>
          };
        <br>
          diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
        b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
        <br>
        index a9ca6988009e..3070466f54e1 100644
        <br>
        --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
        <br>
        +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
        <br>
        @@ -640,6 +640,7 @@ const struct amd_ip_funcs vega10_ih_ip_funcs
        = {
        <br>
          static const struct amdgpu_ih_funcs vega10_ih_funcs = {
        <br>
              .get_wptr = vega10_ih_get_wptr,
        <br>
              .decode_iv = amdgpu_ih_decode_iv_helper,
        <br>
        +    .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper,
        <br>
              .set_rptr = vega10_ih_set_rptr
        <br>
          };
        <br>
          diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
        b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
        <br>
        index f51dfc38ac65..3b4eb8285943 100644
        <br>
        --- a/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
        <br>
        +++ b/drivers/gpu/drm/amd/amdgpu/vega20_ih.c
        <br>
        @@ -688,6 +688,7 @@ const struct amd_ip_funcs vega20_ih_ip_funcs
        = {
        <br>
          static const struct amdgpu_ih_funcs vega20_ih_funcs = {
        <br>
              .get_wptr = vega20_ih_get_wptr,
        <br>
              .decode_iv = amdgpu_ih_decode_iv_helper,
        <br>
        +    .decode_iv_ts = amdgpu_ih_decode_iv_ts_helper,
        <br>
              .set_rptr = vega20_ih_set_rptr
        <br>
          };
        <br>
          diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
        b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
        <br>
        index 10868d5b549f..663489ae56d7 100644
        <br>
        --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
        <br>
        +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c
        <br>
        @@ -1974,7 +1974,7 @@ static void
        svm_range_drain_retry_fault(struct svm_range_list *svms)
        <br>
                    pr_debug("drain retry fault gpu %d svms %p\n", i,
        svms);
        <br>
          -       
        amdgpu_ih_wait_on_checkpoint_process(pdd->dev->adev,
        <br>
        +       
        amdgpu_ih_wait_on_checkpoint_process_ts(pdd->dev->adev,
        <br>
                                      
        &pdd->dev->adev->irq.ih1);
        <br>
                  pr_debug("drain retry fault gpu %d svms 0x%p done\n",
        i, svms);
        <br>
              }
        <br>
      </blockquote>
      <br>
    </blockquote>
  </body>
</html>