[Freedreno] [PATCH 2/2] drm/msm: make msm_disp_state transient data struct

abhinavk at codeaurora.org abhinavk at codeaurora.org
Mon Apr 26 22:03:42 UTC 2021


On 2021-04-26 14:23, Dmitry Baryshkov wrote:
> On 26/04/2021 23:50, abhinavk at codeaurora.org wrote:
>> On 2021-04-25 09:08, Dmitry Baryshkov wrote:
>>> Instead of allocating snapshotting structure at the driver probe time
>>> and later handling concurrent access, actual state, etc, make
>>> msm_disp_state transient struct. Allocate one when snapshotting 
>>> happens
>>> and free it after coredump data is read by userspace.
>> 
>> the purpose of the mutex is that when there are two coredumps being 
>> triggered
>> by two consecutive errors, we want to make sure that till one coredump 
>> is completely
>> read and/or processed, we do not allow triggering of another one as we 
>> want to capture
>> the accurate hardware/software state.
>> 
>> So moving disp_state out of kms might be okay and just allocated it 
>> when actually capturing the state
>> but we will need the mutex and some sort of pending flag in my 
>> opinion.
> 
> I'll return the mutex to the kms struct, so that we won't start
> another snapshot untill previous one is complete.

I think mutex should remain as part of snapshot so that they go 
together. Since this mutex is not used
by any other module, I thought its better to keep it there.

> 
> Concerning the pending flag, I think it is also handled by the
> coredump code: dev_coredumpm() will not create another device if there
> is one already associated with the device being dumped. I should
> probably mention this in the commit message.

Thats a good point, I checked that now as well but I still think we need 
this flag because
it also makes sure devcoredump is taken and read together within our 
driver itself instead of relying
on the devcoredump. Its not a strong preference.

But if you would like to go ahead with this, might have to retest its 
robustness.
With the older logic how i verified this was that I relaxed the underrun 
cnt check in this patch
( https://patchwork.freedesktop.org/patch/429112/?series=89181&rev=1 ) 
here and simulated an error:

@@ -1336,6 +1337,11 @@  static void dpu_encoder_underrun_callback(struct 
drm_encoder *drm_enc,

  	DPU_ATRACE_BEGIN("encoder_underrun_callback");
  	atomic_inc(&phy_enc->underrun_cnt);
+
+	/* trigger dump only on the first underrun */
+	if (atomic_read(&phy_enc->underrun_cnt) == 1)
+		msm_disp_snapshot_state(drm_enc->dev);
+

And verified that across various underrun interrupts, the devcoredump is 
stable.

> 
> If you agree with this, I'll send v2 then (also adding plls dumping).
Looking fwd to seeing the pll dumping, that will be a great addition to 
this.
> 
>> 
>>> 
>>> Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov at linaro.org>
>>> ---
>>>  drivers/gpu/drm/msm/disp/msm_disp_snapshot.c  | 87 
>>> ++++---------------
>>>  drivers/gpu/drm/msm/disp/msm_disp_snapshot.h  | 13 +--
>>>  .../gpu/drm/msm/disp/msm_disp_snapshot_util.c |  5 +-
>>>  drivers/gpu/drm/msm/msm_kms.h                 |  5 +-
>>>  4 files changed, 28 insertions(+), 82 deletions(-)
>>> 
>>> diff --git a/drivers/gpu/drm/msm/disp/msm_disp_snapshot.c
>>> b/drivers/gpu/drm/msm/disp/msm_disp_snapshot.c
>>> index 70fd5a1fe13e..371358893716 100644
>>> --- a/drivers/gpu/drm/msm/disp/msm_disp_snapshot.c
>>> +++ b/drivers/gpu/drm/msm/disp/msm_disp_snapshot.c
>>> @@ -7,8 +7,7 @@
>>> 
>>>  #include "msm_disp_snapshot.h"
>>> 
>>> -#ifdef CONFIG_DEV_COREDUMP
>>> -static ssize_t disp_devcoredump_read(char *buffer, loff_t offset,
>>> +static ssize_t __maybe_unused disp_devcoredump_read(char *buffer,
>>> loff_t offset,
>>>          size_t count, void *data, size_t datalen)
>>>  {
>>>      struct drm_print_iterator iter;
>>> @@ -29,24 +28,21 @@ static ssize_t disp_devcoredump_read(char 
>>> *buffer,
>>> loff_t offset,
>>>      return count - iter.remain;
>>>  }
>>> 
>>> -static void disp_devcoredump_free(void *data)
>>> +static void _msm_disp_snapshot_work(struct kthread_work *work)
>>>  {
>>> +    struct msm_kms *msm_kms = container_of(work, struct msm_kms, 
>>> dump_work);
>>> +    struct drm_device *drm_dev = msm_kms->dev;
>>>      struct msm_disp_state *disp_state;
>>> +    struct drm_printer p;
>>> 
>>> -    disp_state = data;
>>> -
>>> -    msm_disp_state_free(disp_state);
>>> -
>>> -    disp_state->coredump_pending = false;
>>> -}
>>> -#endif /* CONFIG_DEV_COREDUMP */
>>> +    disp_state = kzalloc(sizeof(struct msm_disp_state), GFP_KERNEL);
>>> +    if (!disp_state)
>>> +        return;
>>> 
>>> -static void _msm_disp_snapshot_work(struct kthread_work *work)
>>> -{
>>> -    struct msm_disp_state *disp_state = container_of(work, struct
>>> msm_disp_state, dump_work);
>>> -    struct drm_printer p;
>>> +    disp_state->dev = drm_dev->dev;
>>> +    disp_state->drm_dev = drm_dev;
>>> 
>>> -    mutex_lock(&disp_state->mutex);
>>> +    INIT_LIST_HEAD(&disp_state->blocks);
>>> 
>>>      msm_disp_snapshot_capture_state(disp_state);
>>> 
>>> @@ -55,26 +51,15 @@ static void _msm_disp_snapshot_work(struct
>>> kthread_work *work)
>>>          msm_disp_state_print(disp_state, &p);
>>>      }
>>> 
>>> -    /*
>>> -     * if devcoredump is not defined free the state immediately
>>> -     * otherwise it will be freed in the free handler.
>>> -     */
>>> -#ifdef CONFIG_DEV_COREDUMP
>>> +    /* If COREDUMP is disabled, the stub will call the free 
>>> function. */
>> This is a good cleanup, I just checked that the free() is called even 
>> if the config is not enabled
>>>      dev_coredumpm(disp_state->dev, THIS_MODULE, disp_state, 0, 
>>> GFP_KERNEL,
>>> -            disp_devcoredump_read, disp_devcoredump_free);
>>> -    disp_state->coredump_pending = true;
>>> -#else
>>> -    msm_disp_state_free(disp_state);
>>> -#endif
>>> -
>>> -    mutex_unlock(&disp_state->mutex);
>>> +            disp_devcoredump_read, msm_disp_state_free);
>>>  }
>>> 
>>>  void msm_disp_snapshot_state(struct drm_device *drm_dev)
>>>  {
>>>      struct msm_drm_private *priv;
>>>      struct msm_kms *kms;
>>> -    struct msm_disp_state *disp_state;
>>> 
>>>      if (!drm_dev) {
>>>          DRM_ERROR("invalid params\n");
>>> @@ -83,30 +68,13 @@ void msm_disp_snapshot_state(struct drm_device 
>>> *drm_dev)
>>> 
>>>      priv = drm_dev->dev_private;
>>>      kms = priv->kms;
>>> -    disp_state = kms->disp_state;
>>> -
>>> -    if (!disp_state) {
>>> -        DRM_ERROR("invalid params\n");
>>> -        return;
>>> -    }
>>> -
>>> -    /*
>>> -     * if there is a coredump pending return immediately till dump
>>> -     * if read by userspace or timeout happens
>>> -     */
>>> -    if (disp_state->coredump_pending) {
>>> -        DRM_DEBUG("coredump is pending read\n");
>>> -        return;
>>> -    }
>>> 
>>> -    kthread_queue_work(disp_state->dump_worker,
>>> -            &disp_state->dump_work);
>>> +    kthread_queue_work(kms->dump_worker, &kms->dump_work);
>>>  }
>>> 
>>>  int msm_disp_snapshot_init(struct drm_device *drm_dev)
>>>  {
>>>      struct msm_drm_private *priv;
>>> -    struct msm_disp_state *disp_state;
>>>      struct msm_kms *kms;
>>> 
>>>      if (!drm_dev) {
>>> @@ -117,22 +85,11 @@ int msm_disp_snapshot_init(struct drm_device 
>>> *drm_dev)
>>>      priv = drm_dev->dev_private;
>>>      kms = priv->kms;
>>> 
>>> -    disp_state = devm_kzalloc(drm_dev->dev, sizeof(struct
>>> msm_disp_state), GFP_KERNEL);
>>> -
>>> -    mutex_init(&disp_state->mutex);
>>> -
>>> -    disp_state->dev = drm_dev->dev;
>>> -    disp_state->drm_dev = drm_dev;
>>> -
>>> -    INIT_LIST_HEAD(&disp_state->blocks);
>>> -
>>> -    disp_state->dump_worker = kthread_create_worker(0, "%s", 
>>> "disp_snapshot");
>>> -    if (IS_ERR(disp_state->dump_worker))
>>> +    kms->dump_worker = kthread_create_worker(0, "%s", 
>>> "disp_snapshot");
>>> +    if (IS_ERR(kms->dump_worker))
>>>          DRM_ERROR("failed to create disp state task\n");
>>> 
>>> -    kthread_init_work(&disp_state->dump_work, 
>>> _msm_disp_snapshot_work);
>>> -
>>> -    kms->disp_state = disp_state;
>>> +    kthread_init_work(&kms->dump_work, _msm_disp_snapshot_work);
>>> 
>>>      return 0;
>>>  }
>>> @@ -141,7 +98,6 @@ void msm_disp_snapshot_destroy(struct drm_device 
>>> *drm_dev)
>>>  {
>>>      struct msm_kms *kms;
>>>      struct msm_drm_private *priv;
>>> -    struct msm_disp_state *disp_state;
>>> 
>>>      if (!drm_dev) {
>>>          DRM_ERROR("invalid params\n");
>>> @@ -150,12 +106,7 @@ void msm_disp_snapshot_destroy(struct drm_device 
>>> *drm_dev)
>>> 
>>>      priv = drm_dev->dev_private;
>>>      kms = priv->kms;
>>> -    disp_state = kms->disp_state;
>>> -
>>> -    if (disp_state->dump_worker)
>>> -        kthread_destroy_worker(disp_state->dump_worker);
>>> -
>>> -    list_del(&disp_state->blocks);
>>> 
>>> -    mutex_destroy(&disp_state->mutex);
>>> +    if (kms->dump_worker)
>>> +        kthread_destroy_worker(kms->dump_worker);
>>>  }
>>> diff --git a/drivers/gpu/drm/msm/disp/msm_disp_snapshot.h
>>> b/drivers/gpu/drm/msm/disp/msm_disp_snapshot.h
>>> index 32f52799a1ba..c6174a366095 100644
>>> --- a/drivers/gpu/drm/msm/disp/msm_disp_snapshot.h
>>> +++ b/drivers/gpu/drm/msm/disp/msm_disp_snapshot.h
>>> @@ -41,26 +41,17 @@
>>>   * struct msm_disp_state - structure to store current dpu state
>>>   * @dev: device pointer
>>>   * @drm_dev: drm device pointer
>>> - * @mutex: mutex to serialize access to serialze dumps, debugfs 
>>> access
>>> - * @coredump_pending: coredump is pending read from userspace
>>>   * @atomic_state: atomic state duplicated at the time of the error
>>> - * @dump_worker: kworker thread which runs the dump work
>>> - * @dump_work: kwork which dumps the registers and drm state
>>>   * @timestamp: timestamp at which the coredump was captured
>>>   */
>>>  struct msm_disp_state {
>>>      struct device *dev;
>>>      struct drm_device *drm_dev;
>>> -    struct mutex mutex;
>>> -
>>> -    bool coredump_pending;
>>> 
>>>      struct list_head blocks;
>>> 
>>>      struct drm_atomic_state *atomic_state;
>>> 
>>> -    struct kthread_worker *dump_worker;
>>> -    struct kthread_work dump_work;
>>>      ktime_t timestamp;
>>>  };
>>> 
>>> @@ -123,11 +114,11 @@ void msm_disp_snapshot_capture_state(struct
>>> msm_disp_state *disp_state);
>>> 
>>>  /**
>>>   * msm_disp_state_free - free the memory after the coredump has been 
>>> read
>>> - * @disp_state:        handle to struct msm_disp_state
>>> + * @data:        handle to struct msm_disp_state
>>> 
>>>   * Returns: none
>>>   */
>>> -void msm_disp_state_free(struct msm_disp_state *disp_state);
>>> +void msm_disp_state_free(void *data);
>>> 
>>>  /**
>>>   * msm_disp_snapshot_add_block - add a hardware block with its 
>>> register dump
>>> diff --git a/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c
>>> b/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c
>>> index ca6632550337..cabe15190ec1 100644
>>> --- a/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c
>>> +++ b/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c
>>> @@ -142,8 +142,9 @@ void msm_disp_snapshot_capture_state(struct
>>> msm_disp_state *disp_state)
>>>      msm_disp_capture_atomic_state(disp_state);
>>>  }
>>> 
>>> -void msm_disp_state_free(struct msm_disp_state *disp_state)
>>> +void msm_disp_state_free(void *data)
>>>  {
>>> +    struct msm_disp_state *disp_state = data;
>>>      struct msm_disp_state_block *block, *tmp;
>>> 
>>>      if (disp_state->atomic_state) {
>>> @@ -156,6 +157,8 @@ void msm_disp_state_free(struct msm_disp_state 
>>> *disp_state)
>>>          kfree(block->state);
>>>          kfree(block);
>>>      }
>>> +
>>> +    kfree(disp_state);
>>>  }
>>> 
>>>  void msm_disp_snapshot_add_block(struct msm_disp_state *disp_state, 
>>> u32 len,
>>> diff --git a/drivers/gpu/drm/msm/msm_kms.h 
>>> b/drivers/gpu/drm/msm/msm_kms.h
>>> index 146dcab123f4..529b9dacf7ca 100644
>>> --- a/drivers/gpu/drm/msm/msm_kms.h
>>> +++ b/drivers/gpu/drm/msm/msm_kms.h
>>> @@ -156,8 +156,9 @@ struct msm_kms {
>>>      /* mapper-id used to request GEM buffer mapped for scanout: */
>>>      struct msm_gem_address_space *aspace;
>>> 
>>> -    /* handle to disp snapshot state */
>>> -    struct msm_disp_state *disp_state;
>>> +    /* disp snapshot support */
>>> +    struct kthread_worker *dump_worker;
>>> +    struct kthread_work dump_work;
>>> 
>>>      /*
>>>       * For async commit, where ->flush_commit() and later happens


More information about the dri-devel mailing list