[PATCH v3 1/4] drm/amdgpu/vcn: fix race condition issue for vcn start

Christian König christian.koenig at amd.com
Wed Mar 11 11:38:47 UTC 2020


Am 11.03.20 um 12:30 schrieb Zhu, James:
>
> [AMD Official Use Only - Internal Distribution Only]
>
>
> ping
>
> ------------------------------------------------------------------------
> *From:* Zhu, James <James.Zhu at amd.com>
> *Sent:* Monday, March 9, 2020 12:57 PM
> *To:* amd-gfx at lists.freedesktop.org <amd-gfx at lists.freedesktop.org>
> *Cc:* Zhu, James <James.Zhu at amd.com>; Koenig, Christian 
> <Christian.Koenig at amd.com>
> *Subject:* [PATCH v3 1/4] drm/amdgpu/vcn: fix race condition issue for 
> vcn start
> Fix race condition issue when multiple vcn starts are called.
>
> v2: Removed checking the return value of cancel_delayed_work_sync()
> to prevent possible races here.
>
> v3: Add total_submission_cnt to avoid gate power unexpectedly.
>
> Signed-off-by: James Zhu <James.Zhu at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c | 22 +++++++++++++++-------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h |  2 ++
>  2 files changed, 17 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
> index a41272f..6aafda1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
> @@ -63,6 +63,8 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
>          int i, r;
>
> INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler);
> +       mutex_init(&adev->vcn.vcn_pg_lock);
> + atomic_set(&adev->vcn.total_submission_cnt, 0);
>
>          switch (adev->asic_type) {
>          case CHIP_RAVEN:
> @@ -210,6 +212,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
>          }
>
>          release_firmware(adev->vcn.fw);
> + mutex_destroy(&adev->vcn.vcn_pg_lock);
>
>          return 0;
>  }
> @@ -307,7 +310,8 @@ static void amdgpu_vcn_idle_work_handler(struct 
> work_struct *work)
>                  fences += fence[j];
>          }
>
> -       if (fences == 0) {
> +       if (fences == 0 &&
> + likely(atomic_read(&adev->vcn.total_submission_cnt) == 0)) {
>                  amdgpu_gfx_off_ctrl(adev, true);
> amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
>                         AMD_PG_STATE_GATE);
> @@ -319,13 +323,14 @@ static void amdgpu_vcn_idle_work_handler(struct 
> work_struct *work)
>  void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
>  {
>          struct amdgpu_device *adev = ring->adev;
> -       bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);
>
> -       if (set_clocks) {
> -               amdgpu_gfx_off_ctrl(adev, false);
> - amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
> -                      AMD_PG_STATE_UNGATE);
> -       }
> + atomic_inc(&adev->vcn.total_submission_cnt);
> + cancel_delayed_work_sync(&adev->vcn.idle_work);
> +
> +       mutex_lock(&adev->vcn.vcn_pg_lock);
> +       amdgpu_gfx_off_ctrl(adev, false);
> + amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
> +              AMD_PG_STATE_UNGATE);
>
>          if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)    {
>                  struct dpg_pause_state new_state;
> @@ -345,11 +350,14 @@ void amdgpu_vcn_ring_begin_use(struct 
> amdgpu_ring *ring)
>
>                  adev->vcn.pause_dpg_mode(adev, ring->me, &new_state);
>          }
> + mutex_unlock(&adev->vcn.vcn_pg_lock);
>  }
>
>  void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
>  {
> schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
> +       if 
> (unlikely(atomic_dec_return(&ring->adev->vcn.total_submission_cnt) < 0))
> + atomic_set(&ring->adev->vcn.total_submission_cnt, 0);

You need to decrement first and then call schedule_delayed_work() 
otherwise the work could run with the wrong counter.

And the extra check for an under run should be superfluous.

Regards,
Christian.

>  }
>
>  int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
> index 6fe0573..111c4cc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
> @@ -200,6 +200,8 @@ struct amdgpu_vcn {
>          struct drm_gpu_scheduler 
> *vcn_dec_sched[AMDGPU_MAX_VCN_INSTANCES];
>          uint32_t num_vcn_enc_sched;
>          uint32_t num_vcn_dec_sched;
> +       struct mutex             vcn_pg_lock;
> +       atomic_t total_submission_cnt;
>
>          unsigned        harvest_config;
>          int (*pause_dpg_mode)(struct amdgpu_device *adev,
> -- 
> 2.7.4
>

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20200311/3c3475e5/attachment.htm>


More information about the amd-gfx mailing list