[PATCH 1/3] drm/amdgpu: fix xgmi perfmon a-b-a problem

Kim, Jonathan Jonathan.Kim at amd.com
Tue Sep 15 21:59:56 UTC 2020


[AMD Official Use Only - Internal Distribution Only]

> -----Original Message-----
> From: Kim, Jonathan <jonathan.kim at amd.com>
> Sent: Tuesday, September 15, 2020 5:51 PM
> To: amd-gfx at lists.ffreedesktop.org
> Cc: Kasiviswanathan, Harish <Harish.Kasiviswanathan at amd.com>; Kim,
> Jonathan <Jonathan.Kim at amd.com>; Kim, Jonathan
> <Jonathan.Kim at amd.com>
> Subject: [PATCH 1/3] drm/amdgpu: fix xgmi perfmon a-b-a problem
>
> Mapping hw counters per event config will cause ABA problems so map per
> event instead.
>
> v2: Discontinue starting perf counters if add fails.  Make it clear what's
> happening with pmc_start.
>
> Signed-off-by: Jonathan Kim <Jonathan.Kim at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_df.h  |   6 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c |  42 ++++++----
>  drivers/gpu/drm/amd/amdgpu/df_v3_6.c    | 105 +++++++++++-------------
>  3 files changed, 78 insertions(+), 75 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h
> index 373cdebe0e2f..52488bb45112 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_df.h
> @@ -44,11 +44,11 @@ struct amdgpu_df_funcs {
>  void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device
> *adev,
>      bool enable);
>  int (*pmc_start)(struct amdgpu_device *adev, uint64_t config,
> - int is_add);
> + int counter_idx, int is_add);
>  int (*pmc_stop)(struct amdgpu_device *adev, uint64_t config,
> - int is_remove);
> + int counter_idx, int is_remove);
>  void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config,
> - uint64_t *count);
> + int counter_idx, uint64_t *count);
>  uint64_t (*get_fica)(struct amdgpu_device *adev, uint32_t
> ficaa_val);
>  void (*set_fica)(struct amdgpu_device *adev, uint32_t ficaa_val,
>   uint32_t ficadl_val, uint32_t ficadh_val); diff --git
> a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
> index 69af462db34d..1b0ec715c8ba 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
> @@ -64,6 +64,7 @@ static void amdgpu_perf_start(struct perf_event
> *event, int flags)
>  struct amdgpu_pmu_entry *pe = container_of(event->pmu,
>    struct amdgpu_pmu_entry,
>    pmu);
> +int target_cntr = 0;
>
>  if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED)))
>  return;
> @@ -73,17 +74,24 @@ static void amdgpu_perf_start(struct perf_event
> *event, int flags)
>
>  switch (pe->pmu_perf_type) {
>  case PERF_TYPE_AMDGPU_DF:
> -if (!(flags & PERF_EF_RELOAD))
> -pe->adev->df.funcs->pmc_start(pe->adev, hwc-
> >config, 1);
> +if (!(flags & PERF_EF_RELOAD)) {
> +target_cntr = pe->adev->df.funcs->pmc_start(pe-
> >adev,
> +hwc->config, 0 /* unused */,
> +1 /* add counter */);
> +if (target_cntr < 0)
> +break;
> +
> +hwc->idx = target_cntr;
> +}
>
> -pe->adev->df.funcs->pmc_start(pe->adev, hwc->config, 0);
> +pe->adev->df.funcs->pmc_start(pe->adev, hwc->config,
> +hwc->idx, 0);
>  break;
>  default:
>  break;
>  }
>
>  perf_event_update_userpage(event);
> -
>  }
>
>  /* read perf counter */
> @@ -101,8 +109,8 @@ static void amdgpu_perf_read(struct perf_event
> *event)
>
>  switch (pe->pmu_perf_type) {
>  case PERF_TYPE_AMDGPU_DF:
> -pe->adev->df.funcs->pmc_get_count(pe->adev, hwc-
> >config,
> -  &count);
> +pe->adev->df.funcs->pmc_get_count(pe->adev,
> +hwc->config, hwc->idx,
> &count);
>  break;
>  default:
>  count = 0;
> @@ -126,7 +134,8 @@ static void amdgpu_perf_stop(struct perf_event
> *event, int flags)
>
>  switch (pe->pmu_perf_type) {
>  case PERF_TYPE_AMDGPU_DF:
> -pe->adev->df.funcs->pmc_stop(pe->adev, hwc->config, 0);
> +pe->adev->df.funcs->pmc_stop(pe->adev, hwc->config, hwc-
> >idx,
> +0);
>  break;
>  default:
>  break;
> @@ -142,12 +151,11 @@ static void amdgpu_perf_stop(struct perf_event
> *event, int flags)
>  hwc->state |= PERF_HES_UPTODATE;
>  }
>
> -/* add perf counter  */
> +/* add perf counter */
>  static int amdgpu_perf_add(struct perf_event *event, int flags)  {
>  struct hw_perf_event *hwc = &event->hw;
> -int retval;
> -
> +int retval = 0, target_cntr;
>  struct amdgpu_pmu_entry *pe = container_of(event->pmu,
>    struct amdgpu_pmu_entry,
>    pmu);
> @@ -156,8 +164,14 @@ static int amdgpu_perf_add(struct perf_event
> *event, int flags)
>
>  switch (pe->pmu_perf_type) {
>  case PERF_TYPE_AMDGPU_DF:
> -retval = pe->adev->df.funcs->pmc_start(pe->adev,
> -       hwc->config, 1);
> +target_cntr = pe->adev->df.funcs->pmc_start(pe->adev,
> +hwc->config, 0 /* unused */,
> +1 /* add counter */);
> +if (target_cntr < 0)
> +retval = target_cntr;
> +else
> +hwc->idx = target_cntr;
> +
>  break;
>  default:
>  return 0;
> @@ -170,7 +184,6 @@ static int amdgpu_perf_add(struct perf_event
> *event, int flags)
>  amdgpu_perf_start(event, PERF_EF_RELOAD);
>
>  return retval;
> -
>  }
>
>  /* delete perf counter  */
> @@ -185,7 +198,8 @@ static void amdgpu_perf_del(struct perf_event
> *event, int flags)
>
>  switch (pe->pmu_perf_type) {
>  case PERF_TYPE_AMDGPU_DF:
> -pe->adev->df.funcs->pmc_stop(pe->adev, hwc->config, 1);
> +pe->adev->df.funcs->pmc_stop(pe->adev, hwc->config, hwc-
> >idx,
> +1);
>  break;
>  default:
>  break;
> diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
> b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
> index 7b89fd2aa44a..0ca6e176acb0 100644
> --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
> +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
> @@ -391,33 +391,28 @@ static void df_v3_6_get_clockgating_state(struct
> amdgpu_device *adev,  }
>
>  /* get assigned df perfmon ctr as int */ -static int
> df_v3_6_pmc_config_2_cntr(struct amdgpu_device *adev,
> -      uint64_t config)
> +static bool df_v3_6_pmc_has_counter(struct amdgpu_device *adev,
> +      uint64_t config,
> +      int counter_idx)
>  {
> -int i;
>
> -for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) {
> -if ((config & 0x0FFFFFFUL) ==
> -adev-
> >df_perfmon_config_assign_mask[i])
> -return i;
> -}
> +return ((config & 0x0FFFFFFUL) ==
> +adev-
> >df_perfmon_config_assign_mask[counter_idx]);
>
> -return -EINVAL;
>  }
>
>  /* get address based on counter assignment */  static void
> df_v3_6_pmc_get_addr(struct amdgpu_device *adev,
>   uint64_t config,
> + int counter_idx,
>   int is_ctrl,
>   uint32_t *lo_base_addr,
>   uint32_t *hi_base_addr)
>  {
> -int target_cntr = df_v3_6_pmc_config_2_cntr(adev, config);
> -
> -if (target_cntr < 0)
> +if (!df_v3_6_pmc_has_counter(adev, config, counter_idx))
>  return;
>
> -switch (target_cntr) {
> +switch (counter_idx) {
>
>  case 0:
>  *lo_base_addr = is_ctrl ? smnPerfMonCtlLo4 :
> smnPerfMonCtrLo4; @@ -443,15 +438,18 @@ static void
> df_v3_6_pmc_get_addr(struct amdgpu_device *adev,
>  /* get read counter address */
>  static void df_v3_6_pmc_get_read_settings(struct amdgpu_device *adev,
>    uint64_t config,
> +  int counter_idx,
>    uint32_t *lo_base_addr,
>    uint32_t *hi_base_addr)
>  {
> -df_v3_6_pmc_get_addr(adev, config, 0, lo_base_addr,
> hi_base_addr);
> +df_v3_6_pmc_get_addr(adev, config, counter_idx, 0, lo_base_addr,
> +
> hi_base_addr);
>  }
>
>  /* get control counter settings i.e. address and values to set */  static int
> df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev,
>    uint64_t config,
> +  int counter_idx,
>    uint32_t *lo_base_addr,
>    uint32_t *hi_base_addr,
>    uint32_t *lo_val,
> @@ -462,7 +460,8 @@ static int df_v3_6_pmc_get_ctrl_settings(struct
> amdgpu_device *adev,
>  uint32_t eventsel, instance, unitmask;
>  uint32_t instance_10, instance_5432, instance_76;
>
> -df_v3_6_pmc_get_addr(adev, config, 1, lo_base_addr,
> hi_base_addr);
> +df_v3_6_pmc_get_addr(adev, config, counter_idx, 1, lo_base_addr,
> +hi_base_addr);
>
>  if ((*lo_base_addr == 0) || (*hi_base_addr == 0)) {
>  DRM_ERROR("[DF PMC] addressing not retrieved! Lo: %x, Hi:
> %x", @@ -492,18 +491,13 @@ static int
> df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev,  static int
> df_v3_6_pmc_add_cntr(struct amdgpu_device *adev,
>     uint64_t config)
>  {
> -int i, target_cntr;
> -
> -target_cntr = df_v3_6_pmc_config_2_cntr(adev, config);
> -
> -if (target_cntr >= 0)
> -return 0;
> +int i;
>
>  for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) {
>  if (adev->df_perfmon_config_assign_mask[i] == 0U) {
>  adev->df_perfmon_config_assign_mask[i] =
>  config &
> 0x0FFFFFFUL;
> -return 0;
> +return i;
>  }
>  }
>
> @@ -512,59 +506,50 @@ static int df_v3_6_pmc_add_cntr(struct
> amdgpu_device *adev,
>
>  #define DEFERRED_ARM_MASK(1 << 31)
>  static int df_v3_6_pmc_set_deferred(struct amdgpu_device *adev,
> -    uint64_t config, bool is_deferred)
> +    int counter_idx, uint64_t config,
> +    bool is_deferred)
>  {
> -int target_cntr;
> -
> -target_cntr = df_v3_6_pmc_config_2_cntr(adev, config);
>
> -if (target_cntr < 0)
> +if (!df_v3_6_pmc_has_counter(adev, config, counter_idx))
>  return -EINVAL;
>
>  if (is_deferred)
> -adev->df_perfmon_config_assign_mask[target_cntr] |=
> +adev->df_perfmon_config_assign_mask[counter_idx] |=
>
> DEFERRED_ARM_MASK;
>  else
> -adev->df_perfmon_config_assign_mask[target_cntr] &=
> +adev->df_perfmon_config_assign_mask[counter_idx] &=
>
> ~DEFERRED_ARM_MASK;
>
>  return 0;
>  }
>
>  static bool df_v3_6_pmc_is_deferred(struct amdgpu_device *adev,
> +    int counter_idx,
>      uint64_t config)
>  {
> -int target_cntr;
> -
> -target_cntr = df_v3_6_pmc_config_2_cntr(adev, config);
> -
> -/*
> - * we never get target_cntr < 0 since this funciton is only called in
> - * pmc_count for now but we should check anyways.
> - */
> -return (target_cntr >= 0 &&
> -(adev->df_perfmon_config_assign_mask[target_cntr]
> -& DEFERRED_ARM_MASK));
> +return(df_v3_6_pmc_has_counter(adev, config, counter_idx) &&
> +(adev-
> >df_perfmon_config_assign_mask[counter_idx]
> +& DEFERRED_ARM_MASK));
>
>  }
>
>  /* release performance counter */
>  static void df_v3_6_pmc_release_cntr(struct amdgpu_device *adev,
> -     uint64_t config)
> +     uint64_t config,
> +     int counter_idx)
>  {
> -int target_cntr = df_v3_6_pmc_config_2_cntr(adev, config);
> -
> -if (target_cntr >= 0)
> -adev->df_perfmon_config_assign_mask[target_cntr] = 0ULL;
> +if (df_v3_6_pmc_has_counter(adev, config, counter_idx))
> +adev->df_perfmon_config_assign_mask[counter_idx] = 0ULL;
>  }
>
>
>  static void df_v3_6_reset_perfmon_cntr(struct amdgpu_device *adev,
> - uint64_t config)
> + uint64_t config,
> + int counter_idx)
>  {
>  uint32_t lo_base_addr = 0, hi_base_addr = 0;
>
> -df_v3_6_pmc_get_read_settings(adev, config, &lo_base_addr,
> +df_v3_6_pmc_get_read_settings(adev, config, counter_idx,
> +&lo_base_addr,
>        &hi_base_addr);
>
>  if ((lo_base_addr == 0) || (hi_base_addr == 0)) @@ -573,8 +558,9
> @@ static void df_v3_6_reset_perfmon_cntr(struct amdgpu_device *adev,
>  df_v3_6_perfmon_wreg(adev, lo_base_addr, 0, hi_base_addr, 0);  }
>
> +/* return available counter if is_add == 1 otherwise return error
> +status. */
>  static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config,
> -     int is_add)
> +     int counter_idx, int is_add)
>  {
>  uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
>  int err = 0, ret = 0;
> @@ -584,10 +570,9 @@ static int df_v3_6_pmc_start(struct amdgpu_device
> *adev, uint64_t config,
>  if (is_add)
>  return df_v3_6_pmc_add_cntr(adev, config);
>
> -df_v3_6_reset_perfmon_cntr(adev, config);
> -
>  ret = df_v3_6_pmc_get_ctrl_settings(adev,
>  config,
> +counter_idx,
>  &lo_base_addr,
>  &hi_base_addr,
>  &lo_val,
> @@ -604,7 +589,8 @@ static int df_v3_6_pmc_start(struct amdgpu_device
> *adev, uint64_t config,
>       hi_val);
>
>  if (err)
> -ret = df_v3_6_pmc_set_deferred(adev, config, true);
> +ret = df_v3_6_pmc_set_deferred(adev, config,
> +counter_idx, true);
>
>  break;
>  default:
> @@ -615,7 +601,7 @@ static int df_v3_6_pmc_start(struct amdgpu_device
> *adev, uint64_t config,  }
>
>  static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config,
> -    int is_remove)
> +    int counter_idx, int is_remove)
>  {
>  uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
>  int ret = 0;
> @@ -624,6 +610,7 @@ static int df_v3_6_pmc_stop(struct amdgpu_device
> *adev, uint64_t config,
>  case CHIP_VEGA20:
>  ret = df_v3_6_pmc_get_ctrl_settings(adev,
>  config,
> +counter_idx,
>  &lo_base_addr,
>  &hi_base_addr,
>  &lo_val,
> @@ -635,8 +622,8 @@ static int df_v3_6_pmc_stop(struct amdgpu_device
> *adev, uint64_t config,
>
>
>  if (is_remove) {
> -df_v3_6_reset_perfmon_cntr(adev, config);
> -df_v3_6_pmc_release_cntr(adev, config);
> +df_v3_6_reset_perfmon_cntr(adev, config,
> counter_idx);
> +df_v3_6_pmc_release_cntr(adev, config,
> counter_idx);
>  }
>
>  break;
> @@ -649,6 +636,7 @@ static int df_v3_6_pmc_stop(struct amdgpu_device
> *adev, uint64_t config,
>
>  static void df_v3_6_pmc_get_count(struct amdgpu_device *adev,
>    uint64_t config,
> +  int counter_idx,
>    uint64_t *count)
>  {
>  uint32_t lo_base_addr = 0, hi_base_addr = 0, lo_val = 0, hi_val = 0;
> @@ -656,14 +644,14 @@ static void df_v3_6_pmc_get_count(struct
> amdgpu_device *adev,
>
>  switch (adev->asic_type) {
>  case CHIP_VEGA20:
> -df_v3_6_pmc_get_read_settings(adev, config,
> &lo_base_addr,
> -      &hi_base_addr);
> +df_v3_6_pmc_get_read_settings(adev, config, counter_idx,
> +&lo_base_addr,
> &hi_base_addr);
>
>  if ((lo_base_addr == 0) || (hi_base_addr == 0))
>  return;
>
>  /* rearm the counter or throw away count value on failure */
> -if (df_v3_6_pmc_is_deferred(adev, config)) {
> +if (df_v3_6_pmc_is_deferred(adev, config, counter_idx)) {
>  int rearm_err =
> df_v3_6_perfmon_arm_with_status(adev,
>  lo_base_addr, lo_val,
>  hi_base_addr,
> hi_val);
> @@ -671,7 +659,8 @@ static void df_v3_6_pmc_get_count(struct
> amdgpu_device *adev,
>  if (rearm_err)
>  return;
>
> -df_v3_6_pmc_set_deferred(adev, config, false);
> +df_v3_6_pmc_set_deferred(adev, config,
> counter_idx,
> +
> false);
>  }
>
>  df_v3_6_perfmon_rreg(adev, lo_base_addr, &lo_val,
> --
> 2.17.1



More information about the amd-gfx mailing list