[PATCH 1/2] drm/amdgpu: add perfmon and fica atomics for df

Kim, Jonathan Jonathan.Kim at amd.com
Fri Jul 12 18:52:46 UTC 2019


PCIE macros breaks data fabric state machine.
Subsequent reads after write calls to address/data offsets in macros causes unexpected behavior.

Thanks,

Jon


-----Original Message-----
From: Alex Deucher <alexdeucher at gmail.com> 
Sent: Friday, July 12, 2019 2:45 PM
To: Kim, Jonathan <Jonathan.Kim at amd.com>
Cc: amd-gfx at lists.freedesktop.org
Subject: Re: [PATCH 1/2] drm/amdgpu: add perfmon and fica atomics for df

[CAUTION: External Email]

On Fri, Jul 12, 2019 at 2:39 PM Kim, Jonathan <Jonathan.Kim at amd.com> wrote:
>
> adding perfmon and fica atomic operations to adhere to data fabrics 
> finite state machine requirements for indirect register access.
>
> Change-Id: I2ab17fd59d566b4251c9a9d0e67b897b8c221249
> Signed-off-by: Jonathan Kim <Jonathan.Kim at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h  |   3 +
>  drivers/gpu/drm/amd/amdgpu/df_v3_6.c | 202 
> +++++++++++++++++----------
>  2 files changed, 128 insertions(+), 77 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index e6469e23b76f..198aa60c43bd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -707,6 +707,9 @@ struct amdgpu_df_funcs {
>                                          int is_disable);
>         void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config,
>                                          uint64_t *count);
> +       uint64_t (*get_fica)(struct amdgpu_device *adev, uint32_t ficaa_val);
> +       void (*set_fica)(struct amdgpu_device *adev, uint32_t ficaa_val,
> +                        uint32_t ficadl_val, uint32_t ficadh_val);
>  };
>  /* Define the HW IP blocks will be used in driver , add more if 
> necessary */  enum amd_hw_ip_block_type { diff --git 
> a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c 
> b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
> index ef6e91f9f51c..dcc7be0c9d30 100644
> --- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
> +++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
> @@ -93,6 +93,96 @@ const struct attribute_group *df_v3_6_attr_groups[] = {
>                 NULL
>  };
>
> +static uint64_t df_v3_6_get_fica(struct amdgpu_device *adev,
> +                                uint32_t ficaa_val) {
> +       unsigned long flags, address, data;
> +       uint32_t ficadl_val, ficadh_val;
> +
> +       address = adev->nbio_funcs->get_pcie_index_offset(adev);
> +       data = adev->nbio_funcs->get_pcie_data_offset(adev);
> +
> +       spin_lock_irqsave(&adev->pcie_idx_lock, flags);
> +       WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAdress5);
> +       WREG32(data, ficaa_val);
> +
> +       WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataLo5);
> +       ficadl_val = RREG32(data);
> +
> +       WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataHi5);
> +       ficadh_val = RREG32(data);
> +
> +       spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);

Any reason to open code the transactions rather than using the RREG32_PCIE/WREG32_PCIE macros?

Alex

> +
> +       return (((ficadh_val & 0xFFFFFFFFFFFFFFFF) << 32) | 
> +ficadl_val); }
> +
> +static void df_v3_6_set_fica(struct amdgpu_device *adev, uint32_t ficaa_val,
> +                            uint32_t ficadl_val, uint32_t ficadh_val) 
> +{
> +       unsigned long flags, address, data;
> +
> +       address = adev->nbio_funcs->get_pcie_index_offset(adev);
> +       data = adev->nbio_funcs->get_pcie_data_offset(adev);
> +
> +       spin_lock_irqsave(&adev->pcie_idx_lock, flags);
> +       WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAdress5);
> +       WREG32(data, ficaa_val);
> +
> +       WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataLo5);
> +       WREG32(data, ficadl_val);
> +
> +       WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataHi5);
> +       WREG32(data, ficadh_val);
> +
> +       spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); }
> +
> +/*
> + * df_v3_6_perfmon_rreg - read perfmon lo and hi
> + *
> + * required to be atomic.  no mmio method provided so subsequent 
> +reads for lo
> + * and hi require to preserve df finite state machine  */ static void 
> +df_v3_6_perfmon_rreg(struct amdgpu_device *adev,
> +                           uint32_t lo_addr, uint32_t *lo_val,
> +                           uint32_t hi_addr, uint32_t *hi_val) {
> +       unsigned long flags, address, data;
> +
> +       address = adev->nbio_funcs->get_pcie_index_offset(adev);
> +       data = adev->nbio_funcs->get_pcie_data_offset(adev);
> +
> +       spin_lock_irqsave(&adev->pcie_idx_lock, flags);
> +       WREG32(address, lo_addr);
> +       *lo_val = RREG32(data);
> +       WREG32(address, hi_addr);
> +       *hi_val = RREG32(data);
> +       spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); }
> +
> +/*
> + * df_v3_6_perfmon_wreg - write to perfmon lo and hi
> + *
> + * required to be atomic.  no mmio method provided so subsequent 
> +reads after
> + * data writes cannot occur to preserve data fabrics finite state machine.
> + */
> +static void df_v3_6_perfmon_wreg(struct amdgpu_device *adev, uint32_t lo_addr,
> +                           uint32_t lo_val, uint32_t hi_addr, 
> +uint32_t hi_val) {
> +       unsigned long flags, address, data;
> +
> +       address = adev->nbio_funcs->get_pcie_index_offset(adev);
> +       data = adev->nbio_funcs->get_pcie_data_offset(adev);
> +
> +       spin_lock_irqsave(&adev->pcie_idx_lock, flags);
> +       WREG32(address, lo_addr);
> +       WREG32(data, lo_val);
> +       WREG32(address, hi_addr);
> +       WREG32(data, hi_val);
> +       spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); }
> +
>  /* get the number of df counters available */  static ssize_t 
> df_v3_6_get_df_cntr_avail(struct device *dev,
>                 struct device_attribute *attr, @@ -268,6 +358,10 @@ 
> static int df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev,
>                                           uint32_t *lo_val,
>                                           uint32_t *hi_val)  {
> +
> +       uint32_t eventsel, instance, unitmask;
> +       uint32_t instance_10, instance_5432, instance_76;
> +
>         df_v3_6_pmc_get_addr(adev, config, 1, lo_base_addr, 
> hi_base_addr);
>
>         if ((*lo_base_addr == 0) || (*hi_base_addr == 0)) { @@ -276,40 
> +370,33 @@ static int df_v3_6_pmc_get_ctrl_settings(struct amdgpu_device *adev,
>                 return -ENXIO;
>         }
>
> -       if (lo_val && hi_val) {
> -               uint32_t eventsel, instance, unitmask;
> -               uint32_t instance_10, instance_5432, instance_76;
> +       eventsel = DF_V3_6_GET_EVENT(config) & 0x3f;
> +       unitmask = DF_V3_6_GET_UNITMASK(config) & 0xf;
> +       instance = DF_V3_6_GET_INSTANCE(config);
>
> -               eventsel = DF_V3_6_GET_EVENT(config) & 0x3f;
> -               unitmask = DF_V3_6_GET_UNITMASK(config) & 0xf;
> -               instance = DF_V3_6_GET_INSTANCE(config);
> +       instance_10 = instance & 0x3;
> +       instance_5432 = (instance >> 2) & 0xf;
> +       instance_76 = (instance >> 6) & 0x3;
>
> -               instance_10 = instance & 0x3;
> -               instance_5432 = (instance >> 2) & 0xf;
> -               instance_76 = (instance >> 6) & 0x3;
> +       *lo_val = (unitmask << 8) | (instance_10 << 6) | eventsel | (1 << 22);
> +       *hi_val = (instance_76 << 29) | instance_5432;
>
> -               *lo_val = (unitmask << 8) | (instance_10 << 6) | eventsel;
> -               *hi_val = (instance_76 << 29) | instance_5432;
> -       }
> +       DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x",
> +               config, lo_base_addr, hi_base_addr, lo_val, hi_val);
>
>         return 0;
>  }
>
> -/* assign df performance counters for read */ -static int 
> df_v3_6_pmc_assign_cntr(struct amdgpu_device *adev,
> -                                  uint64_t config,
> -                                  int *is_assigned)
> +/* add df performance counters for read */ static int 
> +df_v3_6_pmc_add_cntr(struct amdgpu_device *adev,
> +                                  uint64_t config)
>  {
>         int i, target_cntr;
>
> -       *is_assigned = 0;
> -
>         target_cntr = df_v3_6_pmc_config_2_cntr(adev, config);
>
> -       if (target_cntr >= 0) {
> -               *is_assigned = 1;
> +       if (target_cntr >= 0)
>                 return 0;
> -       }
>
>         for (i = 0; i < DF_V3_6_MAX_COUNTERS; i++) {
>                 if (adev->df_perfmon_config_assign_mask[i] == 0U) { @@ 
> -344,45 +431,13 @@ static void df_v3_6_reset_perfmon_cntr(struct amdgpu_device *adev,
>         if ((lo_base_addr == 0) || (hi_base_addr == 0))
>                 return;
>
> -       WREG32_PCIE(lo_base_addr, 0UL);
> -       WREG32_PCIE(hi_base_addr, 0UL);
> -}
> -
> -
> -static int df_v3_6_add_perfmon_cntr(struct amdgpu_device *adev,
> -                                     uint64_t config)
> -{
> -       uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
> -       int ret, is_assigned;
> -
> -       ret = df_v3_6_pmc_assign_cntr(adev, config, &is_assigned);
> -
> -       if (ret || is_assigned)
> -               return ret;
> -
> -       ret = df_v3_6_pmc_get_ctrl_settings(adev,
> -                       config,
> -                       &lo_base_addr,
> -                       &hi_base_addr,
> -                       &lo_val,
> -                       &hi_val);
> -
> -       if (ret)
> -               return ret;
> -
> -       DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x",
> -                       config, lo_base_addr, hi_base_addr, lo_val, hi_val);
> -
> -       WREG32_PCIE(lo_base_addr, lo_val);
> -       WREG32_PCIE(hi_base_addr, hi_val);
> -
> -       return ret;
> +       df_v3_6_perfmon_wreg(adev, lo_base_addr, 0, hi_base_addr, 0);
>  }
>
>  static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config,
>                              int is_enable)  {
> -       uint32_t lo_base_addr, hi_base_addr, lo_val;
> +       uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
>         int ret = 0;
>
>         switch (adev->asic_type) {
> @@ -391,24 +446,20 @@ static int df_v3_6_pmc_start(struct amdgpu_device *adev, uint64_t config,
>                 df_v3_6_reset_perfmon_cntr(adev, config);
>
>                 if (is_enable) {
> -                       ret = df_v3_6_add_perfmon_cntr(adev, config);
> +                       ret = df_v3_6_pmc_add_cntr(adev, config);
>                 } else {
>                         ret = df_v3_6_pmc_get_ctrl_settings(adev,
>                                         config,
>                                         &lo_base_addr,
>                                         &hi_base_addr,
> -                                       NULL,
> -                                       NULL);
> +                                       &lo_val,
> +                                       &hi_val);
>
>                         if (ret)
>                                 return ret;
>
> -                       lo_val = RREG32_PCIE(lo_base_addr);
> -
> -                       DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x",
> -                               config, lo_base_addr, hi_base_addr, lo_val);
> -
> -                       WREG32_PCIE(lo_base_addr, lo_val | (1ULL << 22));
> +                       df_v3_6_perfmon_wreg(adev, lo_base_addr, lo_val,
> +                                       hi_base_addr, hi_val);
>                 }
>
>                 break;
> @@ -422,7 +473,7 @@ static int df_v3_6_pmc_start(struct amdgpu_device 
> *adev, uint64_t config,  static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config,
>                             int is_disable)  {
> -       uint32_t lo_base_addr, hi_base_addr, lo_val;
> +       uint32_t lo_base_addr, hi_base_addr, lo_val, hi_val;
>         int ret = 0;
>
>         switch (adev->asic_type) {
> @@ -431,18 +482,13 @@ static int df_v3_6_pmc_stop(struct amdgpu_device *adev, uint64_t config,
>                         config,
>                         &lo_base_addr,
>                         &hi_base_addr,
> -                       NULL,
> -                       NULL);
> +                       &lo_val,
> +                       &hi_val);
>
>                 if (ret)
>                         return ret;
>
> -               lo_val = RREG32_PCIE(lo_base_addr);
> -
> -               DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x",
> -                               config, lo_base_addr, hi_base_addr, lo_val);
> -
> -               WREG32_PCIE(lo_base_addr, lo_val & ~(1ULL << 22));
> +               df_v3_6_perfmon_wreg(adev, lo_base_addr, 0, 
> + hi_base_addr, 0);
>
>                 if (is_disable)
>                         df_v3_6_pmc_release_cntr(adev, config); @@ 
> -471,8 +517,8 @@ static void df_v3_6_pmc_get_count(struct amdgpu_device *adev,
>                 if ((lo_base_addr == 0) || (hi_base_addr == 0))
>                         return;
>
> -               lo_val = RREG32_PCIE(lo_base_addr);
> -               hi_val = RREG32_PCIE(hi_base_addr);
> +               df_v3_6_perfmon_rreg(adev, lo_base_addr, &lo_val,
> +                               hi_base_addr, &hi_val);
>
>                 *count  = ((hi_val | 0ULL) << 32) | (lo_val | 0ULL);
>
> @@ -480,7 +526,7 @@ static void df_v3_6_pmc_get_count(struct amdgpu_device *adev,
>                         *count = 0;
>
>                 DRM_DEBUG_DRIVER("config=%llx addr=%08x:%08x val=%08x:%08x",
> -                       config, lo_base_addr, hi_base_addr, lo_val, hi_val);
> +                        config, lo_base_addr, hi_base_addr, lo_val, 
> + hi_val);
>
>                 break;
>
> @@ -499,5 +545,7 @@ const struct amdgpu_df_funcs df_v3_6_funcs = {
>         .get_clockgating_state = df_v3_6_get_clockgating_state,
>         .pmc_start = df_v3_6_pmc_start,
>         .pmc_stop = df_v3_6_pmc_stop,
> -       .pmc_get_count = df_v3_6_pmc_get_count
> +       .pmc_get_count = df_v3_6_pmc_get_count,
> +       .get_fica = df_v3_6_get_fica,
> +       .set_fica = df_v3_6_set_fica
>  };
> --
> 2.17.1
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


More information about the amd-gfx mailing list