[PATCH 2/2] drm/amdgpu: Add sysfs file for PCIe usage
Russell, Kent
Kent.Russell at amd.com
Mon Jan 7 11:53:20 UTC 2019
Did you mean on APU+dGPU, or just straight APU? The file currently returns 0s on my Carrizo. The event values are the same on CZ as they are for the dGPU ASICs, so it might just be that it's not supported since it's not going through the physical PCIe bus on the board. I have tested it on Fiji/Vega10/Vega20 successfully, and they all give reasonable values, at least. But I don't have access to Raven to see if it just returns 0s like CZ does. Would you prefer that the sysfs file only be added for dGPUs, to save confusion and wasted sysfs files that return no useful information?
Kent
-----Original Message-----
From: Alex Deucher <alexdeucher at gmail.com>
Sent: Friday, January 04, 2019 11:22 AM
To: Russell, Kent <Kent.Russell at amd.com>
Cc: amd-gfx at lists.freedesktop.org
Subject: Re: [PATCH 2/2] drm/amdgpu: Add sysfs file for PCIe usage
On Fri, Jan 4, 2019 at 7:35 AM Russell, Kent <Kent.Russell at amd.com> wrote:
>
> Add a sysfs file that reports the number of bytes transmitted and
> received in the last second. This can be used to approximate the PCIe
> bandwidth usage over the last second.
>
> Change-Id: I6c82fe1cb17726825736512990fa64f5c1489861
> Signed-off-by: Kent Russell <kent.russell at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 4 ++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 31 ++++++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/cik.c | 41 +++++++++++++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/si.c | 41 +++++++++++++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/soc15.c | 44 ++++++++++++++++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/vi.c | 41 +++++++++++++++++++++++++++++++
> 6 files changed, 202 insertions(+)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index e1b2c64..512b124 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -542,6 +542,9 @@ struct amdgpu_asic_funcs {
> bool (*need_full_reset)(struct amdgpu_device *adev);
> /* initialize doorbell layout for specific asic*/
> void (*init_doorbell_index)(struct amdgpu_device *adev);
> + /* PCIe bandwidth usage */
> + void (*get_pcie_usage)(struct amdgpu_device *adev, uint64_t *count0,
> + uint64_t *count1);
> };
>
> /*
> @@ -1045,6 +1048,7 @@ int emu_soc_asic_init(struct amdgpu_device
> *adev); #define amdgpu_asic_invalidate_hdp(adev, r)
> (adev)->asic_funcs->invalidate_hdp((adev), (r)) #define
> amdgpu_asic_need_full_reset(adev)
> (adev)->asic_funcs->need_full_reset((adev))
> #define amdgpu_asic_init_doorbell_index(adev)
> (adev)->asic_funcs->init_doorbell_index((adev))
> +#define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1)
> +((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1)))
>
> /* Common functions */
> bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> index 1f61ed9..051307e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
> @@ -990,6 +990,30 @@ static ssize_t amdgpu_get_busy_percent(struct device *dev,
> return snprintf(buf, PAGE_SIZE, "%d\n", value); }
>
> +/**
> + * DOC: pcie_bw
> + *
> + * The amdgpu driver provides a sysfs API for reading how much data
> + * has been sent and received by the GPU in the last second through PCIe.
> + * The file pcie_bw is used for this.
> + * The Perf counters calculate this and return the number of sent and
> +received
> + * messages, which we multiply by the size of our PCIe packets (mps).
> + */
> +static ssize_t amdgpu_get_pcie_bw(struct device *dev,
> + struct device_attribute *attr,
> + char *buf)
> +{
> + struct drm_device *ddev = dev_get_drvdata(dev);
> + struct amdgpu_device *adev = ddev->dev_private;
> + uint64_t mps = pcie_get_mps(adev->pdev);
> + uint64_t count0, count1;
> +
> + amdgpu_asic_get_pcie_usage(adev, &count0, &count1);
> + return snprintf(buf, PAGE_SIZE,
> + "Bytes received: %llu\nBytes sent: %llu\n",
> + count0 * mps, count1 * mps); }
> +
> static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR,
> amdgpu_get_dpm_state, amdgpu_set_dpm_state); static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR,
> amdgpu_get_dpm_forced_performance_level,
> @@ -1025,6 +1049,7 @@ static DEVICE_ATTR(pp_od_clk_voltage, S_IRUGO | S_IWUSR,
> amdgpu_set_pp_od_clk_voltage); static
> DEVICE_ATTR(gpu_busy_percent, S_IRUGO,
> amdgpu_get_busy_percent, NULL);
> +static DEVICE_ATTR(pcie_bw, S_IRUGO, amdgpu_get_pcie_bw, NULL);
>
> static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
> struct device_attribute *attr,
> @@ -2105,6 +2130,11 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
> "gpu_busy_level\n");
> return ret;
> }
> + ret = device_create_file(adev->dev, &dev_attr_pcie_bw);
> + if (ret) {
> + DRM_ERROR("failed to create device file pcie_bw\n");
> + return ret;
> + }
Should this file be added for APUs too or just dGPUs?
> ret = amdgpu_debugfs_pm_init(adev);
> if (ret) {
> DRM_ERROR("Failed to register debugfs file for
> dpm!\n"); @@ -2141,6 +2171,7 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
> device_remove_file(adev->dev,
> &dev_attr_pp_od_clk_voltage);
> device_remove_file(adev->dev, &dev_attr_gpu_busy_percent);
> + device_remove_file(adev->dev, &dev_attr_pcie_bw);
> }
>
> void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) diff --git
> a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
> index 71c50d8..8681744 100644
> --- a/drivers/gpu/drm/amd/amdgpu/cik.c
> +++ b/drivers/gpu/drm/amd/amdgpu/cik.c
> @@ -1741,6 +1741,46 @@ static bool cik_need_full_reset(struct amdgpu_device *adev)
> return true;
> }
>
> +static void cik_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
> + uint64_t *count1) {
> + uint32_t perfctr = 0;
> + uint64_t cnt0_of, cnt1_of;
> + int tmp;
> +
> + /* Set the 2 events that we wish to watch, defined above */
> + /* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */
> + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40);
> + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK,
> + EVENT1_SEL, 104);
> +
> + /* Write to enable desired perf counters */
> + WREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK, perfctr);
> + /* Zero out and enable the perf counters
> + * Write 0x5:
> + * Bit 0 = Start all counters(1)
> + * Bit 2 = Global counter reset enable(1)
> + */
> + WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000005);
> +
> + msleep(1000);
> +
> + /* Load the shadow and disable the perf counters
> + * Write 0x2:
> + * Bit 0 = Stop counters(0)
> + * Bit 1 = Load the shadow counters(1)
> + */
> + WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000002);
> +
> + /* Read register values to get any >32bit overflow */
> + tmp = RREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK);
> + cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER);
> + cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK,
> + COUNTER1_UPPER);
> +
> + /* Get the values and add the overflow */
> + *count0 = RREG32_PCIE(ixPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32);
> + *count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of <<
> +32); }
Does this work properly on APUs as well or just dGPUs? Same comment for VI and SOC15.
Thanks,
Alex
> +
> static const struct amdgpu_asic_funcs cik_asic_funcs = {
> .read_disabled_bios = &cik_read_disabled_bios, @@ -1756,6
> +1796,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
> .invalidate_hdp = &cik_invalidate_hdp,
> .need_full_reset = &cik_need_full_reset,
> .init_doorbell_index = &legacy_doorbell_index_init,
> + .get_pcie_usage = &cik_get_pcie_usage,
> };
>
> static int cik_common_early_init(void *handle) diff --git
> a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
> index f8408f8..2b3eb0e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/si.c
> +++ b/drivers/gpu/drm/amd/amdgpu/si.c
> @@ -1323,6 +1323,46 @@ static void si_set_pcie_lanes(struct amdgpu_device *adev, int lanes)
> WREG32_PCIE_PORT(PCIE_LC_LINK_WIDTH_CNTL, link_width_cntl); }
>
> +static void si_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
> + uint64_t *count1) {
> + uint32_t perfctr = 0;
> + uint64_t cnt0_of, cnt1_of;
> + int tmp;
> +
> + /* Set the 2 events that we wish to watch, defined above */
> + /* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */
> + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40);
> + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK,
> + EVENT1_SEL, 104);
> +
> + /* Write to enable desired perf counters */
> + WREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK, perfctr);
> + /* Zero out and enable the perf counters
> + * Write 0x5:
> + * Bit 0 = Start all counters(1)
> + * Bit 2 = Global counter reset enable(1)
> + */
> + WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000005);
> +
> + msleep(1000);
> +
> + /* Load the shadow and disable the perf counters
> + * Write 0x2:
> + * Bit 0 = Stop counters(0)
> + * Bit 1 = Load the shadow counters(1)
> + */
> + WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000002);
> +
> + /* Read register values to get any >32bit overflow */
> + tmp = RREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK);
> + cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER);
> + cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK,
> + COUNTER1_UPPER);
> +
> + /* Get the values and add the overflow */
> + *count0 = RREG32_PCIE(ixPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32);
> + *count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of <<
> +32); }
> +
> static const struct amdgpu_asic_funcs si_asic_funcs = {
> .read_disabled_bios = &si_read_disabled_bios, @@ -1339,6
> +1379,7 @@ static const struct amdgpu_asic_funcs si_asic_funcs =
> .flush_hdp = &si_flush_hdp,
> .invalidate_hdp = &si_invalidate_hdp,
> .need_full_reset = &si_need_full_reset,
> + .get_pcie_usage = &si_get_pcie_usage,
> };
>
> static uint32_t si_get_rev_id(struct amdgpu_device *adev) diff --git
> a/drivers/gpu/drm/amd/amdgpu/soc15.c
> b/drivers/gpu/drm/amd/amdgpu/soc15.c
> index 8849b74..afd8391 100644
> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
> @@ -43,6 +43,9 @@
> #include "hdp/hdp_4_0_sh_mask.h"
> #include "smuio/smuio_9_0_offset.h"
> #include "smuio/smuio_9_0_sh_mask.h"
> +#include "nbio/nbio_7_0_default.h"
> +#include "nbio/nbio_7_0_sh_mask.h"
> +#include "nbio/nbio_7_0_smn.h"
>
> #include "soc15.h"
> #include "soc15_common.h"
> @@ -601,6 +604,45 @@ static bool soc15_need_full_reset(struct amdgpu_device *adev)
> /* change this when we implement soft reset */
> return true;
> }
> +static void soc15_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
> + uint64_t *count1) {
> + uint32_t perfctr = 0;
> + uint64_t cnt0_of, cnt1_of;
> + int tmp;
> +
> + /* Set the 2 events that we wish to watch, defined above */
> + /* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */
> + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40);
> + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK,
> + EVENT1_SEL, 104);
> +
> + /* Write to enable desired perf counters */
> + WREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK, perfctr);
> + /* Zero out and enable the perf counters
> + * Write 0x5:
> + * Bit 0 = Start all counters(1)
> + * Bit 2 = Global counter reset enable(1)
> + */
> + WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000005);
> +
> + msleep(1000);
> +
> + /* Load the shadow and disable the perf counters
> + * Write 0x2:
> + * Bit 0 = Stop counters(0)
> + * Bit 1 = Load the shadow counters(1)
> + */
> + WREG32_PCIE(smnPCIE_PERF_COUNT_CNTL, 0x00000002);
> +
> + /* Read register values to get any >32bit overflow */
> + tmp = RREG32_PCIE(smnPCIE_PERF_CNTL_TXCLK);
> + cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER);
> + cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK,
> + COUNTER1_UPPER);
> +
> + /* Get the values and add the overflow */
> + *count0 = RREG32_PCIE(smnPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32);
> + *count1 = RREG32_PCIE(smnPCIE_PERF_COUNT1_TXCLK) | (cnt1_of <<
> +32); }
>
> static const struct amdgpu_asic_funcs soc15_asic_funcs = { @@ -617,6
> +659,7 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs =
> .invalidate_hdp = &soc15_invalidate_hdp,
> .need_full_reset = &soc15_need_full_reset,
> .init_doorbell_index = &vega10_doorbell_index_init,
> + .get_pcie_usage = &soc15_get_pcie_usage,
> };
>
> static const struct amdgpu_asic_funcs vega20_asic_funcs = @@ -634,6
> +677,7 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs =
> .invalidate_hdp = &soc15_invalidate_hdp,
> .need_full_reset = &soc15_need_full_reset,
> .init_doorbell_index = &vega20_doorbell_index_init,
> + .get_pcie_usage = &soc15_get_pcie_usage,
> };
>
> static int soc15_common_early_init(void *handle) diff --git
> a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
> index 03e7be5..f785565 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vi.c
> @@ -941,6 +941,46 @@ static bool vi_need_full_reset(struct amdgpu_device *adev)
> }
> }
>
> +static void vi_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
> + uint64_t *count1) {
> + uint32_t perfctr = 0;
> + uint64_t cnt0_of, cnt1_of;
> + int tmp;
> +
> + /* Set the 2 events that we wish to watch, defined above */
> + /* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */
> + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40);
> + perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK,
> + EVENT1_SEL, 104);
> +
> + /* Write to enable desired perf counters */
> + WREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK, perfctr);
> + /* Zero out and enable the perf counters
> + * Write 0x5:
> + * Bit 0 = Start all counters(1)
> + * Bit 2 = Global counter reset enable(1)
> + */
> + WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000005);
> +
> + msleep(1000);
> +
> + /* Load the shadow and disable the perf counters
> + * Write 0x2:
> + * Bit 0 = Stop counters(0)
> + * Bit 1 = Load the shadow counters(1)
> + */
> + WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000002);
> +
> + /* Read register values to get any >32bit overflow */
> + tmp = RREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK);
> + cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER);
> + cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK,
> + COUNTER1_UPPER);
> +
> + /* Get the values and add the overflow */
> + *count0 = RREG32_PCIE(ixPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32);
> + *count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of <<
> +32); }
> +
> static const struct amdgpu_asic_funcs vi_asic_funcs = {
> .read_disabled_bios = &vi_read_disabled_bios, @@ -956,6 +996,7
> @@ static const struct amdgpu_asic_funcs vi_asic_funcs =
> .invalidate_hdp = &vi_invalidate_hdp,
> .need_full_reset = &vi_need_full_reset,
> .init_doorbell_index = &legacy_doorbell_index_init,
> + .get_pcie_usage = &vi_get_pcie_usage,
> };
>
> #define CZ_REV_BRISTOL(rev) \
> --
> 2.7.4
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
More information about the amd-gfx
mailing list