[PATCH v2 05/11] drm/msm: adreno: add plumbing to generate bandwidth vote table for GMU
Neil Armstrong
neil.armstrong at linaro.org
Wed Nov 20 12:37:16 UTC 2024
On 20/11/2024 12:42, Dmitry Baryshkov wrote:
> On Tue, Nov 19, 2024 at 06:56:40PM +0100, Neil Armstrong wrote:
>> The Adreno GMU Management Unit (GMU) can also scale DDR Bandwidth along
>> the Frequency and Power Domain level, but by default we leave the
>> OPP core scale the interconnect ddr path.
>>
>> In order to calculate vote values used by the GPU Management
>> Unit (GMU), we need to parse all the possible OPP Bandwidths and
>> create a vote value to be sent to the appropriate Bus Control
>> Modules (BCMs) declared in the GPU info struct.
>>
>> The vote array will then be used to dynamically generate the GMU
>> bw_table sent during the GMU power-up.
>>
>> Signed-off-by: Neil Armstrong <neil.armstrong at linaro.org>
>
> LGTM, two minor nits below.
>
>> ---
>> drivers/gpu/drm/msm/adreno/a6xx_gmu.c | 153 ++++++++++++++++++++++++++++++++++
>> drivers/gpu/drm/msm/adreno/a6xx_gmu.h | 14 ++++
>> drivers/gpu/drm/msm/adreno/a6xx_gpu.h | 1 +
>> 3 files changed, 168 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
>> index 14db7376c712d19446b38152e480bd5a1e0a5198..f6814d92a4edb29ba8a34a34aabb8b2324e9c6a4 100644
>> --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
>> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
>> @@ -9,6 +9,7 @@
>> #include <linux/pm_domain.h>
>> #include <linux/pm_opp.h>
>> #include <soc/qcom/cmd-db.h>
>> +#include <soc/qcom/tcs.h>
>> #include <drm/drm_gem.h>
>>
>> #include "a6xx_gpu.h"
>> @@ -1287,6 +1288,109 @@ static int a6xx_gmu_memory_probe(struct a6xx_gmu *gmu)
>> return 0;
>> }
>>
>> +/**
>> + * struct bcm_db - Auxiliary data pertaining to each Bus Clock Manager (BCM)
>> + * @unit: divisor used to convert bytes/sec bw value to an RPMh msg
>> + * @width: multiplier used to convert bytes/sec bw value to an RPMh msg
>> + * @vcd: virtual clock domain that this bcm belongs to
>> + * @reserved: reserved field
>> + */
>> +struct bcm_db {
>> + __le32 unit;
>> + __le16 width;
>> + u8 vcd;
>> + u8 reserved;
>> +};
>> +
>> +static u64 bcm_div(u64 num, u32 base)
>> +{
>> + /* Ensure that small votes aren't lost. */
>> + if (num && num < base)
>> + return 1;
>> +
>> + do_div(num, base);
>> +
>> + return num;
>> +}
>> +
>> +static int a6xx_gmu_rpmh_bw_votes_init(const struct a6xx_info *info,
>> + struct a6xx_gmu *gmu)
>> +{
>> + const struct bcm_db *bcm_data[GMU_MAX_BCMS] = { 0 };
>> + unsigned int bcm_index, bw_index;
>> +
>> + /* Retrieve BCM data from cmd-db */
>> + for (bcm_index = 0; bcm_index < GMU_MAX_BCMS; bcm_index++) {
>> + size_t count;
>> +
>> + /* Skip unconfigured BCM */
>> + if (!info->bcm[bcm_index].name)
>> + continue;
>> +
>> + bcm_data[bcm_index] = cmd_db_read_aux_data(
>> + info->bcm[bcm_index].name,
>> + &count);
>> + if (IS_ERR(bcm_data[bcm_index]))
>> + return PTR_ERR(bcm_data[bcm_index]);
>> +
>> + if (!count)
>> + return -EINVAL;
>> + }
>> +
>> + /* Generate BCM votes values for each bandwidth & BCM */
>> + for (bw_index = 0; bw_index < gmu->nr_gpu_bws; bw_index++) {
>> + u32 *data = gmu->gpu_bw_votes[bw_index];
>> + u32 bw = gmu->gpu_bw_table[bw_index];
>> +
>> + /* Calculations loosely copied from bcm_aggregate() & tcs_cmd_gen() */
>> + for (bcm_index = 0; bcm_index < GMU_MAX_BCMS; bcm_index++) {
>> + bool commit = false;
>> + u64 peak, vote;
>> + u16 width;
>> + u32 unit;
>> +
>> + /* Skip unconfigured BCM */
>> + if (!info->bcm[bcm_index].name || !bcm_data[bcm_index])
>> + continue;
>
> Nit: you don't care about the .name anymore, the first check can be
> dropped.
Right
>
>> +
>> + if (bcm_index == GMU_MAX_BCMS - 1 ||
>> + (bcm_data[bcm_index + 1] &&
>> + bcm_data[bcm_index]->vcd != bcm_data[bcm_index + 1]->vcd))
>> + commit = true;
>> +
>> + if (!bw) {
>> + data[bcm_index] = BCM_TCS_CMD(commit, false, 0, 0);
>> + continue;
>> + }
>> +
>> + if (info->bcm[bcm_index].fixed) {
>> + u32 perfmode = 0;
>> +
>> + if (bw >= info->bcm[bcm_index].perfmode_bw)
>> + perfmode = info->bcm[bcm_index].perfmode;
>> +
>> + data[bcm_index] = BCM_TCS_CMD(commit, true, 0, perfmode);
>> + continue;
>> + }
>> +
>> + /* Multiply the bandwidth by the width of the connection */
>> + width = le16_to_cpu(bcm_data[bcm_index]->width);
>> + peak = bcm_div((u64)bw * width, info->bcm[bcm_index].buswidth);
>> +
>> + /* Input bandwidth value is in KBps, scale the value to BCM unit */
>> + unit = le32_to_cpu(bcm_data[bcm_index]->unit);
>> + vote = bcm_div(peak * 1000ULL, unit);
>> +
>> + if (vote > BCM_TCS_CMD_VOTE_MASK)
>> + vote = BCM_TCS_CMD_VOTE_MASK;
>> +
>> + data[bcm_index] = BCM_TCS_CMD(commit, true, vote, vote);
>> + }
>> + }
>> +
>> + return 0;
>> +}
>> +
>> /* Return the 'arc-level' for the given frequency */
>> static unsigned int a6xx_gmu_get_arc_level(struct device *dev,
>> unsigned long freq)
>> @@ -1390,12 +1494,15 @@ static int a6xx_gmu_rpmh_arc_votes_init(struct device *dev, u32 *votes,
>> * The GMU votes with the RPMh for itself and on behalf of the GPU but we need
>> * to construct the list of votes on the CPU and send it over. Query the RPMh
>> * voltage levels and build the votes
>> + * The GMU can also vote for DDR interconnects, use the OPP bandwidth entries
>> + * and BCM parameters to build the votes.
>> */
>>
>> static int a6xx_gmu_rpmh_votes_init(struct a6xx_gmu *gmu)
>> {
>> struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
>> struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
>> + const struct a6xx_info *info = adreno_gpu->info->a6xx;
>> struct msm_gpu *gpu = &adreno_gpu->base;
>> int ret;
>>
>> @@ -1407,6 +1514,10 @@ static int a6xx_gmu_rpmh_votes_init(struct a6xx_gmu *gmu)
>> ret |= a6xx_gmu_rpmh_arc_votes_init(gmu->dev, gmu->cx_arc_votes,
>> gmu->gmu_freqs, gmu->nr_gmu_freqs, "cx.lvl");
>>
>> + /* Build the interconnect votes */
>> + if (adreno_gpu->info->features & ADRENO_FEAT_GMU_BW_VOTE)
>> + ret |= a6xx_gmu_rpmh_bw_votes_init(info, gmu);
>> +
>> return ret;
>> }
>>
>> @@ -1442,6 +1553,38 @@ static int a6xx_gmu_build_freq_table(struct device *dev, unsigned long *freqs,
>> return index;
>> }
>>
>> +static int a6xx_gmu_build_bw_table(struct device *dev, unsigned long *bandwidths,
>> + u32 size)
>> +{
>> + int count = dev_pm_opp_get_opp_count(dev);
>> + struct dev_pm_opp *opp;
>> + int i, index = 0;
>> + unsigned int bandwidth = 1;
>> +
>> + /*
>> + * The OPP table doesn't contain the "off" bandwidth level so we need to
>> + * add 1 to the table size to account for it
>> + */
>> +
>> + if (WARN(count + 1 > size,
>> + "The GMU bandwidth table is being truncated\n"))
>> + count = size - 1;
>> +
>> + /* Set the "off" bandwidth */
>> + bandwidths[index++] = 0;
>> +
>> + for (i = 0; i < count; i++) {
>> + opp = dev_pm_opp_find_bw_ceil(dev, &bandwidth, 0);
>> + if (IS_ERR(opp))
>> + break;
>> +
>> + dev_pm_opp_put(opp);
>> + bandwidths[index++] = bandwidth++;
>> + }
>> +
>> + return index;
>> +}
>> +
>> static int a6xx_gmu_pwrlevels_probe(struct a6xx_gmu *gmu)
>> {
>> struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
>> @@ -1472,6 +1615,16 @@ static int a6xx_gmu_pwrlevels_probe(struct a6xx_gmu *gmu)
>>
>> gmu->current_perf_index = gmu->nr_gpu_freqs - 1;
>>
>> + /*
>> + * The GMU also handles GPU Interconnect Votes so build a list
>> + * of DDR bandwidths from the GPU OPP table
>> + */
>> + if (adreno_gpu->info->features & ADRENO_FEAT_GMU_BW_VOTE)
>> + gmu->nr_gpu_bws = a6xx_gmu_build_bw_table(&gpu->pdev->dev,
>> + gmu->gpu_bw_table, ARRAY_SIZE(gmu->gpu_bw_table));
>> +
>> + gmu->current_perf_index = gmu->nr_gpu_freqs - 1;
>> +
>> /* Build the list of RPMh votes that we'll send to the GMU */
>> return a6xx_gmu_rpmh_votes_init(gmu);
>> }
>> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
>> index b4a79f88ccf45cfe651c86d2a9da39541c5772b3..03603eadc0f9ed866899c95e99f333a511ebc3c1 100644
>> --- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
>> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.h
>> @@ -19,6 +19,16 @@ struct a6xx_gmu_bo {
>> u64 iova;
>> };
>>
>> +#define GMU_MAX_BCMS 3
>> +
>> +struct a6xx_bcm {
>> + char *name;
>> + unsigned int buswidth;
>> + bool fixed;
>> + unsigned int perfmode;
>> + unsigned int perfmode_bw;
>> +};
>> +
>> /*
>> * These define the different GMU wake up options - these define how both the
>> * CPU and the GMU bring up the hardware
>> @@ -82,6 +92,10 @@ struct a6xx_gmu {
>> unsigned long gpu_freqs[16];
>> u32 gx_arc_votes[16];
>>
>> + int nr_gpu_bws;
>> + unsigned long gpu_bw_table[16];
>> + u32 gpu_bw_votes[16][GMU_MAX_BCMS];
>
> We still have magic 16 here. GPU_MAX_FREQUENCIES? GPU_FREQ_TABLE_SIZE?
Yeah it would be max frequencies.
I tried to replace this 16, but it's used as ARRAY_SIZE(gmu->gpu_bw_table)
like the other gx_arc_votes & gpu_freqs.
So I don't know what do define and where, and how, a separate patch ?
The define would only be used in the struct, not in the code, or whould I
drop the ARRAY_SIZE and use this define ?
>
>> +
>> int nr_gmu_freqs;
>> unsigned long gmu_freqs[4];
>> u32 cx_arc_votes[4];
>> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
>> index 4aceffb6aae89c781facc2a6e4a82b20b341b6cb..5b80919e595fa1ba0a3afcca55feb89e60870cb1 100644
>> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
>> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
>> @@ -44,6 +44,7 @@ struct a6xx_info {
>> u32 gmu_chipid;
>> u32 gmu_cgc_mode;
>> u32 prim_fifo_threshold;
>> + const struct a6xx_bcm bcm[GMU_MAX_BCMS];
>> };
>>
>> struct a6xx_gpu {
>>
>> --
>> 2.34.1
>>
>
Thanks,
Neil
More information about the dri-devel
mailing list