[PATCH 04/12] drm/amdgpu: Make SDMA phase quantum configurable
Alex Deucher
alexdeucher at gmail.com
Thu Jul 6 21:00:06 UTC 2017
On Mon, Jul 3, 2017 at 5:11 PM, Felix Kuehling <Felix.Kuehling at amd.com> wrote:
> Set a configurable SDMA phase quantum when enabling SDMA context
> switching. The default value significantly reduces SDMA latency
> in page table updates when user-mode SDMA queues have concurrent
> activity, compared to the initial HW setting.
>
> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
Acked-by: Alex Deucher <alexander.deucher at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 4 ++++
> drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 32 ++++++++++++++++++++++++++++++-
> drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 32 ++++++++++++++++++++++++++++++-
> drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c | 34 ++++++++++++++++++++++++++++++++-
> 5 files changed, 100 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 810796a..2129fbb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -106,6 +106,7 @@
> extern unsigned amdgpu_pcie_lane_cap;
> extern unsigned amdgpu_cg_mask;
> extern unsigned amdgpu_pg_mask;
> +extern unsigned amdgpu_sdma_phase_quantum;
> extern char *amdgpu_disable_cu;
> extern char *amdgpu_virtual_display;
> extern unsigned amdgpu_pp_feature_mask;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 4bf4a80..02cf24e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -107,6 +107,7 @@
> unsigned amdgpu_pcie_lane_cap = 0;
> unsigned amdgpu_cg_mask = 0xffffffff;
> unsigned amdgpu_pg_mask = 0xffffffff;
> +unsigned amdgpu_sdma_phase_quantum = 32;
> char *amdgpu_disable_cu = NULL;
> char *amdgpu_virtual_display = NULL;
> unsigned amdgpu_pp_feature_mask = 0xffffffff;
> @@ -223,6 +224,9 @@
> MODULE_PARM_DESC(pg_mask, "Powergating flags mask (0 = disable power gating)");
> module_param_named(pg_mask, amdgpu_pg_mask, uint, 0444);
>
> +MODULE_PARM_DESC(sdma_phase_quantum, "SDMA context switch phase quantum (x 1K GPU clock cycles, 0 = no change (default 32))");
> +module_param_named(sdma_phase_quantum, amdgpu_sdma_phase_quantum, uint, 0444);
> +
> MODULE_PARM_DESC(disable_cu, "Disable CUs (se.sh.cu,...)");
> module_param_named(disable_cu, amdgpu_disable_cu, charp, 0444);
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> index 4a9cea0..f508f4d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> @@ -351,14 +351,44 @@ static void cik_sdma_rlc_stop(struct amdgpu_device *adev)
> */
> static void cik_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
> {
> - u32 f32_cntl;
> + u32 f32_cntl, phase_quantum = 0;
> int i;
>
> + if (amdgpu_sdma_phase_quantum) {
> + unsigned value = amdgpu_sdma_phase_quantum;
> + unsigned unit = 0;
> +
> + while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
> + SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) {
> + value = (value + 1) >> 1;
> + unit++;
> + }
> + if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
> + SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) {
> + value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
> + SDMA0_PHASE0_QUANTUM__VALUE__SHIFT);
> + unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
> + SDMA0_PHASE0_QUANTUM__UNIT__SHIFT);
> + WARN_ONCE(1,
> + "clamping sdma_phase_quantum to %uK clock cycles\n",
> + value << unit);
> + }
> + phase_quantum =
> + value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
> + unit << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
> + }
> +
> for (i = 0; i < adev->sdma.num_instances; i++) {
> f32_cntl = RREG32(mmSDMA0_CNTL + sdma_offsets[i]);
> if (enable) {
> f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
> AUTO_CTXSW_ENABLE, 1);
> + if (amdgpu_sdma_phase_quantum) {
> + WREG32(mmSDMA0_PHASE0_QUANTUM + sdma_offsets[i],
> + phase_quantum);
> + WREG32(mmSDMA0_PHASE1_QUANTUM + sdma_offsets[i],
> + phase_quantum);
> + }
> } else {
> f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
> AUTO_CTXSW_ENABLE, 0);
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> index 67a29fb..b1de44f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> @@ -551,9 +551,33 @@ static void sdma_v3_0_rlc_stop(struct amdgpu_device *adev)
> */
> static void sdma_v3_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
> {
> - u32 f32_cntl;
> + u32 f32_cntl, phase_quantum = 0;
> int i;
>
> + if (amdgpu_sdma_phase_quantum) {
> + unsigned value = amdgpu_sdma_phase_quantum;
> + unsigned unit = 0;
> +
> + while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
> + SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) {
> + value = (value + 1) >> 1;
> + unit++;
> + }
> + if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
> + SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) {
> + value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
> + SDMA0_PHASE0_QUANTUM__VALUE__SHIFT);
> + unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
> + SDMA0_PHASE0_QUANTUM__UNIT__SHIFT);
> + WARN_ONCE(1,
> + "clamping sdma_phase_quantum to %uK clock cycles\n",
> + value << unit);
> + }
> + phase_quantum =
> + value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
> + unit << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
> + }
> +
> for (i = 0; i < adev->sdma.num_instances; i++) {
> f32_cntl = RREG32(mmSDMA0_CNTL + sdma_offsets[i]);
> if (enable) {
> @@ -561,6 +585,12 @@ static void sdma_v3_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
> AUTO_CTXSW_ENABLE, 1);
> f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
> ATC_L1_ENABLE, 1);
> + if (amdgpu_sdma_phase_quantum) {
> + WREG32(mmSDMA0_PHASE0_QUANTUM + sdma_offsets[i],
> + phase_quantum);
> + WREG32(mmSDMA0_PHASE1_QUANTUM + sdma_offsets[i],
> + phase_quantum);
> + }
> } else {
> f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
> AUTO_CTXSW_ENABLE, 0);
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> index 4a65697..591f3e7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> @@ -493,13 +493,45 @@ static void sdma_v4_0_rlc_stop(struct amdgpu_device *adev)
> */
> static void sdma_v4_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
> {
> - u32 f32_cntl;
> + u32 f32_cntl, phase_quantum = 0;
> int i;
>
> + if (amdgpu_sdma_phase_quantum) {
> + unsigned value = amdgpu_sdma_phase_quantum;
> + unsigned unit = 0;
> +
> + while (value > (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
> + SDMA0_PHASE0_QUANTUM__VALUE__SHIFT)) {
> + value = (value + 1) >> 1;
> + unit++;
> + }
> + if (unit > (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
> + SDMA0_PHASE0_QUANTUM__UNIT__SHIFT)) {
> + value = (SDMA0_PHASE0_QUANTUM__VALUE_MASK >>
> + SDMA0_PHASE0_QUANTUM__VALUE__SHIFT);
> + unit = (SDMA0_PHASE0_QUANTUM__UNIT_MASK >>
> + SDMA0_PHASE0_QUANTUM__UNIT__SHIFT);
> + WARN_ONCE(1,
> + "clamping sdma_phase_quantum to %uK clock cycles\n",
> + value << unit);
> + }
> + phase_quantum =
> + value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
> + unit << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
> + }
> +
> for (i = 0; i < adev->sdma.num_instances; i++) {
> f32_cntl = RREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_CNTL));
> f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
> AUTO_CTXSW_ENABLE, enable ? 1 : 0);
> + if (enable && amdgpu_sdma_phase_quantum) {
> + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_PHASE0_QUANTUM),
> + phase_quantum);
> + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_PHASE1_QUANTUM),
> + phase_quantum);
> + WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_PHASE2_QUANTUM),
> + phase_quantum);
> + }
> WREG32(sdma_v4_0_get_reg_offset(i, mmSDMA0_CNTL), f32_cntl);
> }
>
> --
> 1.9.1
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
More information about the amd-gfx
mailing list