[PATCH 1/2] drm/amdgpu: Remap hdp coherency registers
Kuehling, Felix
Felix.Kuehling at amd.com
Tue Apr 23 19:59:35 UTC 2019
See inline.
On 2019-04-23 3:23 p.m., Zeng, Oak wrote:
> Remap HDP_MEM_COHERENCY_FLUSH_CNTL and HDP_REG_COHERENCY_FLUSH_CNTL
> to an empty page in mmio space. We will later map this page to process
> space so application can flush hdp. This can't be done properly at
> those registers' original location because it will expose more than
> desired registers to process space.
>
> v2: Use explicit register hole location
> v3: Moved remapped hdp registers into adev struct
> v4: Use more generic name for remapped page
> Expose register offset in kfd_ioctl.h
> v5: Move hdp register remap function to nbio ip function
>
> Change-Id: Ia8d27c0c9a082711d16bbf55602bf5712a47b6d6
> Signed-off-by: Oak Zeng <Oak.Zeng at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 7 +++++++
> drivers/gpu/drm/amd/amdgpu/cik.c | 1 +
> drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c | 15 ++++++++++++---
> drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 15 ++++++++++++---
> drivers/gpu/drm/amd/amdgpu/si.c | 1 +
> drivers/gpu/drm/amd/amdgpu/soc15.c | 11 +++++++++++
> drivers/gpu/drm/amd/amdgpu/vi.c | 1 +
> include/uapi/linux/kfd_ioctl.h | 7 +++++++
> 8 files changed, 52 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index bc96ec4..e16dcee 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -642,6 +642,11 @@ struct nbio_hdp_flush_reg {
> u32 ref_and_mask_sdma1;
> };
>
> +struct amdgpu_mmio_remap {
> + u32 reg_offset;
> + resource_size_t bus_addr;
> +};
> +
> struct amdgpu_nbio_funcs {
> const struct nbio_hdp_flush_reg *hdp_flush_reg;
> u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev);
> @@ -669,6 +674,7 @@ struct amdgpu_nbio_funcs {
> void (*ih_control)(struct amdgpu_device *adev);
> void (*init_registers)(struct amdgpu_device *adev);
> void (*detect_hw_virt)(struct amdgpu_device *adev);
> + void (*remap_hdp_registers)(struct amdgpu_device *adev);
> };
>
> struct amdgpu_df_funcs {
> @@ -767,6 +773,7 @@ struct amdgpu_device {
> void __iomem *rmmio;
> /* protects concurrent MM_INDEX/DATA based register access */
> spinlock_t mmio_idx_lock;
> + struct amdgpu_mmio_remap rmmio_remap;
> /* protects concurrent SMC based register access */
> spinlock_t smc_idx_lock;
> amdgpu_rreg_t smc_rreg;
> diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
> index 07c1f23..3f7ec6a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/cik.c
> +++ b/drivers/gpu/drm/amd/amdgpu/cik.c
> @@ -1827,6 +1827,7 @@ static int cik_common_early_init(void *handle)
> {
> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>
> + adev->rmmio_remap.bus_addr = ULLONG_MAX;
> adev->smc_rreg = &cik_smc_rreg;
> adev->smc_wreg = &cik_smc_wreg;
> adev->pcie_rreg = &cik_pcie_rreg;
> diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
> index 1cdb98a..83f1f75 100644
> --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
> @@ -29,9 +29,18 @@
> #include "nbio/nbio_7_0_sh_mask.h"
> #include "nbio/nbio_7_0_smn.h"
> #include "vega10_enum.h"
> +#include <uapi/linux/kfd_ioctl.h>
>
> #define smnNBIF_MGCG_CTRL_LCLK 0x1013a05c
>
> +static void nbio_v7_0_remap_hdp_registers(struct amdgpu_device *adev)
> +{
> + WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
> + adev->rmmio_remap.reg_offset << 2 + HDP_MEM_FLUSH_CNTL);
I don't think this does what you intend. I think + binds stronger than
<<, so you should write this as
(adev->rmmio_remap.reg_offset << 2) + HDP_MEM_FLUSH_CNTL
> + WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL,
> + adev->rmmio_remap.reg_offset << 2 + HDP_REG_FLUSH_CNTL);
Same as above.
> +}
> +
> static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev)
> {
> u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
> @@ -55,10 +64,9 @@ static void nbio_v7_0_hdp_flush(struct amdgpu_device *adev,
> struct amdgpu_ring *ring)
> {
> if (!ring || !ring->funcs->emit_wreg)
> - WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0);
> + WREG32_NO_KIQ(adev->rmmio_remap.reg_offset + HDP_MEM_FLUSH_CNTL, 0);
Are you sure this is correct? As I understand it from the above,
adev->rmmio_remap.reg_offset is in dwords, HDP_MEM_FLUSH_CNTL is in
bytes. Something will need to be shifted.
> else
> - amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
> - NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0);
> + amdgpu_ring_emit_wreg(ring, adev->rmmio_remap.reg_offset + HDP_MEM_FLUSH_CNTL, 0);
Same as above.
> }
>
> static u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev)
> @@ -283,4 +291,5 @@ const struct amdgpu_nbio_funcs nbio_v7_0_funcs = {
> .ih_control = nbio_v7_0_ih_control,
> .init_registers = nbio_v7_0_init_registers,
> .detect_hw_virt = nbio_v7_0_detect_hw_virt,
> + .remap_hdp_registers = nbio_v7_0_remap_hdp_registers,
> };
> diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
> index c69d515..fa67772 100644
> --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
> +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
> @@ -27,9 +27,18 @@
> #include "nbio/nbio_7_4_offset.h"
> #include "nbio/nbio_7_4_sh_mask.h"
> #include "nbio/nbio_7_4_0_smn.h"
> +#include <uapi/linux/kfd_ioctl.h>
>
> #define smnNBIF_MGCG_CTRL_LCLK 0x1013a21c
>
> +static void nbio_v7_4_remap_hdp_registers(struct amdgpu_device *adev)
> +{
> + WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
> + adev->rmmio_remap.reg_offset << 2 + HDP_MEM_FLUSH_CNTL);
> + WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL,
> + adev->rmmio_remap.reg_offset << 2 + HDP_REG_FLUSH_CNTL);
Operator precedence. See above.
> +}
> +
> static u32 nbio_v7_4_get_rev_id(struct amdgpu_device *adev)
> {
> u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
> @@ -53,10 +62,9 @@ static void nbio_v7_4_hdp_flush(struct amdgpu_device *adev,
> struct amdgpu_ring *ring)
> {
> if (!ring || !ring->funcs->emit_wreg)
> - WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0);
> + WREG32_NO_KIQ(adev->rmmio_remap.reg_offset + HDP_MEM_FLUSH_CNTL, 0);
Are adev->rmmio_remap.reg_offset and HDP_MEM_FLUSH_CNTL in the same units?
> else
> - amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
> - NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0);
> + amdgpu_ring_emit_wreg(ring, adev->rmmio_remap.reg_offset + HDP_MEM_FLUSH_CNTL, 0);
> }
>
> static u32 nbio_v7_4_get_memsize(struct amdgpu_device *adev)
> @@ -262,4 +270,5 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
> .ih_control = nbio_v7_4_ih_control,
> .init_registers = nbio_v7_4_init_registers,
> .detect_hw_virt = nbio_v7_4_detect_hw_virt,
> + .remap_hdp_registers = nbio_v7_4_remap_hdp_registers,
> };
> diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
> index 9d8df68..c6b89c2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/si.c
> +++ b/drivers/gpu/drm/amd/amdgpu/si.c
> @@ -1405,6 +1405,7 @@ static int si_common_early_init(void *handle)
> {
> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>
> + adev->rmmio_remap.bus_addr = ULLONG_MAX;
> adev->smc_rreg = &si_smc_rreg;
> adev->smc_wreg = &si_smc_wreg;
> adev->pcie_rreg = &si_pcie_rreg;
> diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
> index bdb5ad9..3685944 100644
> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
> @@ -44,6 +44,7 @@
> #include "smuio/smuio_9_0_offset.h"
> #include "smuio/smuio_9_0_sh_mask.h"
> #include "nbio/nbio_7_0_default.h"
> +#include "nbio/nbio_7_0_offset.h"
> #include "nbio/nbio_7_0_sh_mask.h"
> #include "nbio/nbio_7_0_smn.h"
> #include "mp/mp_9_0_offset.h"
> @@ -64,6 +65,7 @@
> #include "dce_virtual.h"
> #include "mxgpu_ai.h"
> #include "amdgpu_smu.h"
> +#include <uapi/linux/kfd_ioctl.h>
>
> #define mmMP0_MISC_CGTT_CTRL0 0x01b9
> #define mmMP0_MISC_CGTT_CTRL0_BASE_IDX 0
> @@ -777,8 +779,11 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs =
>
> static int soc15_common_early_init(void *handle)
> {
> +#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
> struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>
> + adev->rmmio_remap.reg_offset = (MMIO_REG_HOLE_OFFSET) >> 2;
Why is this shifted? I think this creates some of the confusion above
where things have different units.
> + adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
> adev->smc_rreg = NULL;
> adev->smc_wreg = NULL;
> adev->pcie_rreg = &soc15_pcie_rreg;
> @@ -1007,6 +1012,12 @@ static int soc15_common_hw_init(void *handle)
> soc15_program_aspm(adev);
> /* setup nbio registers */
> adev->nbio_funcs->init_registers(adev);
> + /* remap HDP registers to a hole in mmio space,
> + * for the purpose of expose those registers
> + * to process space
> + */
> + if (adev->nbio_funcs->remap_hdp_registers)
> + adev->nbio_funcs->remap_hdp_registers(adev);
> /* enable the doorbell aperture */
> soc15_enable_doorbell_aperture(adev, true);
> /* HW doorbell routing policy: doorbell writing not
> diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
> index 5e5b42a..44565b23 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vi.c
> @@ -1037,6 +1037,7 @@ static int vi_common_early_init(void *handle)
> adev->smc_rreg = &vi_smc_rreg;
> adev->smc_wreg = &vi_smc_wreg;
> }
> + adev->rmmio_remap.bus_addr = ULLONG_MAX;
> adev->pcie_rreg = &vi_pcie_rreg;
> adev->pcie_wreg = &vi_pcie_wreg;
> adev->uvd_ctx_rreg = &vi_uvd_ctx_rreg;
> diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
> index dc067ed..7524e6e 100644
> --- a/include/uapi/linux/kfd_ioctl.h
> +++ b/include/uapi/linux/kfd_ioctl.h
> @@ -426,6 +426,13 @@ struct kfd_ioctl_import_dmabuf_args {
> __u32 dmabuf_fd; /* to KFD */
> };
>
> +/* Register offset inside the remapped mmio page
> + */
> +enum kfd_mmio_remap {
> + HDP_MEM_FLUSH_CNTL = 0,
This needs some prefix to disambiguate. Something to make it clear that
this is not the MMIO register address, but an offset in a remapped
address range. This is a bit verbose, but it's the best I can come up with:
KFD_MMIO_REMAP_HPD_MEM_FLUSH_CNTL = 0,
KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL = 4,
Regards,
Felix
> + HDP_REG_FLUSH_CNTL = 4,
> +};
> +
> #define AMDKFD_IOCTL_BASE 'K'
> #define AMDKFD_IO(nr) _IO(AMDKFD_IOCTL_BASE, nr)
> #define AMDKFD_IOR(nr, type) _IOR(AMDKFD_IOCTL_BASE, nr, type)
More information about the amd-gfx
mailing list