[PATCH 1/2] drm/amdgpu: Remap hdp coherency registers

Kuehling, Felix Felix.Kuehling at amd.com
Tue Apr 23 19:59:35 UTC 2019


See inline.

On 2019-04-23 3:23 p.m., Zeng, Oak wrote:
> Remap HDP_MEM_COHERENCY_FLUSH_CNTL and HDP_REG_COHERENCY_FLUSH_CNTL
> to an empty page in mmio space. We will later map this page to process
> space so application can flush hdp. This can't be done properly at
> those registers' original location because it will expose more than
> desired registers to process space.
>
> v2: Use explicit register hole location
> v3: Moved remapped hdp registers into adev struct
> v4: Use more generic name for remapped page
>      Expose register offset in kfd_ioctl.h
> v5: Move hdp register remap function to nbio ip function
>
> Change-Id: Ia8d27c0c9a082711d16bbf55602bf5712a47b6d6
> Signed-off-by: Oak Zeng <Oak.Zeng at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  7 +++++++
>   drivers/gpu/drm/amd/amdgpu/cik.c       |  1 +
>   drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c | 15 ++++++++++++---
>   drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c | 15 ++++++++++++---
>   drivers/gpu/drm/amd/amdgpu/si.c        |  1 +
>   drivers/gpu/drm/amd/amdgpu/soc15.c     | 11 +++++++++++
>   drivers/gpu/drm/amd/amdgpu/vi.c        |  1 +
>   include/uapi/linux/kfd_ioctl.h         |  7 +++++++
>   8 files changed, 52 insertions(+), 6 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index bc96ec4..e16dcee 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -642,6 +642,11 @@ struct nbio_hdp_flush_reg {
>   	u32 ref_and_mask_sdma1;
>   };
>   
> +struct amdgpu_mmio_remap {
> +	u32 reg_offset;
> +	resource_size_t bus_addr;
> +};
> +
>   struct amdgpu_nbio_funcs {
>   	const struct nbio_hdp_flush_reg *hdp_flush_reg;
>   	u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev);
> @@ -669,6 +674,7 @@ struct amdgpu_nbio_funcs {
>   	void (*ih_control)(struct amdgpu_device *adev);
>   	void (*init_registers)(struct amdgpu_device *adev);
>   	void (*detect_hw_virt)(struct amdgpu_device *adev);
> +	void (*remap_hdp_registers)(struct amdgpu_device *adev);
>   };
>   
>   struct amdgpu_df_funcs {
> @@ -767,6 +773,7 @@ struct amdgpu_device {
>   	void __iomem			*rmmio;
>   	/* protects concurrent MM_INDEX/DATA based register access */
>   	spinlock_t mmio_idx_lock;
> +	struct amdgpu_mmio_remap        rmmio_remap;
>   	/* protects concurrent SMC based register access */
>   	spinlock_t smc_idx_lock;
>   	amdgpu_rreg_t			smc_rreg;
> diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
> index 07c1f23..3f7ec6a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/cik.c
> +++ b/drivers/gpu/drm/amd/amdgpu/cik.c
> @@ -1827,6 +1827,7 @@ static int cik_common_early_init(void *handle)
>   {
>   	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>   
> +	adev->rmmio_remap.bus_addr = ULLONG_MAX;
>   	adev->smc_rreg = &cik_smc_rreg;
>   	adev->smc_wreg = &cik_smc_wreg;
>   	adev->pcie_rreg = &cik_pcie_rreg;
> diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
> index 1cdb98a..83f1f75 100644
> --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c
> @@ -29,9 +29,18 @@
>   #include "nbio/nbio_7_0_sh_mask.h"
>   #include "nbio/nbio_7_0_smn.h"
>   #include "vega10_enum.h"
> +#include <uapi/linux/kfd_ioctl.h>
>   
>   #define smnNBIF_MGCG_CTRL_LCLK	0x1013a05c
>   
> +static void nbio_v7_0_remap_hdp_registers(struct amdgpu_device *adev)
> +{
> +	WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
> +		adev->rmmio_remap.reg_offset << 2 + HDP_MEM_FLUSH_CNTL);

I don't think this does what you intend. I think + binds stronger than 
<<, so you should write this as

     (adev->rmmio_remap.reg_offset << 2) + HDP_MEM_FLUSH_CNTL


> +	WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL,
> +		adev->rmmio_remap.reg_offset << 2 + HDP_REG_FLUSH_CNTL);

Same as above.


> +}
> +
>   static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev)
>   {
>           u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
> @@ -55,10 +64,9 @@ static void nbio_v7_0_hdp_flush(struct amdgpu_device *adev,
>   				struct amdgpu_ring *ring)
>   {
>   	if (!ring || !ring->funcs->emit_wreg)
> -		WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0);
> +		WREG32_NO_KIQ(adev->rmmio_remap.reg_offset + HDP_MEM_FLUSH_CNTL, 0);

Are you sure this is correct? As I understand it from the above, 
adev->rmmio_remap.reg_offset is in dwords, HDP_MEM_FLUSH_CNTL is in 
bytes. Something will need to be shifted.


>   	else
> -		amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
> -			NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0);
> +		amdgpu_ring_emit_wreg(ring, adev->rmmio_remap.reg_offset + HDP_MEM_FLUSH_CNTL, 0);

Same as above.


>   }
>   
>   static u32 nbio_v7_0_get_memsize(struct amdgpu_device *adev)
> @@ -283,4 +291,5 @@ const struct amdgpu_nbio_funcs nbio_v7_0_funcs = {
>   	.ih_control = nbio_v7_0_ih_control,
>   	.init_registers = nbio_v7_0_init_registers,
>   	.detect_hw_virt = nbio_v7_0_detect_hw_virt,
> +	.remap_hdp_registers = nbio_v7_0_remap_hdp_registers,
>   };
> diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
> index c69d515..fa67772 100644
> --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
> +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
> @@ -27,9 +27,18 @@
>   #include "nbio/nbio_7_4_offset.h"
>   #include "nbio/nbio_7_4_sh_mask.h"
>   #include "nbio/nbio_7_4_0_smn.h"
> +#include <uapi/linux/kfd_ioctl.h>
>   
>   #define smnNBIF_MGCG_CTRL_LCLK	0x1013a21c
>   
> +static void nbio_v7_4_remap_hdp_registers(struct amdgpu_device *adev)
> +{
> +	WREG32_SOC15(NBIO, 0, mmREMAP_HDP_MEM_FLUSH_CNTL,
> +		adev->rmmio_remap.reg_offset << 2 + HDP_MEM_FLUSH_CNTL);
> +	WREG32_SOC15(NBIO, 0, mmREMAP_HDP_REG_FLUSH_CNTL,
> +		adev->rmmio_remap.reg_offset << 2 + HDP_REG_FLUSH_CNTL);

Operator precedence. See above.


> +}
> +
>   static u32 nbio_v7_4_get_rev_id(struct amdgpu_device *adev)
>   {
>   	u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0);
> @@ -53,10 +62,9 @@ static void nbio_v7_4_hdp_flush(struct amdgpu_device *adev,
>   				struct amdgpu_ring *ring)
>   {
>   	if (!ring || !ring->funcs->emit_wreg)
> -		WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0);
> +		WREG32_NO_KIQ(adev->rmmio_remap.reg_offset + HDP_MEM_FLUSH_CNTL, 0);
Are adev->rmmio_remap.reg_offset and HDP_MEM_FLUSH_CNTL in the same units?
>   	else
> -		amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
> -			NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0);
> +		amdgpu_ring_emit_wreg(ring, adev->rmmio_remap.reg_offset + HDP_MEM_FLUSH_CNTL, 0);
>   }
>   
>   static u32 nbio_v7_4_get_memsize(struct amdgpu_device *adev)
> @@ -262,4 +270,5 @@ const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
>   	.ih_control = nbio_v7_4_ih_control,
>   	.init_registers = nbio_v7_4_init_registers,
>   	.detect_hw_virt = nbio_v7_4_detect_hw_virt,
> +	.remap_hdp_registers = nbio_v7_4_remap_hdp_registers,
>   };
> diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
> index 9d8df68..c6b89c2 100644
> --- a/drivers/gpu/drm/amd/amdgpu/si.c
> +++ b/drivers/gpu/drm/amd/amdgpu/si.c
> @@ -1405,6 +1405,7 @@ static int si_common_early_init(void *handle)
>   {
>   	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>   
> +	adev->rmmio_remap.bus_addr = ULLONG_MAX;
>   	adev->smc_rreg = &si_smc_rreg;
>   	adev->smc_wreg = &si_smc_wreg;
>   	adev->pcie_rreg = &si_pcie_rreg;
> diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
> index bdb5ad9..3685944 100644
> --- a/drivers/gpu/drm/amd/amdgpu/soc15.c
> +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
> @@ -44,6 +44,7 @@
>   #include "smuio/smuio_9_0_offset.h"
>   #include "smuio/smuio_9_0_sh_mask.h"
>   #include "nbio/nbio_7_0_default.h"
> +#include "nbio/nbio_7_0_offset.h"
>   #include "nbio/nbio_7_0_sh_mask.h"
>   #include "nbio/nbio_7_0_smn.h"
>   #include "mp/mp_9_0_offset.h"
> @@ -64,6 +65,7 @@
>   #include "dce_virtual.h"
>   #include "mxgpu_ai.h"
>   #include "amdgpu_smu.h"
> +#include <uapi/linux/kfd_ioctl.h>
>   
>   #define mmMP0_MISC_CGTT_CTRL0                                                                   0x01b9
>   #define mmMP0_MISC_CGTT_CTRL0_BASE_IDX                                                          0
> @@ -777,8 +779,11 @@ static const struct amdgpu_asic_funcs vega20_asic_funcs =
>   
>   static int soc15_common_early_init(void *handle)
>   {
> +#define MMIO_REG_HOLE_OFFSET (0x80000 - PAGE_SIZE)
>   	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
>   
> +	adev->rmmio_remap.reg_offset = (MMIO_REG_HOLE_OFFSET) >> 2;

Why is this shifted? I think this creates some of the confusion above 
where things have different units.


> +	adev->rmmio_remap.bus_addr = adev->rmmio_base + MMIO_REG_HOLE_OFFSET;
>   	adev->smc_rreg = NULL;
>   	adev->smc_wreg = NULL;
>   	adev->pcie_rreg = &soc15_pcie_rreg;
> @@ -1007,6 +1012,12 @@ static int soc15_common_hw_init(void *handle)
>   	soc15_program_aspm(adev);
>   	/* setup nbio registers */
>   	adev->nbio_funcs->init_registers(adev);
> +	/* remap HDP registers to a hole in mmio space,
> +	 * for the purpose of expose those registers
> +	 * to process space
> +	 */
> +	if (adev->nbio_funcs->remap_hdp_registers)
> +		adev->nbio_funcs->remap_hdp_registers(adev);
>   	/* enable the doorbell aperture */
>   	soc15_enable_doorbell_aperture(adev, true);
>   	/* HW doorbell routing policy: doorbell writing not
> diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
> index 5e5b42a..44565b23 100644
> --- a/drivers/gpu/drm/amd/amdgpu/vi.c
> +++ b/drivers/gpu/drm/amd/amdgpu/vi.c
> @@ -1037,6 +1037,7 @@ static int vi_common_early_init(void *handle)
>   		adev->smc_rreg = &vi_smc_rreg;
>   		adev->smc_wreg = &vi_smc_wreg;
>   	}
> +	adev->rmmio_remap.bus_addr = ULLONG_MAX;
>   	adev->pcie_rreg = &vi_pcie_rreg;
>   	adev->pcie_wreg = &vi_pcie_wreg;
>   	adev->uvd_ctx_rreg = &vi_uvd_ctx_rreg;
> diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
> index dc067ed..7524e6e 100644
> --- a/include/uapi/linux/kfd_ioctl.h
> +++ b/include/uapi/linux/kfd_ioctl.h
> @@ -426,6 +426,13 @@ struct kfd_ioctl_import_dmabuf_args {
>   	__u32 dmabuf_fd;	/* to KFD */
>   };
>   
> +/* Register offset inside the remapped mmio page
> + */
> +enum kfd_mmio_remap {
> +	HDP_MEM_FLUSH_CNTL = 0,

This needs some prefix to disambiguate. Something to make it clear that 
this is not the MMIO register address, but an offset in a remapped 
address range. This is a bit verbose, but it's the best I can come up with:

KFD_MMIO_REMAP_HPD_MEM_FLUSH_CNTL = 0,
KFD_MMIO_REMAP_HDP_REG_FLUSH_CNTL = 4,

Regards,
   Felix


> +	HDP_REG_FLUSH_CNTL = 4,
> +};
> +
>   #define AMDKFD_IOCTL_BASE 'K'
>   #define AMDKFD_IO(nr)			_IO(AMDKFD_IOCTL_BASE, nr)
>   #define AMDKFD_IOR(nr, type)		_IOR(AMDKFD_IOCTL_BASE, nr, type)


More information about the amd-gfx mailing list