[PATCH 19/48] drm/radeon/kms: add support for MC/VM setup on SI

Jerome Glisse j.glisse at gmail.com
Tue Mar 20 16:20:32 PDT 2012


On Tue, 2012-03-20 at 17:18 -0400, alexdeucher at gmail.com wrote:
> From: Alex Deucher <alexander.deucher at amd.com>
> 
> Sets up the VM and adds support for the new VM ioctls.
> 
> Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
> ---
>  drivers/gpu/drm/radeon/si.c  |  328 ++++++++++++++++++++++++++++++++++++++++++
>  drivers/gpu/drm/radeon/sid.h |  219 ++++++++++++++++++++++++++++
>  2 files changed, 547 insertions(+), 0 deletions(-)
> 
> diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
> index 58ad900..ad91c5f 100644
> --- a/drivers/gpu/drm/radeon/si.c
> +++ b/drivers/gpu/drm/radeon/si.c
> @@ -1608,3 +1608,331 @@ int si_asic_reset(struct radeon_device *rdev)
>  	return si_gpu_soft_reset(rdev);
>  }
>  
> +/* MC */
> +static void si_mc_program(struct radeon_device *rdev)
> +{
> +	struct evergreen_mc_save save;
> +	u32 tmp;
> +	int i, j;
> +
> +	/* Initialize HDP */
> +	for (i = 0, j = 0; i < 32; i++, j += 0x18) {
> +		WREG32((0x2c14 + j), 0x00000000);
> +		WREG32((0x2c18 + j), 0x00000000);
> +		WREG32((0x2c1c + j), 0x00000000);
> +		WREG32((0x2c20 + j), 0x00000000);
> +		WREG32((0x2c24 + j), 0x00000000);
> +	}
> +	WREG32(HDP_REG_COHERENCY_FLUSH_CNTL, 0);
> +
> +	evergreen_mc_stop(rdev, &save);
> +	if (radeon_mc_wait_for_idle(rdev)) {
> +		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
> +	}
> +	/* Lockout access through VGA aperture*/
> +	WREG32(VGA_HDP_CONTROL, VGA_MEMORY_DISABLE);
> +	/* Update configuration */
> +	WREG32(MC_VM_SYSTEM_APERTURE_LOW_ADDR,
> +	       rdev->mc.vram_start >> 12);
> +	WREG32(MC_VM_SYSTEM_APERTURE_HIGH_ADDR,
> +	       rdev->mc.vram_end >> 12);
> +	WREG32(MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR,
> +	       rdev->vram_scratch.gpu_addr >> 12);
> +	tmp = ((rdev->mc.vram_end >> 24) & 0xFFFF) << 16;
> +	tmp |= ((rdev->mc.vram_start >> 24) & 0xFFFF);
> +	WREG32(MC_VM_FB_LOCATION, tmp);
> +	/* XXX double check these! */
> +	WREG32(HDP_NONSURFACE_BASE, (rdev->mc.vram_start >> 8));
> +	WREG32(HDP_NONSURFACE_INFO, (2 << 7) | (1 << 30));
> +	WREG32(HDP_NONSURFACE_SIZE, 0x3FFFFFFF);
> +	WREG32(MC_VM_AGP_BASE, 0);
> +	WREG32(MC_VM_AGP_TOP, 0x0FFFFFFF);
> +	WREG32(MC_VM_AGP_BOT, 0x0FFFFFFF);
> +	if (radeon_mc_wait_for_idle(rdev)) {
> +		dev_warn(rdev->dev, "Wait for MC idle timedout !\n");
> +	}
> +	evergreen_mc_resume(rdev, &save);
> +	/* we need to own VRAM, so turn off the VGA renderer here
> +	 * to stop it overwriting our objects */
> +	rv515_vga_render_disable(rdev);
> +}
> +
> +/* SI MC address space is 40 bits */
> +static void si_vram_location(struct radeon_device *rdev,
> +			     struct radeon_mc *mc, u64 base)
> +{
> +	mc->vram_start = base;
> +	if (mc->mc_vram_size > (0xFFFFFFFFFFULL - base + 1)) {

Why not ((1ULL << 40ULL) - 1ULL) so i don't have to count the number
of F in 0xFFFFFFFFFFULL ;)

> +		dev_warn(rdev->dev, "limiting VRAM to PCI aperture size\n");
> +		mc->real_vram_size = mc->aper_size;
> +		mc->mc_vram_size = mc->aper_size;
> +	}
> +	mc->vram_end = mc->vram_start + mc->mc_vram_size - 1;
> +	dev_info(rdev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n",
> +			mc->mc_vram_size >> 20, mc->vram_start,
> +			mc->vram_end, mc->real_vram_size >> 20);
> +}
> +
> +static void si_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc)
> +{
> +	u64 size_af, size_bf;
> +
> +	size_af = ((0xFFFFFFFFFFULL - mc->vram_end) + mc->gtt_base_align) & ~mc->gtt_base_align;
> +	size_bf = mc->vram_start & ~mc->gtt_base_align;
> +	if (size_bf > size_af) {
> +		if (mc->gtt_size > size_bf) {
> +			dev_warn(rdev->dev, "limiting GTT\n");
> +			mc->gtt_size = size_bf;
> +		}
> +		mc->gtt_start = (mc->vram_start & ~mc->gtt_base_align) - mc->gtt_size;
> +	} else {
> +		if (mc->gtt_size > size_af) {
> +			dev_warn(rdev->dev, "limiting GTT\n");
> +			mc->gtt_size = size_af;
> +		}
> +		mc->gtt_start = (mc->vram_end + 1 + mc->gtt_base_align) & ~mc->gtt_base_align;
> +	}
> +	mc->gtt_end = mc->gtt_start + mc->gtt_size - 1;
> +	dev_info(rdev->dev, "GTT: %lluM 0x%016llX - 0x%016llX\n",
> +			mc->gtt_size >> 20, mc->gtt_start, mc->gtt_end);
> +}
> +
> +static void si_vram_gtt_location(struct radeon_device *rdev,
> +				 struct radeon_mc *mc)
> +{
> +	if (mc->mc_vram_size > 0xFFC0000000ULL) {
> +		/* leave room for at least 1024M GTT */
> +		dev_warn(rdev->dev, "limiting VRAM\n");
> +		mc->real_vram_size = 0xFFC0000000ULL;
> +		mc->mc_vram_size = 0xFFC0000000ULL;
> +	}
> +	si_vram_location(rdev, &rdev->mc, 0);
> +	rdev->mc.gtt_base_align = 0;
> +	si_gtt_location(rdev, mc);
> +}
> +
> +static int si_mc_init(struct radeon_device *rdev)
> +{
> +	u32 tmp;
> +	int chansize, numchan;
> +
> +	/* Get VRAM informations */
> +	rdev->mc.vram_is_ddr = true;
> +	tmp = RREG32(MC_ARB_RAMCFG);
> +	if (tmp & CHANSIZE_OVERRIDE) {
> +		chansize = 16;
> +	} else if (tmp & CHANSIZE_MASK) {
> +		chansize = 64;
> +	} else {
> +		chansize = 32;
> +	}
> +	tmp = RREG32(MC_SHARED_CHMAP);
> +	switch ((tmp & NOOFCHAN_MASK) >> NOOFCHAN_SHIFT) {
> +	case 0:
> +	default:
> +		numchan = 1;
> +		break;
> +	case 1:
> +		numchan = 2;
> +		break;
> +	case 2:
> +		numchan = 4;
> +		break;
> +	case 3:
> +		numchan = 8;
> +		break;
> +	case 4:
> +		numchan = 3;
> +		break;
> +	case 5:
> +		numchan = 6;
> +		break;
> +	case 6:
> +		numchan = 10;
> +		break;
> +	case 7:
> +		numchan = 12;
> +		break;
> +	case 8:
> +		numchan = 16;
> +		break;
> +	}
> +	rdev->mc.vram_width = numchan * chansize;
> +	/* Could aper size report 0 ? */
> +	rdev->mc.aper_base = pci_resource_start(rdev->pdev, 0);
> +	rdev->mc.aper_size = pci_resource_len(rdev->pdev, 0);
> +	/* size in MB on si */
> +	rdev->mc.mc_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
> +	rdev->mc.real_vram_size = RREG32(CONFIG_MEMSIZE) * 1024 * 1024;
> +	rdev->mc.visible_vram_size = rdev->mc.aper_size;
> +	si_vram_gtt_location(rdev, &rdev->mc);
> +	radeon_update_bandwidth_info(rdev);
> +
> +	return 0;
> +}
> +
> +/*
> + * GART
> + */
> +void si_pcie_gart_tlb_flush(struct radeon_device *rdev)
> +{
> +	/* flush hdp cache */
> +	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
> +
> +	/* bits 0-15 are the VM contexts0-15 */
> +	WREG32(VM_INVALIDATE_REQUEST, 1);
> +}
> +
> +int si_pcie_gart_enable(struct radeon_device *rdev)
> +{
> +	int r, i;
> +
> +	if (rdev->gart.robj == NULL) {
> +		dev_err(rdev->dev, "No VRAM object for PCIE GART.\n");
> +		return -EINVAL;
> +	}
> +	r = radeon_gart_table_vram_pin(rdev);
> +	if (r)
> +		return r;
> +	radeon_gart_restore(rdev);
> +	/* Setup TLB control */
> +	WREG32(MC_VM_MX_L1_TLB_CNTL,
> +	       (0xA << 7) |
> +	       ENABLE_L1_TLB |
> +	       SYSTEM_ACCESS_MODE_NOT_IN_SYS |
> +	       ENABLE_ADVANCED_DRIVER_MODEL |
> +	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
> +	/* Setup L2 cache */
> +	WREG32(VM_L2_CNTL, ENABLE_L2_CACHE |
> +	       ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
> +	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
> +	       EFFECTIVE_L2_QUEUE_SIZE(7) |
> +	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
> +	WREG32(VM_L2_CNTL2, INVALIDATE_ALL_L1_TLBS | INVALIDATE_L2_CACHE);
> +	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
> +	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
> +	/* setup context0 */
> +	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
> +	WREG32(VM_CONTEXT0_PAGE_TABLE_END_ADDR, rdev->mc.gtt_end >> 12);
> +	WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR, rdev->gart.table_addr >> 12);
> +	WREG32(VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR,
> +			(u32)(rdev->dummy_page.addr >> 12));
> +	WREG32(VM_CONTEXT0_CNTL2, 0);
> +	WREG32(VM_CONTEXT0_CNTL, (ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
> +				  RANGE_PROTECTION_FAULT_ENABLE_DEFAULT));
> +
> +	WREG32(0x15D4, 0);
> +	WREG32(0x15D8, 0);
> +	WREG32(0x15DC, 0);
> +
> +	/* empty context1-15 */
> +	/* FIXME start with 1G, once using 2 level pt switch to full
> +	 * vm size space
> +	 */
> +	/* set vm size, must be a multiple of 4 */
> +	WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
> +	WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, (1 << 30) / RADEON_GPU_PAGE_SIZE);
> +	for (i = 1; i < 16; i++) {
> +		if (i < 8)
> +			WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
> +			       rdev->gart.table_addr >> 12);
> +		else
> +			WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((i - 8) << 2),
> +			       rdev->gart.table_addr >> 12);
> +	}
> +
> +	/* enable context1-15 */
> +	WREG32(VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR,
> +	       (u32)(rdev->dummy_page.addr >> 12));
> +	WREG32(VM_CONTEXT1_CNTL2, 0);
> +	WREG32(VM_CONTEXT1_CNTL, ENABLE_CONTEXT | PAGE_TABLE_DEPTH(0) |
> +				RANGE_PROTECTION_FAULT_ENABLE_DEFAULT);
> +
> +	si_pcie_gart_tlb_flush(rdev);
> +	DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n",
> +		 (unsigned)(rdev->mc.gtt_size >> 20),
> +		 (unsigned long long)rdev->gart.table_addr);
> +	rdev->gart.ready = true;
> +	return 0;
> +}
> +
> +void si_pcie_gart_disable(struct radeon_device *rdev)
> +{
> +	/* Disable all tables */
> +	WREG32(VM_CONTEXT0_CNTL, 0);
> +	WREG32(VM_CONTEXT1_CNTL, 0);
> +	/* Setup TLB control */
> +	WREG32(MC_VM_MX_L1_TLB_CNTL, SYSTEM_ACCESS_MODE_NOT_IN_SYS |
> +	       SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU);
> +	/* Setup L2 cache */
> +	WREG32(VM_L2_CNTL, ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE |
> +	       ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE |
> +	       EFFECTIVE_L2_QUEUE_SIZE(7) |
> +	       CONTEXT1_IDENTITY_ACCESS_MODE(1));
> +	WREG32(VM_L2_CNTL2, 0);
> +	WREG32(VM_L2_CNTL3, L2_CACHE_BIGK_ASSOCIATIVITY |
> +	       L2_CACHE_BIGK_FRAGMENT_SIZE(0));
> +	radeon_gart_table_vram_unpin(rdev);
> +}
> +
> +void si_pcie_gart_fini(struct radeon_device *rdev)
> +{
> +	si_pcie_gart_disable(rdev);
> +	radeon_gart_table_vram_free(rdev);
> +	radeon_gart_fini(rdev);
> +}
> +
> +/*
> + * vm
> + */
> +int si_vm_init(struct radeon_device *rdev)
> +{
> +	/* number of VMs */
> +	rdev->vm_manager.nvm = 16;
> +	/* base offset of vram pages */
> +	rdev->vm_manager.vram_base_offset = 0;
> +
> +	return 0;
> +}
> +
> +void si_vm_fini(struct radeon_device *rdev)
> +{
> +}
> +
> +int si_vm_bind(struct radeon_device *rdev, struct radeon_vm *vm, int id)
> +{
> +	if (id < 8)
> +		WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (id << 2), vm->pt_gpu_addr >> 12);
> +	else
> +		WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((id - 8) << 2),
> +		       vm->pt_gpu_addr >> 12);
> +	/* flush hdp cache */
> +	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
> +	/* bits 0-15 are the VM contexts0-15 */
> +	WREG32(VM_INVALIDATE_REQUEST, 1 << id);
> +	return 0;
> +}
> +
> +void si_vm_unbind(struct radeon_device *rdev, struct radeon_vm *vm)
> +{
> +	if (vm->id < 8)
> +		WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (vm->id << 2), 0);
> +	else
> +		WREG32(VM_CONTEXT8_PAGE_TABLE_BASE_ADDR + ((vm->id - 8) << 2), 0);
> +	/* flush hdp cache */
> +	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
> +	/* bits 0-15 are the VM contexts0-15 */
> +	WREG32(VM_INVALIDATE_REQUEST, 1 << vm->id);
> +}
> +
> +void si_vm_tlb_flush(struct radeon_device *rdev, struct radeon_vm *vm)
> +{
> +	if (vm->id == -1)
> +		return;
> +
> +	/* flush hdp cache */
> +	WREG32(HDP_MEM_COHERENCY_FLUSH_CNTL, 0x1);
> +	/* bits 0-15 are the VM contexts0-15 */
> +	WREG32(VM_INVALIDATE_REQUEST, 1 << vm->id);
> +}
> +
> diff --git a/drivers/gpu/drm/radeon/sid.h b/drivers/gpu/drm/radeon/sid.h
> index 4c6ff1c..4d9cdc8 100644
> --- a/drivers/gpu/drm/radeon/sid.h
> +++ b/drivers/gpu/drm/radeon/sid.h
> @@ -50,6 +50,9 @@
>  #define SI_MAX_TCC               16
>  #define SI_MAX_TCC_MASK          0xFFFF
>  
> +#define VGA_HDP_CONTROL  				0x328
> +#define		VGA_MEMORY_DISABLE				(1 << 4)
> +
>  #define DMIF_ADDR_CONFIG  				0xBD4
>  
>  #define	SRBM_STATUS				        0xE50
> @@ -57,11 +60,88 @@
>  #define	CC_SYS_RB_BACKEND_DISABLE			0xe80
>  #define	GC_USER_SYS_RB_BACKEND_DISABLE			0xe84
>  
> +#define VM_L2_CNTL					0x1400
> +#define		ENABLE_L2_CACHE					(1 << 0)
> +#define		ENABLE_L2_FRAGMENT_PROCESSING			(1 << 1)
> +#define		L2_CACHE_PTE_ENDIAN_SWAP_MODE(x)		((x) << 2)
> +#define		L2_CACHE_PDE_ENDIAN_SWAP_MODE(x)		((x) << 4)
> +#define		ENABLE_L2_PTE_CACHE_LRU_UPDATE_BY_WRITE		(1 << 9)
> +#define		ENABLE_L2_PDE0_CACHE_LRU_UPDATE_BY_WRITE	(1 << 10)
> +#define		EFFECTIVE_L2_QUEUE_SIZE(x)			(((x) & 7) << 15)
> +#define		CONTEXT1_IDENTITY_ACCESS_MODE(x)		(((x) & 3) << 19)
> +#define VM_L2_CNTL2					0x1404
> +#define		INVALIDATE_ALL_L1_TLBS				(1 << 0)
> +#define		INVALIDATE_L2_CACHE				(1 << 1)
> +#define		INVALIDATE_CACHE_MODE(x)			((x) << 26)
> +#define			INVALIDATE_PTE_AND_PDE_CACHES		0
> +#define			INVALIDATE_ONLY_PTE_CACHES		1
> +#define			INVALIDATE_ONLY_PDE_CACHES		2
> +#define VM_L2_CNTL3					0x1408
> +#define		BANK_SELECT(x)					((x) << 0)
> +#define		L2_CACHE_UPDATE_MODE(x)				((x) << 6)
> +#define		L2_CACHE_BIGK_FRAGMENT_SIZE(x)			((x) << 15)
> +#define		L2_CACHE_BIGK_ASSOCIATIVITY			(1 << 20)
> +#define	VM_L2_STATUS					0x140C
> +#define		L2_BUSY						(1 << 0)
> +#define VM_CONTEXT0_CNTL				0x1410
> +#define		ENABLE_CONTEXT					(1 << 0)
> +#define		PAGE_TABLE_DEPTH(x)				(((x) & 3) << 1)
> +#define		RANGE_PROTECTION_FAULT_ENABLE_DEFAULT		(1 << 4)
> +#define VM_CONTEXT1_CNTL				0x1414
> +#define VM_CONTEXT0_CNTL2				0x1430
> +#define VM_CONTEXT1_CNTL2				0x1434
> +#define	VM_CONTEXT8_PAGE_TABLE_BASE_ADDR		0x1438
> +#define	VM_CONTEXT9_PAGE_TABLE_BASE_ADDR		0x143c
> +#define	VM_CONTEXT10_PAGE_TABLE_BASE_ADDR		0x1440
> +#define	VM_CONTEXT11_PAGE_TABLE_BASE_ADDR		0x1444
> +#define	VM_CONTEXT12_PAGE_TABLE_BASE_ADDR		0x1448
> +#define	VM_CONTEXT13_PAGE_TABLE_BASE_ADDR		0x144c
> +#define	VM_CONTEXT14_PAGE_TABLE_BASE_ADDR		0x1450
> +#define	VM_CONTEXT15_PAGE_TABLE_BASE_ADDR		0x1454
> +
> +#define VM_INVALIDATE_REQUEST				0x1478
> +#define VM_INVALIDATE_RESPONSE				0x147c
> +
> +#define VM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR	0x1518
> +#define VM_CONTEXT1_PROTECTION_FAULT_DEFAULT_ADDR	0x151c
> +
> +#define	VM_CONTEXT0_PAGE_TABLE_BASE_ADDR		0x153c
> +#define	VM_CONTEXT1_PAGE_TABLE_BASE_ADDR		0x1540
> +#define	VM_CONTEXT2_PAGE_TABLE_BASE_ADDR		0x1544
> +#define	VM_CONTEXT3_PAGE_TABLE_BASE_ADDR		0x1548
> +#define	VM_CONTEXT4_PAGE_TABLE_BASE_ADDR		0x154c
> +#define	VM_CONTEXT5_PAGE_TABLE_BASE_ADDR		0x1550
> +#define	VM_CONTEXT6_PAGE_TABLE_BASE_ADDR		0x1554
> +#define	VM_CONTEXT7_PAGE_TABLE_BASE_ADDR		0x1558
> +#define	VM_CONTEXT0_PAGE_TABLE_START_ADDR		0x155c
> +#define	VM_CONTEXT1_PAGE_TABLE_START_ADDR		0x1560
> +
> +#define	VM_CONTEXT0_PAGE_TABLE_END_ADDR			0x157C
> +#define	VM_CONTEXT1_PAGE_TABLE_END_ADDR			0x1580
> +
>  #define MC_SHARED_CHMAP						0x2004
>  #define		NOOFCHAN_SHIFT					12
>  #define		NOOFCHAN_MASK					0x0000f000
>  #define MC_SHARED_CHREMAP					0x2008
>  
> +#define	MC_VM_FB_LOCATION				0x2024
> +#define	MC_VM_AGP_TOP					0x2028
> +#define	MC_VM_AGP_BOT					0x202C
> +#define	MC_VM_AGP_BASE					0x2030
> +#define	MC_VM_SYSTEM_APERTURE_LOW_ADDR			0x2034
> +#define	MC_VM_SYSTEM_APERTURE_HIGH_ADDR			0x2038
> +#define	MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR		0x203C
> +
> +#define	MC_VM_MX_L1_TLB_CNTL				0x2064
> +#define		ENABLE_L1_TLB					(1 << 0)
> +#define		ENABLE_L1_FRAGMENT_PROCESSING			(1 << 1)
> +#define		SYSTEM_ACCESS_MODE_PA_ONLY			(0 << 3)
> +#define		SYSTEM_ACCESS_MODE_USE_SYS_MAP			(1 << 3)
> +#define		SYSTEM_ACCESS_MODE_IN_SYS			(2 << 3)
> +#define		SYSTEM_ACCESS_MODE_NOT_IN_SYS			(3 << 3)
> +#define		SYSTEM_APERTURE_UNMAPPED_ACCESS_PASS_THRU	(0 << 5)
> +#define		ENABLE_ADVANCED_DRIVER_MODEL			(1 << 6)
> +
>  #define	MC_ARB_RAMCFG					0x2760
>  #define		NOOFBANK_SHIFT					0
>  #define		NOOFBANK_MASK					0x00000003
> @@ -73,19 +153,29 @@
>  #define		NOOFCOLS_MASK					0x000000C0
>  #define		CHANSIZE_SHIFT					8
>  #define		CHANSIZE_MASK					0x00000100
> +#define		CHANSIZE_OVERRIDE				(1 << 11)
>  #define		NOOFGROUPS_SHIFT				12
>  #define		NOOFGROUPS_MASK					0x00001000
>  
>  #define	HDP_HOST_PATH_CNTL				0x2C00
> +#define	HDP_NONSURFACE_BASE				0x2C04
> +#define	HDP_NONSURFACE_INFO				0x2C08
> +#define	HDP_NONSURFACE_SIZE				0x2C0C
>  
>  #define HDP_ADDR_CONFIG  				0x2F48
>  #define HDP_MISC_CNTL					0x2F4C
>  #define 	HDP_FLUSH_INVALIDATE_CACHE			(1 << 0)
>  
> +#define	CONFIG_MEMSIZE					0x5428
> +
> +#define HDP_MEM_COHERENCY_FLUSH_CNTL			0x5480
> +
>  #define	BIF_FB_EN						0x5490
>  #define		FB_READ_EN					(1 << 0)
>  #define		FB_WRITE_EN					(1 << 1)
>  
> +#define HDP_REG_COHERENCY_FLUSH_CNTL			0x54A0
> +
>  #define	DC_LB_MEMORY_SPLIT					0x6b0c
>  #define		DC_LB_MEMORY_CONFIG(x)				((x) << 20)
>  
> @@ -321,5 +411,134 @@
>  #define	TCP_CHAN_STEER_LO				0xac0c
>  #define	TCP_CHAN_STEER_HI				0xac10
>  
> +/*
> + * PM4
> + */
> +#define	PACKET_TYPE0	0
> +#define	PACKET_TYPE1	1
> +#define	PACKET_TYPE2	2
> +#define	PACKET_TYPE3	3
> +
> +#define CP_PACKET_GET_TYPE(h) (((h) >> 30) & 3)
> +#define CP_PACKET_GET_COUNT(h) (((h) >> 16) & 0x3FFF)
> +#define CP_PACKET0_GET_REG(h) (((h) & 0xFFFF) << 2)
> +#define CP_PACKET3_GET_OPCODE(h) (((h) >> 8) & 0xFF)
> +#define PACKET0(reg, n)	((PACKET_TYPE0 << 30) |				\
> +			 (((reg) >> 2) & 0xFFFF) |			\
> +			 ((n) & 0x3FFF) << 16)
> +#define CP_PACKET2			0x80000000
> +#define		PACKET2_PAD_SHIFT		0
> +#define		PACKET2_PAD_MASK		(0x3fffffff << 0)
> +
> +#define PACKET2(v)	(CP_PACKET2 | REG_SET(PACKET2_PAD, (v)))
> +
> +#define PACKET3(op, n)	((PACKET_TYPE3 << 30) |				\
> +			 (((op) & 0xFF) << 8) |				\
> +			 ((n) & 0x3FFF) << 16)
> +
> +/* Packet 3 types */
> +#define	PACKET3_NOP					0x10
> +#define	PACKET3_SET_BASE				0x11
> +#define		PACKET3_BASE_INDEX(x)                  ((x) << 0)
> +#define			GDS_PARTITION_BASE		2
> +#define			CE_PARTITION_BASE		3
> +#define	PACKET3_CLEAR_STATE				0x12
> +#define	PACKET3_INDEX_BUFFER_SIZE			0x13
> +#define	PACKET3_DISPATCH_DIRECT				0x15
> +#define	PACKET3_DISPATCH_INDIRECT			0x16
> +#define	PACKET3_ALLOC_GDS				0x1B
> +#define	PACKET3_WRITE_GDS_RAM				0x1C
> +#define	PACKET3_ATOMIC_GDS				0x1D
> +#define	PACKET3_ATOMIC					0x1E
> +#define	PACKET3_OCCLUSION_QUERY				0x1F
> +#define	PACKET3_SET_PREDICATION				0x20
> +#define	PACKET3_REG_RMW					0x21
> +#define	PACKET3_COND_EXEC				0x22
> +#define	PACKET3_PRED_EXEC				0x23
> +#define	PACKET3_DRAW_INDIRECT				0x24
> +#define	PACKET3_DRAW_INDEX_INDIRECT			0x25
> +#define	PACKET3_INDEX_BASE				0x26
> +#define	PACKET3_DRAW_INDEX_2				0x27
> +#define	PACKET3_CONTEXT_CONTROL				0x28
> +#define	PACKET3_INDEX_TYPE				0x2A
> +#define	PACKET3_DRAW_INDIRECT_MULTI			0x2C
> +#define	PACKET3_DRAW_INDEX_AUTO				0x2D
> +#define	PACKET3_DRAW_INDEX_IMMD				0x2E
> +#define	PACKET3_NUM_INSTANCES				0x2F
> +#define	PACKET3_DRAW_INDEX_MULTI_AUTO			0x30
> +#define	PACKET3_INDIRECT_BUFFER_CONST			0x31
> +#define	PACKET3_INDIRECT_BUFFER				0x32
> +#define	PACKET3_STRMOUT_BUFFER_UPDATE			0x34
> +#define	PACKET3_DRAW_INDEX_OFFSET_2			0x35
> +#define	PACKET3_DRAW_INDEX_MULTI_ELEMENT		0x36
> +#define	PACKET3_WRITE_DATA				0x37
> +#define	PACKET3_DRAW_INDEX_INDIRECT_MULTI		0x38
> +#define	PACKET3_MEM_SEMAPHORE				0x39
> +#define	PACKET3_MPEG_INDEX				0x3A
> +#define	PACKET3_COPY_DW					0x3B
> +#define	PACKET3_WAIT_REG_MEM				0x3C
> +#define	PACKET3_MEM_WRITE				0x3D
> +#define	PACKET3_COPY_DATA				0x40
> +#define	PACKET3_PFP_SYNC_ME				0x42
> +#define	PACKET3_SURFACE_SYNC				0x43
> +#              define PACKET3_DEST_BASE_0_ENA      (1 << 0)
> +#              define PACKET3_DEST_BASE_1_ENA      (1 << 1)
> +#              define PACKET3_CB0_DEST_BASE_ENA    (1 << 6)
> +#              define PACKET3_CB1_DEST_BASE_ENA    (1 << 7)
> +#              define PACKET3_CB2_DEST_BASE_ENA    (1 << 8)
> +#              define PACKET3_CB3_DEST_BASE_ENA    (1 << 9)
> +#              define PACKET3_CB4_DEST_BASE_ENA    (1 << 10)
> +#              define PACKET3_CB5_DEST_BASE_ENA    (1 << 11)
> +#              define PACKET3_CB6_DEST_BASE_ENA    (1 << 12)
> +#              define PACKET3_CB7_DEST_BASE_ENA    (1 << 13)
> +#              define PACKET3_DB_DEST_BASE_ENA     (1 << 14)
> +#              define PACKET3_DEST_BASE_2_ENA      (1 << 19)
> +#              define PACKET3_DEST_BASE_3_ENA      (1 << 21)
> +#              define PACKET3_TCL1_ACTION_ENA      (1 << 22)
> +#              define PACKET3_TC_ACTION_ENA        (1 << 23)
> +#              define PACKET3_CB_ACTION_ENA        (1 << 25)
> +#              define PACKET3_DB_ACTION_ENA        (1 << 26)
> +#              define PACKET3_SH_KCACHE_ACTION_ENA (1 << 27)
> +#              define PACKET3_SH_ICACHE_ACTION_ENA (1 << 29)
> +#define	PACKET3_ME_INITIALIZE				0x44
> +#define		PACKET3_ME_INITIALIZE_DEVICE_ID(x) ((x) << 16)
> +#define	PACKET3_COND_WRITE				0x45
> +#define	PACKET3_EVENT_WRITE				0x46
> +#define	PACKET3_EVENT_WRITE_EOP				0x47
> +#define	PACKET3_EVENT_WRITE_EOS				0x48
> +#define	PACKET3_PREAMBLE_CNTL				0x4A
> +#              define PACKET3_PREAMBLE_BEGIN_CLEAR_STATE     (2 << 28)
> +#              define PACKET3_PREAMBLE_END_CLEAR_STATE       (3 << 28)
> +#define	PACKET3_ONE_REG_WRITE				0x57
> +#define	PACKET3_LOAD_CONFIG_REG				0x5F
> +#define	PACKET3_LOAD_CONTEXT_REG			0x60
> +#define	PACKET3_LOAD_SH_REG				0x61
> +#define	PACKET3_SET_CONFIG_REG				0x68
> +#define		PACKET3_SET_CONFIG_REG_START			0x00008000
> +#define		PACKET3_SET_CONFIG_REG_END			0x0000b000
> +#define	PACKET3_SET_CONTEXT_REG				0x69
> +#define		PACKET3_SET_CONTEXT_REG_START			0x00028000
> +#define		PACKET3_SET_CONTEXT_REG_END			0x00029000
> +#define	PACKET3_SET_CONTEXT_REG_INDIRECT		0x73
> +#define	PACKET3_SET_RESOURCE_INDIRECT			0x74
> +#define	PACKET3_SET_SH_REG				0x76
> +#define		PACKET3_SET_SH_REG_START			0x0000b000
> +#define		PACKET3_SET_SH_REG_END				0x0000c000
> +#define	PACKET3_SET_SH_REG_OFFSET			0x77
> +#define	PACKET3_ME_WRITE				0x7A
> +#define	PACKET3_SCRATCH_RAM_WRITE			0x7D
> +#define	PACKET3_SCRATCH_RAM_READ			0x7E
> +#define	PACKET3_CE_WRITE				0x7F
> +#define	PACKET3_LOAD_CONST_RAM				0x80
> +#define	PACKET3_WRITE_CONST_RAM				0x81
> +#define	PACKET3_WRITE_CONST_RAM_OFFSET			0x82
> +#define	PACKET3_DUMP_CONST_RAM				0x83
> +#define	PACKET3_INCREMENT_CE_COUNTER			0x84
> +#define	PACKET3_INCREMENT_DE_COUNTER			0x85
> +#define	PACKET3_WAIT_ON_CE_COUNTER			0x86
> +#define	PACKET3_WAIT_ON_DE_COUNTER			0x87
> +#define	PACKET3_WAIT_ON_DE_COUNTER_DIFF			0x88
> +#define	PACKET3_SET_CE_DE_COUNTERS			0x89
> +#define	PACKET3_WAIT_ON_AVAIL_BUFFER			0x8A
>  
>  #endif

Good as is

Cheers,
Jerome




More information about the dri-devel mailing list