[Mesa-dev] [PATCH 2/2] radeonsi: add support for Vega12

Marek Olšák maraeo at gmail.com
Thu Mar 22 15:15:44 UTC 2018


From: Marek Olšák <marek.olsak at amd.com>

Reviewed-by: Alex Deucher <alexander.deucher at amd.com>
---
 include/pci_ids/radeonsi_pci_ids.h              |  6 +++++
 src/amd/common/ac_llvm_util.c                   |  1 +
 src/amd/common/ac_surface.c                     | 33 +++++++++++++++++++------
 src/amd/common/amd_family.h                     |  1 +
 src/gallium/drivers/radeonsi/si_get.c           |  1 +
 src/gallium/drivers/radeonsi/si_pipe.c          |  2 ++
 src/gallium/drivers/radeonsi/si_state.c         |  4 ++-
 src/gallium/drivers/radeonsi/si_state_binning.c |  1 +
 8 files changed, 41 insertions(+), 8 deletions(-)

diff --git a/include/pci_ids/radeonsi_pci_ids.h b/include/pci_ids/radeonsi_pci_ids.h
index 6a3594eabc9..62b130307a3 100644
--- a/include/pci_ids/radeonsi_pci_ids.h
+++ b/include/pci_ids/radeonsi_pci_ids.h
@@ -219,11 +219,17 @@ CHIPSET(0x699F, POLARIS12)
 CHIPSET(0x6860, VEGA10)
 CHIPSET(0x6861, VEGA10)
 CHIPSET(0x6862, VEGA10)
 CHIPSET(0x6863, VEGA10)
 CHIPSET(0x6864, VEGA10)
 CHIPSET(0x6867, VEGA10)
 CHIPSET(0x6868, VEGA10)
 CHIPSET(0x687F, VEGA10)
 CHIPSET(0x686C, VEGA10)
 
+CHIPSET(0x69A0, VEGA12)
+CHIPSET(0x69A1, VEGA12)
+CHIPSET(0x69A2, VEGA12)
+CHIPSET(0x69A3, VEGA12)
+CHIPSET(0x69AF, VEGA12)
+
 CHIPSET(0x15DD, RAVEN)
diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
index bb9e873af81..f3db1c5a4a4 100644
--- a/src/amd/common/ac_llvm_util.c
+++ b/src/amd/common/ac_llvm_util.c
@@ -107,20 +107,21 @@ const char *ac_get_llvm_processor_name(enum radeon_family family)
 	case CHIP_FIJI:
 		return "fiji";
 	case CHIP_STONEY:
 		return "stoney";
 	case CHIP_POLARIS10:
 		return "polaris10";
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
 		return "polaris11";
 	case CHIP_VEGA10:
+	case CHIP_VEGA12:
 	case CHIP_RAVEN:
 		return "gfx900";
 	default:
 		return "";
 	}
 }
 
 LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, enum ac_target_machine_options tm_options)
 {
 	assert(family >= CHIP_TAHITI);
diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
index 603b7058bdc..12dfc0cb1f2 100644
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -128,20 +128,24 @@ static void addrlib_family_rev_id(enum radeon_family family,
 		*addrlib_revid = get_first(AMDGPU_POLARIS11_RANGE);
 		break;
 	case CHIP_POLARIS12:
 		*addrlib_family = FAMILY_VI;
 		*addrlib_revid = get_first(AMDGPU_POLARIS12_RANGE);
 		break;
 	case CHIP_VEGA10:
 		*addrlib_family = FAMILY_AI;
 		*addrlib_revid = get_first(AMDGPU_VEGA10_RANGE);
 		break;
+	case CHIP_VEGA12:
+		*addrlib_family = FAMILY_AI;
+		*addrlib_revid = get_first(AMDGPU_VEGA12_RANGE);
+		break;
 	case CHIP_RAVEN:
 		*addrlib_family = FAMILY_RV;
 		*addrlib_revid = get_first(AMDGPU_RAVEN_RANGE);
 		break;
 	default:
 		fprintf(stderr, "amdgpu: Unknown family.\n");
 	}
 }
 
 static void *ADDR_API allocSysMem(const ADDR_ALLOCSYSMEM_INPUT * pInput)
@@ -898,22 +902,22 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
 	if (in->flags.depth) {
 		assert(in->swizzleMode != ADDR_SW_LINEAR);
 
 		/* HTILE */
 		ADDR2_COMPUTE_HTILE_INFO_INPUT hin = {0};
 		ADDR2_COMPUTE_HTILE_INFO_OUTPUT hout = {0};
 
 		hin.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_INPUT);
 		hout.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_OUTPUT);
 
-		hin.hTileFlags.pipeAligned = 1;
-		hin.hTileFlags.rbAligned = 1;
+		hin.hTileFlags.pipeAligned = !in->flags.metaPipeUnaligned;
+		hin.hTileFlags.rbAligned = !in->flags.metaRbUnaligned;
 		hin.depthFlags = in->flags;
 		hin.swizzleMode = in->swizzleMode;
 		hin.unalignedWidth = in->width;
 		hin.unalignedHeight = in->height;
 		hin.numSlices = in->numSlices;
 		hin.numMipLevels = in->numMipLevels;
 
 		ret = Addr2ComputeHtileInfo(addrlib, &hin, &hout);
 		if (ret != ADDR_OK)
 			return ret;
@@ -960,22 +964,22 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
 		    !compressed &&
 		    in->swizzleMode != ADDR_SW_LINEAR) {
 			ADDR2_COMPUTE_DCCINFO_INPUT din = {0};
 			ADDR2_COMPUTE_DCCINFO_OUTPUT dout = {0};
 			ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {};
 
 			din.size = sizeof(ADDR2_COMPUTE_DCCINFO_INPUT);
 			dout.size = sizeof(ADDR2_COMPUTE_DCCINFO_OUTPUT);
 			dout.pMipInfo = meta_mip_info;
 
-			din.dccKeyFlags.pipeAligned = 1;
-			din.dccKeyFlags.rbAligned = 1;
+			din.dccKeyFlags.pipeAligned = !in->flags.metaPipeUnaligned;
+			din.dccKeyFlags.rbAligned = !in->flags.metaRbUnaligned;
 			din.colorFlags = in->flags;
 			din.resourceType = in->resourceType;
 			din.swizzleMode = in->swizzleMode;
 			din.bpp = in->bpp;
 			din.unalignedWidth = in->width;
 			din.unalignedHeight = in->height;
 			din.numSlices = in->numSlices;
 			din.numFrags = in->numFrags;
 			din.numMipLevels = in->numMipLevels;
 			din.dataSurfaceSize = out.surfSize;
@@ -1081,22 +1085,28 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
 		}
 
 		/* CMASK */
 		if (in->swizzleMode != ADDR_SW_LINEAR) {
 			ADDR2_COMPUTE_CMASK_INFO_INPUT cin = {0};
 			ADDR2_COMPUTE_CMASK_INFO_OUTPUT cout = {0};
 
 			cin.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_INPUT);
 			cout.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_OUTPUT);
 
-			cin.cMaskFlags.pipeAligned = 1;
-			cin.cMaskFlags.rbAligned = 1;
+			if (in->numSamples) {
+				/* FMASK is always aligned. */
+				cin.cMaskFlags.pipeAligned = 1;
+				cin.cMaskFlags.rbAligned = 1;
+			} else {
+				cin.cMaskFlags.pipeAligned = !in->flags.metaPipeUnaligned;
+				cin.cMaskFlags.rbAligned = !in->flags.metaRbUnaligned;
+			}
 			cin.colorFlags = in->flags;
 			cin.resourceType = in->resourceType;
 			cin.unalignedWidth = in->width;
 			cin.unalignedHeight = in->height;
 			cin.numSlices = in->numSlices;
 
 			if (in->numSamples > 1)
 				cin.swizzleMode = surf->u.gfx9.fmask.swizzle_mode;
 			else
 				cin.swizzleMode = in->swizzleMode;
@@ -1109,20 +1119,21 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
 			surf->u.gfx9.cmask.pipe_aligned = cin.cMaskFlags.pipeAligned;
 			surf->u.gfx9.cmask_size = cout.cmaskBytes;
 			surf->u.gfx9.cmask_alignment = cout.baseAlign;
 		}
 	}
 
 	return 0;
 }
 
 static int gfx9_compute_surface(ADDR_HANDLE addrlib,
+				const struct radeon_info *info,
 				const struct ac_surf_config *config,
 				enum radeon_surf_mode mode,
 				struct radeon_surf *surf)
 {
 	bool compressed;
 	ADDR2_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0};
 	int r;
 
 	assert(!(surf->flags & RADEON_SURF_FMASK));
 
@@ -1189,20 +1200,24 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib,
 	AddrSurfInfoIn.width = config->info.width;
 	AddrSurfInfoIn.height = config->info.height;
 
 	if (config->is_3d)
 		AddrSurfInfoIn.numSlices = config->info.depth;
 	else if (config->is_cube)
 		AddrSurfInfoIn.numSlices = 6;
 	else
 		AddrSurfInfoIn.numSlices = config->info.array_size;
 
+	/* This is propagated to HTILE/DCC/CMASK. */
+	AddrSurfInfoIn.flags.metaPipeUnaligned = 0;
+	AddrSurfInfoIn.flags.metaRbUnaligned = 0;
+
 	switch (mode) {
 	case RADEON_SURF_MODE_LINEAR_ALIGNED:
 		assert(config->info.samples <= 1);
 		assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
 		AddrSurfInfoIn.swizzleMode = ADDR_SW_LINEAR;
 		break;
 
 	case RADEON_SURF_MODE_1D:
 	case RADEON_SURF_MODE_2D:
 		if (surf->flags & RADEON_SURF_IMPORTED) {
@@ -1314,29 +1329,33 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib,
 		case ADDR_SW_4KB_Z_X:
 		case ADDR_SW_64KB_Z_X:
 		case ADDR_SW_VAR_Z_X:
 			surf->micro_tile_mode = RADEON_MICRO_MODE_DEPTH;
 			break;
 
 		default:
 			assert(0);
 	}
 
+	/* Temporary workaround to prevent VM faults and hangs. */
+	if (info->family == CHIP_VEGA12)
+		surf->u.gfx9.fmask_size *= 8;
+
 	return 0;
 }
 
 int ac_compute_surface(ADDR_HANDLE addrlib, const struct radeon_info *info,
 		       const struct ac_surf_config *config,
 		       enum radeon_surf_mode mode,
 		       struct radeon_surf *surf)
 {
 	int r;
 
 	r = surf_config_sanity(config);
 	if (r)
 		return r;
 
 	if (info->chip_class >= GFX9)
-		return gfx9_compute_surface(addrlib, config, mode, surf);
+		return gfx9_compute_surface(addrlib, info, config, mode, surf);
 	else
 		return gfx6_compute_surface(addrlib, info, config, mode, surf);
 }
diff --git a/src/amd/common/amd_family.h b/src/amd/common/amd_family.h
index c62d0aa527a..285111f2a2a 100644
--- a/src/amd/common/amd_family.h
+++ b/src/amd/common/amd_family.h
@@ -86,20 +86,21 @@ enum radeon_family {
     CHIP_MULLINS,
     CHIP_TONGA,
     CHIP_ICELAND,
     CHIP_CARRIZO,
     CHIP_FIJI,
     CHIP_STONEY,
     CHIP_POLARIS10,
     CHIP_POLARIS11,
     CHIP_POLARIS12,
     CHIP_VEGA10,
+    CHIP_VEGA12,
     CHIP_RAVEN,
     CHIP_LAST,
 };
 
 enum chip_class {
     CLASS_UNKNOWN = 0,
     R300,
     R400,
     R500,
     R600,
diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c
index b4ca5bea943..fc2be33b3e4 100644
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -68,20 +68,21 @@ const char *si_get_family_name(const struct si_screen *sscreen)
 	case CHIP_MULLINS: return "AMD MULLINS";
 	case CHIP_TONGA: return "AMD TONGA";
 	case CHIP_ICELAND: return "AMD ICELAND";
 	case CHIP_CARRIZO: return "AMD CARRIZO";
 	case CHIP_FIJI: return "AMD FIJI";
 	case CHIP_POLARIS10: return "AMD POLARIS10";
 	case CHIP_POLARIS11: return "AMD POLARIS11";
 	case CHIP_POLARIS12: return "AMD POLARIS12";
 	case CHIP_STONEY: return "AMD STONEY";
 	case CHIP_VEGA10: return "AMD VEGA10";
+	case CHIP_VEGA12: return "AMD VEGA12";
 	case CHIP_RAVEN: return "AMD RAVEN";
 	default: return "AMD unknown";
 	}
 }
 
 static bool si_have_tgsi_compute(struct si_screen *sscreen)
 {
 	/* Old kernels disallowed some register writes for SI
 	 * that are used for indirect dispatches. */
 	return (sscreen->info.chip_class >= CIK ||
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 3d787d58cd1..fa9ee43389a 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -821,20 +821,21 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
 					    sscreen->info.family <= CHIP_POLARIS12) ||
 					   sscreen->info.family == CHIP_VEGA10 ||
 					   sscreen->info.family == CHIP_RAVEN;
 	sscreen->has_ls_vgpr_init_bug = sscreen->info.family == CHIP_VEGA10 ||
 					sscreen->info.family == CHIP_RAVEN;
 
 	if (sscreen->debug_flags & DBG(DPBB)) {
 		sscreen->dpbb_allowed = true;
 	} else {
 		/* Only enable primitive binning on Raven by default. */
+		/* TODO: Investigate if binning is profitable on Vega12. */
 		sscreen->dpbb_allowed = sscreen->info.family == CHIP_RAVEN &&
 					!(sscreen->debug_flags & DBG(NO_DPBB));
 	}
 
 	if (sscreen->debug_flags & DBG(DFSM)) {
 		sscreen->dfsm_allowed = sscreen->dpbb_allowed;
 	} else {
 		sscreen->dfsm_allowed = sscreen->dpbb_allowed &&
 					!(sscreen->debug_flags & DBG(NO_DFSM));
 	}
@@ -848,20 +849,21 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
 	/* Some chips have RB+ registers, but don't support RB+. Those must
 	 * always disable it.
 	 */
 	if (sscreen->info.family == CHIP_STONEY ||
 	    sscreen->info.chip_class >= GFX9) {
 		sscreen->has_rbplus = true;
 
 		sscreen->rbplus_allowed =
 			!(sscreen->debug_flags & DBG(NO_RB_PLUS)) &&
 			(sscreen->info.family == CHIP_STONEY ||
+			 sscreen->info.family == CHIP_VEGA12 ||
 			 sscreen->info.family == CHIP_RAVEN);
 	}
 
 	sscreen->dcc_msaa_allowed =
 		!(sscreen->debug_flags & DBG(NO_DCC_MSAA)) &&
 		(sscreen->debug_flags & DBG(DCC_MSAA) ||
 		 sscreen->info.chip_class == VI);
 
 	sscreen->cpdma_prefetch_writes_memory = sscreen->info.chip_class <= VI;
 
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 1bfb3c34aa7..b4165a4669b 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -1668,21 +1668,22 @@ static uint32_t si_translate_texformat(struct pipe_screen *screen,
 		case PIPE_FORMAT_RGTC2_UNORM:
 		case PIPE_FORMAT_LATC2_UNORM:
 			return V_008F14_IMG_DATA_FORMAT_BC5;
 		default:
 			goto out_unknown;
 		}
 	}
 
 	if (desc->layout == UTIL_FORMAT_LAYOUT_ETC &&
 	    (sscreen->info.family == CHIP_STONEY ||
-	     sscreen->info.chip_class >= GFX9)) {
+	     sscreen->info.family == CHIP_VEGA10 ||
+	     sscreen->info.family == CHIP_RAVEN)) {
 		switch (format) {
 		case PIPE_FORMAT_ETC1_RGB8:
 		case PIPE_FORMAT_ETC2_RGB8:
 		case PIPE_FORMAT_ETC2_SRGB8:
 			return V_008F14_IMG_DATA_FORMAT_ETC2_RGB;
 		case PIPE_FORMAT_ETC2_RGB8A1:
 		case PIPE_FORMAT_ETC2_SRGB8A1:
 			return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA1;
 		case PIPE_FORMAT_ETC2_RGBA8:
 		case PIPE_FORMAT_ETC2_SRGBA8:
@@ -5038,20 +5039,21 @@ static void si_init_config(struct si_context *sctx)
 	}
 	si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ,
 		      RADEON_PRIO_BORDER_COLORS);
 
 	if (sctx->b.chip_class >= GFX9) {
 		unsigned num_se = sscreen->info.max_se;
 		unsigned pc_lines = 0;
 
 		switch (sctx->b.family) {
 		case CHIP_VEGA10:
+		case CHIP_VEGA12:
 			pc_lines = 4096;
 			break;
 		case CHIP_RAVEN:
 			pc_lines = 1024;
 			break;
 		default:
 			assert(0);
 		}
 
 		si_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1,
diff --git a/src/gallium/drivers/radeonsi/si_state_binning.c b/src/gallium/drivers/radeonsi/si_state_binning.c
index 686701d718f..0f50ea755cb 100644
--- a/src/gallium/drivers/radeonsi/si_state_binning.c
+++ b/src/gallium/drivers/radeonsi/si_state_binning.c
@@ -407,20 +407,21 @@ void si_emit_dpbb_state(struct si_context *sctx, struct r600_atom *state)
 					 blend->blend_enable_4bit) != 0;
 	}
 
 	/* Tunable parameters. Also test with DFSM enabled/disabled. */
 	unsigned context_states_per_bin; /* allowed range: [0, 5] */
 	unsigned persistent_states_per_bin; /* allowed range: [0, 31] */
 	unsigned fpovs_per_batch; /* allowed range: [0, 255], 0 = unlimited */
 
 	switch (sctx->b.family) {
 	case CHIP_VEGA10:
+	case CHIP_VEGA12:
 	case CHIP_RAVEN:
 		/* Tuned for Raven. Vega might need different values. */
 		context_states_per_bin = 5;
 		persistent_states_per_bin = 31;
 		fpovs_per_batch = 63;
 		break;
 	default:
 		assert(0);
 	}
 
-- 
2.15.1



More information about the mesa-dev mailing list