[Mesa-dev] [PATCH] radeonsi: add support for Raven2 (v2)

Marek Olšák maraeo at gmail.com
Mon Oct 29 21:39:30 UTC 2018


From: Marek Olšák <marek.olsak at amd.com>

v2: fix enabling primitive binning
---
 src/amd/addrlib/amdgpu_asic_addr.h              |  2 ++
 src/amd/addrlib/gfx9/gfx9addrlib.cpp            |  2 +-
 src/amd/common/ac_gpu_info.c                    |  6 ++++++
 src/amd/common/ac_llvm_util.c                   |  2 ++
 src/amd/common/ac_surface.c                     |  4 ++++
 src/amd/common/amd_family.h                     |  1 +
 src/amd/common/gfx9d.h                          |  3 +++
 src/gallium/drivers/radeonsi/si_pipe.c          | 10 ++++++----
 src/gallium/drivers/radeonsi/si_state.c         |  4 +++-
 src/gallium/drivers/radeonsi/si_state_binning.c |  1 +
 10 files changed, 29 insertions(+), 6 deletions(-)

diff --git a/src/amd/addrlib/amdgpu_asic_addr.h b/src/amd/addrlib/amdgpu_asic_addr.h
index e5838d42a3c..7436c5493e1 100644
--- a/src/amd/addrlib/amdgpu_asic_addr.h
+++ b/src/amd/addrlib/amdgpu_asic_addr.h
@@ -83,20 +83,21 @@
 
 #define AMDGPU_CARRIZO_RANGE    0x01, 0x21
 #define AMDGPU_BRISTOL_RANGE    0x10, 0x21
 #define AMDGPU_STONEY_RANGE     0x61, 0xFF
 
 #define AMDGPU_VEGA10_RANGE     0x01, 0x14
 #define AMDGPU_VEGA12_RANGE     0x14, 0x28
 #define AMDGPU_VEGA20_RANGE     0x28, 0xFF
 
 #define AMDGPU_RAVEN_RANGE      0x01, 0x81
+#define AMDGPU_RAVEN2_RANGE     0x81, 0xFF
 
 #define AMDGPU_EXPAND_FIX(x) x
 #define AMDGPU_RANGE_HELPER(val, min, max) ((val >= min) && (val < max))
 #define AMDGPU_IN_RANGE(val, ...)   AMDGPU_EXPAND_FIX(AMDGPU_RANGE_HELPER(val, __VA_ARGS__))
 
 
 // ASICREV_IS(eRevisionId, revisionName)
 #define ASICREV_IS(r, rn)              AMDGPU_IN_RANGE(r, AMDGPU_##rn##_RANGE)
 #define ASICREV_IS_TAHITI_P(r)         ASICREV_IS(r, TAHITI)
 #define ASICREV_IS_PITCAIRN_PM(r)      ASICREV_IS(r, PITCAIRN)
@@ -125,12 +126,13 @@
 #define ASICREV_IS_CARRIZO_BRISTOL(r)  ASICREV_IS(r, BRISTOL)
 #define ASICREV_IS_STONEY(r)           ASICREV_IS(r, STONEY)
 
 #define ASICREV_IS_VEGA10_M(r)         ASICREV_IS(r, VEGA10)
 #define ASICREV_IS_VEGA10_P(r)         ASICREV_IS(r, VEGA10)
 #define ASICREV_IS_VEGA12_P(r)         ASICREV_IS(r, VEGA12)
 #define ASICREV_IS_VEGA12_p(r)         ASICREV_IS(r, VEGA12)
 #define ASICREV_IS_VEGA20_P(r)         ASICREV_IS(r, VEGA20)
 
 #define ASICREV_IS_RAVEN(r)            ASICREV_IS(r, RAVEN)
+#define ASICREV_IS_RAVEN2(r)           ASICREV_IS(r, RAVEN2)
 
 #endif // _AMDGPU_ASIC_ADDR_H
diff --git a/src/amd/addrlib/gfx9/gfx9addrlib.cpp b/src/amd/addrlib/gfx9/gfx9addrlib.cpp
index d27aabbb60c..f115242c89c 100644
--- a/src/amd/addrlib/gfx9/gfx9addrlib.cpp
+++ b/src/amd/addrlib/gfx9/gfx9addrlib.cpp
@@ -1284,21 +1284,21 @@ ChipFamily Gfx9Lib::HwlConvertChipFamily(
                 m_settings.htileAlignFix = 1;
                 m_settings.applyAliasFix = 1;
             }
 
             m_settings.metaBaseAlignFix = 1;
 
             m_settings.depthPipeXorDisable = 1;
             break;
         case FAMILY_RV:
             m_settings.isArcticIsland = 1;
-            m_settings.isRaven        = ASICREV_IS_RAVEN(uChipRevision);
+            m_settings.isRaven        = ASICREV_IS_RAVEN(uChipRevision) || ASICREV_IS_RAVEN2(uChipRevision);
 
             if (m_settings.isRaven)
             {
                 m_settings.isDcn1   = 1;
             }
 
             m_settings.metaBaseAlignFix = 1;
 
             if (ASICREV_IS_RAVEN(uChipRevision))
             {
diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
index 2c70fb2c721..689f544c18b 100644
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -307,20 +307,26 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
 		info->name = #cfamily; \
 		break;
 #include "pci_ids/radeonsi_pci_ids.h"
 #undef CHIPSET
 
 	default:
 		fprintf(stderr, "amdgpu: Invalid PCI ID.\n");
 		return false;
 	}
 
+	/* Raven2 uses the same PCI IDs as Raven1, but different revision IDs. */
+	if (info->family == CHIP_RAVEN && amdinfo->chip_rev >= 0x8) {
+		info->family = CHIP_RAVEN2;
+		info->name = "RAVEN2";
+	}
+
 	if (info->family >= CHIP_VEGA10)
 		info->chip_class = GFX9;
 	else if (info->family >= CHIP_TONGA)
 		info->chip_class = VI;
 	else if (info->family >= CHIP_BONAIRE)
 		info->chip_class = CIK;
 	else if (info->family >= CHIP_TAHITI)
 		info->chip_class = SI;
 	else {
 		fprintf(stderr, "amdgpu: Unknown family.\n");
diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
index cd3525187a0..69d9f7b9f3f 100644
--- a/src/amd/common/ac_llvm_util.c
+++ b/src/amd/common/ac_llvm_util.c
@@ -128,20 +128,22 @@ const char *ac_get_llvm_processor_name(enum radeon_family family)
 	case CHIP_VEGAM:
 		return "polaris11";
 	case CHIP_VEGA10:
 		return "gfx900";
 	case CHIP_RAVEN:
 		return "gfx902";
 	case CHIP_VEGA12:
 		return HAVE_LLVM >= 0x0700 ? "gfx904" : "gfx902";
 	case CHIP_VEGA20:
 		return HAVE_LLVM >= 0x0700 ? "gfx906" : "gfx902";
+	case CHIP_RAVEN2:
+		return "gfx902"; /* TODO: use gfx909 when it's available */
 	default:
 		return "";
 	}
 }
 
 static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family,
 						     enum ac_target_machine_options tm_options,
 						     LLVMCodeGenOptLevel level,
 						     const char **out_triple)
 {
diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
index 94723dc9c09..1f7e2344625 100644
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -144,20 +144,24 @@ static void addrlib_family_rev_id(enum radeon_family family,
 		*addrlib_revid = get_first(AMDGPU_VEGA12_RANGE);
 		break;
 	case CHIP_VEGA20:
 		*addrlib_family = FAMILY_AI;
 		*addrlib_revid = get_first(AMDGPU_VEGA20_RANGE);
 		break;
 	case CHIP_RAVEN:
 		*addrlib_family = FAMILY_RV;
 		*addrlib_revid = get_first(AMDGPU_RAVEN_RANGE);
 		break;
+	case CHIP_RAVEN2:
+		*addrlib_family = FAMILY_RV;
+		*addrlib_revid = get_first(AMDGPU_RAVEN2_RANGE);
+		break;
 	default:
 		fprintf(stderr, "amdgpu: Unknown family.\n");
 	}
 }
 
 static void *ADDR_API allocSysMem(const ADDR_ALLOCSYSMEM_INPUT * pInput)
 {
 	return malloc(pInput->sizeInBytes);
 }
 
diff --git a/src/amd/common/amd_family.h b/src/amd/common/amd_family.h
index a282898be06..185ba029763 100644
--- a/src/amd/common/amd_family.h
+++ b/src/amd/common/amd_family.h
@@ -90,20 +90,21 @@ enum radeon_family {
     CHIP_FIJI,
     CHIP_STONEY,
     CHIP_POLARIS10,
     CHIP_POLARIS11,
     CHIP_POLARIS12,
     CHIP_VEGAM,
     CHIP_VEGA10,
     CHIP_VEGA12,
     CHIP_VEGA20,
     CHIP_RAVEN,
+    CHIP_RAVEN2,
     CHIP_LAST,
 };
 
 enum chip_class {
     CLASS_UNKNOWN = 0,
     R300,
     R400,
     R500,
     R600,
     R700,
diff --git a/src/amd/common/gfx9d.h b/src/amd/common/gfx9d.h
index d18e6655d33..2e790c54699 100644
--- a/src/amd/common/gfx9d.h
+++ b/src/amd/common/gfx9d.h
@@ -4450,20 +4450,23 @@
 #define R_028424_CB_DCC_CONTROL                                         0x028424
 #define   S_028424_OVERWRITE_COMBINER_DISABLE(x)                      (((unsigned)(x) & 0x1) << 0)
 #define   G_028424_OVERWRITE_COMBINER_DISABLE(x)                      (((x) >> 0) & 0x1)
 #define   C_028424_OVERWRITE_COMBINER_DISABLE                         0xFFFFFFFE
 #define   S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(x)          (((unsigned)(x) & 0x1) << 1)
 #define   G_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(x)          (((x) >> 1) & 0x1)
 #define   C_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE             0xFFFFFFFD
 #define   S_028424_OVERWRITE_COMBINER_WATERMARK(x)                    (((unsigned)(x) & 0x1F) << 2)
 #define   G_028424_OVERWRITE_COMBINER_WATERMARK(x)                    (((x) >> 2) & 0x1F)
 #define   C_028424_OVERWRITE_COMBINER_WATERMARK                       0xFFFFFF83
+#define   S_028424_DISABLE_CONSTANT_ENCODE_REG(x)                     (((unsigned)(x) & 0x1) << 10) /* Raven2+ */
+#define   G_028424_DISABLE_CONSTANT_ENCODE_REG(x)                     (((x) >> 10) & 0x1)
+#define   C_028424_DISABLE_CONSTANT_ENCODE_REG                        0xFFFFFBFF
 #define R_02842C_DB_STENCIL_CONTROL                                     0x02842C
 #define   S_02842C_STENCILFAIL(x)                                     (((unsigned)(x) & 0x0F) << 0)
 #define   G_02842C_STENCILFAIL(x)                                     (((x) >> 0) & 0x0F)
 #define   C_02842C_STENCILFAIL                                        0xFFFFFFF0
 #define   S_02842C_STENCILZPASS(x)                                    (((unsigned)(x) & 0x0F) << 4)
 #define   G_02842C_STENCILZPASS(x)                                    (((x) >> 4) & 0x0F)
 #define   C_02842C_STENCILZPASS                                       0xFFFFFF0F
 #define   S_02842C_STENCILZFAIL(x)                                    (((unsigned)(x) & 0x0F) << 8)
 #define   G_02842C_STENCILZFAIL(x)                                    (((x) >> 8) & 0x0F)
 #define   C_02842C_STENCILZFAIL                                       0xFFFFF0FF
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 6118b8076f1..490a3714836 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -1026,24 +1026,25 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
 	sscreen->has_msaa_sample_loc_bug = (sscreen->info.family >= CHIP_POLARIS10 &&
 					    sscreen->info.family <= CHIP_POLARIS12) ||
 					   sscreen->info.family == CHIP_VEGA10 ||
 					   sscreen->info.family == CHIP_RAVEN;
 	sscreen->has_ls_vgpr_init_bug = sscreen->info.family == CHIP_VEGA10 ||
 					sscreen->info.family == CHIP_RAVEN;
 
 	if (sscreen->debug_flags & DBG(DPBB)) {
 		sscreen->dpbb_allowed = true;
 	} else {
-		/* Only enable primitive binning on Raven by default. */
+		/* Only enable primitive binning on APUs by default. */
 		/* TODO: Investigate if binning is profitable on Vega12. */
-		sscreen->dpbb_allowed = sscreen->info.family == CHIP_RAVEN &&
-					!(sscreen->debug_flags & DBG(NO_DPBB));
+		sscreen->dpbb_allowed = !(sscreen->debug_flags & DBG(NO_DPBB)) &&
+					(sscreen->info.family == CHIP_RAVEN ||
+					 sscreen->info.family == CHIP_RAVEN2);
 	}
 
 	if (sscreen->debug_flags & DBG(DFSM)) {
 		sscreen->dfsm_allowed = sscreen->dpbb_allowed;
 	} else {
 		sscreen->dfsm_allowed = sscreen->dpbb_allowed &&
 					!(sscreen->debug_flags & DBG(NO_DFSM));
 	}
 
 	/* While it would be nice not to have this flag, we are constrained
@@ -1056,21 +1057,22 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
 	 * always disable it.
 	 */
 	if (sscreen->info.family == CHIP_STONEY ||
 	    sscreen->info.chip_class >= GFX9) {
 		sscreen->has_rbplus = true;
 
 		sscreen->rbplus_allowed =
 			!(sscreen->debug_flags & DBG(NO_RB_PLUS)) &&
 			(sscreen->info.family == CHIP_STONEY ||
 			 sscreen->info.family == CHIP_VEGA12 ||
-			 sscreen->info.family == CHIP_RAVEN);
+			 sscreen->info.family == CHIP_RAVEN ||
+			 sscreen->info.family == CHIP_RAVEN2);
 	}
 
 	sscreen->dcc_msaa_allowed =
 		!(sscreen->debug_flags & DBG(NO_DCC_MSAA));
 
 	sscreen->cpdma_prefetch_writes_memory = sscreen->info.chip_class <= VI;
 
 	(void) mtx_init(&sscreen->shader_parts_mutex, mtx_plain);
 	sscreen->use_monolithic_shaders =
 		(sscreen->debug_flags & DBG(MONOLITHIC_SHADERS)) != 0;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 43d76d19916..0293bdfa791 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -113,21 +113,22 @@ static void si_emit_cb_render_state(struct si_context *sctx)
 				  blend &&
 				  blend->blend_enable_4bit & cb_target_mask &&
 				  sctx->framebuffer.nr_samples >= 2;
 		unsigned watermark = sctx->framebuffer.dcc_overwrite_combiner_watermark;
 
 		radeon_opt_set_context_reg(
 				sctx, R_028424_CB_DCC_CONTROL,
 				SI_TRACKED_CB_DCC_CONTROL,
 				S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) |
 				S_028424_OVERWRITE_COMBINER_WATERMARK(watermark) |
-				S_028424_OVERWRITE_COMBINER_DISABLE(oc_disable));
+				S_028424_OVERWRITE_COMBINER_DISABLE(oc_disable) |
+				S_028424_DISABLE_CONSTANT_ENCODE_REG(sctx->family == CHIP_RAVEN2));
 	}
 
 	/* RB+ register settings. */
 	if (sctx->screen->rbplus_allowed) {
 		unsigned spi_shader_col_format =
 			sctx->ps_shader.cso ?
 			sctx->ps_shader.current->key.part.ps.epilog.spi_shader_col_format : 0;
 		unsigned sx_ps_downconvert = 0;
 		unsigned sx_blend_opt_epsilon = 0;
 		unsigned sx_blend_opt_control = 0;
@@ -5093,20 +5094,21 @@ static void si_init_config(struct si_context *sctx)
 		unsigned num_se = sscreen->info.max_se;
 		unsigned pc_lines = 0;
 
 		switch (sctx->family) {
 		case CHIP_VEGA10:
 		case CHIP_VEGA12:
 		case CHIP_VEGA20:
 			pc_lines = 4096;
 			break;
 		case CHIP_RAVEN:
+		case CHIP_RAVEN2:
 			pc_lines = 1024;
 			break;
 		default:
 			assert(0);
 		}
 
 		si_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1,
 			       S_028C48_MAX_ALLOC_COUNT(MIN2(128, pc_lines / (4 * num_se))) |
 			       S_028C48_MAX_PRIM_PER_BATCH(1023));
 		si_pm4_set_reg(pm4, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL,
diff --git a/src/gallium/drivers/radeonsi/si_state_binning.c b/src/gallium/drivers/radeonsi/si_state_binning.c
index 70c129242d1..3516e561282 100644
--- a/src/gallium/drivers/radeonsi/si_state_binning.c
+++ b/src/gallium/drivers/radeonsi/si_state_binning.c
@@ -400,20 +400,21 @@ void si_emit_dpbb_state(struct si_context *sctx)
 	/* Tunable parameters. Also test with DFSM enabled/disabled. */
 	unsigned context_states_per_bin; /* allowed range: [0, 5] */
 	unsigned persistent_states_per_bin; /* allowed range: [0, 31] */
 	unsigned fpovs_per_batch; /* allowed range: [0, 255], 0 = unlimited */
 
 	switch (sctx->family) {
 	case CHIP_VEGA10:
 	case CHIP_VEGA12:
 	case CHIP_VEGA20:
 	case CHIP_RAVEN:
+	case CHIP_RAVEN2:
 		/* Tuned for Raven. Vega might need different values. */
 		context_states_per_bin = 5;
 		persistent_states_per_bin = 31;
 		fpovs_per_batch = 63;
 		break;
 	default:
 		assert(0);
 	}
 
 	/* Emit registers. */
-- 
2.17.1



More information about the mesa-dev mailing list