[Mesa-dev] [PATCH] radeonsi: add support for Raven2 (v2)

Samuel Pitoiset samuel.pitoiset at gmail.com
Tue Oct 30 09:18:57 UTC 2018


Reviewed-by: Samuel Pitoiset <samuel.pitoiset at gmail.com>

On 10/29/18 10:39 PM, Marek Olšák wrote:
> From: Marek Olšák <marek.olsak at amd.com>
> 
> v2: fix enabling primitive binning
> ---
>   src/amd/addrlib/amdgpu_asic_addr.h              |  2 ++
>   src/amd/addrlib/gfx9/gfx9addrlib.cpp            |  2 +-
>   src/amd/common/ac_gpu_info.c                    |  6 ++++++
>   src/amd/common/ac_llvm_util.c                   |  2 ++
>   src/amd/common/ac_surface.c                     |  4 ++++
>   src/amd/common/amd_family.h                     |  1 +
>   src/amd/common/gfx9d.h                          |  3 +++
>   src/gallium/drivers/radeonsi/si_pipe.c          | 10 ++++++----
>   src/gallium/drivers/radeonsi/si_state.c         |  4 +++-
>   src/gallium/drivers/radeonsi/si_state_binning.c |  1 +
>   10 files changed, 29 insertions(+), 6 deletions(-)
> 
> diff --git a/src/amd/addrlib/amdgpu_asic_addr.h b/src/amd/addrlib/amdgpu_asic_addr.h
> index e5838d42a3c..7436c5493e1 100644
> --- a/src/amd/addrlib/amdgpu_asic_addr.h
> +++ b/src/amd/addrlib/amdgpu_asic_addr.h
> @@ -83,20 +83,21 @@
>   
>   #define AMDGPU_CARRIZO_RANGE    0x01, 0x21
>   #define AMDGPU_BRISTOL_RANGE    0x10, 0x21
>   #define AMDGPU_STONEY_RANGE     0x61, 0xFF
>   
>   #define AMDGPU_VEGA10_RANGE     0x01, 0x14
>   #define AMDGPU_VEGA12_RANGE     0x14, 0x28
>   #define AMDGPU_VEGA20_RANGE     0x28, 0xFF
>   
>   #define AMDGPU_RAVEN_RANGE      0x01, 0x81
> +#define AMDGPU_RAVEN2_RANGE     0x81, 0xFF
>   
>   #define AMDGPU_EXPAND_FIX(x) x
>   #define AMDGPU_RANGE_HELPER(val, min, max) ((val >= min) && (val < max))
>   #define AMDGPU_IN_RANGE(val, ...)   AMDGPU_EXPAND_FIX(AMDGPU_RANGE_HELPER(val, __VA_ARGS__))
>   
>   
>   // ASICREV_IS(eRevisionId, revisionName)
>   #define ASICREV_IS(r, rn)              AMDGPU_IN_RANGE(r, AMDGPU_##rn##_RANGE)
>   #define ASICREV_IS_TAHITI_P(r)         ASICREV_IS(r, TAHITI)
>   #define ASICREV_IS_PITCAIRN_PM(r)      ASICREV_IS(r, PITCAIRN)
> @@ -125,12 +126,13 @@
>   #define ASICREV_IS_CARRIZO_BRISTOL(r)  ASICREV_IS(r, BRISTOL)
>   #define ASICREV_IS_STONEY(r)           ASICREV_IS(r, STONEY)
>   
>   #define ASICREV_IS_VEGA10_M(r)         ASICREV_IS(r, VEGA10)
>   #define ASICREV_IS_VEGA10_P(r)         ASICREV_IS(r, VEGA10)
>   #define ASICREV_IS_VEGA12_P(r)         ASICREV_IS(r, VEGA12)
>   #define ASICREV_IS_VEGA12_p(r)         ASICREV_IS(r, VEGA12)
>   #define ASICREV_IS_VEGA20_P(r)         ASICREV_IS(r, VEGA20)
>   
>   #define ASICREV_IS_RAVEN(r)            ASICREV_IS(r, RAVEN)
> +#define ASICREV_IS_RAVEN2(r)           ASICREV_IS(r, RAVEN2)
>   
>   #endif // _AMDGPU_ASIC_ADDR_H
> diff --git a/src/amd/addrlib/gfx9/gfx9addrlib.cpp b/src/amd/addrlib/gfx9/gfx9addrlib.cpp
> index d27aabbb60c..f115242c89c 100644
> --- a/src/amd/addrlib/gfx9/gfx9addrlib.cpp
> +++ b/src/amd/addrlib/gfx9/gfx9addrlib.cpp
> @@ -1284,21 +1284,21 @@ ChipFamily Gfx9Lib::HwlConvertChipFamily(
>                   m_settings.htileAlignFix = 1;
>                   m_settings.applyAliasFix = 1;
>               }
>   
>               m_settings.metaBaseAlignFix = 1;
>   
>               m_settings.depthPipeXorDisable = 1;
>               break;
>           case FAMILY_RV:
>               m_settings.isArcticIsland = 1;
> -            m_settings.isRaven        = ASICREV_IS_RAVEN(uChipRevision);
> +            m_settings.isRaven        = ASICREV_IS_RAVEN(uChipRevision) || ASICREV_IS_RAVEN2(uChipRevision);
>   
>               if (m_settings.isRaven)
>               {
>                   m_settings.isDcn1   = 1;
>               }
>   
>               m_settings.metaBaseAlignFix = 1;
>   
>               if (ASICREV_IS_RAVEN(uChipRevision))
>               {
> diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
> index 2c70fb2c721..689f544c18b 100644
> --- a/src/amd/common/ac_gpu_info.c
> +++ b/src/amd/common/ac_gpu_info.c
> @@ -307,20 +307,26 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
>   		info->name = #cfamily; \
>   		break;
>   #include "pci_ids/radeonsi_pci_ids.h"
>   #undef CHIPSET
>   
>   	default:
>   		fprintf(stderr, "amdgpu: Invalid PCI ID.\n");
>   		return false;
>   	}
>   
> +	/* Raven2 uses the same PCI IDs as Raven1, but different revision IDs. */
> +	if (info->family == CHIP_RAVEN && amdinfo->chip_rev >= 0x8) {
> +		info->family = CHIP_RAVEN2;
> +		info->name = "RAVEN2";
> +	}
> +
>   	if (info->family >= CHIP_VEGA10)
>   		info->chip_class = GFX9;
>   	else if (info->family >= CHIP_TONGA)
>   		info->chip_class = VI;
>   	else if (info->family >= CHIP_BONAIRE)
>   		info->chip_class = CIK;
>   	else if (info->family >= CHIP_TAHITI)
>   		info->chip_class = SI;
>   	else {
>   		fprintf(stderr, "amdgpu: Unknown family.\n");
> diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
> index cd3525187a0..69d9f7b9f3f 100644
> --- a/src/amd/common/ac_llvm_util.c
> +++ b/src/amd/common/ac_llvm_util.c
> @@ -128,20 +128,22 @@ const char *ac_get_llvm_processor_name(enum radeon_family family)
>   	case CHIP_VEGAM:
>   		return "polaris11";
>   	case CHIP_VEGA10:
>   		return "gfx900";
>   	case CHIP_RAVEN:
>   		return "gfx902";
>   	case CHIP_VEGA12:
>   		return HAVE_LLVM >= 0x0700 ? "gfx904" : "gfx902";
>   	case CHIP_VEGA20:
>   		return HAVE_LLVM >= 0x0700 ? "gfx906" : "gfx902";
> +	case CHIP_RAVEN2:
> +		return "gfx902"; /* TODO: use gfx909 when it's available */
>   	default:
>   		return "";
>   	}
>   }
>   
>   static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family,
>   						     enum ac_target_machine_options tm_options,
>   						     LLVMCodeGenOptLevel level,
>   						     const char **out_triple)
>   {
> diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
> index 94723dc9c09..1f7e2344625 100644
> --- a/src/amd/common/ac_surface.c
> +++ b/src/amd/common/ac_surface.c
> @@ -144,20 +144,24 @@ static void addrlib_family_rev_id(enum radeon_family family,
>   		*addrlib_revid = get_first(AMDGPU_VEGA12_RANGE);
>   		break;
>   	case CHIP_VEGA20:
>   		*addrlib_family = FAMILY_AI;
>   		*addrlib_revid = get_first(AMDGPU_VEGA20_RANGE);
>   		break;
>   	case CHIP_RAVEN:
>   		*addrlib_family = FAMILY_RV;
>   		*addrlib_revid = get_first(AMDGPU_RAVEN_RANGE);
>   		break;
> +	case CHIP_RAVEN2:
> +		*addrlib_family = FAMILY_RV;
> +		*addrlib_revid = get_first(AMDGPU_RAVEN2_RANGE);
> +		break;
>   	default:
>   		fprintf(stderr, "amdgpu: Unknown family.\n");
>   	}
>   }
>   
>   static void *ADDR_API allocSysMem(const ADDR_ALLOCSYSMEM_INPUT * pInput)
>   {
>   	return malloc(pInput->sizeInBytes);
>   }
>   
> diff --git a/src/amd/common/amd_family.h b/src/amd/common/amd_family.h
> index a282898be06..185ba029763 100644
> --- a/src/amd/common/amd_family.h
> +++ b/src/amd/common/amd_family.h
> @@ -90,20 +90,21 @@ enum radeon_family {
>       CHIP_FIJI,
>       CHIP_STONEY,
>       CHIP_POLARIS10,
>       CHIP_POLARIS11,
>       CHIP_POLARIS12,
>       CHIP_VEGAM,
>       CHIP_VEGA10,
>       CHIP_VEGA12,
>       CHIP_VEGA20,
>       CHIP_RAVEN,
> +    CHIP_RAVEN2,
>       CHIP_LAST,
>   };
>   
>   enum chip_class {
>       CLASS_UNKNOWN = 0,
>       R300,
>       R400,
>       R500,
>       R600,
>       R700,
> diff --git a/src/amd/common/gfx9d.h b/src/amd/common/gfx9d.h
> index d18e6655d33..2e790c54699 100644
> --- a/src/amd/common/gfx9d.h
> +++ b/src/amd/common/gfx9d.h
> @@ -4450,20 +4450,23 @@
>   #define R_028424_CB_DCC_CONTROL                                         0x028424
>   #define   S_028424_OVERWRITE_COMBINER_DISABLE(x)                      (((unsigned)(x) & 0x1) << 0)
>   #define   G_028424_OVERWRITE_COMBINER_DISABLE(x)                      (((x) >> 0) & 0x1)
>   #define   C_028424_OVERWRITE_COMBINER_DISABLE                         0xFFFFFFFE
>   #define   S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(x)          (((unsigned)(x) & 0x1) << 1)
>   #define   G_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(x)          (((x) >> 1) & 0x1)
>   #define   C_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE             0xFFFFFFFD
>   #define   S_028424_OVERWRITE_COMBINER_WATERMARK(x)                    (((unsigned)(x) & 0x1F) << 2)
>   #define   G_028424_OVERWRITE_COMBINER_WATERMARK(x)                    (((x) >> 2) & 0x1F)
>   #define   C_028424_OVERWRITE_COMBINER_WATERMARK                       0xFFFFFF83
> +#define   S_028424_DISABLE_CONSTANT_ENCODE_REG(x)                     (((unsigned)(x) & 0x1) << 10) /* Raven2+ */
> +#define   G_028424_DISABLE_CONSTANT_ENCODE_REG(x)                     (((x) >> 10) & 0x1)
> +#define   C_028424_DISABLE_CONSTANT_ENCODE_REG                        0xFFFFFBFF
>   #define R_02842C_DB_STENCIL_CONTROL                                     0x02842C
>   #define   S_02842C_STENCILFAIL(x)                                     (((unsigned)(x) & 0x0F) << 0)
>   #define   G_02842C_STENCILFAIL(x)                                     (((x) >> 0) & 0x0F)
>   #define   C_02842C_STENCILFAIL                                        0xFFFFFFF0
>   #define   S_02842C_STENCILZPASS(x)                                    (((unsigned)(x) & 0x0F) << 4)
>   #define   G_02842C_STENCILZPASS(x)                                    (((x) >> 4) & 0x0F)
>   #define   C_02842C_STENCILZPASS                                       0xFFFFFF0F
>   #define   S_02842C_STENCILZFAIL(x)                                    (((unsigned)(x) & 0x0F) << 8)
>   #define   G_02842C_STENCILZFAIL(x)                                    (((x) >> 8) & 0x0F)
>   #define   C_02842C_STENCILZFAIL                                       0xFFFFF0FF
> diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
> index 6118b8076f1..490a3714836 100644
> --- a/src/gallium/drivers/radeonsi/si_pipe.c
> +++ b/src/gallium/drivers/radeonsi/si_pipe.c
> @@ -1026,24 +1026,25 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
>   	sscreen->has_msaa_sample_loc_bug = (sscreen->info.family >= CHIP_POLARIS10 &&
>   					    sscreen->info.family <= CHIP_POLARIS12) ||
>   					   sscreen->info.family == CHIP_VEGA10 ||
>   					   sscreen->info.family == CHIP_RAVEN;
>   	sscreen->has_ls_vgpr_init_bug = sscreen->info.family == CHIP_VEGA10 ||
>   					sscreen->info.family == CHIP_RAVEN;
>   
>   	if (sscreen->debug_flags & DBG(DPBB)) {
>   		sscreen->dpbb_allowed = true;
>   	} else {
> -		/* Only enable primitive binning on Raven by default. */
> +		/* Only enable primitive binning on APUs by default. */
>   		/* TODO: Investigate if binning is profitable on Vega12. */
> -		sscreen->dpbb_allowed = sscreen->info.family == CHIP_RAVEN &&
> -					!(sscreen->debug_flags & DBG(NO_DPBB));
> +		sscreen->dpbb_allowed = !(sscreen->debug_flags & DBG(NO_DPBB)) &&
> +					(sscreen->info.family == CHIP_RAVEN ||
> +					 sscreen->info.family == CHIP_RAVEN2);
>   	}
>   
>   	if (sscreen->debug_flags & DBG(DFSM)) {
>   		sscreen->dfsm_allowed = sscreen->dpbb_allowed;
>   	} else {
>   		sscreen->dfsm_allowed = sscreen->dpbb_allowed &&
>   					!(sscreen->debug_flags & DBG(NO_DFSM));
>   	}
>   
>   	/* While it would be nice not to have this flag, we are constrained
> @@ -1056,21 +1057,22 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
>   	 * always disable it.
>   	 */
>   	if (sscreen->info.family == CHIP_STONEY ||
>   	    sscreen->info.chip_class >= GFX9) {
>   		sscreen->has_rbplus = true;
>   
>   		sscreen->rbplus_allowed =
>   			!(sscreen->debug_flags & DBG(NO_RB_PLUS)) &&
>   			(sscreen->info.family == CHIP_STONEY ||
>   			 sscreen->info.family == CHIP_VEGA12 ||
> -			 sscreen->info.family == CHIP_RAVEN);
> +			 sscreen->info.family == CHIP_RAVEN ||
> +			 sscreen->info.family == CHIP_RAVEN2);
>   	}
>   
>   	sscreen->dcc_msaa_allowed =
>   		!(sscreen->debug_flags & DBG(NO_DCC_MSAA));
>   
>   	sscreen->cpdma_prefetch_writes_memory = sscreen->info.chip_class <= VI;
>   
>   	(void) mtx_init(&sscreen->shader_parts_mutex, mtx_plain);
>   	sscreen->use_monolithic_shaders =
>   		(sscreen->debug_flags & DBG(MONOLITHIC_SHADERS)) != 0;
> diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
> index 43d76d19916..0293bdfa791 100644
> --- a/src/gallium/drivers/radeonsi/si_state.c
> +++ b/src/gallium/drivers/radeonsi/si_state.c
> @@ -113,21 +113,22 @@ static void si_emit_cb_render_state(struct si_context *sctx)
>   				  blend &&
>   				  blend->blend_enable_4bit & cb_target_mask &&
>   				  sctx->framebuffer.nr_samples >= 2;
>   		unsigned watermark = sctx->framebuffer.dcc_overwrite_combiner_watermark;
>   
>   		radeon_opt_set_context_reg(
>   				sctx, R_028424_CB_DCC_CONTROL,
>   				SI_TRACKED_CB_DCC_CONTROL,
>   				S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(1) |
>   				S_028424_OVERWRITE_COMBINER_WATERMARK(watermark) |
> -				S_028424_OVERWRITE_COMBINER_DISABLE(oc_disable));
> +				S_028424_OVERWRITE_COMBINER_DISABLE(oc_disable) |
> +				S_028424_DISABLE_CONSTANT_ENCODE_REG(sctx->family == CHIP_RAVEN2));
>   	}
>   
>   	/* RB+ register settings. */
>   	if (sctx->screen->rbplus_allowed) {
>   		unsigned spi_shader_col_format =
>   			sctx->ps_shader.cso ?
>   			sctx->ps_shader.current->key.part.ps.epilog.spi_shader_col_format : 0;
>   		unsigned sx_ps_downconvert = 0;
>   		unsigned sx_blend_opt_epsilon = 0;
>   		unsigned sx_blend_opt_control = 0;
> @@ -5093,20 +5094,21 @@ static void si_init_config(struct si_context *sctx)
>   		unsigned num_se = sscreen->info.max_se;
>   		unsigned pc_lines = 0;
>   
>   		switch (sctx->family) {
>   		case CHIP_VEGA10:
>   		case CHIP_VEGA12:
>   		case CHIP_VEGA20:
>   			pc_lines = 4096;
>   			break;
>   		case CHIP_RAVEN:
> +		case CHIP_RAVEN2:
>   			pc_lines = 1024;
>   			break;
>   		default:
>   			assert(0);
>   		}
>   
>   		si_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1,
>   			       S_028C48_MAX_ALLOC_COUNT(MIN2(128, pc_lines / (4 * num_se))) |
>   			       S_028C48_MAX_PRIM_PER_BATCH(1023));
>   		si_pm4_set_reg(pm4, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL,
> diff --git a/src/gallium/drivers/radeonsi/si_state_binning.c b/src/gallium/drivers/radeonsi/si_state_binning.c
> index 70c129242d1..3516e561282 100644
> --- a/src/gallium/drivers/radeonsi/si_state_binning.c
> +++ b/src/gallium/drivers/radeonsi/si_state_binning.c
> @@ -400,20 +400,21 @@ void si_emit_dpbb_state(struct si_context *sctx)
>   	/* Tunable parameters. Also test with DFSM enabled/disabled. */
>   	unsigned context_states_per_bin; /* allowed range: [0, 5] */
>   	unsigned persistent_states_per_bin; /* allowed range: [0, 31] */
>   	unsigned fpovs_per_batch; /* allowed range: [0, 255], 0 = unlimited */
>   
>   	switch (sctx->family) {
>   	case CHIP_VEGA10:
>   	case CHIP_VEGA12:
>   	case CHIP_VEGA20:
>   	case CHIP_RAVEN:
> +	case CHIP_RAVEN2:
>   		/* Tuned for Raven. Vega might need different values. */
>   		context_states_per_bin = 5;
>   		persistent_states_per_bin = 31;
>   		fpovs_per_batch = 63;
>   		break;
>   	default:
>   		assert(0);
>   	}
>   
>   	/* Emit registers. */
> 


More information about the mesa-dev mailing list