[Mesa-dev] [PATCH 2/2] radeonsi: add support for Vega12
Marek Olšák
maraeo at gmail.com
Thu Mar 22 15:15:44 UTC 2018
From: Marek Olšák <marek.olsak at amd.com>
Reviewed-by: Alex Deucher <alexander.deucher at amd.com>
---
include/pci_ids/radeonsi_pci_ids.h | 6 +++++
src/amd/common/ac_llvm_util.c | 1 +
src/amd/common/ac_surface.c | 33 +++++++++++++++++++------
src/amd/common/amd_family.h | 1 +
src/gallium/drivers/radeonsi/si_get.c | 1 +
src/gallium/drivers/radeonsi/si_pipe.c | 2 ++
src/gallium/drivers/radeonsi/si_state.c | 4 ++-
src/gallium/drivers/radeonsi/si_state_binning.c | 1 +
8 files changed, 41 insertions(+), 8 deletions(-)
diff --git a/include/pci_ids/radeonsi_pci_ids.h b/include/pci_ids/radeonsi_pci_ids.h
index 6a3594eabc9..62b130307a3 100644
--- a/include/pci_ids/radeonsi_pci_ids.h
+++ b/include/pci_ids/radeonsi_pci_ids.h
@@ -219,11 +219,17 @@ CHIPSET(0x699F, POLARIS12)
CHIPSET(0x6860, VEGA10)
CHIPSET(0x6861, VEGA10)
CHIPSET(0x6862, VEGA10)
CHIPSET(0x6863, VEGA10)
CHIPSET(0x6864, VEGA10)
CHIPSET(0x6867, VEGA10)
CHIPSET(0x6868, VEGA10)
CHIPSET(0x687F, VEGA10)
CHIPSET(0x686C, VEGA10)
+CHIPSET(0x69A0, VEGA12)
+CHIPSET(0x69A1, VEGA12)
+CHIPSET(0x69A2, VEGA12)
+CHIPSET(0x69A3, VEGA12)
+CHIPSET(0x69AF, VEGA12)
+
CHIPSET(0x15DD, RAVEN)
diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c
index bb9e873af81..f3db1c5a4a4 100644
--- a/src/amd/common/ac_llvm_util.c
+++ b/src/amd/common/ac_llvm_util.c
@@ -107,20 +107,21 @@ const char *ac_get_llvm_processor_name(enum radeon_family family)
case CHIP_FIJI:
return "fiji";
case CHIP_STONEY:
return "stoney";
case CHIP_POLARIS10:
return "polaris10";
case CHIP_POLARIS11:
case CHIP_POLARIS12:
return "polaris11";
case CHIP_VEGA10:
+ case CHIP_VEGA12:
case CHIP_RAVEN:
return "gfx900";
default:
return "";
}
}
LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, enum ac_target_machine_options tm_options)
{
assert(family >= CHIP_TAHITI);
diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
index 603b7058bdc..12dfc0cb1f2 100644
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -128,20 +128,24 @@ static void addrlib_family_rev_id(enum radeon_family family,
*addrlib_revid = get_first(AMDGPU_POLARIS11_RANGE);
break;
case CHIP_POLARIS12:
*addrlib_family = FAMILY_VI;
*addrlib_revid = get_first(AMDGPU_POLARIS12_RANGE);
break;
case CHIP_VEGA10:
*addrlib_family = FAMILY_AI;
*addrlib_revid = get_first(AMDGPU_VEGA10_RANGE);
break;
+ case CHIP_VEGA12:
+ *addrlib_family = FAMILY_AI;
+ *addrlib_revid = get_first(AMDGPU_VEGA12_RANGE);
+ break;
case CHIP_RAVEN:
*addrlib_family = FAMILY_RV;
*addrlib_revid = get_first(AMDGPU_RAVEN_RANGE);
break;
default:
fprintf(stderr, "amdgpu: Unknown family.\n");
}
}
static void *ADDR_API allocSysMem(const ADDR_ALLOCSYSMEM_INPUT * pInput)
@@ -898,22 +902,22 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
if (in->flags.depth) {
assert(in->swizzleMode != ADDR_SW_LINEAR);
/* HTILE */
ADDR2_COMPUTE_HTILE_INFO_INPUT hin = {0};
ADDR2_COMPUTE_HTILE_INFO_OUTPUT hout = {0};
hin.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_INPUT);
hout.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_OUTPUT);
- hin.hTileFlags.pipeAligned = 1;
- hin.hTileFlags.rbAligned = 1;
+ hin.hTileFlags.pipeAligned = !in->flags.metaPipeUnaligned;
+ hin.hTileFlags.rbAligned = !in->flags.metaRbUnaligned;
hin.depthFlags = in->flags;
hin.swizzleMode = in->swizzleMode;
hin.unalignedWidth = in->width;
hin.unalignedHeight = in->height;
hin.numSlices = in->numSlices;
hin.numMipLevels = in->numMipLevels;
ret = Addr2ComputeHtileInfo(addrlib, &hin, &hout);
if (ret != ADDR_OK)
return ret;
@@ -960,22 +964,22 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
!compressed &&
in->swizzleMode != ADDR_SW_LINEAR) {
ADDR2_COMPUTE_DCCINFO_INPUT din = {0};
ADDR2_COMPUTE_DCCINFO_OUTPUT dout = {0};
ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {};
din.size = sizeof(ADDR2_COMPUTE_DCCINFO_INPUT);
dout.size = sizeof(ADDR2_COMPUTE_DCCINFO_OUTPUT);
dout.pMipInfo = meta_mip_info;
- din.dccKeyFlags.pipeAligned = 1;
- din.dccKeyFlags.rbAligned = 1;
+ din.dccKeyFlags.pipeAligned = !in->flags.metaPipeUnaligned;
+ din.dccKeyFlags.rbAligned = !in->flags.metaRbUnaligned;
din.colorFlags = in->flags;
din.resourceType = in->resourceType;
din.swizzleMode = in->swizzleMode;
din.bpp = in->bpp;
din.unalignedWidth = in->width;
din.unalignedHeight = in->height;
din.numSlices = in->numSlices;
din.numFrags = in->numFrags;
din.numMipLevels = in->numMipLevels;
din.dataSurfaceSize = out.surfSize;
@@ -1081,22 +1085,28 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
}
/* CMASK */
if (in->swizzleMode != ADDR_SW_LINEAR) {
ADDR2_COMPUTE_CMASK_INFO_INPUT cin = {0};
ADDR2_COMPUTE_CMASK_INFO_OUTPUT cout = {0};
cin.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_INPUT);
cout.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_OUTPUT);
- cin.cMaskFlags.pipeAligned = 1;
- cin.cMaskFlags.rbAligned = 1;
+ if (in->numSamples) {
+ /* FMASK is always aligned. */
+ cin.cMaskFlags.pipeAligned = 1;
+ cin.cMaskFlags.rbAligned = 1;
+ } else {
+ cin.cMaskFlags.pipeAligned = !in->flags.metaPipeUnaligned;
+ cin.cMaskFlags.rbAligned = !in->flags.metaRbUnaligned;
+ }
cin.colorFlags = in->flags;
cin.resourceType = in->resourceType;
cin.unalignedWidth = in->width;
cin.unalignedHeight = in->height;
cin.numSlices = in->numSlices;
if (in->numSamples > 1)
cin.swizzleMode = surf->u.gfx9.fmask.swizzle_mode;
else
cin.swizzleMode = in->swizzleMode;
@@ -1109,20 +1119,21 @@ static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
surf->u.gfx9.cmask.pipe_aligned = cin.cMaskFlags.pipeAligned;
surf->u.gfx9.cmask_size = cout.cmaskBytes;
surf->u.gfx9.cmask_alignment = cout.baseAlign;
}
}
return 0;
}
static int gfx9_compute_surface(ADDR_HANDLE addrlib,
+ const struct radeon_info *info,
const struct ac_surf_config *config,
enum radeon_surf_mode mode,
struct radeon_surf *surf)
{
bool compressed;
ADDR2_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0};
int r;
assert(!(surf->flags & RADEON_SURF_FMASK));
@@ -1189,20 +1200,24 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib,
AddrSurfInfoIn.width = config->info.width;
AddrSurfInfoIn.height = config->info.height;
if (config->is_3d)
AddrSurfInfoIn.numSlices = config->info.depth;
else if (config->is_cube)
AddrSurfInfoIn.numSlices = 6;
else
AddrSurfInfoIn.numSlices = config->info.array_size;
+ /* This is propagated to HTILE/DCC/CMASK. */
+ AddrSurfInfoIn.flags.metaPipeUnaligned = 0;
+ AddrSurfInfoIn.flags.metaRbUnaligned = 0;
+
switch (mode) {
case RADEON_SURF_MODE_LINEAR_ALIGNED:
assert(config->info.samples <= 1);
assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
AddrSurfInfoIn.swizzleMode = ADDR_SW_LINEAR;
break;
case RADEON_SURF_MODE_1D:
case RADEON_SURF_MODE_2D:
if (surf->flags & RADEON_SURF_IMPORTED) {
@@ -1314,29 +1329,33 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib,
case ADDR_SW_4KB_Z_X:
case ADDR_SW_64KB_Z_X:
case ADDR_SW_VAR_Z_X:
surf->micro_tile_mode = RADEON_MICRO_MODE_DEPTH;
break;
default:
assert(0);
}
+ /* Temporary workaround to prevent VM faults and hangs. */
+ if (info->family == CHIP_VEGA12)
+ surf->u.gfx9.fmask_size *= 8;
+
return 0;
}
int ac_compute_surface(ADDR_HANDLE addrlib, const struct radeon_info *info,
const struct ac_surf_config *config,
enum radeon_surf_mode mode,
struct radeon_surf *surf)
{
int r;
r = surf_config_sanity(config);
if (r)
return r;
if (info->chip_class >= GFX9)
- return gfx9_compute_surface(addrlib, config, mode, surf);
+ return gfx9_compute_surface(addrlib, info, config, mode, surf);
else
return gfx6_compute_surface(addrlib, info, config, mode, surf);
}
diff --git a/src/amd/common/amd_family.h b/src/amd/common/amd_family.h
index c62d0aa527a..285111f2a2a 100644
--- a/src/amd/common/amd_family.h
+++ b/src/amd/common/amd_family.h
@@ -86,20 +86,21 @@ enum radeon_family {
CHIP_MULLINS,
CHIP_TONGA,
CHIP_ICELAND,
CHIP_CARRIZO,
CHIP_FIJI,
CHIP_STONEY,
CHIP_POLARIS10,
CHIP_POLARIS11,
CHIP_POLARIS12,
CHIP_VEGA10,
+ CHIP_VEGA12,
CHIP_RAVEN,
CHIP_LAST,
};
enum chip_class {
CLASS_UNKNOWN = 0,
R300,
R400,
R500,
R600,
diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c
index b4ca5bea943..fc2be33b3e4 100644
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -68,20 +68,21 @@ const char *si_get_family_name(const struct si_screen *sscreen)
case CHIP_MULLINS: return "AMD MULLINS";
case CHIP_TONGA: return "AMD TONGA";
case CHIP_ICELAND: return "AMD ICELAND";
case CHIP_CARRIZO: return "AMD CARRIZO";
case CHIP_FIJI: return "AMD FIJI";
case CHIP_POLARIS10: return "AMD POLARIS10";
case CHIP_POLARIS11: return "AMD POLARIS11";
case CHIP_POLARIS12: return "AMD POLARIS12";
case CHIP_STONEY: return "AMD STONEY";
case CHIP_VEGA10: return "AMD VEGA10";
+ case CHIP_VEGA12: return "AMD VEGA12";
case CHIP_RAVEN: return "AMD RAVEN";
default: return "AMD unknown";
}
}
static bool si_have_tgsi_compute(struct si_screen *sscreen)
{
/* Old kernels disallowed some register writes for SI
* that are used for indirect dispatches. */
return (sscreen->info.chip_class >= CIK ||
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 3d787d58cd1..fa9ee43389a 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -821,20 +821,21 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
sscreen->info.family <= CHIP_POLARIS12) ||
sscreen->info.family == CHIP_VEGA10 ||
sscreen->info.family == CHIP_RAVEN;
sscreen->has_ls_vgpr_init_bug = sscreen->info.family == CHIP_VEGA10 ||
sscreen->info.family == CHIP_RAVEN;
if (sscreen->debug_flags & DBG(DPBB)) {
sscreen->dpbb_allowed = true;
} else {
/* Only enable primitive binning on Raven by default. */
+ /* TODO: Investigate if binning is profitable on Vega12. */
sscreen->dpbb_allowed = sscreen->info.family == CHIP_RAVEN &&
!(sscreen->debug_flags & DBG(NO_DPBB));
}
if (sscreen->debug_flags & DBG(DFSM)) {
sscreen->dfsm_allowed = sscreen->dpbb_allowed;
} else {
sscreen->dfsm_allowed = sscreen->dpbb_allowed &&
!(sscreen->debug_flags & DBG(NO_DFSM));
}
@@ -848,20 +849,21 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws,
/* Some chips have RB+ registers, but don't support RB+. Those must
* always disable it.
*/
if (sscreen->info.family == CHIP_STONEY ||
sscreen->info.chip_class >= GFX9) {
sscreen->has_rbplus = true;
sscreen->rbplus_allowed =
!(sscreen->debug_flags & DBG(NO_RB_PLUS)) &&
(sscreen->info.family == CHIP_STONEY ||
+ sscreen->info.family == CHIP_VEGA12 ||
sscreen->info.family == CHIP_RAVEN);
}
sscreen->dcc_msaa_allowed =
!(sscreen->debug_flags & DBG(NO_DCC_MSAA)) &&
(sscreen->debug_flags & DBG(DCC_MSAA) ||
sscreen->info.chip_class == VI);
sscreen->cpdma_prefetch_writes_memory = sscreen->info.chip_class <= VI;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 1bfb3c34aa7..b4165a4669b 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -1668,21 +1668,22 @@ static uint32_t si_translate_texformat(struct pipe_screen *screen,
case PIPE_FORMAT_RGTC2_UNORM:
case PIPE_FORMAT_LATC2_UNORM:
return V_008F14_IMG_DATA_FORMAT_BC5;
default:
goto out_unknown;
}
}
if (desc->layout == UTIL_FORMAT_LAYOUT_ETC &&
(sscreen->info.family == CHIP_STONEY ||
- sscreen->info.chip_class >= GFX9)) {
+ sscreen->info.family == CHIP_VEGA10 ||
+ sscreen->info.family == CHIP_RAVEN)) {
switch (format) {
case PIPE_FORMAT_ETC1_RGB8:
case PIPE_FORMAT_ETC2_RGB8:
case PIPE_FORMAT_ETC2_SRGB8:
return V_008F14_IMG_DATA_FORMAT_ETC2_RGB;
case PIPE_FORMAT_ETC2_RGB8A1:
case PIPE_FORMAT_ETC2_SRGB8A1:
return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA1;
case PIPE_FORMAT_ETC2_RGBA8:
case PIPE_FORMAT_ETC2_SRGBA8:
@@ -5038,20 +5039,21 @@ static void si_init_config(struct si_context *sctx)
}
si_pm4_add_bo(pm4, sctx->border_color_buffer, RADEON_USAGE_READ,
RADEON_PRIO_BORDER_COLORS);
if (sctx->b.chip_class >= GFX9) {
unsigned num_se = sscreen->info.max_se;
unsigned pc_lines = 0;
switch (sctx->b.family) {
case CHIP_VEGA10:
+ case CHIP_VEGA12:
pc_lines = 4096;
break;
case CHIP_RAVEN:
pc_lines = 1024;
break;
default:
assert(0);
}
si_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1,
diff --git a/src/gallium/drivers/radeonsi/si_state_binning.c b/src/gallium/drivers/radeonsi/si_state_binning.c
index 686701d718f..0f50ea755cb 100644
--- a/src/gallium/drivers/radeonsi/si_state_binning.c
+++ b/src/gallium/drivers/radeonsi/si_state_binning.c
@@ -407,20 +407,21 @@ void si_emit_dpbb_state(struct si_context *sctx, struct r600_atom *state)
blend->blend_enable_4bit) != 0;
}
/* Tunable parameters. Also test with DFSM enabled/disabled. */
unsigned context_states_per_bin; /* allowed range: [0, 5] */
unsigned persistent_states_per_bin; /* allowed range: [0, 31] */
unsigned fpovs_per_batch; /* allowed range: [0, 255], 0 = unlimited */
switch (sctx->b.family) {
case CHIP_VEGA10:
+ case CHIP_VEGA12:
case CHIP_RAVEN:
/* Tuned for Raven. Vega might need different values. */
context_states_per_bin = 5;
persistent_states_per_bin = 31;
fpovs_per_batch = 63;
break;
default:
assert(0);
}
--
2.15.1
More information about the mesa-dev
mailing list