Mesa (master): radeonsi: use a C++ template to decrease draw_vbo overhead by 13 %
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Wed Dec 9 21:57:53 UTC 2020
Module: Mesa
Branch: master
Commit: 2b09bde1f5450152ce121a5f58943e01223ff783
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=2b09bde1f5450152ce121a5f58943e01223ff783
Author: Marek Olšák <marek.olsak at amd.com>
Date: Wed Aug 26 00:16:23 2020 -0400
radeonsi: use a C++ template to decrease draw_vbo overhead by 13 %
With GALLIUM_THREAD=0 to disable draw merging.
Before:
1, DrawElements ( 1 VBO| 0 UBO| 0 ) w/ no state change, 8736
After:
1, DrawElements ( 1 VBO| 0 UBO| 0 ) w/ no state change, 10059
Reviewed-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7807>
---
src/amd/common/amd_family.h | 2 +
src/gallium/drivers/radeonsi/si_pipe.h | 38 +++
src/gallium/drivers/radeonsi/si_state_draw.cpp | 360 ++++++++++++++----------
src/gallium/drivers/radeonsi/si_state_shaders.c | 4 +
4 files changed, 263 insertions(+), 141 deletions(-)
diff --git a/src/amd/common/amd_family.h b/src/amd/common/amd_family.h
index cfb3f47f835..a6175e8b4fd 100644
--- a/src/amd/common/amd_family.h
+++ b/src/amd/common/amd_family.h
@@ -126,6 +126,8 @@ enum chip_class
GFX9,
GFX10,
GFX10_3,
+
+ NUM_GFX_VERSIONS,
};
enum ring_type
diff --git a/src/gallium/drivers/radeonsi/si_pipe.h b/src/gallium/drivers/radeonsi/si_pipe.h
index b0b7d5469fa..0f1c94ac7e6 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.h
+++ b/src/gallium/drivers/radeonsi/si_pipe.h
@@ -131,6 +131,26 @@ extern "C" {
#define SI_RESOURCE_FLAG_UNCACHED (PIPE_RESOURCE_FLAG_DRV_PRIV << 12)
#define SI_RESOURCE_FLAG_DRIVER_INTERNAL (PIPE_RESOURCE_FLAG_DRV_PRIV << 13)
+enum si_has_gs {
+ GS_OFF,
+ GS_ON,
+};
+
+enum si_has_tess {
+ TESS_OFF,
+ TESS_ON,
+};
+
+enum si_has_ngg {
+ NGG_OFF,
+ NGG_ON,
+};
+
+enum si_has_prim_discard_cs {
+ PRIM_DISCARD_CS_OFF,
+ PRIM_DISCARD_CS_ON,
+};
+
enum si_clear_code
{
DCC_CLEAR_COLOR_0000 = 0x00000000,
@@ -884,6 +904,12 @@ struct si_small_prim_cull_info {
float scale[2], translate[2];
};
+typedef void (*pipe_draw_vbo_func)(struct pipe_context *pipe,
+ const struct pipe_draw_info *info,
+ const struct pipe_draw_indirect_info *indirect,
+ const struct pipe_draw_start_count *draws,
+ unsigned num_draws);
+
struct si_context {
struct pipe_context b; /* base class */
@@ -1268,6 +1294,8 @@ struct si_context {
* a context flush.
*/
struct hash_table *dirty_implicit_resources;
+
+ pipe_draw_vbo_func draw_vbo[NUM_GFX_VERSIONS - GFX6][2][2][2][2];
};
/* si_blit.c */
@@ -1912,6 +1940,16 @@ static inline unsigned si_get_shader_wave_size(struct si_shader *shader)
shader->key.opt.vs_as_prim_discard_cs);
}
+static inline void si_select_draw_vbo(struct si_context *sctx)
+{
+ sctx->b.draw_vbo = sctx->draw_vbo[sctx->chip_class - GFX6]
+ [!!sctx->tes_shader.cso]
+ [!!sctx->gs_shader.cso]
+ [sctx->ngg]
+ [si_compute_prim_discard_enabled(sctx)];
+ assert(sctx->b.draw_vbo);
+}
+
#define PRINT_ERR(fmt, args...) \
fprintf(stderr, "EE %s:%d %s - " fmt, __FILE__, __LINE__, __func__, ##args)
diff --git a/src/gallium/drivers/radeonsi/si_state_draw.cpp b/src/gallium/drivers/radeonsi/si_state_draw.cpp
index 2460a33dccc..e1ad9999674 100644
--- a/src/gallium/drivers/radeonsi/si_state_draw.cpp
+++ b/src/gallium/drivers/radeonsi/si_state_draw.cpp
@@ -521,7 +521,7 @@ static bool num_instanced_prims_less_than(const struct pipe_draw_info *info,
}
}
-ALWAYS_INLINE
+template <chip_class GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS> ALWAYS_INLINE
static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
const struct pipe_draw_info *info,
const struct pipe_draw_indirect_info *indirect,
@@ -533,9 +533,9 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
unsigned primgroup_size;
unsigned ia_multi_vgt_param;
- if (sctx->tes_shader.cso) {
+ if (HAS_TESS) {
primgroup_size = num_patches; /* must be a multiple of NUM_PATCHES */
- } else if (sctx->gs_shader.cso) {
+ } else if (HAS_GS) {
primgroup_size = 64; /* recommended with a GS */
} else {
primgroup_size = 128; /* recommended without a GS and tess */
@@ -552,9 +552,9 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
ia_multi_vgt_param =
sctx->ia_multi_vgt_param[key.index] | S_028AA8_PRIMGROUP_SIZE(primgroup_size - 1);
- if (sctx->gs_shader.cso) {
+ if (HAS_GS) {
/* GS requirement. */
- if (sctx->chip_class <= GFX8 &&
+ if (GFX_VERSION <= GFX8 &&
SI_GS_PER_ES / primgroup_size >= sctx->screen->gs_table_depth - 3)
ia_multi_vgt_param |= S_028AA8_PARTIAL_ES_WAVE_ON(1);
@@ -562,7 +562,8 @@ static unsigned si_get_ia_multi_vgt_param(struct si_context *sctx,
* The hw doc says all multi-SE chips are affected, but Vulkan
* only applies it to Hawaii. Do what Vulkan does.
*/
- if (sctx->family == CHIP_HAWAII && G_028AA8_SWITCH_ON_EOI(ia_multi_vgt_param) &&
+ if (GFX_VERSION == GFX7 &&
+ sctx->family == CHIP_HAWAII && G_028AA8_SWITCH_ON_EOI(ia_multi_vgt_param) &&
num_instanced_prims_less_than(info, indirect, prim, min_vertex_count, instance_count, 2))
sctx->flags |= SI_CONTEXT_VGT_FLUSH;
}
@@ -597,7 +598,7 @@ static unsigned si_conv_prim_to_gs_out(unsigned mode)
}
/* rast_prim is the primitive type after GS. */
-ALWAYS_INLINE
+template<si_has_gs HAS_GS, si_has_ngg NGG> ALWAYS_INLINE
static void si_emit_rasterizer_prim_state(struct si_context *sctx)
{
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
@@ -620,7 +621,7 @@ static void si_emit_rasterizer_prim_state(struct si_context *sctx)
}
unsigned gs_out_prim = si_conv_prim_to_gs_out(rast_prim);
- if (unlikely(gs_out_prim != sctx->last_gs_out_prim && (sctx->ngg || sctx->gs_shader.cso))) {
+ if (unlikely(gs_out_prim != sctx->last_gs_out_prim && (NGG || HAS_GS))) {
radeon_set_context_reg(cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, gs_out_prim);
sctx->last_gs_out_prim = gs_out_prim;
}
@@ -628,7 +629,7 @@ static void si_emit_rasterizer_prim_state(struct si_context *sctx)
if (initial_cdw != cs->current.cdw)
sctx->context_roll = true;
- if (sctx->ngg) {
+ if (NGG) {
struct si_shader *hw_vs = si_get_vs_state(sctx);
if (hw_vs->uses_vs_state_provoking_vertex) {
@@ -696,7 +697,7 @@ static bool si_prim_restart_index_changed(struct si_context *sctx, bool primitiv
sctx->last_restart_index == SI_RESTART_INDEX_UNKNOWN);
}
-ALWAYS_INLINE
+template <chip_class GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS> ALWAYS_INLINE
static void si_emit_ia_multi_vgt_param(struct si_context *sctx, const struct pipe_draw_info *info,
const struct pipe_draw_indirect_info *indirect,
enum pipe_prim_type prim, unsigned num_patches,
@@ -707,15 +708,16 @@ static void si_emit_ia_multi_vgt_param(struct si_context *sctx, const struct pip
unsigned ia_multi_vgt_param;
ia_multi_vgt_param =
- si_get_ia_multi_vgt_param(sctx, info, indirect, prim, num_patches, instance_count,
- primitive_restart, min_vertex_count);
+ si_get_ia_multi_vgt_param<GFX_VERSION, HAS_TESS, HAS_GS>
+ (sctx, info, indirect, prim, num_patches, instance_count, primitive_restart,
+ min_vertex_count);
/* Draw state. */
if (ia_multi_vgt_param != sctx->last_multi_vgt_param) {
- if (sctx->chip_class == GFX9)
+ if (GFX_VERSION == GFX9)
radeon_set_uconfig_reg_idx(cs, sctx->screen, R_030960_IA_MULTI_VGT_PARAM, 4,
ia_multi_vgt_param);
- else if (sctx->chip_class >= GFX7)
+ else if (GFX_VERSION >= GFX7)
radeon_set_context_reg_idx(cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param);
else
radeon_set_context_reg(cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param);
@@ -727,14 +729,14 @@ static void si_emit_ia_multi_vgt_param(struct si_context *sctx, const struct pip
/* GFX10 removed IA_MULTI_VGT_PARAM in exchange for GE_CNTL.
* We overload last_multi_vgt_param.
*/
-ALWAYS_INLINE
+template <chip_class GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS, si_has_ngg NGG> ALWAYS_INLINE
static void gfx10_emit_ge_cntl(struct si_context *sctx, unsigned num_patches)
{
union si_vgt_param_key key = sctx->ia_multi_vgt_param_key;
unsigned ge_cntl;
- if (sctx->ngg) {
- if (sctx->tes_shader.cso) {
+ if (NGG) {
+ if (HAS_TESS) {
ge_cntl = S_03096C_PRIM_GRP_SIZE(num_patches) |
S_03096C_VERT_GRP_SIZE(0) |
S_03096C_BREAK_WAVE_AT_EOI(key.u.tess_uses_prim_id);
@@ -745,10 +747,10 @@ static void gfx10_emit_ge_cntl(struct si_context *sctx, unsigned num_patches)
unsigned primgroup_size;
unsigned vertgroup_size;
- if (sctx->tes_shader.cso) {
+ if (HAS_TESS) {
primgroup_size = num_patches; /* must be a multiple of NUM_PATCHES */
vertgroup_size = 0;
- } else if (sctx->gs_shader.cso) {
+ } else if (HAS_GS) {
unsigned vgt_gs_onchip_cntl = sctx->gs_shader.current->ctx_reg.gs.vgt_gs_onchip_cntl;
primgroup_size = G_028A44_GS_PRIMS_PER_SUBGRP(vgt_gs_onchip_cntl);
vertgroup_size = G_028A44_ES_VERTS_PER_SUBGRP(vgt_gs_onchip_cntl);
@@ -769,7 +771,7 @@ static void gfx10_emit_ge_cntl(struct si_context *sctx, unsigned num_patches)
}
}
-ALWAYS_INLINE
+template <chip_class GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS, si_has_ngg NGG> ALWAYS_INLINE
static void si_emit_draw_registers(struct si_context *sctx, const struct pipe_draw_info *info,
const struct pipe_draw_indirect_info *indirect,
enum pipe_prim_type prim, unsigned num_patches,
@@ -779,16 +781,17 @@ static void si_emit_draw_registers(struct si_context *sctx, const struct pipe_dr
struct radeon_cmdbuf *cs = &sctx->gfx_cs;
unsigned vgt_prim = si_conv_pipe_prim(prim);
- if (sctx->chip_class >= GFX10)
- gfx10_emit_ge_cntl(sctx, num_patches);
+ if (GFX_VERSION >= GFX10)
+ gfx10_emit_ge_cntl<GFX_VERSION, HAS_TESS, HAS_GS, NGG>(sctx, num_patches);
else
- si_emit_ia_multi_vgt_param(sctx, info, indirect, prim, num_patches, instance_count,
- primitive_restart, min_vertex_count);
+ si_emit_ia_multi_vgt_param<GFX_VERSION, HAS_TESS, HAS_GS>
+ (sctx, info, indirect, prim, num_patches, instance_count, primitive_restart,
+ min_vertex_count);
if (vgt_prim != sctx->last_prim) {
- if (sctx->chip_class >= GFX10)
+ if (GFX_VERSION >= GFX10)
radeon_set_uconfig_reg(cs, R_030908_VGT_PRIMITIVE_TYPE, vgt_prim);
- else if (sctx->chip_class >= GFX7)
+ else if (GFX_VERSION >= GFX7)
radeon_set_uconfig_reg_idx(cs, sctx->screen, R_030908_VGT_PRIMITIVE_TYPE, 1, vgt_prim);
else
radeon_set_config_reg(cs, R_008958_VGT_PRIMITIVE_TYPE, vgt_prim);
@@ -798,7 +801,7 @@ static void si_emit_draw_registers(struct si_context *sctx, const struct pipe_dr
/* Primitive restart. */
if (primitive_restart != sctx->last_primitive_restart_en) {
- if (sctx->chip_class >= GFX9)
+ if (GFX_VERSION >= GFX9)
radeon_set_uconfig_reg(cs, R_03092C_VGT_MULTI_PRIM_IB_RESET_EN, primitive_restart);
else
radeon_set_context_reg(cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, primitive_restart);
@@ -812,6 +815,7 @@ static void si_emit_draw_registers(struct si_context *sctx, const struct pipe_dr
}
}
+template <chip_class GFX_VERSION, si_has_ngg NGG, si_has_prim_discard_cs ALLOW_PRIM_DISCARD_CS>
static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw_info *info,
const struct pipe_draw_indirect_info *indirect,
const struct pipe_draw_start_count *draws,
@@ -852,19 +856,19 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
case 2:
index_type =
V_028A7C_VGT_INDEX_16 |
- (SI_BIG_ENDIAN && sctx->chip_class <= GFX7 ? V_028A7C_VGT_DMA_SWAP_16_BIT : 0);
+ (SI_BIG_ENDIAN && GFX_VERSION <= GFX7 ? V_028A7C_VGT_DMA_SWAP_16_BIT : 0);
break;
case 4:
index_type =
V_028A7C_VGT_INDEX_32 |
- (SI_BIG_ENDIAN && sctx->chip_class <= GFX7 ? V_028A7C_VGT_DMA_SWAP_32_BIT : 0);
+ (SI_BIG_ENDIAN && GFX_VERSION <= GFX7 ? V_028A7C_VGT_DMA_SWAP_32_BIT : 0);
break;
default:
assert(!"unreachable");
return;
}
- if (sctx->chip_class >= GFX9) {
+ if (GFX_VERSION >= GFX9) {
radeon_set_uconfig_reg_idx(cs, sctx->screen, R_03090C_VGT_INDEX_TYPE, 2, index_type);
} else {
radeon_emit(cs, PKT3(PKT3_INDEX_TYPE, 0, 0));
@@ -874,7 +878,8 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
sctx->last_index_size = index_size;
}
- if (original_index_size) {
+ /* If !ALLOW_PRIM_DISCARD_CS, index_size == original_index_size. */
+ if (!ALLOW_PRIM_DISCARD_CS || original_index_size) {
index_max_size = (indexbuf->width0 - index_offset) >> util_logbase2(original_index_size);
/* Skip draw calls with 0-sized index buffers.
* They cause a hang on some chips, like Navi10-14.
@@ -891,7 +896,7 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
/* On GFX7 and later, non-indexed draws overwrite VGT_INDEX_TYPE,
* so the state must be re-emitted before the next indexed draw.
*/
- if (sctx->chip_class >= GFX7)
+ if (GFX_VERSION >= GFX7)
sctx->last_index_size = -1;
}
@@ -1018,7 +1023,7 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
set_draw_id &= info->increment_draw_id;
if (index_size) {
- if (dispatch_prim_discard_cs) {
+ if (ALLOW_PRIM_DISCARD_CS && dispatch_prim_discard_cs) {
for (unsigned i = 0; i < num_draws; i++) {
uint64_t va = index_va + draws[0].start * original_index_size;
@@ -1049,13 +1054,13 @@ static void si_emit_draw_packets(struct si_context *sctx, const struct pipe_draw
* can be changed between draws and GS fast launch must be disabled.
* NOT_EOP doesn't work on gfx9 and older.
*/
- S_0287F0_NOT_EOP(sctx->chip_class >= GFX10 &&
+ S_0287F0_NOT_EOP(GFX_VERSION >= GFX10 &&
!set_draw_id &&
i < num_draws - 1));
}
} else {
/* Set the index buffer for fast launch. The VS prolog will load the indices. */
- if (sctx->ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_INDEX_SIZE_PACKED(~0)) {
+ if (NGG && sctx->ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_INDEX_SIZE_PACKED(~0)) {
index_max_size = (indexbuf->width0 - index_offset) >> util_logbase2(original_index_size);
radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, si_resource(indexbuf),
@@ -1587,7 +1592,7 @@ void si_emit_cache_flush(struct si_context *sctx)
sctx->flags = 0;
}
-ALWAYS_INLINE
+template <chip_class GFX_VERSION> ALWAYS_INLINE
static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
{
unsigned i, count = sctx->num_vertex_elements;
@@ -1651,7 +1656,7 @@ static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
uint64_t va = buf->gpu_address + offset;
int64_t num_records = (int64_t)buf->b.b.width0 - offset;
- if (sctx->chip_class != GFX8 && vb->stride) {
+ if (GFX_VERSION != GFX8 && vb->stride) {
/* Round up by rounding down and adding 1 */
num_records = (num_records - velems->format_size[i]) / vb->stride + 1;
}
@@ -1663,7 +1668,7 @@ static bool si_upload_vertex_buffer_descriptors(struct si_context *sctx)
* - 1: index >= NUM_RECORDS (Structured)
* - 3: offset >= NUM_RECORDS (Raw)
*/
- if (sctx->chip_class >= GFX10)
+ if (GFX_VERSION >= GFX10)
rsrc_word3 |= S_008F0C_OOB_SELECT(vb->stride ? V_008F0C_OOB_SELECT_STRUCTURED
: V_008F0C_OOB_SELECT_RAW);
@@ -1760,6 +1765,7 @@ static void si_get_draw_start_count(struct si_context *sctx, const struct pipe_d
}
}
+template <chip_class GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS, si_has_ngg NGG>
static void si_emit_all_states(struct si_context *sctx, const struct pipe_draw_info *info,
const struct pipe_draw_indirect_info *indirect,
enum pipe_prim_type prim, unsigned instance_count,
@@ -1768,8 +1774,8 @@ static void si_emit_all_states(struct si_context *sctx, const struct pipe_draw_i
{
unsigned num_patches = 0;
- si_emit_rasterizer_prim_state(sctx);
- if (sctx->tes_shader.cso)
+ si_emit_rasterizer_prim_state<HAS_GS, NGG>(sctx);
+ if (HAS_TESS)
si_emit_derived_tess_state(sctx, info, &num_patches);
/* Emit state atoms. */
@@ -1795,8 +1801,9 @@ static void si_emit_all_states(struct si_context *sctx, const struct pipe_draw_i
/* Emit draw states. */
si_emit_vs_state(sctx, info);
- si_emit_draw_registers(sctx, info, indirect, prim, num_patches, instance_count,
- primitive_restart, min_vertex_count);
+ si_emit_draw_registers<GFX_VERSION, HAS_TESS, HAS_GS, NGG>
+ (sctx, info, indirect, prim, num_patches, instance_count, primitive_restart,
+ min_vertex_count);
}
static bool si_all_vs_resources_read_only(struct si_context *sctx, struct pipe_resource *indexbuf)
@@ -1891,6 +1898,8 @@ static ALWAYS_INLINE bool pd_msg(const char *s)
pipe_resource_reference(&indexbuf, NULL); \
} while (0)
+template <chip_class GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS, si_has_ngg NGG,
+ si_has_prim_discard_cs ALLOW_PRIM_DISCARD_CS>
static void si_draw_vbo(struct pipe_context *ctx,
const struct pipe_draw_info *info,
const struct pipe_draw_indirect_info *indirect,
@@ -1920,7 +1929,7 @@ static void si_draw_vbo(struct pipe_context *ctx,
struct si_shader_selector *vs = sctx->vs_shader.cso;
if (unlikely(!vs || sctx->num_vertex_elements < vs->num_vs_inputs ||
(!sctx->ps_shader.cso && !rs->rasterizer_discard) ||
- (!!sctx->tes_shader.cso != (prim == PIPE_PRIM_PATCHES)))) {
+ (HAS_TESS != (prim == PIPE_PRIM_PATCHES)))) {
assert(0);
return;
}
@@ -1949,10 +1958,10 @@ static void si_draw_vbo(struct pipe_context *ctx,
* This must be done after si_decompress_textures, which can call
* draw_vbo recursively, and before si_update_shaders, which uses
* current_rast_prim for this draw_vbo call. */
- if (sctx->gs_shader.cso) {
+ if (HAS_GS) {
/* Only possibilities: POINTS, LINE_STRIP, TRIANGLES */
rast_prim = sctx->gs_shader.cso->rast_prim;
- } else if (sctx->tes_shader.cso) {
+ } else if (HAS_TESS) {
/* Only possibilities: POINTS, LINE_STRIP, TRIANGLES */
rast_prim = sctx->tes_shader.cso->rast_prim;
} else if (util_rast_prim_is_triangles(prim)) {
@@ -1971,12 +1980,12 @@ static void si_draw_vbo(struct pipe_context *ctx,
sctx->do_update_shaders = true;
}
- if (sctx->tes_shader.cso) {
+ if (HAS_TESS) {
struct si_shader_selector *tcs = sctx->tcs_shader.cso;
/* The rarely occuring tcs == NULL case is not optimized. */
bool same_patch_vertices =
- sctx->chip_class >= GFX9 &&
+ GFX_VERSION >= GFX9 &&
tcs && info->vertices_per_patch == tcs->info.base.tess.tcs_vertices_out;
if (sctx->same_patch_vertices != same_patch_vertices) {
@@ -1984,7 +1993,7 @@ static void si_draw_vbo(struct pipe_context *ctx,
sctx->do_update_shaders = true;
}
- if (sctx->screen->info.has_ls_vgpr_init_bug) {
+ if (GFX_VERSION == GFX9 && sctx->screen->info.has_ls_vgpr_init_bug) {
/* Determine whether the LS VGPR fix should be applied.
*
* It is only required when num input CPs > num output CPs,
@@ -2002,7 +2011,7 @@ static void si_draw_vbo(struct pipe_context *ctx,
}
}
- if (sctx->chip_class <= GFX9 && sctx->gs_shader.cso) {
+ if (GFX_VERSION <= GFX9 && HAS_GS) {
/* Determine whether the GS triangle strip adjacency fix should
* be applied. Rotate every other triangle if
* - triangle strips with adjacency are fed to the GS and
@@ -2010,7 +2019,7 @@ static void si_draw_vbo(struct pipe_context *ctx,
* when the restart occurs after an odd number of triangles).
*/
bool gs_tri_strip_adj_fix =
- !sctx->tes_shader.cso && prim == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY && !primitive_restart;
+ !HAS_TESS && prim == PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY && !primitive_restart;
if (gs_tri_strip_adj_fix != sctx->gs_tri_strip_adj_fix) {
sctx->gs_tri_strip_adj_fix = gs_tri_strip_adj_fix;
@@ -2021,7 +2030,7 @@ static void si_draw_vbo(struct pipe_context *ctx,
if (index_size) {
/* Translate or upload, if needed. */
/* 8-bit indices are supported on GFX8. */
- if (sctx->chip_class <= GFX7 && index_size == 1) {
+ if (GFX_VERSION <= GFX7 && index_size == 1) {
unsigned start, count, start_offset, size, offset;
void *ptr;
@@ -2056,7 +2065,7 @@ static void si_draw_vbo(struct pipe_context *ctx,
/* info->start will be added by the drawing code */
index_offset -= start_offset;
- } else if (sctx->chip_class <= GFX7 && si_resource(indexbuf)->TC_L2_dirty) {
+ } else if (GFX_VERSION <= GFX7 && si_resource(indexbuf)->TC_L2_dirty) {
/* GFX8 reads index buffers through TC L2, so it doesn't
* need this. */
sctx->flags |= SI_CONTEXT_WB_L2;
@@ -2077,7 +2086,7 @@ static void si_draw_vbo(struct pipe_context *ctx,
si_context_add_resource_size(sctx, indirect->buffer);
/* Indirect buffers use TC L2 on GFX9, but not older hw. */
- if (sctx->chip_class <= GFX8) {
+ if (GFX_VERSION <= GFX8) {
if (indirect->buffer && si_resource(indirect->buffer)->TC_L2_dirty) {
sctx->flags |= SI_CONTEXT_WB_L2;
si_resource(indirect->buffer)->TC_L2_dirty = false;
@@ -2101,7 +2110,7 @@ static void si_draw_vbo(struct pipe_context *ctx,
}
/* Determine if we can use the primitive discard compute shader. */
- if (si_compute_prim_discard_enabled(sctx) &&
+ if (ALLOW_PRIM_DISCARD_CS &&
(avg_direct_count > sctx->prim_discard_vertex_count_threshold
? (sctx->compute_num_verts_rejected += total_direct_count, true)
: /* Add, then return true. */
@@ -2130,8 +2139,8 @@ static void si_draw_vbo(struct pipe_context *ctx,
(!sctx->num_pipeline_stat_queries && !sctx->streamout.prims_gen_query_enabled) ||
pd_msg("pipestat or primgen query")) &&
(!sctx->vertex_elements->instance_divisor_is_fetched || pd_msg("loads instance divisors")) &&
- (!sctx->tes_shader.cso || pd_msg("uses tess")) &&
- (!sctx->gs_shader.cso || pd_msg("uses GS")) &&
+ (!HAS_TESS || pd_msg("uses tess")) &&
+ (!HAS_GS || pd_msg("uses GS")) &&
(!sctx->ps_shader.cso->info.uses_primid || pd_msg("PS uses PrimID")) &&
!rs->polygon_mode_enabled &&
#if SI_PRIM_DISCARD_DEBUG /* same as cso->prim_discard_cs_allowed */
@@ -2176,64 +2185,66 @@ static void si_draw_vbo(struct pipe_context *ctx,
}
}
- if (prim_discard_cs_instancing != sctx->prim_discard_cs_instancing) {
+ if (ALLOW_PRIM_DISCARD_CS &&
+ prim_discard_cs_instancing != sctx->prim_discard_cs_instancing) {
sctx->prim_discard_cs_instancing = prim_discard_cs_instancing;
sctx->do_update_shaders = true;
}
/* Update NGG culling settings. */
uint8_t old_ngg_culling = sctx->ngg_culling;
- struct si_shader_selector *hw_vs;
- if (sctx->ngg && !dispatch_prim_discard_cs && rast_prim == PIPE_PRIM_TRIANGLES &&
- (hw_vs = si_get_vs(sctx)->cso) &&
- (avg_direct_count > hw_vs->ngg_cull_vert_threshold ||
- (!index_size &&
- avg_direct_count > hw_vs->ngg_cull_nonindexed_fast_launch_vert_threshold &&
- prim & ((1 << PIPE_PRIM_TRIANGLES) |
- (1 << PIPE_PRIM_TRIANGLE_STRIP))))) {
- uint8_t ngg_culling = 0;
-
- if (rs->rasterizer_discard) {
- ngg_culling |= SI_NGG_CULL_FRONT_FACE | SI_NGG_CULL_BACK_FACE;
- } else {
- /* Polygon mode can't use view and small primitive culling,
- * because it draws points or lines where the culling depends
- * on the point or line width.
- */
- if (!rs->polygon_mode_enabled)
- ngg_culling |= SI_NGG_CULL_VIEW_SMALLPRIMS;
-
- if (sctx->viewports.y_inverted ? rs->cull_back : rs->cull_front)
- ngg_culling |= SI_NGG_CULL_FRONT_FACE;
- if (sctx->viewports.y_inverted ? rs->cull_front : rs->cull_back)
- ngg_culling |= SI_NGG_CULL_BACK_FACE;
- }
+ if (GFX_VERSION >= GFX10) {
+ struct si_shader_selector *hw_vs;
+ if (NGG && !dispatch_prim_discard_cs && rast_prim == PIPE_PRIM_TRIANGLES &&
+ (hw_vs = si_get_vs(sctx)->cso) &&
+ (avg_direct_count > hw_vs->ngg_cull_vert_threshold ||
+ (!index_size &&
+ avg_direct_count > hw_vs->ngg_cull_nonindexed_fast_launch_vert_threshold &&
+ prim & ((1 << PIPE_PRIM_TRIANGLES) |
+ (1 << PIPE_PRIM_TRIANGLE_STRIP))))) {
+ uint8_t ngg_culling = 0;
+
+ if (rs->rasterizer_discard) {
+ ngg_culling |= SI_NGG_CULL_FRONT_FACE | SI_NGG_CULL_BACK_FACE;
+ } else {
+ /* Polygon mode can't use view and small primitive culling,
+ * because it draws points or lines where the culling depends
+ * on the point or line width.
+ */
+ if (!rs->polygon_mode_enabled)
+ ngg_culling |= SI_NGG_CULL_VIEW_SMALLPRIMS;
+
+ if (sctx->viewports.y_inverted ? rs->cull_back : rs->cull_front)
+ ngg_culling |= SI_NGG_CULL_FRONT_FACE;
+ if (sctx->viewports.y_inverted ? rs->cull_front : rs->cull_back)
+ ngg_culling |= SI_NGG_CULL_BACK_FACE;
+ }
- /* Use NGG fast launch for certain primitive types.
- * A draw must have at least 1 full primitive.
- */
- if (ngg_culling &&
- hw_vs->ngg_cull_nonindexed_fast_launch_vert_threshold < UINT32_MAX &&
- min_direct_count >= 3 && !sctx->tes_shader.cso &&
- !sctx->gs_shader.cso) {
- if (prim == PIPE_PRIM_TRIANGLES && !index_size) {
- ngg_culling |= SI_NGG_CULL_GS_FAST_LAUNCH_TRI_LIST;
- } else if (prim == PIPE_PRIM_TRIANGLE_STRIP && !primitive_restart) {
- ngg_culling |= SI_NGG_CULL_GS_FAST_LAUNCH_TRI_STRIP |
- SI_NGG_CULL_GS_FAST_LAUNCH_INDEX_SIZE_PACKED(MIN2(index_size, 3));
- /* The index buffer will be emulated. */
- index_size = 0;
+ /* Use NGG fast launch for certain primitive types.
+ * A draw must have at least 1 full primitive.
+ */
+ if (ngg_culling &&
+ hw_vs->ngg_cull_nonindexed_fast_launch_vert_threshold < UINT32_MAX &&
+ min_direct_count >= 3 && !HAS_TESS && !HAS_GS) {
+ if (prim == PIPE_PRIM_TRIANGLES && !index_size) {
+ ngg_culling |= SI_NGG_CULL_GS_FAST_LAUNCH_TRI_LIST;
+ } else if (prim == PIPE_PRIM_TRIANGLE_STRIP && !primitive_restart) {
+ ngg_culling |= SI_NGG_CULL_GS_FAST_LAUNCH_TRI_STRIP |
+ SI_NGG_CULL_GS_FAST_LAUNCH_INDEX_SIZE_PACKED(MIN2(index_size, 3));
+ /* The index buffer will be emulated. */
+ index_size = 0;
+ }
}
- }
- if (ngg_culling != old_ngg_culling) {
- /* If shader compilation is not ready, this setting will be rejected. */
- sctx->ngg_culling = ngg_culling;
+ if (ngg_culling != old_ngg_culling) {
+ /* If shader compilation is not ready, this setting will be rejected. */
+ sctx->ngg_culling = ngg_culling;
+ sctx->do_update_shaders = true;
+ }
+ } else if (old_ngg_culling) {
+ sctx->ngg_culling = 0;
sctx->do_update_shaders = true;
}
- } else if (old_ngg_culling) {
- sctx->ngg_culling = false;
- sctx->do_update_shaders = true;
}
if (sctx->shader_has_inlinable_uniforms_mask &
@@ -2255,29 +2266,31 @@ static void si_draw_vbo(struct pipe_context *ctx,
*
* This is the setting that is used by the draw.
*/
- uint8_t ngg_culling = si_get_vs(sctx)->current->key.opt.ngg_culling;
- if (sctx->chip_class == GFX10 &&
- !(old_ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_ALL) &&
- ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_ALL)
- sctx->flags |= SI_CONTEXT_VGT_FLUSH;
+ if (GFX_VERSION >= GFX10) {
+ uint8_t ngg_culling = si_get_vs(sctx)->current->key.opt.ngg_culling;
+ if (GFX_VERSION == GFX10 &&
+ !(old_ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_ALL) &&
+ ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_ALL)
+ sctx->flags |= SI_CONTEXT_VGT_FLUSH;
+
+ if (old_ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_INDEX_SIZE_PACKED(~0) &&
+ !(ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_INDEX_SIZE_PACKED(~0))) {
+ /* Need to re-set these, because we have bound an index buffer there. */
+ sctx->shader_pointers_dirty |=
+ (1u << si_const_and_shader_buffer_descriptors_idx(PIPE_SHADER_GEOMETRY)) |
+ (1u << si_sampler_and_image_descriptors_idx(PIPE_SHADER_GEOMETRY));
+ si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers);
+ }
- if (old_ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_INDEX_SIZE_PACKED(~0) &&
- !(ngg_culling & SI_NGG_CULL_GS_FAST_LAUNCH_INDEX_SIZE_PACKED(~0))) {
- /* Need to re-set these, because we have bound an index buffer there. */
- sctx->shader_pointers_dirty |=
- (1u << si_const_and_shader_buffer_descriptors_idx(PIPE_SHADER_GEOMETRY)) |
- (1u << si_sampler_and_image_descriptors_idx(PIPE_SHADER_GEOMETRY));
- si_mark_atom_dirty(sctx, &sctx->atoms.s.shader_pointers);
+ /* Set this to the correct value determined by si_update_shaders. */
+ sctx->ngg_culling = ngg_culling;
}
-
- /* Set this to the correct value determined by si_update_shaders. */
- sctx->ngg_culling = ngg_culling;
}
si_need_gfx_cs_space(sctx, num_draws);
/* If we're using a secure context, determine if cs must be secure or not */
- if (unlikely(radeon_uses_secure_bos(sctx->ws))) {
+ if (GFX_VERSION >= GFX9 && unlikely(radeon_uses_secure_bos(sctx->ws))) {
bool secure = si_gfx_resources_check_encrypted(sctx);
if (secure != sctx->ws->cs_is_secure(&sctx->gfx_cs)) {
si_flush_gfx_cs(sctx, RADEON_FLUSH_ASYNC_START_NEXT_GFX_IB_NOW |
@@ -2295,7 +2308,7 @@ static void si_draw_vbo(struct pipe_context *ctx,
if (unlikely(!si_upload_graphics_shader_descriptors(sctx) ||
(sctx->vertex_buffers_dirty &&
sctx->num_vertex_elements &&
- !si_upload_vertex_buffer_descriptors(sctx)))) {
+ !si_upload_vertex_buffer_descriptors<GFX_VERSION>(sctx)))) {
DRAW_CLEANUP;
return;
}
@@ -2307,7 +2320,7 @@ static void si_draw_vbo(struct pipe_context *ctx,
unsigned masked_atoms = 0;
bool gfx9_scissor_bug = false;
- if (sctx->screen->info.has_gfx9_scissor_bug) {
+ if (GFX_VERSION == GFX9 && sctx->screen->info.has_gfx9_scissor_bug) {
masked_atoms |= si_get_atom_bit(sctx, &sctx->atoms.s.scissors);
gfx9_scissor_bug = true;
@@ -2329,8 +2342,9 @@ static void si_draw_vbo(struct pipe_context *ctx,
masked_atoms |= si_get_atom_bit(sctx, &sctx->atoms.s.render_cond);
/* Emit all states except possibly render condition. */
- si_emit_all_states(sctx, info, indirect, prim, instance_count, min_direct_count,
- primitive_restart, masked_atoms);
+ si_emit_all_states<GFX_VERSION, HAS_TESS, HAS_GS, NGG>
+ (sctx, info, indirect, prim, instance_count, min_direct_count,
+ primitive_restart, masked_atoms);
sctx->emit_cache_flush(sctx);
/* <-- CUs are idle here. */
@@ -2339,22 +2353,23 @@ static void si_draw_vbo(struct pipe_context *ctx,
sctx->dirty_atoms &= ~si_get_atom_bit(sctx, &sctx->atoms.s.render_cond);
}
- if (gfx9_scissor_bug &&
+ if (GFX_VERSION == GFX9 && gfx9_scissor_bug &&
(sctx->context_roll || si_is_atom_dirty(sctx, &sctx->atoms.s.scissors))) {
sctx->atoms.s.scissors.emit(sctx);
sctx->dirty_atoms &= ~si_get_atom_bit(sctx, &sctx->atoms.s.scissors);
}
assert(sctx->dirty_atoms == 0);
- si_emit_draw_packets(sctx, info, indirect, draws, num_draws,
- indexbuf, index_size, index_offset, instance_count,
- dispatch_prim_discard_cs, original_index_size);
+ si_emit_draw_packets<GFX_VERSION, NGG, ALLOW_PRIM_DISCARD_CS>
+ (sctx, info, indirect, draws, num_draws, indexbuf, index_size,
+ index_offset, instance_count, dispatch_prim_discard_cs,
+ original_index_size);
/* <-- CUs are busy here. */
/* Start prefetches after the draw has been started. Both will run
* in parallel, but starting the draw first is more important.
*/
- if (sctx->chip_class >= GFX7 && sctx->prefetch_L2_mask)
+ if (GFX_VERSION >= GFX7 && sctx->prefetch_L2_mask)
cik_emit_prefetch_L2(sctx, false);
} else {
/* If we don't wait for idle, start prefetches first, then set
@@ -2364,31 +2379,36 @@ static void si_draw_vbo(struct pipe_context *ctx,
sctx->emit_cache_flush(sctx);
/* Only prefetch the API VS and VBO descriptors. */
- if (sctx->chip_class >= GFX7 && sctx->prefetch_L2_mask)
+ if (GFX_VERSION >= GFX7 && sctx->prefetch_L2_mask)
cik_emit_prefetch_L2(sctx, true);
- si_emit_all_states(sctx, info, indirect, prim, instance_count, min_direct_count,
- primitive_restart, masked_atoms);
+ si_emit_all_states<GFX_VERSION, HAS_TESS, HAS_GS, NGG>
+ (sctx, info, indirect, prim, instance_count, min_direct_count,
+ primitive_restart, masked_atoms);
- if (gfx9_scissor_bug &&
+ if (GFX_VERSION == GFX9 && gfx9_scissor_bug &&
(sctx->context_roll || si_is_atom_dirty(sctx, &sctx->atoms.s.scissors))) {
sctx->atoms.s.scissors.emit(sctx);
sctx->dirty_atoms &= ~si_get_atom_bit(sctx, &sctx->atoms.s.scissors);
}
assert(sctx->dirty_atoms == 0);
- si_emit_draw_packets(sctx, info, indirect, draws, num_draws,
- indexbuf, index_size, index_offset, instance_count,
- dispatch_prim_discard_cs, original_index_size);
+ si_emit_draw_packets<GFX_VERSION, NGG, ALLOW_PRIM_DISCARD_CS>
+ (sctx, info, indirect, draws, num_draws, indexbuf, index_size,
+ index_offset, instance_count,
+ dispatch_prim_discard_cs, original_index_size);
/* Prefetch the remaining shaders after the draw has been
* started. */
- if (sctx->chip_class >= GFX7 && sctx->prefetch_L2_mask)
+ if (GFX_VERSION >= GFX7 && sctx->prefetch_L2_mask)
cik_emit_prefetch_L2(sctx, false);
}
- /* Clear the context roll flag after the draw call. */
- sctx->context_roll = false;
+ /* Clear the context roll flag after the draw call.
+ * Only used by the gfx9 scissor bug.
+ */
+ if (GFX_VERSION == GFX9)
+ sctx->context_roll = false;
if (unlikely(sctx->current_saved_cs)) {
si_trace_emit(sctx);
@@ -2397,7 +2417,8 @@ static void si_draw_vbo(struct pipe_context *ctx,
/* Workaround for a VGT hang when streamout is enabled.
* It must be done after drawing. */
- if ((sctx->family == CHIP_HAWAII || sctx->family == CHIP_TONGA || sctx->family == CHIP_FIJI) &&
+ if ((GFX_VERSION == GFX7 || GFX_VERSION == GFX8) &&
+ (sctx->family == CHIP_HAWAII || sctx->family == CHIP_TONGA || sctx->family == CHIP_FIJI) &&
si_get_strmout_en(sctx)) {
sctx->flags |= SI_CONTEXT_VGT_STREAMOUT_SYNC;
}
@@ -2457,7 +2478,7 @@ static void si_draw_rectangle(struct blitter_context *blitter, void *vertex_elem
sctx->vertex_buffer_pointer_dirty = false;
sctx->vertex_buffer_user_sgprs_dirty = false;
- si_draw_vbo(pipe, &info, NULL, &draw, 1);
+ pipe->draw_vbo(pipe, &info, NULL, &draw, 1);
}
extern "C"
@@ -2475,10 +2496,67 @@ void si_trace_emit(struct si_context *sctx)
u_log_flush(sctx->log);
}
+template <chip_class GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS,
+ si_has_ngg NGG, si_has_prim_discard_cs ALLOW_PRIM_DISCARD_CS>
+static void si_init_draw_vbo(struct si_context *sctx)
+{
+ /* Prim discard CS is only useful on gfx7+ because gfx6 doesn't have async compute. */
+ if (ALLOW_PRIM_DISCARD_CS && GFX_VERSION < GFX7)
+ return;
+
+ if (NGG && GFX_VERSION < GFX10)
+ return;
+
+ sctx->draw_vbo[GFX_VERSION - GFX6][HAS_TESS][HAS_GS][NGG][ALLOW_PRIM_DISCARD_CS] =
+ si_draw_vbo<GFX_VERSION, HAS_TESS, HAS_GS, NGG, ALLOW_PRIM_DISCARD_CS>;
+}
+
+template <chip_class GFX_VERSION, si_has_tess HAS_TESS, si_has_gs HAS_GS>
+static void si_init_draw_vbo_all_internal_options(struct si_context *sctx)
+{
+ si_init_draw_vbo<GFX_VERSION, HAS_TESS, HAS_GS, NGG_OFF, PRIM_DISCARD_CS_OFF>(sctx);
+ si_init_draw_vbo<GFX_VERSION, HAS_TESS, HAS_GS, NGG_OFF, PRIM_DISCARD_CS_ON>(sctx);
+ si_init_draw_vbo<GFX_VERSION, HAS_TESS, HAS_GS, NGG_ON, PRIM_DISCARD_CS_OFF>(sctx);
+ si_init_draw_vbo<GFX_VERSION, HAS_TESS, HAS_GS, NGG_ON, PRIM_DISCARD_CS_ON>(sctx);
+}
+
+template <chip_class GFX_VERSION>
+static void si_init_draw_vbo_all_pipeline_options(struct si_context *sctx)
+{
+ si_init_draw_vbo_all_internal_options<GFX_VERSION, TESS_OFF, GS_OFF>(sctx);
+ si_init_draw_vbo_all_internal_options<GFX_VERSION, TESS_OFF, GS_ON>(sctx);
+ si_init_draw_vbo_all_internal_options<GFX_VERSION, TESS_ON, GS_OFF>(sctx);
+ si_init_draw_vbo_all_internal_options<GFX_VERSION, TESS_ON, GS_ON>(sctx);
+}
+
+static void si_init_draw_vbo_all_families(struct si_context *sctx)
+{
+ si_init_draw_vbo_all_pipeline_options<GFX6>(sctx);
+ si_init_draw_vbo_all_pipeline_options<GFX7>(sctx);
+ si_init_draw_vbo_all_pipeline_options<GFX8>(sctx);
+ si_init_draw_vbo_all_pipeline_options<GFX9>(sctx);
+ si_init_draw_vbo_all_pipeline_options<GFX10>(sctx);
+ si_init_draw_vbo_all_pipeline_options<GFX10_3>(sctx);
+}
+
+static void si_invalid_draw_vbo(struct pipe_context *pipe,
+ const struct pipe_draw_info *info,
+ const struct pipe_draw_indirect_info *indirect,
+ const struct pipe_draw_start_count *draws,
+ unsigned num_draws)
+{
+ unreachable("vertex shader not bound");
+}
+
extern "C"
void si_init_draw_functions(struct si_context *sctx)
{
- sctx->b.draw_vbo = si_draw_vbo;
+ si_init_draw_vbo_all_families(sctx);
+
+ /* Bind a fake draw_vbo, so that draw_vbo isn't NULL, which would skip
+ * initialization of callbacks in upper layers (such as u_threaded_context).
+ */
+ sctx->b.draw_vbo = si_invalid_draw_vbo;
sctx->blitter->draw_rectangle = si_draw_rectangle;
si_init_ia_multi_vgt_param_table(sctx);
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 73fcf719671..d9a7f38c38f 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -3011,6 +3011,7 @@ static void si_bind_vs_shader(struct pipe_context *ctx, void *state)
si_shader_change_notify(sctx);
si_update_common_shader_state(sctx, sel, PIPE_SHADER_VERTEX);
+ si_select_draw_vbo(sctx);
si_update_vs_viewport_state(sctx);
si_update_streamout_state(sctx);
si_update_clip_regs(sctx, old_hw_vs, old_hw_vs_variant, si_get_vs(sctx)->cso,
@@ -3059,6 +3060,7 @@ bool si_update_ngg(struct si_context *sctx)
sctx->ngg = new_ngg;
sctx->last_gs_out_prim = -1; /* reset this so that it gets updated */
+ si_select_draw_vbo(sctx);
return true;
}
return false;
@@ -3081,6 +3083,7 @@ static void si_bind_gs_shader(struct pipe_context *ctx, void *state)
sctx->ia_multi_vgt_param_key.u.uses_gs = sel != NULL;
si_update_common_shader_state(sctx, sel, PIPE_SHADER_GEOMETRY);
+ si_select_draw_vbo(sctx);
sctx->last_gs_out_prim = -1; /* reset this so that it gets updated */
ngg_changed = si_update_ngg(sctx);
@@ -3132,6 +3135,7 @@ static void si_bind_tes_shader(struct pipe_context *ctx, void *state)
si_update_tess_uses_prim_id(sctx);
si_update_common_shader_state(sctx, sel, PIPE_SHADER_TESS_EVAL);
+ si_select_draw_vbo(sctx);
sctx->last_gs_out_prim = -1; /* reset this so that it gets updated */
bool ngg_changed = si_update_ngg(sctx);
More information about the mesa-commit
mailing list