[Mesa-dev] [PATCH 4/4] radeonsi: implement GL_KHR_blend_equation_advanced without MSAA
Marek Olšák
maraeo at gmail.com
Tue Jan 30 15:48:42 UTC 2018
From: Marek Olšák <marek.olsak at amd.com>
I'm not fully convinced that the hw can do it with MSAA.
Maybe without CMASK, FMASK, and with sample shading, i.e. the slowest
possible way.
---
docs/features.txt | 2 +-
docs/relnotes/18.1.0.html | 2 +-
src/gallium/drivers/radeonsi/si_descriptors.c | 61 +++++++++++++++++++++++
src/gallium/drivers/radeonsi/si_get.c | 2 +-
src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 45 +++++++++++++++++
src/gallium/drivers/radeonsi/si_state.c | 1 +
src/gallium/drivers/radeonsi/si_state.h | 5 ++
src/gallium/drivers/radeonsi/si_state_shaders.c | 1 +
8 files changed, 116 insertions(+), 3 deletions(-)
diff --git a/docs/features.txt b/docs/features.txt
index 2e110d9..6226629 100644
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -262,21 +262,21 @@ GLES3.1, GLSL ES 3.1 -- all DONE: i965/hsw+, nvc0, r600, radeonsi
Additional functionality not covered above:
glMemoryBarrierByRegion DONE
glGetTexLevelParameter[fi]v - needs updates DONE
glGetBooleani_v - restrict to GLES enums
gl_HelperInvocation support DONE (i965, r600)
GLES3.2, GLSL ES 3.2 -- all DONE: i965/gen9+
GL_EXT_color_buffer_float DONE (all drivers)
- GL_KHR_blend_equation_advanced DONE (i965, nvc0)
+ GL_KHR_blend_equation_advanced DONE (i965, nvc0, radeonsi)
GL_KHR_debug DONE (all drivers)
GL_KHR_robustness DONE (i965, nvc0, radeonsi)
GL_KHR_texture_compression_astc_ldr DONE (freedreno, i965/gen9+)
GL_OES_copy_image DONE (all drivers)
GL_OES_draw_buffers_indexed DONE (all drivers that support GL_ARB_draw_buffers_blend)
GL_OES_draw_elements_base_vertex DONE (all drivers)
GL_OES_geometry_shader DONE (i965/hsw+, nvc0, radeonsi)
GL_OES_gpu_shader5 DONE (all drivers that support GL_ARB_gpu_shader5)
GL_OES_primitive_bounding_box DONE (i965/gen7+, nvc0, radeonsi)
GL_OES_sample_shading DONE (i965, nvc0, r600, radeonsi)
diff --git a/docs/relnotes/18.1.0.html b/docs/relnotes/18.1.0.html
index ddacbb4..e15ee87 100644
--- a/docs/relnotes/18.1.0.html
+++ b/docs/relnotes/18.1.0.html
@@ -37,21 +37,21 @@ TBD.
</pre>
<h2>New features</h2>
<p>
Note: some of the new features are only available with certain drivers.
</p>
<ul>
-TBD
+<li>GL_KHR_blend_equation_advanced on radeonsi</li>
</ul>
<h2>Bug fixes</h2>
<ul>
TBD
</ul>
<h2>Changes</h2>
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index 17115e1..98086a7 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -854,20 +854,81 @@ si_images_update_needs_color_decompress_mask(struct si_images *images)
if (color_needs_decompression(rtex)) {
images->needs_color_decompress_mask |= 1 << i;
} else {
images->needs_color_decompress_mask &= ~(1 << i);
}
}
}
}
+void si_update_ps_colorbuf0_slot(struct si_context *sctx)
+{
+ struct si_buffer_resources *buffers = &sctx->rw_buffers;
+ struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS];
+ unsigned slot = SI_PS_IMAGE_COLORBUF0;
+ struct pipe_surface *surf = NULL;
+
+ /* si_texture_disable_dcc can get us here again. */
+ if (sctx->blitter->running)
+ return;
+
+ /* See whether FBFETCH is used and color buffer 0 is set. */
+ if (sctx->ps_shader.cso &&
+ sctx->ps_shader.cso->info.opcode_count[TGSI_OPCODE_FBFETCH] &&
+ sctx->framebuffer.state.nr_cbufs &&
+ sctx->framebuffer.state.cbufs[0])
+ surf = sctx->framebuffer.state.cbufs[0];
+
+ if (!buffers->buffers[slot] && !surf)
+ return;
+
+ struct r600_texture *tex = surf ? (struct r600_texture*)surf->texture : NULL;
+ if (tex) {
+ struct pipe_image_view view;
+
+ /* There is no automatic decompression for this resource slot. */
+ assert(!tex->is_depth);
+
+ if (tex->resource.b.b.nr_samples <= 1) {
+ /* Disable DCC and CMASK. */
+ si_texture_disable_dcc(&sctx->b, tex);
+ si_eliminate_fast_color_clear(&sctx->b, tex);
+ si_texture_discard_cmask(sctx->screen, tex);
+ }
+
+ view.resource = surf->texture;
+ view.format = surf->format;
+ view.access = PIPE_IMAGE_ACCESS_READ;
+ view.u.tex.first_layer = surf->u.tex.first_layer;
+ view.u.tex.last_layer = surf->u.tex.last_layer;
+ view.u.tex.level = surf->u.tex.level;
+
+ /* Set the descriptor. */
+ uint32_t *desc = descs->list + slot*4;
+ si_set_shader_image_desc(sctx, &view, true, desc);
+
+ pipe_resource_reference(&buffers->buffers[slot], &tex->resource.b.b);
+ radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
+ &tex->resource, RADEON_USAGE_READ,
+ RADEON_PRIO_SHADER_RW_IMAGE);
+ buffers->enabled_mask |= 1u << slot;
+ } else {
+ /* Clear the descriptor. */
+ memset(descs->list + slot*4, 0, 8*4);
+ pipe_resource_reference(&buffers->buffers[slot], NULL);
+ buffers->enabled_mask &= ~(1u << slot);
+ }
+
+ sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
+}
+
/* SAMPLER STATES */
static void si_bind_sampler_states(struct pipe_context *ctx,
enum pipe_shader_type shader,
unsigned start, unsigned count, void **states)
{
struct si_context *sctx = (struct si_context *)ctx;
struct si_samplers *samplers = &sctx->samplers[shader];
struct si_descriptors *desc = si_sampler_and_image_descriptors(sctx, shader);
struct si_sampler_state **sstates = (struct si_sampler_state**)states;
diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c
index e38565c..eea705d 100644
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -181,20 +181,21 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
case PIPE_CAP_QUERY_SO_OVERFLOW:
case PIPE_CAP_MEMOBJ:
case PIPE_CAP_LOAD_CONSTBUF:
case PIPE_CAP_INT64:
case PIPE_CAP_INT64_DIVMOD:
case PIPE_CAP_TGSI_CLOCK:
case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS:
case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET:
+ case PIPE_CAP_TGSI_FS_FBFETCH:
return 1;
case PIPE_CAP_TGSI_VOTE:
return HAVE_LLVM >= 0x0400;
case PIPE_CAP_TGSI_BALLOT:
return HAVE_LLVM >= 0x0500;
case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
return !SI_BIG_ENDIAN && sscreen->info.has_userptr;
@@ -255,21 +256,20 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
/* Unsupported features. */
case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
case PIPE_CAP_USER_VERTEX_BUFFERS:
case PIPE_CAP_FAKE_SW_MSAA:
case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
case PIPE_CAP_VERTEXID_NOBASE:
case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
case PIPE_CAP_MAX_WINDOW_RECTANGLES:
- case PIPE_CAP_TGSI_FS_FBFETCH:
case PIPE_CAP_TGSI_MUL_ZERO_WINS:
case PIPE_CAP_UMA:
case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE:
case PIPE_CAP_POST_DEPTH_COVERAGE:
case PIPE_CAP_TILE_RASTER_ORDER:
case PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES:
case PIPE_CAP_CONTEXT_PRIORITY_MASK:
return 0;
case PIPE_CAP_NATIVE_FENCE_FD:
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index c80dc8e..58f3bda 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -1927,20 +1927,63 @@ static void si_llvm_emit_txqs(
samples = LLVMBuildLShr(ctx->ac.builder, samples,
LLVMConstInt(ctx->i32, 16, 0), "");
samples = LLVMBuildAnd(ctx->ac.builder, samples,
LLVMConstInt(ctx->i32, 0xf, 0), "");
samples = LLVMBuildShl(ctx->ac.builder, ctx->i32_1,
samples, "");
emit_data->output[emit_data->chan] = samples;
}
+static void si_llvm_emit_fbfetch(const struct lp_build_tgsi_action *action,
+ struct lp_build_tgsi_context *bld_base,
+ struct lp_build_emit_data *emit_data)
+{
+ struct si_shader_context *ctx = si_shader_context(bld_base);
+ struct ac_image_args args = {};
+ LLVMValueRef resource, addr;
+
+ /* Ignore src0, because KHR_blend_func_extended disallows multiple render
+ * targets.
+ */
+
+ /* Load the image descriptor. */
+ STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0 % 2 == 0);
+ resource = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers);
+ resource = LLVMBuildPointerCast(ctx->ac.builder, resource,
+ ac_array_in_const_addr_space(ctx->v8i32), "");
+ resource = ac_build_load_to_sgpr(&ctx->ac, resource,
+ LLVMConstInt(ctx->i32, SI_PS_IMAGE_COLORBUF0 / 2, 0));
+
+ /* Get the current pixel address. */
+ LLVMValueRef pos_fixed = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_FIXED_PT);
+ LLVMValueRef pos[4] = {
+ LLVMBuildAnd(ctx->ac.builder, pos_fixed,
+ LLVMConstInt(ctx->i32, 0xffff, 0), ""),
+ LLVMBuildLShr(ctx->ac.builder, pos_fixed,
+ LLVMConstInt(ctx->i32, 16, 0), ""),
+ /* Set 0 for layered rendering. (do we care?) */
+ ctx->ac.i32_0,
+ LLVMGetUndef(ctx->i32),
+ };
+ addr = ac_build_gather_values(&ctx->ac, pos, ARRAY_SIZE(pos));
+
+ args.opcode = ac_image_load;
+ args.resource = resource;
+ args.addr = addr;
+ args.dmask = 0xf;
+ args.da = true;
+
+ emit_data->output[emit_data->chan] =
+ ac_build_image_opcode(&ctx->ac, &args);
+}
+
static const struct lp_build_tgsi_action tex_action = {
.fetch_args = tex_fetch_args,
.emit = build_tex_intrinsic,
};
/**
* Setup actions for TGSI memory opcode, including texture opcodes.
*/
void si_shader_context_init_mem(struct si_shader_context *ctx)
{
@@ -1959,20 +2002,22 @@ void si_shader_context_init_mem(struct si_shader_context *ctx)
bld_base->op_actions[TGSI_OPCODE_TXF_LZ] = tex_action;
bld_base->op_actions[TGSI_OPCODE_TXL] = tex_action;
bld_base->op_actions[TGSI_OPCODE_TXL2] = tex_action;
bld_base->op_actions[TGSI_OPCODE_TXP] = tex_action;
bld_base->op_actions[TGSI_OPCODE_TXQ].fetch_args = txq_fetch_args;
bld_base->op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
bld_base->op_actions[TGSI_OPCODE_TG4] = tex_action;
bld_base->op_actions[TGSI_OPCODE_LODQ] = tex_action;
bld_base->op_actions[TGSI_OPCODE_TXQS].emit = si_llvm_emit_txqs;
+ bld_base->op_actions[TGSI_OPCODE_FBFETCH].emit = si_llvm_emit_fbfetch;
+
bld_base->op_actions[TGSI_OPCODE_LOAD].fetch_args = load_fetch_args;
bld_base->op_actions[TGSI_OPCODE_LOAD].emit = load_emit;
bld_base->op_actions[TGSI_OPCODE_STORE].fetch_args = store_fetch_args;
bld_base->op_actions[TGSI_OPCODE_STORE].emit = store_emit;
bld_base->op_actions[TGSI_OPCODE_RESQ].fetch_args = resq_fetch_args;
bld_base->op_actions[TGSI_OPCODE_RESQ].emit = resq_emit;
tmpl.fetch_args = atomic_fetch_args;
tmpl.emit = atomic_emit;
bld_base->op_actions[TGSI_OPCODE_ATOMUADD] = tmpl;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index f88bf29..a6ec427 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2890,20 +2890,21 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
if (!surf->depth_initialized) {
si_init_depth_surface(sctx, surf);
}
if (vi_tc_compat_htile_enabled(zstex, surf->base.u.tex.level))
sctx->framebuffer.DB_has_shader_readable_metadata = true;
si_context_add_resource_size(ctx, surf->base.texture);
}
+ si_update_ps_colorbuf0_slot(sctx);
si_update_poly_offset_state(sctx);
si_mark_atom_dirty(sctx, &sctx->cb_render_state);
si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
if (sctx->screen->dpbb_allowed)
si_mark_atom_dirty(sctx, &sctx->dpbb_state);
if (sctx->framebuffer.any_dst_linear != old_any_dst_linear)
si_mark_atom_dirty(sctx, &sctx->msaa_config);
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 5233be7..ea5038d 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -210,20 +210,24 @@ enum {
SI_VS_STREAMOUT_BUF1,
SI_VS_STREAMOUT_BUF2,
SI_VS_STREAMOUT_BUF3,
SI_HS_CONST_DEFAULT_TESS_LEVELS,
SI_VS_CONST_INSTANCE_DIVISORS,
SI_VS_CONST_CLIP_PLANES,
SI_PS_CONST_POLY_STIPPLE,
SI_PS_CONST_SAMPLE_POSITIONS,
+ /* Image descriptor of color buffer 0 for KHR_blend_equation_advanced. */
+ SI_PS_IMAGE_COLORBUF0,
+ SI_PS_IMAGE_COLORBUF0_HI,
+
SI_NUM_RW_BUFFERS,
};
/* Indices into sctx->descriptors, laid out so that gfx and compute pipelines
* are contiguous:
*
* 0 - rw buffers
* 1 - vertex const and shader buffers
* 2 - vertex samplers and images
* 3 - fragment const and shader buffer
@@ -317,20 +321,21 @@ struct si_buffer_resources {
si_pm4_block_idx(member)); \
} while(0)
/* si_descriptors.c */
void si_set_mutable_tex_desc_fields(struct si_screen *sscreen,
struct r600_texture *tex,
const struct legacy_surf_level *base_level_info,
unsigned base_level, unsigned first_level,
unsigned block_width, bool is_stencil,
uint32_t *state);
+void si_update_ps_colorbuf0_slot(struct si_context *sctx);
void si_get_pipe_constant_buffer(struct si_context *sctx, uint shader,
uint slot, struct pipe_constant_buffer *cbuf);
void si_get_shader_buffers(struct si_context *sctx,
enum pipe_shader_type shader,
uint start_slot, uint count,
struct pipe_shader_buffer *sbuf);
void si_set_ring_buffer(struct pipe_context *ctx, uint slot,
struct pipe_resource *buffer,
unsigned stride, unsigned num_records,
bool add_tid, bool swizzle,
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 2c1d990..2cd48f5 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -2379,20 +2379,21 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
si_mark_atom_dirty(sctx, &sctx->cb_render_state);
if (sctx->screen->has_out_of_order_rast &&
(!old_sel ||
old_sel->info.writes_memory != sel->info.writes_memory ||
old_sel->info.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL] !=
sel->info.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL]))
si_mark_atom_dirty(sctx, &sctx->msaa_config);
}
si_set_active_descriptors_for_shader(sctx, sel);
+ si_update_ps_colorbuf0_slot(sctx);
}
static void si_delete_shader(struct si_context *sctx, struct si_shader *shader)
{
if (shader->is_optimized) {
util_queue_drop_job(&sctx->screen->shader_compiler_queue_low_priority,
&shader->ready);
}
util_queue_fence_destroy(&shader->ready);
--
2.7.4
More information about the mesa-dev
mailing list