[Mesa-dev] [PATCH 4/4] radeonsi: implement GL_KHR_blend_equation_advanced without MSAA

Marek Olšák maraeo at gmail.com
Tue Jan 30 15:48:42 UTC 2018


From: Marek Olšák <marek.olsak at amd.com>

I'm not fully convinced that the hw can do it with MSAA.
Maybe without CMASK, FMASK, and with sample shading, i.e. the slowest
possible way.
---
 docs/features.txt                                 |  2 +-
 docs/relnotes/18.1.0.html                         |  2 +-
 src/gallium/drivers/radeonsi/si_descriptors.c     | 61 +++++++++++++++++++++++
 src/gallium/drivers/radeonsi/si_get.c             |  2 +-
 src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c | 45 +++++++++++++++++
 src/gallium/drivers/radeonsi/si_state.c           |  1 +
 src/gallium/drivers/radeonsi/si_state.h           |  5 ++
 src/gallium/drivers/radeonsi/si_state_shaders.c   |  1 +
 8 files changed, 116 insertions(+), 3 deletions(-)

diff --git a/docs/features.txt b/docs/features.txt
index 2e110d9..6226629 100644
--- a/docs/features.txt
+++ b/docs/features.txt
@@ -262,21 +262,21 @@ GLES3.1, GLSL ES 3.1 -- all DONE: i965/hsw+, nvc0, r600, radeonsi
 
   Additional functionality not covered above:
       glMemoryBarrierByRegion                           DONE
       glGetTexLevelParameter[fi]v - needs updates       DONE
       glGetBooleani_v - restrict to GLES enums
       gl_HelperInvocation support                       DONE (i965, r600)
 
 GLES3.2, GLSL ES 3.2 -- all DONE: i965/gen9+
 
   GL_EXT_color_buffer_float                             DONE (all drivers)
-  GL_KHR_blend_equation_advanced                        DONE (i965, nvc0)
+  GL_KHR_blend_equation_advanced                        DONE (i965, nvc0, radeonsi)
   GL_KHR_debug                                          DONE (all drivers)
   GL_KHR_robustness                                     DONE (i965, nvc0, radeonsi)
   GL_KHR_texture_compression_astc_ldr                   DONE (freedreno, i965/gen9+)
   GL_OES_copy_image                                     DONE (all drivers)
   GL_OES_draw_buffers_indexed                           DONE (all drivers that support GL_ARB_draw_buffers_blend)
   GL_OES_draw_elements_base_vertex                      DONE (all drivers)
   GL_OES_geometry_shader                                DONE (i965/hsw+, nvc0, radeonsi)
   GL_OES_gpu_shader5                                    DONE (all drivers that support GL_ARB_gpu_shader5)
   GL_OES_primitive_bounding_box                         DONE (i965/gen7+, nvc0, radeonsi)
   GL_OES_sample_shading                                 DONE (i965, nvc0, r600, radeonsi)
diff --git a/docs/relnotes/18.1.0.html b/docs/relnotes/18.1.0.html
index ddacbb4..e15ee87 100644
--- a/docs/relnotes/18.1.0.html
+++ b/docs/relnotes/18.1.0.html
@@ -37,21 +37,21 @@ TBD.
 </pre>
 
 
 <h2>New features</h2>
 
 <p>
 Note: some of the new features are only available with certain drivers.
 </p>
 
 <ul>
-TBD
+<li>GL_KHR_blend_equation_advanced on radeonsi</li>
 </ul>
 
 <h2>Bug fixes</h2>
 
 <ul>
 TBD
 </ul>
 
 <h2>Changes</h2>
 
diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index 17115e1..98086a7 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -854,20 +854,81 @@ si_images_update_needs_color_decompress_mask(struct si_images *images)
 
 			if (color_needs_decompression(rtex)) {
 				images->needs_color_decompress_mask |= 1 << i;
 			} else {
 				images->needs_color_decompress_mask &= ~(1 << i);
 			}
 		}
 	}
 }
 
+void si_update_ps_colorbuf0_slot(struct si_context *sctx)
+{
+	struct si_buffer_resources *buffers = &sctx->rw_buffers;
+	struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS];
+	unsigned slot = SI_PS_IMAGE_COLORBUF0;
+	struct pipe_surface *surf = NULL;
+
+	/* si_texture_disable_dcc can get us here again. */
+	if (sctx->blitter->running)
+		return;
+
+	/* See whether FBFETCH is used and color buffer 0 is set. */
+	if (sctx->ps_shader.cso &&
+	    sctx->ps_shader.cso->info.opcode_count[TGSI_OPCODE_FBFETCH] &&
+	    sctx->framebuffer.state.nr_cbufs &&
+	    sctx->framebuffer.state.cbufs[0])
+		surf = sctx->framebuffer.state.cbufs[0];
+
+	if (!buffers->buffers[slot] && !surf)
+		return;
+
+	struct r600_texture *tex = surf ? (struct r600_texture*)surf->texture : NULL;
+	if (tex) {
+		struct pipe_image_view view;
+
+		/* There is no automatic decompression for this resource slot. */
+		assert(!tex->is_depth);
+
+		if (tex->resource.b.b.nr_samples <= 1) {
+			/* Disable DCC and CMASK. */
+			si_texture_disable_dcc(&sctx->b, tex);
+			si_eliminate_fast_color_clear(&sctx->b, tex);
+			si_texture_discard_cmask(sctx->screen, tex);
+		}
+
+		view.resource = surf->texture;
+		view.format = surf->format;
+		view.access = PIPE_IMAGE_ACCESS_READ;
+		view.u.tex.first_layer = surf->u.tex.first_layer;
+		view.u.tex.last_layer = surf->u.tex.last_layer;
+		view.u.tex.level = surf->u.tex.level;
+
+		/* Set the descriptor. */
+		uint32_t *desc = descs->list + slot*4;
+		si_set_shader_image_desc(sctx, &view, true, desc);
+
+		pipe_resource_reference(&buffers->buffers[slot], &tex->resource.b.b);
+		radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx,
+					  &tex->resource, RADEON_USAGE_READ,
+					  RADEON_PRIO_SHADER_RW_IMAGE);
+		buffers->enabled_mask |= 1u << slot;
+	} else {
+		/* Clear the descriptor. */
+		memset(descs->list + slot*4, 0, 8*4);
+		pipe_resource_reference(&buffers->buffers[slot], NULL);
+		buffers->enabled_mask &= ~(1u << slot);
+	}
+
+	sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
+}
+
 /* SAMPLER STATES */
 
 static void si_bind_sampler_states(struct pipe_context *ctx,
                                    enum pipe_shader_type shader,
                                    unsigned start, unsigned count, void **states)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
 	struct si_samplers *samplers = &sctx->samplers[shader];
 	struct si_descriptors *desc = si_sampler_and_image_descriptors(sctx, shader);
 	struct si_sampler_state **sstates = (struct si_sampler_state**)states;
diff --git a/src/gallium/drivers/radeonsi/si_get.c b/src/gallium/drivers/radeonsi/si_get.c
index e38565c..eea705d 100644
--- a/src/gallium/drivers/radeonsi/si_get.c
+++ b/src/gallium/drivers/radeonsi/si_get.c
@@ -181,20 +181,21 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 	case PIPE_CAP_QUERY_SO_OVERFLOW:
 	case PIPE_CAP_MEMOBJ:
 	case PIPE_CAP_LOAD_CONSTBUF:
 	case PIPE_CAP_INT64:
 	case PIPE_CAP_INT64_DIVMOD:
 	case PIPE_CAP_TGSI_CLOCK:
 	case PIPE_CAP_CAN_BIND_CONST_BUFFER_AS_VERTEX:
 	case PIPE_CAP_ALLOW_MAPPED_BUFFERS_DURING_EXECUTION:
 	case PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS:
 	case PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET:
+	case PIPE_CAP_TGSI_FS_FBFETCH:
 		return 1;
 
 	case PIPE_CAP_TGSI_VOTE:
 		return HAVE_LLVM >= 0x0400;
 
 	case PIPE_CAP_TGSI_BALLOT:
 		return HAVE_LLVM >= 0x0500;
 
 	case PIPE_CAP_RESOURCE_FROM_USER_MEMORY:
 		return !SI_BIG_ENDIAN && sscreen->info.has_userptr;
@@ -255,21 +256,20 @@ static int si_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
 	/* Unsupported features. */
 	case PIPE_CAP_BUFFER_SAMPLER_VIEW_RGBA_ONLY:
 	case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
 	case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
 	case PIPE_CAP_USER_VERTEX_BUFFERS:
 	case PIPE_CAP_FAKE_SW_MSAA:
 	case PIPE_CAP_TEXTURE_GATHER_OFFSETS:
 	case PIPE_CAP_VERTEXID_NOBASE:
 	case PIPE_CAP_PRIMITIVE_RESTART_FOR_PATCHES:
 	case PIPE_CAP_MAX_WINDOW_RECTANGLES:
-	case PIPE_CAP_TGSI_FS_FBFETCH:
 	case PIPE_CAP_TGSI_MUL_ZERO_WINS:
 	case PIPE_CAP_UMA:
 	case PIPE_CAP_POLYGON_MODE_FILL_RECTANGLE:
 	case PIPE_CAP_POST_DEPTH_COVERAGE:
 	case PIPE_CAP_TILE_RASTER_ORDER:
 	case PIPE_CAP_MAX_COMBINED_SHADER_OUTPUT_RESOURCES:
 	case PIPE_CAP_CONTEXT_PRIORITY_MASK:
 		return 0;
 
 	case PIPE_CAP_NATIVE_FENCE_FD:
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
index c80dc8e..58f3bda 100644
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_mem.c
@@ -1927,20 +1927,63 @@ static void si_llvm_emit_txqs(
 	samples = LLVMBuildLShr(ctx->ac.builder, samples,
 				LLVMConstInt(ctx->i32, 16, 0), "");
 	samples = LLVMBuildAnd(ctx->ac.builder, samples,
 			       LLVMConstInt(ctx->i32, 0xf, 0), "");
 	samples = LLVMBuildShl(ctx->ac.builder, ctx->i32_1,
 			       samples, "");
 
 	emit_data->output[emit_data->chan] = samples;
 }
 
+static void si_llvm_emit_fbfetch(const struct lp_build_tgsi_action *action,
+				 struct lp_build_tgsi_context *bld_base,
+				 struct lp_build_emit_data *emit_data)
+{
+	struct si_shader_context *ctx = si_shader_context(bld_base);
+	struct ac_image_args args = {};
+	LLVMValueRef resource, addr;
+
+	/* Ignore src0, because KHR_blend_func_extended disallows multiple render
+	 * targets.
+	 */
+
+	/* Load the image descriptor. */
+	STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0 % 2 == 0);
+	resource = LLVMGetParam(ctx->main_fn, ctx->param_rw_buffers);
+	resource = LLVMBuildPointerCast(ctx->ac.builder, resource,
+					ac_array_in_const_addr_space(ctx->v8i32), "");
+	resource = ac_build_load_to_sgpr(&ctx->ac, resource,
+				LLVMConstInt(ctx->i32, SI_PS_IMAGE_COLORBUF0 / 2, 0));
+
+	/* Get the current pixel address. */
+	LLVMValueRef pos_fixed = LLVMGetParam(ctx->main_fn, SI_PARAM_POS_FIXED_PT);
+	LLVMValueRef pos[4] = {
+		LLVMBuildAnd(ctx->ac.builder, pos_fixed,
+			     LLVMConstInt(ctx->i32, 0xffff, 0), ""),
+		LLVMBuildLShr(ctx->ac.builder, pos_fixed,
+			      LLVMConstInt(ctx->i32, 16, 0), ""),
+		/* Set 0 for layered rendering. (do we care?) */
+		ctx->ac.i32_0,
+		LLVMGetUndef(ctx->i32),
+	};
+	addr = ac_build_gather_values(&ctx->ac, pos, ARRAY_SIZE(pos));
+
+	args.opcode = ac_image_load;
+	args.resource = resource;
+	args.addr = addr;
+	args.dmask = 0xf;
+	args.da = true;
+
+	emit_data->output[emit_data->chan] =
+		ac_build_image_opcode(&ctx->ac, &args);
+}
+
 static const struct lp_build_tgsi_action tex_action = {
 	.fetch_args = tex_fetch_args,
 	.emit = build_tex_intrinsic,
 };
 
 /**
  * Setup actions for TGSI memory opcode, including texture opcodes.
  */
 void si_shader_context_init_mem(struct si_shader_context *ctx)
 {
@@ -1959,20 +2002,22 @@ void si_shader_context_init_mem(struct si_shader_context *ctx)
 	bld_base->op_actions[TGSI_OPCODE_TXF_LZ] = tex_action;
 	bld_base->op_actions[TGSI_OPCODE_TXL] = tex_action;
 	bld_base->op_actions[TGSI_OPCODE_TXL2] = tex_action;
 	bld_base->op_actions[TGSI_OPCODE_TXP] = tex_action;
 	bld_base->op_actions[TGSI_OPCODE_TXQ].fetch_args = txq_fetch_args;
 	bld_base->op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
 	bld_base->op_actions[TGSI_OPCODE_TG4] = tex_action;
 	bld_base->op_actions[TGSI_OPCODE_LODQ] = tex_action;
 	bld_base->op_actions[TGSI_OPCODE_TXQS].emit = si_llvm_emit_txqs;
 
+	bld_base->op_actions[TGSI_OPCODE_FBFETCH].emit = si_llvm_emit_fbfetch;
+
 	bld_base->op_actions[TGSI_OPCODE_LOAD].fetch_args = load_fetch_args;
 	bld_base->op_actions[TGSI_OPCODE_LOAD].emit = load_emit;
 	bld_base->op_actions[TGSI_OPCODE_STORE].fetch_args = store_fetch_args;
 	bld_base->op_actions[TGSI_OPCODE_STORE].emit = store_emit;
 	bld_base->op_actions[TGSI_OPCODE_RESQ].fetch_args = resq_fetch_args;
 	bld_base->op_actions[TGSI_OPCODE_RESQ].emit = resq_emit;
 
 	tmpl.fetch_args = atomic_fetch_args;
 	tmpl.emit = atomic_emit;
 	bld_base->op_actions[TGSI_OPCODE_ATOMUADD] = tmpl;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index f88bf29..a6ec427 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2890,20 +2890,21 @@ static void si_set_framebuffer_state(struct pipe_context *ctx,
 		if (!surf->depth_initialized) {
 			si_init_depth_surface(sctx, surf);
 		}
 
 		if (vi_tc_compat_htile_enabled(zstex, surf->base.u.tex.level))
 			sctx->framebuffer.DB_has_shader_readable_metadata = true;
 
 		si_context_add_resource_size(ctx, surf->base.texture);
 	}
 
+	si_update_ps_colorbuf0_slot(sctx);
 	si_update_poly_offset_state(sctx);
 	si_mark_atom_dirty(sctx, &sctx->cb_render_state);
 	si_mark_atom_dirty(sctx, &sctx->framebuffer.atom);
 
 	if (sctx->screen->dpbb_allowed)
 		si_mark_atom_dirty(sctx, &sctx->dpbb_state);
 
 	if (sctx->framebuffer.any_dst_linear != old_any_dst_linear)
 		si_mark_atom_dirty(sctx, &sctx->msaa_config);
 
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 5233be7..ea5038d 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -210,20 +210,24 @@ enum {
 	SI_VS_STREAMOUT_BUF1,
 	SI_VS_STREAMOUT_BUF2,
 	SI_VS_STREAMOUT_BUF3,
 
 	SI_HS_CONST_DEFAULT_TESS_LEVELS,
 	SI_VS_CONST_INSTANCE_DIVISORS,
 	SI_VS_CONST_CLIP_PLANES,
 	SI_PS_CONST_POLY_STIPPLE,
 	SI_PS_CONST_SAMPLE_POSITIONS,
 
+	/* Image descriptor of color buffer 0 for KHR_blend_equation_advanced. */
+	SI_PS_IMAGE_COLORBUF0,
+	SI_PS_IMAGE_COLORBUF0_HI,
+
 	SI_NUM_RW_BUFFERS,
 };
 
 /* Indices into sctx->descriptors, laid out so that gfx and compute pipelines
  * are contiguous:
  *
  *  0 - rw buffers
  *  1 - vertex const and shader buffers
  *  2 - vertex samplers and images
  *  3 - fragment const and shader buffer
@@ -317,20 +321,21 @@ struct si_buffer_resources {
 				  si_pm4_block_idx(member)); \
 	} while(0)
 
 /* si_descriptors.c */
 void si_set_mutable_tex_desc_fields(struct si_screen *sscreen,
 				    struct r600_texture *tex,
 				    const struct legacy_surf_level *base_level_info,
 				    unsigned base_level, unsigned first_level,
 				    unsigned block_width, bool is_stencil,
 				    uint32_t *state);
+void si_update_ps_colorbuf0_slot(struct si_context *sctx);
 void si_get_pipe_constant_buffer(struct si_context *sctx, uint shader,
 				 uint slot, struct pipe_constant_buffer *cbuf);
 void si_get_shader_buffers(struct si_context *sctx,
 			   enum pipe_shader_type shader,
 			   uint start_slot, uint count,
 			   struct pipe_shader_buffer *sbuf);
 void si_set_ring_buffer(struct pipe_context *ctx, uint slot,
 			struct pipe_resource *buffer,
 			unsigned stride, unsigned num_records,
 			bool add_tid, bool swizzle,
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c b/src/gallium/drivers/radeonsi/si_state_shaders.c
index 2c1d990..2cd48f5 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -2379,20 +2379,21 @@ static void si_bind_ps_shader(struct pipe_context *ctx, void *state)
 			si_mark_atom_dirty(sctx, &sctx->cb_render_state);
 
 		if (sctx->screen->has_out_of_order_rast &&
 		    (!old_sel ||
 		     old_sel->info.writes_memory != sel->info.writes_memory ||
 		     old_sel->info.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL] !=
 		     sel->info.properties[TGSI_PROPERTY_FS_EARLY_DEPTH_STENCIL]))
 			si_mark_atom_dirty(sctx, &sctx->msaa_config);
 	}
 	si_set_active_descriptors_for_shader(sctx, sel);
+	si_update_ps_colorbuf0_slot(sctx);
 }
 
 static void si_delete_shader(struct si_context *sctx, struct si_shader *shader)
 {
 	if (shader->is_optimized) {
 		util_queue_drop_job(&sctx->screen->shader_compiler_queue_low_priority,
 				    &shader->ready);
 	}
 
 	util_queue_fence_destroy(&shader->ready);
-- 
2.7.4



More information about the mesa-dev mailing list