[Mesa-dev] [PATCH 1/9] gallium/radeon: add barrier_flags to r600_common_screen

Fri Sep 16 13:57:03 UTC 2016

From: Nicolai Hähnle <nicolai.haehnle at amd.com>

There are driver-specific context flags for barriers that are not covered
by the Gallium barrier interfaces.

The R600 settings of these flags may not be optimal, but we're not going
to use them yet anyway.
---
 src/gallium/drivers/r600/r600_pipe.c          |  6 ++++++
 src/gallium/drivers/radeon/r600_pipe_common.h | 12 ++++++++++++
 src/gallium/drivers/radeonsi/si_pipe.c        |  5 +++++
 3 files changed, 23 insertions(+)

diff --git a/src/gallium/drivers/r600/r600_pipe.c b/src/gallium/drivers/r600/r600_pipe.c
index c09821d..0799ba2 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -673,20 +673,26 @@ struct pipe_screen *r600_screen_create(struct radeon_winsys *ws)
 		rscreen->has_compressed_msaa_texturing = true;
 		break;
 	default:
 		rscreen->has_msaa = FALSE;
 		rscreen->has_compressed_msaa_texturing = false;
 	}
 
 	rscreen->b.has_cp_dma = rscreen->b.info.drm_minor >= 27 &&
 			      !(rscreen->b.debug_flags & DBG_NO_CP_DMA);
 
+	rscreen->b.barrier_flags.cp_to_L2 =
+		R600_CONTEXT_INV_VERTEX_CACHE |
+		R600_CONTEXT_INV_TEX_CACHE |
+		R600_CONTEXT_INV_CONST_CACHE;
+	rscreen->b.barrier_flags.compute_to_L2 = R600_CONTEXT_PS_PARTIAL_FLUSH;
+
 	rscreen->global_pool = compute_memory_pool_new(rscreen);
 
 	/* Create the auxiliary context. This must be done last. */
 	rscreen->b.aux_context = rscreen->b.b.context_create(&rscreen->b.b, NULL, 0);
 
 #if 0 /* This is for testing whether aux_context and buffer clearing work correctly. */
 	struct pipe_resource templ = {};
 
 	templ.width0 = 4;
 	templ.height0 = 2048;
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h b/src/gallium/drivers/radeon/r600_pipe_common.h
index d9f22e4..dd33eab 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -408,20 +408,32 @@ struct r600_common_screen {
 	 * contexts' compressed texture binding masks.
 	 */
 	unsigned			compressed_colortex_counter;
 
 	/* Atomically increment this counter when an existing texture's
 	 * backing buffer or tile mode parameters have changed that requires
 	 * recomputation of shader descriptors.
 	 */
 	unsigned			dirty_tex_descriptor_counter;
 
+	struct {
+		/* Context flags to set so that all writes from earlier jobs
+		 * in the CP are seen by L2 clients.
+		 */
+		unsigned cp_to_L2;
+
+		/* Context flags to set so that all writes from earlier
+		 * compute jobs are seen by L2 clients.
+		 */
+		unsigned compute_to_L2;
+	} barrier_flags;
+
 	void (*query_opaque_metadata)(struct r600_common_screen *rscreen,
 				      struct r600_texture *rtex,
 				      struct radeon_bo_metadata *md);
 
 	void (*apply_opaque_metadata)(struct r600_common_screen *rscreen,
 				    struct r600_texture *rtex,
 				    struct radeon_bo_metadata *md);
 };
 
 /* This encapsulates a state or an operation which can emitted into the GPU
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c
index 8f9e6f5..730be9d 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -811,20 +811,25 @@ struct pipe_screen *radeonsi_screen_create(struct radeon_winsys *ws)
 		 sscreen->b.info.pfp_fw_version >= 121 &&
 		 sscreen->b.info.me_fw_version >= 87);
 
 	sscreen->b.has_cp_dma = true;
 	sscreen->b.has_streamout = true;
 	pipe_mutex_init(sscreen->shader_parts_mutex);
 	sscreen->use_monolithic_shaders =
 		HAVE_LLVM < 0x0308 ||
 		(sscreen->b.debug_flags & DBG_MONOLITHIC_SHADERS) != 0;
 
+	sscreen->b.barrier_flags.cp_to_L2 = SI_CONTEXT_INV_SMEM_L1 |
+					    SI_CONTEXT_INV_VMEM_L1 |
+					    SI_CONTEXT_INV_GLOBAL_L2;
+	sscreen->b.barrier_flags.compute_to_L2 = SI_CONTEXT_CS_PARTIAL_FLUSH;
+
 	if (debug_get_bool_option("RADEON_DUMP_SHADERS", false))
 		sscreen->b.debug_flags |= DBG_FS | DBG_VS | DBG_GS | DBG_PS | DBG_CS;
 
 	/* Only enable as many threads as we have target machines and CPUs. */
 	num_cpus = sysconf(_SC_NPROCESSORS_ONLN);
 	num_compiler_threads = MIN2(num_cpus, ARRAY_SIZE(sscreen->tm));
 
 	for (i = 0; i < num_compiler_threads; i++)
 		sscreen->tm[i] = si_create_llvm_target_machine(sscreen);
 
-- 
2.7.4