[Mesa-dev] [PATCH 3/7] radeonsi: merge si_set_streamout_targets with si_common_set_streamout_targets

Marek Olšák maraeo at gmail.com
Sat Oct 7 22:47:02 UTC 2017


From: Marek Olšák <marek.olsak at amd.com>

---
 src/gallium/drivers/radeonsi/si_descriptors.c     | 114 ---------------------
 src/gallium/drivers/radeonsi/si_state.h           |   4 -
 src/gallium/drivers/radeonsi/si_state_streamout.c | 117 ++++++++++++++++++++--
 3 files changed, 109 insertions(+), 126 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
index dd1f1e9..cda7d94 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -1356,133 +1356,20 @@ void si_set_ring_buffer(struct pipe_context *ctx, uint slot,
 		buffers->enabled_mask |= 1u << slot;
 	} else {
 		/* Clear the descriptor. */
 		memset(descs->list + slot*4, 0, sizeof(uint32_t) * 4);
 		buffers->enabled_mask &= ~(1u << slot);
 	}
 
 	sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
 }
 
-/* STREAMOUT BUFFERS */
-
-static void si_set_streamout_targets(struct pipe_context *ctx,
-				     unsigned num_targets,
-				     struct pipe_stream_output_target **targets,
-				     const unsigned *offsets)
-{
-	struct si_context *sctx = (struct si_context *)ctx;
-	struct si_buffer_resources *buffers = &sctx->rw_buffers;
-	struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS];
-	unsigned old_num_targets = sctx->streamout.num_targets;
-	unsigned i, bufidx;
-
-	/* We are going to unbind the buffers. Mark which caches need to be flushed. */
-	if (sctx->streamout.num_targets && sctx->streamout.begin_emitted) {
-		/* Since streamout uses vector writes which go through TC L2
-		 * and most other clients can use TC L2 as well, we don't need
-		 * to flush it.
-		 *
-		 * The only cases which requires flushing it is VGT DMA index
-		 * fetching (on <= CIK) and indirect draw data, which are rare
-		 * cases. Thus, flag the TC L2 dirtiness in the resource and
-		 * handle it at draw call time.
-		 */
-		for (i = 0; i < sctx->streamout.num_targets; i++)
-			if (sctx->streamout.targets[i])
-				r600_resource(sctx->streamout.targets[i]->b.buffer)->TC_L2_dirty = true;
-
-		/* Invalidate the scalar cache in case a streamout buffer is
-		 * going to be used as a constant buffer.
-		 *
-		 * Invalidate TC L1, because streamout bypasses it (done by
-		 * setting GLC=1 in the store instruction), but it can contain
-		 * outdated data of streamout buffers.
-		 *
-		 * VS_PARTIAL_FLUSH is required if the buffers are going to be
-		 * used as an input immediately.
-		 */
-		sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1 |
-				 SI_CONTEXT_INV_VMEM_L1 |
-				 SI_CONTEXT_VS_PARTIAL_FLUSH;
-	}
-
-	/* All readers of the streamout targets need to be finished before we can
-	 * start writing to the targets.
-	 */
-	if (num_targets)
-		sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
-		                 SI_CONTEXT_CS_PARTIAL_FLUSH;
-
-	/* Streamout buffers must be bound in 2 places:
-	 * 1) in VGT by setting the VGT_STRMOUT registers
-	 * 2) as shader resources
-	 */
-
-	/* Set the VGT regs. */
-	si_common_set_streamout_targets(ctx, num_targets, targets, offsets);
-
-	/* Set the shader resources.*/
-	for (i = 0; i < num_targets; i++) {
-		bufidx = SI_VS_STREAMOUT_BUF0 + i;
-
-		if (targets[i]) {
-			struct pipe_resource *buffer = targets[i]->buffer;
-			uint64_t va = r600_resource(buffer)->gpu_address;
-
-			/* Set the descriptor.
-			 *
-			 * On VI, the format must be non-INVALID, otherwise
-			 * the buffer will be considered not bound and store
-			 * instructions will be no-ops.
-			 */
-			uint32_t *desc = descs->list + bufidx*4;
-			desc[0] = va;
-			desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
-			desc[2] = 0xffffffff;
-			desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
-				  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
-				  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
-				  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
-				  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
-
-			/* Set the resource. */
-			pipe_resource_reference(&buffers->buffers[bufidx],
-						buffer);
-			radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
-							    (struct r600_resource*)buffer,
-							    buffers->shader_usage,
-							    RADEON_PRIO_SHADER_RW_BUFFER,
-							    true);
-			r600_resource(buffer)->bind_history |= PIPE_BIND_STREAM_OUTPUT;
-
-			buffers->enabled_mask |= 1u << bufidx;
-		} else {
-			/* Clear the descriptor and unset the resource. */
-			memset(descs->list + bufidx*4, 0,
-			       sizeof(uint32_t) * 4);
-			pipe_resource_reference(&buffers->buffers[bufidx],
-						NULL);
-			buffers->enabled_mask &= ~(1u << bufidx);
-		}
-	}
-	for (; i < old_num_targets; i++) {
-		bufidx = SI_VS_STREAMOUT_BUF0 + i;
-		/* Clear the descriptor and unset the resource. */
-		memset(descs->list + bufidx*4, 0, sizeof(uint32_t) * 4);
-		pipe_resource_reference(&buffers->buffers[bufidx], NULL);
-		buffers->enabled_mask &= ~(1u << bufidx);
-	}
-
-	sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
-}
-
 static void si_desc_reset_buffer_offset(struct pipe_context *ctx,
 					uint32_t *desc, uint64_t old_buf_va,
 					struct pipe_resource *new_buf)
 {
 	/* Retrieve the buffer offset from the descriptor. */
 	uint64_t old_desc_va =
 		desc[0] | ((uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32);
 
 	assert(old_buf_va <= old_desc_va);
 	uint64_t offset_within_buffer = old_desc_va - old_buf_va;
@@ -2687,21 +2574,20 @@ void si_init_all_descriptors(struct si_context *sctx)
 
 	sctx->descriptors_dirty = u_bit_consecutive(0, SI_NUM_DESCS);
 
 	/* Set pipe_context functions. */
 	sctx->b.b.bind_sampler_states = si_bind_sampler_states;
 	sctx->b.b.set_shader_images = si_set_shader_images;
 	sctx->b.b.set_constant_buffer = si_pipe_set_constant_buffer;
 	sctx->b.b.set_polygon_stipple = si_set_polygon_stipple;
 	sctx->b.b.set_shader_buffers = si_set_shader_buffers;
 	sctx->b.b.set_sampler_views = si_set_sampler_views;
-	sctx->b.b.set_stream_output_targets = si_set_streamout_targets;
 	sctx->b.b.create_texture_handle = si_create_texture_handle;
 	sctx->b.b.delete_texture_handle = si_delete_texture_handle;
 	sctx->b.b.make_texture_handle_resident = si_make_texture_handle_resident;
 	sctx->b.b.create_image_handle = si_create_image_handle;
 	sctx->b.b.delete_image_handle = si_delete_image_handle;
 	sctx->b.b.make_image_handle_resident = si_make_image_handle_resident;
 	sctx->b.invalidate_buffer = si_invalidate_buffer;
 	sctx->b.rebind_buffer = si_rebind_buffer;
 
 	/* Shader user data. */
diff --git a/src/gallium/drivers/radeonsi/si_state.h b/src/gallium/drivers/radeonsi/si_state.h
index 9d29878..a686d0d 100644
--- a/src/gallium/drivers/radeonsi/si_state.h
+++ b/src/gallium/drivers/radeonsi/si_state.h
@@ -418,24 +418,20 @@ void si_draw_rectangle(struct blitter_context *blitter,
 		       void *vertex_elements_cso,
 		       blitter_get_vs_func get_vs,
 		       int x1, int y1, int x2, int y2,
 		       float depth, unsigned num_instances,
 		       enum blitter_attrib_type type,
 		       const union blitter_attrib *attrib);
 void si_trace_emit(struct si_context *sctx);
 
 /* si_state_streamout.c */
 void si_streamout_buffers_dirty(struct si_context *sctx);
-void si_common_set_streamout_targets(struct pipe_context *ctx,
-				     unsigned num_targets,
-				     struct pipe_stream_output_target **targets,
-				     const unsigned *offset);
 void si_emit_streamout_end(struct si_context *sctx);
 void si_update_prims_generated_query_state(struct si_context *sctx,
 					   unsigned type, int diff);
 void si_init_streamout_functions(struct si_context *sctx);
 
 
 static inline unsigned
 si_tile_mode_index(struct r600_texture *rtex, unsigned level, bool stencil)
 {
 	if (stencil)
diff --git a/src/gallium/drivers/radeonsi/si_state_streamout.c b/src/gallium/drivers/radeonsi/si_state_streamout.c
index 92c5c59..35fbcea 100644
--- a/src/gallium/drivers/radeonsi/si_state_streamout.c
+++ b/src/gallium/drivers/radeonsi/si_state_streamout.c
@@ -19,20 +19,21 @@
  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  *
  * Authors: Marek Olšák <maraeo at gmail.com>
  *
  */
 
 #include "si_pipe.h"
 #include "si_state.h"
+#include "sid.h"
 #include "radeon/r600_cs.h"
 
 #include "util/u_memory.h"
 
 static void si_set_streamout_enable(struct si_context *sctx, bool enable);
 
 static inline void si_so_target_reference(struct si_streamout_target **dst,
 					  struct pipe_stream_output_target *src)
 {
 	pipe_so_target_reference((struct pipe_stream_output_target**)dst, src);
@@ -83,60 +84,159 @@ static void si_so_target_destroy(struct pipe_context *ctx,
 
 void si_streamout_buffers_dirty(struct si_context *sctx)
 {
 	if (!sctx->streamout.enabled_mask)
 		return;
 
 	si_mark_atom_dirty(sctx, &sctx->streamout.begin_atom);
 	si_set_streamout_enable(sctx, true);
 }
 
-void si_common_set_streamout_targets(struct pipe_context *ctx,
+static void si_set_streamout_targets(struct pipe_context *ctx,
 				     unsigned num_targets,
 				     struct pipe_stream_output_target **targets,
 				     const unsigned *offsets)
 {
 	struct si_context *sctx = (struct si_context *)ctx;
-	unsigned i;
-        unsigned enabled_mask = 0, append_bitmask = 0;
+	struct si_buffer_resources *buffers = &sctx->rw_buffers;
+	struct si_descriptors *descs = &sctx->descriptors[SI_DESCS_RW_BUFFERS];
+	unsigned old_num_targets = sctx->streamout.num_targets;
+	unsigned i, bufidx;
 
-	/* Stop streamout. */
+	/* We are going to unbind the buffers. Mark which caches need to be flushed. */
 	if (sctx->streamout.num_targets && sctx->streamout.begin_emitted) {
-		si_emit_streamout_end(sctx);
+		/* Since streamout uses vector writes which go through TC L2
+		 * and most other clients can use TC L2 as well, we don't need
+		 * to flush it.
+		 *
+		 * The only cases which requires flushing it is VGT DMA index
+		 * fetching (on <= CIK) and indirect draw data, which are rare
+		 * cases. Thus, flag the TC L2 dirtiness in the resource and
+		 * handle it at draw call time.
+		 */
+		for (i = 0; i < sctx->streamout.num_targets; i++)
+			if (sctx->streamout.targets[i])
+				r600_resource(sctx->streamout.targets[i]->b.buffer)->TC_L2_dirty = true;
+
+		/* Invalidate the scalar cache in case a streamout buffer is
+		 * going to be used as a constant buffer.
+		 *
+		 * Invalidate TC L1, because streamout bypasses it (done by
+		 * setting GLC=1 in the store instruction), but it can contain
+		 * outdated data of streamout buffers.
+		 *
+		 * VS_PARTIAL_FLUSH is required if the buffers are going to be
+		 * used as an input immediately.
+		 */
+		sctx->b.flags |= SI_CONTEXT_INV_SMEM_L1 |
+				 SI_CONTEXT_INV_VMEM_L1 |
+				 SI_CONTEXT_VS_PARTIAL_FLUSH;
 	}
 
+	/* All readers of the streamout targets need to be finished before we can
+	 * start writing to the targets.
+	 */
+	if (num_targets)
+		sctx->b.flags |= SI_CONTEXT_PS_PARTIAL_FLUSH |
+		                 SI_CONTEXT_CS_PARTIAL_FLUSH;
+
+	/* Streamout buffers must be bound in 2 places:
+	 * 1) in VGT by setting the VGT_STRMOUT registers
+	 * 2) as shader resources
+	 */
+
+	/* Stop streamout. */
+	if (sctx->streamout.num_targets && sctx->streamout.begin_emitted)
+		si_emit_streamout_end(sctx);
+
 	/* Set the new targets. */
+	unsigned enabled_mask = 0, append_bitmask = 0;
 	for (i = 0; i < num_targets; i++) {
 		si_so_target_reference(&sctx->streamout.targets[i], targets[i]);
 		if (!targets[i])
 			continue;
 
 		r600_context_add_resource_size(ctx, targets[i]->buffer);
 		enabled_mask |= 1 << i;
+
 		if (offsets[i] == ((unsigned)-1))
 			append_bitmask |= 1 << i;
 	}
-	for (; i < sctx->streamout.num_targets; i++) {
+
+	for (; i < sctx->streamout.num_targets; i++)
 		si_so_target_reference(&sctx->streamout.targets[i], NULL);
-	}
 
 	sctx->streamout.enabled_mask = enabled_mask;
-
 	sctx->streamout.num_targets = num_targets;
 	sctx->streamout.append_bitmask = append_bitmask;
 
+	/* Update dirty state bits. */
 	if (num_targets) {
 		si_streamout_buffers_dirty(sctx);
 	} else {
 		si_set_atom_dirty(sctx, &sctx->streamout.begin_atom, false);
 		si_set_streamout_enable(sctx, false);
 	}
+
+	/* Set the shader resources.*/
+	for (i = 0; i < num_targets; i++) {
+		bufidx = SI_VS_STREAMOUT_BUF0 + i;
+
+		if (targets[i]) {
+			struct pipe_resource *buffer = targets[i]->buffer;
+			uint64_t va = r600_resource(buffer)->gpu_address;
+
+			/* Set the descriptor.
+			 *
+			 * On VI, the format must be non-INVALID, otherwise
+			 * the buffer will be considered not bound and store
+			 * instructions will be no-ops.
+			 */
+			uint32_t *desc = descs->list + bufidx*4;
+			desc[0] = va;
+			desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
+			desc[2] = 0xffffffff;
+			desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
+				  S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
+				  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
+				  S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
+				  S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
+
+			/* Set the resource. */
+			pipe_resource_reference(&buffers->buffers[bufidx],
+						buffer);
+			radeon_add_to_buffer_list_check_mem(&sctx->b, &sctx->b.gfx,
+							    (struct r600_resource*)buffer,
+							    buffers->shader_usage,
+							    RADEON_PRIO_SHADER_RW_BUFFER,
+							    true);
+			r600_resource(buffer)->bind_history |= PIPE_BIND_STREAM_OUTPUT;
+
+			buffers->enabled_mask |= 1u << bufidx;
+		} else {
+			/* Clear the descriptor and unset the resource. */
+			memset(descs->list + bufidx*4, 0,
+			       sizeof(uint32_t) * 4);
+			pipe_resource_reference(&buffers->buffers[bufidx],
+						NULL);
+			buffers->enabled_mask &= ~(1u << bufidx);
+		}
+	}
+	for (; i < old_num_targets; i++) {
+		bufidx = SI_VS_STREAMOUT_BUF0 + i;
+		/* Clear the descriptor and unset the resource. */
+		memset(descs->list + bufidx*4, 0, sizeof(uint32_t) * 4);
+		pipe_resource_reference(&buffers->buffers[bufidx], NULL);
+		buffers->enabled_mask &= ~(1u << bufidx);
+	}
+
+	sctx->descriptors_dirty |= 1u << SI_DESCS_RW_BUFFERS;
 }
 
 static void si_flush_vgt_streamout(struct si_context *sctx)
 {
 	struct radeon_winsys_cs *cs = sctx->b.gfx.cs;
 	unsigned reg_strmout_cntl;
 
 	/* The register is at different places on different ASICs. */
 	if (sctx->b.chip_class >= CIK) {
 		reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL;
@@ -306,13 +406,14 @@ void si_update_prims_generated_query_state(struct si_context *sctx,
 
 		if (old_strmout_en != si_get_strmout_en(sctx))
 			si_mark_atom_dirty(sctx, &sctx->streamout.enable_atom);
 	}
 }
 
 void si_init_streamout_functions(struct si_context *sctx)
 {
 	sctx->b.b.create_stream_output_target = si_create_so_target;
 	sctx->b.b.stream_output_target_destroy = si_so_target_destroy;
+	sctx->b.b.set_stream_output_targets = si_set_streamout_targets;
 	sctx->streamout.begin_atom.emit = si_emit_streamout_begin;
 	sctx->streamout.enable_atom.emit = si_emit_streamout_enable;
 }
-- 
2.7.4



More information about the mesa-dev mailing list