[Mesa-dev] [PATCH 09/13] gallium/radeon: implement basic parts of PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE

Nicolai Hähnle nhaehnle at gmail.com
Thu Jul 27 19:14:19 UTC 2017


From: Nicolai Hähnle <nicolai.haehnle at amd.com>

---
 src/gallium/drivers/radeon/r600_query.c | 96 ++++++++++++++++++++++++---------
 1 file changed, 71 insertions(+), 25 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
index 9f33bac..4c727d6 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -648,6 +648,12 @@ static struct pipe_query *r600_query_hw_create(struct r600_common_screen *rscree
 		query->num_cs_dw_end = 6;
 		query->stream = index;
 		break;
+	case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
+		/* NumPrimitivesWritten, PrimitiveStorageNeeded. */
+		query->result_size = 32 * 4;
+		query->num_cs_dw_begin = 6 * 4;
+		query->num_cs_dw_end = 6 * 4;
+		break;
 	case PIPE_QUERY_PIPELINE_STATISTICS:
 		/* 11 values on EG, 8 on R600. */
 		query->result_size = (rscreen->chip_class >= EVERGREEN ? 11 : 8) * 16;
@@ -696,9 +702,9 @@ static void r600_update_occlusion_query_state(struct r600_common_context *rctx,
 	}
 }
 
-static unsigned event_type_for_stream(struct r600_query_hw *query)
+static unsigned event_type_for_stream(unsigned stream)
 {
-	switch (query->stream) {
+	switch (stream) {
 	default:
 	case 0: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS;
 	case 1: return EVENT_TYPE_SAMPLE_STREAMOUTSTATS1;
@@ -707,6 +713,15 @@ static unsigned event_type_for_stream(struct r600_query_hw *query)
 	}
 }
 
+static void emit_sample_streamout(struct radeon_winsys_cs *cs, uint64_t va,
+				  unsigned stream)
+{
+	radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
+	radeon_emit(cs, EVENT_TYPE(event_type_for_stream(stream)) | EVENT_INDEX(3));
+	radeon_emit(cs, va);
+	radeon_emit(cs, va >> 32);
+}
+
 static void r600_query_hw_do_emit_start(struct r600_common_context *ctx,
 					struct r600_query_hw *query,
 					struct r600_resource *buffer,
@@ -726,10 +741,11 @@ static void r600_query_hw_do_emit_start(struct r600_common_context *ctx,
 	case PIPE_QUERY_PRIMITIVES_GENERATED:
 	case PIPE_QUERY_SO_STATISTICS:
 	case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
-		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
-		radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | EVENT_INDEX(3));
-		radeon_emit(cs, va);
-		radeon_emit(cs, va >> 32);
+		emit_sample_streamout(cs, va, query->stream);
+		break;
+	case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
+		for (unsigned stream = 0; stream < 4; ++stream)
+			emit_sample_streamout(cs, va + 32 * stream, stream);
 		break;
 	case PIPE_QUERY_TIME_ELAPSED:
 		if (ctx->chip_class >= SI) {
@@ -821,11 +837,13 @@ static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
 	case PIPE_QUERY_PRIMITIVES_GENERATED:
 	case PIPE_QUERY_SO_STATISTICS:
 	case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
-		va += query->result_size/2;
-		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
-		radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | EVENT_INDEX(3));
-		radeon_emit(cs, va);
-		radeon_emit(cs, va >> 32);
+		va += 16;
+		emit_sample_streamout(cs, va, query->stream);
+		break;
+	case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
+		va += 16;
+		for (unsigned stream = 0; stream < 4; ++stream)
+			emit_sample_streamout(cs, va + 32 * stream, stream);
 		break;
 	case PIPE_QUERY_TIME_ELAPSED:
 		va += 8;
@@ -885,10 +903,29 @@ static void r600_query_hw_emit_stop(struct r600_common_context *ctx,
 	r600_update_prims_generated_query_state(ctx, query->b.type, -1);
 }
 
+static void emit_set_predicate(struct r600_common_context *ctx,
+			       struct r600_resource *buf, uint64_t va,
+			       uint32_t op)
+{
+	struct radeon_winsys_cs *cs = ctx->gfx.cs;
+
+	if (ctx->chip_class >= GFX9) {
+		radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 2, 0));
+		radeon_emit(cs, op);
+		radeon_emit(cs, va);
+		radeon_emit(cs, va >> 32);
+	} else {
+		radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0));
+		radeon_emit(cs, va);
+		radeon_emit(cs, op | ((va >> 32) & 0xFF));
+	}
+	r600_emit_reloc(ctx, &ctx->gfx, buf, RADEON_USAGE_READ,
+			RADEON_PRIO_QUERY);
+}
+
 static void r600_emit_query_predication(struct r600_common_context *ctx,
 					struct r600_atom *atom)
 {
-	struct radeon_winsys_cs *cs = ctx->gfx.cs;
 	struct r600_query_hw *query = (struct r600_query_hw *)ctx->render_cond;
 	struct r600_query_buffer *qbuf;
 	uint32_t op;
@@ -907,6 +944,7 @@ static void r600_emit_query_predication(struct r600_common_context *ctx,
 		op = PRED_OP(PREDICATION_OP_ZPASS);
 		break;
 	case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+	case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
 		op = PRED_OP(PREDICATION_OP_PRIMCOUNT);
 		invert = !invert;
 		break;
@@ -931,22 +969,19 @@ static void r600_emit_query_predication(struct r600_common_context *ctx,
 		while (results_base < qbuf->results_end) {
 			uint64_t va = va_base + results_base;
 
-			if (ctx->chip_class >= GFX9) {
-				radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 2, 0));
-				radeon_emit(cs, op);
-				radeon_emit(cs, va);
-				radeon_emit(cs, va >> 32);
+			if (query->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE) {
+				for (unsigned stream = 0; stream < 4; ++stream) {
+					emit_set_predicate(ctx, qbuf->buf, va + 32 * stream, op);
+
+					/* set CONTINUE bit for all packets except the first */
+					op |= PREDICATION_CONTINUE;
+				}
 			} else {
-				radeon_emit(cs, PKT3(PKT3_SET_PREDICATION, 1, 0));
-				radeon_emit(cs, va);
-				radeon_emit(cs, op | ((va >> 32) & 0xFF));
+				emit_set_predicate(ctx, qbuf->buf, va, op);
+				op |= PREDICATION_CONTINUE;
 			}
-			r600_emit_reloc(ctx, &ctx->gfx, qbuf->buf, RADEON_USAGE_READ,
-					RADEON_PRIO_QUERY);
-			results_base += query->result_size;
 
-			/* set CONTINUE bit for all packets except the first */
-			op |= PREDICATION_CONTINUE;
+			results_base += query->result_size;
 		}
 	}
 }
@@ -1185,6 +1220,14 @@ static void r600_query_hw_add_result(struct r600_common_screen *rscreen,
 			r600_query_read_result(buffer, 2, 6, true) !=
 			r600_query_read_result(buffer, 0, 4, true);
 		break;
+	case PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE:
+		for (unsigned stream = 0; stream < 4; ++stream) {
+			result->b = result->b ||
+				r600_query_read_result(buffer, 2, 6, true) !=
+				r600_query_read_result(buffer, 0, 4, true);
+			buffer = (char *)buffer + 32;
+		}
+		break;
 	case PIPE_QUERY_PIPELINE_STATISTICS:
 		if (rscreen->chip_class >= EVERGREEN) {
 			result->pipeline_statistics.ps_invocations +=
@@ -1698,6 +1741,9 @@ static void r600_render_condition(struct pipe_context *ctx,
 	if (query) {
 		for (qbuf = &rquery->buffer; qbuf; qbuf = qbuf->previous)
 			atom->num_dw += (qbuf->results_end / rquery->result_size) * 5;
+
+		if (rquery->b.type == PIPE_QUERY_SO_OVERFLOW_ANY_PREDICATE)
+			atom->num_dw *= 4;
 	}
 
 	rctx->set_atom_dirty(rctx, atom, query != NULL);
-- 
2.9.3



More information about the mesa-dev mailing list