[Mesa-dev] [PATCH 5/9] gallium/radeon: add query fences and r600_get_hw_query_params

Nicolai Hähnle nhaehnle at gmail.com
Fri Sep 16 13:57:07 UTC 2016


From: Nicolai Hähnle <nicolai.haehnle at amd.com>

We will support the waiting option in ARB_query_buffer_object using
WAIT_REG_MEM on an appropriate fence-like dword. Some queries conveniently
write their results with the highest bit set, and we can just use that;
for others, we have to write a fence explicitly.

ZPASS_DONE for occlusion queries writes its results with the high bit
set, but it writes up to 8 pairs of results (one for each DB). We have
to wait for all of these results, so let's just add an explicit fence.

The new function provides summary information to be used by subsequent
patches.
---
 src/gallium/drivers/radeon/r600_query.c | 107 +++++++++++++++++++++++++++-----
 1 file changed, 91 insertions(+), 16 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
index 2c3d530..b9041eb 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -19,20 +19,28 @@
  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  */
 
 #include "r600_query.h"
 #include "r600_cs.h"
 #include "util/u_memory.h"
 
+struct r600_hw_query_params {
+	unsigned start_offset;
+	unsigned end_offset;
+	unsigned fence_offset;
+	unsigned pair_stride;
+	unsigned pair_count;
+};
+
 /* Queries without buffer handling or suspend/resume. */
 struct r600_query_sw {
 	struct r600_query b;
 
 	uint64_t begin_result;
 	uint64_t end_result;
 	/* Fence for GPU_FINISHED. */
 	struct pipe_fence_handle *fence;
 };
 
@@ -352,21 +360,21 @@ static bool r600_query_hw_prepare_buffer(struct r600_common_context *ctx,
 		return false;
 
 	memset(results, 0, buffer->b.b.width0);
 
 	if (query->b.type == PIPE_QUERY_OCCLUSION_COUNTER ||
 	    query->b.type == PIPE_QUERY_OCCLUSION_PREDICATE) {
 		unsigned num_results;
 		unsigned i, j;
 
 		/* Set top bits for unused backends. */
-		num_results = buffer->b.b.width0 / (16 * ctx->max_db);
+		num_results = buffer->b.b.width0 / query->result_size;
 		for (j = 0; j < num_results; j++) {
 			for (i = 0; i < ctx->max_db; i++) {
 				if (!(ctx->backend_mask & (1<<i))) {
 					results[(i * 4)+1] = 0x80000000;
 					results[(i * 4)+3] = 0x80000000;
 				}
 			}
 			results += 4 * ctx->max_db;
 		}
 	}
@@ -422,50 +430,52 @@ static struct pipe_query *r600_query_hw_create(struct r600_common_context *rctx,
 		return NULL;
 
 	query->b.type = query_type;
 	query->b.ops = &query_hw_ops;
 	query->ops = &query_hw_default_hw_ops;
 
 	switch (query_type) {
 	case PIPE_QUERY_OCCLUSION_COUNTER:
 	case PIPE_QUERY_OCCLUSION_PREDICATE:
 		query->result_size = 16 * rctx->max_db;
+		query->result_size += 16; /* for the fence + alignment */
 		query->num_cs_dw_begin = 6;
-		query->num_cs_dw_end = 6;
+		query->num_cs_dw_end = 6 + r600_gfx_write_fence_dwords(rctx->screen);
 		query->flags |= R600_QUERY_HW_FLAG_PREDICATE;
 		break;
 	case PIPE_QUERY_TIME_ELAPSED:
-		query->result_size = 16;
+		query->result_size = 24;
 		query->num_cs_dw_begin = 8;
-		query->num_cs_dw_end = 8;
+		query->num_cs_dw_end = 8 + r600_gfx_write_fence_dwords(rctx->screen);
 		break;
 	case PIPE_QUERY_TIMESTAMP:
-		query->result_size = 8;
-		query->num_cs_dw_end = 8;
+		query->result_size = 16;
+		query->num_cs_dw_end = 8 + r600_gfx_write_fence_dwords(rctx->screen);
 		query->flags = R600_QUERY_HW_FLAG_NO_START;
 		break;
 	case PIPE_QUERY_PRIMITIVES_EMITTED:
 	case PIPE_QUERY_PRIMITIVES_GENERATED:
 	case PIPE_QUERY_SO_STATISTICS:
 	case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
 		/* NumPrimitivesWritten, PrimitiveStorageNeeded. */
 		query->result_size = 32;
 		query->num_cs_dw_begin = 6;
 		query->num_cs_dw_end = 6;
 		query->stream = index;
 		query->flags |= R600_QUERY_HW_FLAG_PREDICATE;
 		break;
 	case PIPE_QUERY_PIPELINE_STATISTICS:
 		/* 11 values on EG, 8 on R600. */
 		query->result_size = (rctx->chip_class >= EVERGREEN ? 11 : 8) * 16;
+		query->result_size += 8; /* for the fence + alignment */
 		query->num_cs_dw_begin = 6;
-		query->num_cs_dw_end = 6;
+		query->num_cs_dw_end = 6 + r600_gfx_write_fence_dwords(rctx->screen);
 		break;
 	default:
 		assert(0);
 		FREE(query);
 		return NULL;
 	}
 
 	if (!r600_query_hw_init(rctx, query)) {
 		FREE(query);
 		return NULL;
@@ -598,49 +608,61 @@ static void r600_query_hw_do_emit_stop(struct r600_common_context *ctx,
 	struct radeon_winsys_cs *cs = ctx->gfx.cs;
 
 	switch (query->b.type) {
 	case PIPE_QUERY_OCCLUSION_COUNTER:
 	case PIPE_QUERY_OCCLUSION_PREDICATE:
 		va += 8;
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
 		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1));
 		radeon_emit(cs, va);
 		radeon_emit(cs, (va >> 32) & 0xFFFF);
+
+		va += ctx->max_db * 16 - 8;
+		r600_gfx_write_fence(ctx, va, 0, 0x80000000);
 		break;
 	case PIPE_QUERY_PRIMITIVES_EMITTED:
 	case PIPE_QUERY_PRIMITIVES_GENERATED:
 	case PIPE_QUERY_SO_STATISTICS:
 	case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
 		va += query->result_size/2;
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
 		radeon_emit(cs, EVENT_TYPE(event_type_for_stream(query)) | EVENT_INDEX(3));
 		radeon_emit(cs, va);
 		radeon_emit(cs, (va >> 32) & 0xFFFF);
 		break;
 	case PIPE_QUERY_TIME_ELAPSED:
-		va += query->result_size/2;
+		va += 8;
 		/* fall through */
 	case PIPE_QUERY_TIMESTAMP:
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0));
 		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_BOTTOM_OF_PIPE_TS) | EVENT_INDEX(5));
 		radeon_emit(cs, va);
 		radeon_emit(cs, (3 << 29) | ((va >> 32) & 0xFFFF));
 		radeon_emit(cs, 0);
 		radeon_emit(cs, 0);
+
+		va += 8;
+		r600_gfx_write_fence(ctx, va, 0, 0x80000000);
 		break;
-	case PIPE_QUERY_PIPELINE_STATISTICS:
-		va += query->result_size/2;
+	case PIPE_QUERY_PIPELINE_STATISTICS: {
+		unsigned sample_size = (query->result_size - 8) / 2;
+
+		va += sample_size;
 		radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 2, 0));
 		radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SAMPLE_PIPELINESTAT) | EVENT_INDEX(2));
 		radeon_emit(cs, va);
 		radeon_emit(cs, (va >> 32) & 0xFFFF);
+
+		va += sample_size;
+		r600_gfx_write_fence(ctx, va, 0, 0x80000000);
 		break;
+	}
 	default:
 		assert(0);
 	}
 	r600_emit_reloc(ctx, &ctx->gfx, query->buffer.buf, RADEON_USAGE_WRITE,
 			RADEON_PRIO_QUERY);
 }
 
 static void r600_query_hw_emit_stop(struct r600_common_context *ctx,
 				    struct r600_query_hw *query)
 {
@@ -825,20 +847,75 @@ bool r600_query_hw_end(struct r600_common_context *rctx,
 
 	if (!(query->flags & R600_QUERY_HW_FLAG_NO_START))
 		LIST_DELINIT(&query->list);
 
 	if (!query->buffer.buf)
 		return false;
 
 	return true;
 }
 
+static void r600_get_hw_query_params(struct r600_common_context *rctx,
+				     struct r600_query_hw *rquery, int index,
+				     struct r600_hw_query_params *params)
+{
+	params->pair_stride = 0;
+	params->pair_count = 1;
+
+	switch (rquery->b.type) {
+	case PIPE_QUERY_OCCLUSION_COUNTER:
+	case PIPE_QUERY_OCCLUSION_PREDICATE:
+		params->start_offset = 0;
+		params->end_offset = 8;
+		params->fence_offset = rctx->max_db * 16;
+		params->pair_stride = 16;
+		params->pair_count = rctx->max_db;
+		break;
+	case PIPE_QUERY_TIME_ELAPSED:
+		params->start_offset = 0;
+		params->end_offset = 8;
+		params->fence_offset = 16;
+		break;
+	case PIPE_QUERY_TIMESTAMP:
+		params->start_offset = 0;
+		params->end_offset = 0;
+		params->fence_offset = 8;
+		break;
+	case PIPE_QUERY_PRIMITIVES_EMITTED:
+		params->start_offset = 8;
+		params->end_offset = 24;
+		params->fence_offset = params->end_offset + 4;
+		break;
+	case PIPE_QUERY_PRIMITIVES_GENERATED:
+		params->start_offset = 0;
+		params->end_offset = 16;
+		params->fence_offset = params->end_offset + 4;
+		break;
+	case PIPE_QUERY_SO_STATISTICS:
+		params->start_offset = 8 - index * 8;
+		params->end_offset = 24 - index * 8;
+		params->fence_offset = params->end_offset + 4;
+		break;
+	case PIPE_QUERY_PIPELINE_STATISTICS:
+	{
+		/* Offsets apply to EG+ */
+		static const unsigned offsets[] = {56, 48, 24, 32, 40, 16, 8, 0, 64, 72, 80};
+		params->start_offset = offsets[index];
+		params->end_offset = 88 + offsets[index];
+		params->fence_offset = 2 * 88;
+		break;
+	}
+	default:
+		unreachable("r600_get_hw_query_params unsupported");
+	}
+}
+
 static unsigned r600_query_read_result(void *map, unsigned start_index, unsigned end_index,
 				       bool test_status_bit)
 {
 	uint32_t *current_result = (uint32_t*)map;
 	uint64_t start, end;
 
 	start = (uint64_t)current_result[start_index] |
 		(uint64_t)current_result[start_index+1] << 32;
 	end = (uint64_t)current_result[end_index] |
 	      (uint64_t)current_result[end_index+1] << 32;
@@ -850,34 +927,32 @@ static unsigned r600_query_read_result(void *map, unsigned start_index, unsigned
 	return 0;
 }
 
 static void r600_query_hw_add_result(struct r600_common_context *ctx,
 				     struct r600_query_hw *query,
 				     void *buffer,
 				     union pipe_query_result *result)
 {
 	switch (query->b.type) {
 	case PIPE_QUERY_OCCLUSION_COUNTER: {
-		unsigned results_base = 0;
-		while (results_base != query->result_size) {
+		for (unsigned i = 0; i < ctx->max_db; ++i) {
+			unsigned results_base = i * 16;
 			result->u64 +=
 				r600_query_read_result(buffer + results_base, 0, 2, true);
-			results_base += 16;
 		}
 		break;
 	}
 	case PIPE_QUERY_OCCLUSION_PREDICATE: {
-		unsigned results_base = 0;
-		while (results_base != query->result_size) {
+		for (unsigned i = 0; i < ctx->max_db; ++i) {
+			unsigned results_base = i * 16;
 			result->b = result->b ||
 				r600_query_read_result(buffer + results_base, 0, 2, true) != 0;
-			results_base += 16;
 		}
 		break;
 	}
 	case PIPE_QUERY_TIME_ELAPSED:
 		result->u64 += r600_query_read_result(buffer, 0, 2, false);
 		break;
 	case PIPE_QUERY_TIMESTAMP:
 	{
 		uint32_t *current_result = (uint32_t*)buffer;
 		result->u64 = (uint64_t)current_result[0] |
-- 
2.7.4



More information about the mesa-dev mailing list