Mesa (master): r600: add occlusion query support

Wed Oct 28 19:58:58 UTC 2009

Module: Mesa
Branch: master
Commit: 660acd60d00366c97fbe7caf3995a75ce935a19b
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=660acd60d00366c97fbe7caf3995a75ce935a19b

Author: Alex Deucher <alexdeucher at gmail.com>
Date:   Wed Oct 28 15:36:53 2009 -0400

r600: add occlusion query support

Based on initial patch from Stephan Schmid <stephan_2303 at gmx.de>.

Basic idea is to dump the zpass count at the start and end of the query
and subtract to get the total number of visible fragments.  HW writes
alternating qwords for up to 4 DBs.  On the first pass, we start at
buffer address + 0; on the second pass, we start at buffer address + 8
(bytes). The resulting buffer at the end of the query looks like:

qw[0]: db0 start
qw[1]: db0 end
...
qw[6]: db3 start
qw[7]: db3 end

The MSB of each qword is the valid bit and the lower 63 bits are
the zpass count for that DB.

OQ on RV740 is disabled at the moment as it only seems to report
results for half of its DBs.  This needs further investigation.

Signed-off-by: Alex Deucher <alexdeucher at gmail.com>

---

 src/mesa/drivers/dri/r600/r600_context.c      |   31 ++++++++++++--
 src/mesa/drivers/dri/r600/r700_chip.c         |   54 +++++++++++++++++++++++++
 src/mesa/drivers/dri/r600/r700_state.c        |    1 +
 src/mesa/drivers/dri/radeon/radeon_queryobj.c |   33 ++++++++++++---
 4 files changed, 109 insertions(+), 10 deletions(-)

diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c
index c1bf76d..6de151d 100644
--- a/src/mesa/drivers/dri/r600/r600_context.c
+++ b/src/mesa/drivers/dri/r600/r600_context.c
@@ -64,6 +64,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "r600_cmdbuf.h"
 #include "r600_emit.h"
 #include "radeon_bocs_wrapper.h"
+#include "radeon_queryobj.h"
 
 #include "r700_state.h"
 #include "r700_ioctl.h"
@@ -73,11 +74,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "utils.h"
 #include "xmlpool.h"		/* for symbolic values of enum-type options */
 
-/* hw_tcl_on derives from future_hw_tcl_on when its safe to change it. */
-int future_hw_tcl_on = 1;
-int hw_tcl_on = 1;
-
 #define need_GL_VERSION_2_0
+#define need_GL_ARB_occlusion_query
 #define need_GL_ARB_point_parameters
 #define need_GL_ARB_vertex_program
 #define need_GL_EXT_blend_equation_separate
@@ -98,6 +96,7 @@ static const struct dri_extension card_extensions[] = {
   /* *INDENT-OFF* */
   {"GL_ARB_depth_texture",		NULL},
   {"GL_ARB_fragment_program",		NULL},
+  {"GL_ARB_occlusion_query",            GL_ARB_occlusion_query_functions},
   {"GL_ARB_multitexture",		NULL},
   {"GL_ARB_point_parameters",		GL_ARB_point_parameters_functions},
   {"GL_ARB_shadow",			NULL},
@@ -204,6 +203,24 @@ static void r600_fallback(GLcontext *ctx, GLuint bit, GLboolean mode)
 		context->radeon.Fallback &= ~bit;
 }
 
+static void r600_emit_query_finish(radeonContextPtr radeon)
+{
+	context_t *context = (context_t*) radeon;
+	BATCH_LOCALS(&context->radeon);
+
+	struct radeon_query_object *query = radeon->query.current;
+
+	BEGIN_BATCH_NO_AUTOSTATE(4 + 2);
+	R600_OUT_BATCH(CP_PACKET3(R600_IT_EVENT_WRITE, 2));
+	R600_OUT_BATCH(ZPASS_DONE);
+	R600_OUT_BATCH(query->curr_offset + 8); /* hw writes qwords */
+	R600_OUT_BATCH(0x00000000);
+	R600_OUT_BATCH_RELOC(VGT_EVENT_INITIATOR, query->bo, 0, 0, RADEON_GEM_DOMAIN_GTT, 0);
+	END_BATCH();
+	assert(query->curr_offset < RADEON_QUERY_PAGE_SIZE);
+	query->emitted_begin = GL_FALSE;
+}
+
 static void r600_init_vtbl(radeonContextPtr radeon)
 {
 	radeon->vtbl.get_lock = r600_get_lock;
@@ -212,6 +229,7 @@ static void r600_init_vtbl(radeonContextPtr radeon)
 	radeon->vtbl.swtcl_flush = NULL;
 	radeon->vtbl.pre_emit_atoms = r600_vtbl_pre_emit_atoms;
 	radeon->vtbl.fallback = r600_fallback;
+	radeon->vtbl.emit_query_finish = r600_emit_query_finish;
 }
 
 static void r600InitConstValues(GLcontext *ctx, radeonScreenPtr screen)
@@ -302,6 +320,10 @@ static void r600InitGLExtensions(GLcontext *ctx)
 	{
 		_mesa_enable_extension(ctx, "GL_EXT_texture_compression_s3tc");
 	}
+
+	/* XXX: RV740 only seems to report results from half of its DBs */
+	if (r600->radeon.radeonScreen->chip_family == CHIP_FAMILY_RV740)
+		_mesa_disable_extension(ctx, "GL_ARB_occlusion_query");
 }
 
 /* Create the device specific rendering context.
@@ -340,6 +362,7 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual,
 	r700InitStateFuncs(&functions);
 	r600InitTextureFuncs(&functions);
 	r700InitShaderFuncs(&functions);
+	radeonInitQueryObjFunctions(&functions);
 	r700InitIoctlFuncs(&functions);
 	radeonInitBufferObjectFuncs(&functions);
 
diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c
index 75b97c5..ace3d24 100644
--- a/src/mesa/drivers/dri/r600/r700_chip.c
+++ b/src/mesa/drivers/dri/r600/r700_chip.c
@@ -1100,6 +1100,32 @@ static void r700SendVSConsts(GLcontext *ctx, struct radeon_state_atom *atom)
 	COMMIT_BATCH();
 }
 
+static void r700SendQueryBegin(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+	struct radeon_query_object *query = radeon->query.current;
+	BATCH_LOCALS(radeon);
+	radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+	/* clear the buffer */
+	radeon_bo_map(query->bo, GL_FALSE);
+	memset(query->bo->ptr, 0, 4 * 2 * sizeof(uint64_t)); /* 4 DBs, 2 qwords each */
+	radeon_bo_unmap(query->bo);
+
+	radeon_cs_space_check_with_bo(radeon->cmdbuf.cs,
+				      query->bo,
+				      0, RADEON_GEM_DOMAIN_GTT);
+
+	BEGIN_BATCH_NO_AUTOSTATE(4 + 2);
+	R600_OUT_BATCH(CP_PACKET3(R600_IT_EVENT_WRITE, 2));
+	R600_OUT_BATCH(ZPASS_DONE);
+	R600_OUT_BATCH(query->curr_offset); /* hw writes qwords */
+	R600_OUT_BATCH(0x00000000);
+	R600_OUT_BATCH_RELOC(VGT_EVENT_INITIATOR, query->bo, 0, 0, RADEON_GEM_DOMAIN_GTT, 0);
+	END_BATCH();
+	query->emitted_begin = GL_TRUE;
+}
+
 static int check_always(GLcontext *ctx, struct radeon_state_atom *atom)
 {
 	return atom->cmd_size;
@@ -1208,6 +1234,20 @@ static int check_vs_consts(GLcontext *ctx, struct radeon_state_atom *atom)
 	return count;
 }
 
+static int check_queryobj(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+	struct radeon_query_object *query = radeon->query.current;
+	int count;
+
+	if (!query || query->emitted_begin)
+		count = 0;
+	else
+		count = atom->cmd_size;
+	radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count);
+	return count;
+}
+
 #define ALLOC_STATE( ATOM, CHK, SZ, EMIT )				\
 do {									\
 	context->atoms.ATOM.cmd_size = (SZ);				\
@@ -1221,6 +1261,19 @@ do {									\
 	insert_at_tail(&context->radeon.hw.atomlist, &context->atoms.ATOM); \
 } while (0)
 
+static void r600_init_query_stateobj(radeonContextPtr radeon, int SZ)
+{
+	radeon->query.queryobj.cmd_size = (SZ);
+	radeon->query.queryobj.cmd = NULL;
+	radeon->query.queryobj.name = "queryobj";
+	radeon->query.queryobj.idx = 0;
+	radeon->query.queryobj.check = check_queryobj;
+	radeon->query.queryobj.dirty = GL_FALSE;
+	radeon->query.queryobj.emit = r700SendQueryBegin;
+	radeon->hw.max_state_size += (SZ);
+	insert_at_tail(&radeon->hw.atomlist, &radeon->query.queryobj);
+}
+
 void r600InitAtoms(context_t *context)
 {
 	radeon_print(RADEON_STATE, RADEON_NORMAL, "%s %p\n", __func__, context);
@@ -1260,6 +1313,7 @@ void r600InitAtoms(context_t *context)
 	ALLOC_STATE(tx, tx, (R700_TEXTURE_NUMBERUNITS * 20), r700SendTexState);
 	ALLOC_STATE(tx_smplr, tx, (R700_TEXTURE_NUMBERUNITS * 5), r700SendTexSamplerState);
 	ALLOC_STATE(tx_brdr_clr, tx, (R700_TEXTURE_NUMBERUNITS * 6), r700SendTexBorderColorState);
+	r600_init_query_stateobj(&context->radeon, 6 * 2);
 
 	context->radeon.hw.is_dirty = GL_TRUE;
 	context->radeon.hw.all_dirty = GL_TRUE;
diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c
index 9a6a68a..0b67636 100644
--- a/src/mesa/drivers/dri/r600/r700_state.c
+++ b/src/mesa/drivers/dri/r600/r700_state.c
@@ -1675,6 +1675,7 @@ void r700InitState(GLcontext * ctx) //-------------------
     SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIZ_ENABLE_shift, FORCE_HIZ_ENABLE_mask);
     SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE0_shift, FORCE_HIS_ENABLE0_mask);
     SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE1_shift, FORCE_HIS_ENABLE1_mask);
+    SETbit(r700->DB_RENDER_OVERRIDE.u32All, NOOP_CULL_DISABLE_bit);
 
     r700->DB_ALPHA_TO_MASK.u32All = 0;
     SETfield(r700->DB_ALPHA_TO_MASK.u32All, 2, ALPHA_TO_MASK_OFFSET0_shift, ALPHA_TO_MASK_OFFSET0_mask);
diff --git a/src/mesa/drivers/dri/radeon/radeon_queryobj.c b/src/mesa/drivers/dri/radeon/radeon_queryobj.c
index b79d864..6539c36 100644
--- a/src/mesa/drivers/dri/radeon/radeon_queryobj.c
+++ b/src/mesa/drivers/dri/radeon/radeon_queryobj.c
@@ -47,8 +47,8 @@ static int radeonQueryIsFlushed(GLcontext *ctx, struct gl_query_object *q)
 
 static void radeonQueryGetResult(GLcontext *ctx, struct gl_query_object *q)
 {
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
 	struct radeon_query_object *query = (struct radeon_query_object *)q;
-	uint32_t *result;
 	int i;
 
 	radeon_print(RADEON_STATE, RADEON_VERBOSE,
@@ -57,12 +57,33 @@ static void radeonQueryGetResult(GLcontext *ctx, struct gl_query_object *q)
 
 	radeon_bo_map(query->bo, GL_FALSE);
 
-	result = query->bo->ptr;
-
 	query->Base.Result = 0;
-	for (i = 0; i < query->curr_offset/sizeof(uint32_t); ++i) {
-		query->Base.Result += result[i];
-		radeon_print(RADEON_STATE, RADEON_TRACE, "result[%d] = %d\n", i, result[i]);
+	if (IS_R600_CLASS(radeon->radeonScreen)) {
+		/* ZPASS EVENT writes alternating qwords
+		 * At query start we set the start offset to 0 and
+		 * hw writes zpass start counts to qwords 0, 2, 4, 6.
+		 * At query end we set the start offset to 8 and
+		 * hw writes zpass end counts to qwords 1, 3, 5, 7.
+		 * then we substract. MSB is the valid bit.
+		 */
+		uint64_t *result = query->bo->ptr;
+		for (i = 0; i < 8; i += 2) {
+			uint64_t start = result[i];
+			uint64_t end = result[i + 1];
+			if ((start & 0x8000000000000000) && (end & 0x8000000000000000)) {
+				uint64_t query_count = end - start;
+				query->Base.Result += query_count;
+
+			}
+			radeon_print(RADEON_STATE, RADEON_TRACE,
+				     "%d start: %lx, end: %lx %ld\n", i, start, end, end - start);
+		}
+	} else {
+		uint32_t *result = query->bo->ptr;
+		for (i = 0; i < query->curr_offset/sizeof(uint32_t); ++i) {
+			query->Base.Result += result[i];
+			radeon_print(RADEON_STATE, RADEON_TRACE, "result[%d] = %d\n", i, result[i]);
+		}
 	}
 
 	radeon_bo_unmap(query->bo);