[PATCH] Implement support for GPU timings of calls when proofiling with glretrace.
Carl Worth
cworth at cworth.org
Tue Jul 31 15:00:27 PDT 2012
This augments the existing support for measuring CPU time of each call.
For OpenGL, the GPU time spent for each rendering call is measured
asynchronously via GL_TIME_ELAPSED queries (in what was previously
known as the GL_ARB_timer_query extension).
Stubs are provided here for D3D timers as well, (which I have no idea
how to implement).
---
I think this is usable as is. If I recall correctly, José wants no
regressions in functionality, but functionality that's new for one
backend (and not yet implemented for another) is OK. So I don't think
this should need to block on D3D support.
Do look over things and let me know if you see anything that should be
changed.
Thanks,
-Carl
common/trace_model.hpp | 15 +++++++++
common/trace_parser_flags.cpp | 3 +-
retrace/d3dretrace_main.cpp | 28 +++++++++++++++++
retrace/glretrace_main.cpp | 57 ++++++++++++++++++++++++++++++++++
retrace/retrace.cpp | 59 +++++++++++++++++++++++++++++------
retrace/retrace.hpp | 23 ++++++++++++++
retrace/retrace_main.cpp | 69 +++++++++++++++++++++++++++++++++++++++++
7 files changed, 243 insertions(+), 11 deletions(-)
diff --git a/common/trace_model.hpp b/common/trace_model.hpp
index bb8936e..6ecfbee 100644
--- a/common/trace_model.hpp
+++ b/common/trace_model.hpp
@@ -449,6 +449,21 @@ enum {
* Whether this call is verbose (i.e., not usually interesting).
*/
CALL_FLAG_VERBOSE = (1 << 7),
+
+ /**
+ * Whether this call begins a block of calls that collectively form
+ * a rendering operation. Note that such a call will *not* have the
+ * CALL_FLAG_RENDER flag set.
+ */
+ CALL_FLAG_RENDER_BEGIN = (1 << 8),
+
+ /**
+ * Whether this call ends a block of calls that collectively form
+ * a rendering operation. Note that such a call will also have the
+ * CALL_FLAG_RENDER flag set.
+ */
+ CALL_FLAG_RENDER_END = (1 << 9),
+
};
diff --git a/common/trace_parser_flags.cpp b/common/trace_parser_flags.cpp
index 9704063..1726389 100644
--- a/common/trace_parser_flags.cpp
+++ b/common/trace_parser_flags.cpp
@@ -87,6 +87,7 @@ callFlagTable[] = {
{ "eglGetProcAddress", CALL_FLAG_NO_SIDE_EFFECTS | CALL_FLAG_VERBOSE },
{ "eglQueryString", CALL_FLAG_NO_SIDE_EFFECTS | CALL_FLAG_VERBOSE },
{ "eglSwapBuffers", CALL_FLAG_SWAPBUFFERS },
+ { "glBegin", CALL_FLAG_RENDER_BEGIN },
{ "glBindFramebuffer", CALL_FLAG_SWAP_RENDERTARGET },
{ "glBindFramebufferEXT", CALL_FLAG_SWAP_RENDERTARGET },
{ "glBindFramebufferOES", CALL_FLAG_SWAP_RENDERTARGET },
@@ -116,7 +117,7 @@ callFlagTable[] = {
{ "glDrawRangeElements", CALL_FLAG_RENDER },
{ "glDrawRangeElementsBaseVertex", CALL_FLAG_RENDER },
{ "glDrawRangeElementsEXT", CALL_FLAG_RENDER },
- { "glEnd", CALL_FLAG_RENDER },
+ { "glEnd", CALL_FLAG_RENDER | CALL_FLAG_RENDER_END },
{ "glFrameTerminatorGREMEDY", CALL_FLAG_END_FRAME },
{ "glGetError", CALL_FLAG_NO_SIDE_EFFECTS }, // verbose will be set later for GL_NO_ERROR
{ "glGetString", CALL_FLAG_NO_SIDE_EFFECTS | CALL_FLAG_VERBOSE },
diff --git a/retrace/d3dretrace_main.cpp b/retrace/d3dretrace_main.cpp
index 40d53ea..71856c2 100644
--- a/retrace/d3dretrace_main.cpp
+++ b/retrace/d3dretrace_main.cpp
@@ -89,6 +89,34 @@ void
retrace::waitForInput(void) {
}
+int
+timerGetCookie(unsigned *cookie) {
+ /* FIXME: Not yet implemented. */
+ return 1;
+}
+
+void
+timerStart(unsigned cookie) {
+ /* FIXME: Not yet implemented. */
+}
+
+void
+timerStop(unsigned cookie) {
+ /* FIXME: Not yet implemented. */
+}
+
+int
+timerResultAvailable(unsigned cookie) {
+ /* FIXME: Not yet implemented. */
+ return 0;
+}
+
+float
+timerGetResultUsec(unsigned cookie) {
+ /* FIXME: Not yet implemented. */
+ return 0.0;
+}
+
void
retrace::cleanUp(void) {
}
diff --git a/retrace/glretrace_main.cpp b/retrace/glretrace_main.cpp
index 49be505..f151db5 100644
--- a/retrace/glretrace_main.cpp
+++ b/retrace/glretrace_main.cpp
@@ -151,6 +151,63 @@ retrace::waitForInput(void) {
}
}
+/* Fail with non-zero return value for the following cases:
+ *
+ * No current OpenGL context
+ */
+int
+retrace::timerGetCookie(unsigned *cookie) {
+
+ if (!glretrace::currentContext) {
+ return 1;
+ }
+
+ glGenQueries(1, cookie);
+
+ if (*cookie == 0) {
+ std::cerr << "glGenQueries returned unexpected (invalid) value of 0. Disabling one query.\n";
+ return 1;
+ }
+
+ return 0;
+}
+
+void
+retrace::timerStart(unsigned cookie) {
+ glBeginQuery(GL_TIME_ELAPSED, cookie);
+}
+
+void
+retrace::timerStop(unsigned cookie) {
+ glEndQuery(GL_TIME_ELAPSED);
+}
+
+int
+retrace::timerResultAvailable(unsigned cookie) {
+ int available;
+
+ glGetQueryObjectiv(cookie, GL_QUERY_RESULT_AVAILABLE, &available);
+
+ return available;
+}
+
+float
+retrace::timerGetResultUsec(unsigned cookie) {
+ GLuint64 elapsed;
+
+ glGetQueryObjectui64v(cookie, GL_QUERY_RESULT, &elapsed);
+
+ glDeleteQueries(1, &cookie);
+
+ if (elapsed == (GLuint64) -1) {
+ std::cout << "Ignoring invalid elapsed time value of (GLuint64) -1 from cookie " << cookie << "\n";
+ elapsed = 0;
+ }
+
+ /* GL timer result is in nanoseconds */
+ return (float) elapsed / 1000.;
+}
+
void
retrace::cleanUp(void) {
glws::cleanup();
diff --git a/retrace/retrace.cpp b/retrace/retrace.cpp
index 25c3f64..fe3757c 100644
--- a/retrace/retrace.cpp
+++ b/retrace/retrace.cpp
@@ -36,6 +36,7 @@ namespace retrace {
static bool call_dumped = false;
+static unsigned gpu_timer_cookie = 0;
static void dumpCall(trace::Call &call) {
@@ -112,17 +113,55 @@ void Retracer::retrace(trace::Call &call) {
assert(callback);
assert(callbacks[id] == callback);
- if (retrace::profiling) {
- long long startTime = os::getTime();
+ if (retrace::profiling && callback != &retrace::ignore) {
+ long long cpuStartTime = os::getTime();
+ bool is_render_call = false;
+ bool gpu_timer_result_available = false;
+
+ /* Is this call something that performs rendering? */
+ if ((call.flags & trace::CALL_FLAG_RENDER ||
+ call.flags & trace::CALL_FLAG_RENDER_BEGIN ||
+ call.flags & trace::CALL_FLAG_RENDER_END ||
+ call.flags & trace::CALL_FLAG_SWAP_RENDERTARGET ||
+ call.flags & trace::CALL_FLAG_END_FRAME)) {
+
+ is_render_call = true;
+ }
+
+ /* Start timer only at the beginning of render calls,
+ * (notably, not at calls like glEnd that are the end of a
+ * block of render calls to be timed as a single unit). */
+ if (is_render_call && !(call.flags & trace::CALL_FLAG_RENDER_END)) {
+
+ if (timerGetCookie(&gpu_timer_cookie) == 0) {
+ timerStart(gpu_timer_cookie);
+ } else {
+ gpu_timer_cookie = 0;
+ }
+ }
+
callback(call);
- long long stopTime = os::getTime();
- float timeInterval = (stopTime - startTime) * (1.0E6 / os::timeFrequency);
-
- std::cout
- << call.no << " "
- << "[" << timeInterval << " usec] "
- ;
- trace::dump(call, std::cout, trace::DUMP_FLAG_NO_CALL_NO | trace::DUMP_FLAG_NO_COLOR);
+
+ /* Stop time only at the end of render calls, (notably, not at
+ * calls like glBegin that are at the beginning of a block of
+ * render calls to be timed as a single unit). */
+ if (is_render_call && !(call.flags & trace::CALL_FLAG_RENDER_BEGIN)) {
+
+ /* Don't call timerStop if timerGetCookie failed somehow. */
+ if (gpu_timer_cookie) {
+ timerStop(gpu_timer_cookie);
+ gpu_timer_result_available = true;
+ }
+ }
+
+ long long cpuStopTime = os::getTime();
+ float cpuTimeInterval = (cpuStopTime - cpuStartTime) * (1.0E6 / os::timeFrequency);
+
+ if (gpu_timer_result_available) {
+ addProfileResult(call.no, cpuTimeInterval, gpu_timer_cookie);
+ } else {
+ addProfileResult(call.no, cpuTimeInterval);
+ }
} else {
callback(call);
}
diff --git a/retrace/retrace.hpp b/retrace/retrace.hpp
index a019de7..ae30d8c 100644
--- a/retrace/retrace.hpp
+++ b/retrace/retrace.hpp
@@ -212,6 +212,12 @@ addCallbacks(retrace::Retracer &retracer);
void
frameComplete(trace::Call &call);
+void
+addProfileResult(unsigned callNo, float cpuTimeElapsed, unsigned gpuTimerCookie);
+
+void
+addProfileResult(unsigned callNo, float cpuTimeElapsed);
+
image::Image *
getSnapshot(void);
@@ -224,6 +230,23 @@ flushRendering(void);
void
waitForInput(void);
+/* Returns: 0 for success, any other valid for failure (in which case
+ * the returned cookie value should not be used). */
+int
+timerGetCookie(unsigned *cookie);
+
+void
+timerStart(unsigned cookie);
+
+void
+timerStop(unsigned cookie);
+
+int
+timerResultAvailable(unsigned cookie);
+
+float
+timerGetResultUsec(unsigned cookie);
+
void
cleanUp(void);
diff --git a/retrace/retrace_main.cpp b/retrace/retrace_main.cpp
index f7fb711..c8a73bf 100644
--- a/retrace/retrace_main.cpp
+++ b/retrace/retrace_main.cpp
@@ -63,6 +63,14 @@ bool coreProfile = false;
static unsigned frameNo = 0;
+struct ProfileResult {
+ unsigned callNo;
+ float cpuTimeElapsed;
+ int gpuTimerCookie;
+ int gpuTimerAvailable;
+};
+
+std::list<ProfileResult> profileResults;
void
frameComplete(trace::Call &call) {
@@ -115,6 +123,61 @@ takeSnapshot(unsigned call_no) {
return;
}
+void
+addProfileResult(unsigned callNo, float cpuTimeElapsed, unsigned gpuTimerCookie) {
+ ProfileResult result;
+
+ result.callNo = callNo;
+ result.cpuTimeElapsed = cpuTimeElapsed;
+ result.gpuTimerCookie = gpuTimerCookie;
+ result.gpuTimerAvailable = 1;
+
+ profileResults.push_back(result);
+}
+
+void
+addProfileResult(unsigned callNo, float cpuTimeElapsed) {
+ ProfileResult result;
+
+ result.callNo = callNo;
+ result.cpuTimeElapsed = cpuTimeElapsed;
+ result.gpuTimerCookie = 0;
+ result.gpuTimerAvailable = 0;
+
+ profileResults.push_back(result);
+}
+
+static void
+dumpAvailableProfileResults(void) {
+
+ static bool firstResult = true;
+
+ while (! profileResults.empty()) {
+ ProfileResult &front = profileResults.front();
+
+ if (front.gpuTimerAvailable && ! timerResultAvailable(front.gpuTimerCookie)) {
+ break;
+ }
+
+ if (firstResult) {
+ std::cout << "# Call CPU(usec) GPU(usec)\n";
+ firstResult = false;
+ }
+
+ std::cout << front.callNo << " " << front.cpuTimeElapsed << " ";
+
+ if (front.gpuTimerAvailable) {
+ float gpuTimeElapsed = timerGetResultUsec (front.gpuTimerCookie);
+ std::cout << gpuTimeElapsed;
+ } else {
+ std::cout << "0";
+ }
+
+ std::cout << "\n";
+
+ profileResults.pop_front();
+ }
+}
static void
mainLoop() {
@@ -155,6 +218,10 @@ mainLoop() {
takeSnapshot(call->no);
}
+ if (swapRenderTarget) {
+ dumpAvailableProfileResults();
+ }
+
if (call->no >= dumpStateCallNo &&
dumpState(std::cout)) {
exit(0);
@@ -166,6 +233,8 @@ mainLoop() {
// Reached the end of trace
flushRendering();
+ dumpAvailableProfileResults();
+
long long endTime = os::getTime();
float timeInterval = (endTime - startTime) * (1.0 / os::timeFrequency);
--
1.7.10
More information about the apitrace
mailing list