[PATCH] Implement support for GPU timings of calls when proofiling with glretrace.

Carl Worth cworth at cworth.org
Tue Jul 31 15:00:27 PDT 2012


This augments the existing support for measuring CPU time of each call.

For OpenGL, the GPU time spent for each rendering call is measured
asynchronously via GL_TIME_ELAPSED queries (in what was previously
known as the GL_ARB_timer_query extension).

Stubs are provided here for D3D timers as well, (which I have no idea
how to implement).
---

I think this is usable as is. If I recall correctly, José wants no
regressions in functionality, but functionality that's new for one
backend (and not yet implemented for another) is OK. So I don't think
this should need to block on D3D support.

Do look over things and let me know if you see anything that should be
changed.

Thanks,

-Carl

 common/trace_model.hpp        |   15 +++++++++
 common/trace_parser_flags.cpp |    3 +-
 retrace/d3dretrace_main.cpp   |   28 +++++++++++++++++
 retrace/glretrace_main.cpp    |   57 ++++++++++++++++++++++++++++++++++
 retrace/retrace.cpp           |   59 +++++++++++++++++++++++++++++------
 retrace/retrace.hpp           |   23 ++++++++++++++
 retrace/retrace_main.cpp      |   69 +++++++++++++++++++++++++++++++++++++++++
 7 files changed, 243 insertions(+), 11 deletions(-)

diff --git a/common/trace_model.hpp b/common/trace_model.hpp
index bb8936e..6ecfbee 100644
--- a/common/trace_model.hpp
+++ b/common/trace_model.hpp
@@ -449,6 +449,21 @@ enum {
      * Whether this call is verbose (i.e., not usually interesting).
      */
     CALL_FLAG_VERBOSE                  = (1 << 7),
+
+    /**
+     * Whether this call begins a block of calls that collectively form
+     * a rendering operation. Note that such a call will *not* have the
+     * CALL_FLAG_RENDER flag set.
+     */
+    CALL_FLAG_RENDER_BEGIN	       = (1 << 8),
+
+    /**
+     * Whether this call ends a block of calls that collectively form
+     * a rendering operation. Note that such a call will also have the
+     * CALL_FLAG_RENDER flag set.
+     */
+    CALL_FLAG_RENDER_END	       = (1 << 9),
+
 };
 
 
diff --git a/common/trace_parser_flags.cpp b/common/trace_parser_flags.cpp
index 9704063..1726389 100644
--- a/common/trace_parser_flags.cpp
+++ b/common/trace_parser_flags.cpp
@@ -87,6 +87,7 @@ callFlagTable[] = {
     { "eglGetProcAddress",                             CALL_FLAG_NO_SIDE_EFFECTS | CALL_FLAG_VERBOSE },
     { "eglQueryString",                                CALL_FLAG_NO_SIDE_EFFECTS | CALL_FLAG_VERBOSE },
     { "eglSwapBuffers",                                CALL_FLAG_SWAPBUFFERS },
+    { "glBegin",                                       CALL_FLAG_RENDER_BEGIN },
     { "glBindFramebuffer",                             CALL_FLAG_SWAP_RENDERTARGET },
     { "glBindFramebufferEXT",                          CALL_FLAG_SWAP_RENDERTARGET },
     { "glBindFramebufferOES",                          CALL_FLAG_SWAP_RENDERTARGET },
@@ -116,7 +117,7 @@ callFlagTable[] = {
     { "glDrawRangeElements",                           CALL_FLAG_RENDER },
     { "glDrawRangeElementsBaseVertex",                 CALL_FLAG_RENDER },
     { "glDrawRangeElementsEXT",                        CALL_FLAG_RENDER },
-    { "glEnd",                                         CALL_FLAG_RENDER },
+    { "glEnd",                                         CALL_FLAG_RENDER | CALL_FLAG_RENDER_END },
     { "glFrameTerminatorGREMEDY",                      CALL_FLAG_END_FRAME },
     { "glGetError",                                    CALL_FLAG_NO_SIDE_EFFECTS }, // verbose will be set later for GL_NO_ERROR 
     { "glGetString",                                   CALL_FLAG_NO_SIDE_EFFECTS | CALL_FLAG_VERBOSE },
diff --git a/retrace/d3dretrace_main.cpp b/retrace/d3dretrace_main.cpp
index 40d53ea..71856c2 100644
--- a/retrace/d3dretrace_main.cpp
+++ b/retrace/d3dretrace_main.cpp
@@ -89,6 +89,34 @@ void
 retrace::waitForInput(void) {
 }
 
+int
+timerGetCookie(unsigned *cookie) {
+    /* FIXME: Not yet implemented. */
+    return 1;
+}
+
+void
+timerStart(unsigned cookie) {
+    /* FIXME: Not yet implemented. */
+}
+
+void
+timerStop(unsigned cookie) {
+    /* FIXME: Not yet implemented. */
+}
+
+int
+timerResultAvailable(unsigned cookie) {
+    /* FIXME: Not yet implemented. */
+    return 0;
+}
+
+float
+timerGetResultUsec(unsigned cookie) {
+    /* FIXME: Not yet implemented. */
+    return 0.0;
+}
+
 void
 retrace::cleanUp(void) {
 }
diff --git a/retrace/glretrace_main.cpp b/retrace/glretrace_main.cpp
index 49be505..f151db5 100644
--- a/retrace/glretrace_main.cpp
+++ b/retrace/glretrace_main.cpp
@@ -151,6 +151,63 @@ retrace::waitForInput(void) {
     }
 }
 
+/* Fail with non-zero return value for the following cases:
+ *
+ * 	No current OpenGL context
+ */
+int
+retrace::timerGetCookie(unsigned *cookie) {
+
+    if (!glretrace::currentContext) {
+        return 1;
+    }
+
+    glGenQueries(1, cookie);
+
+    if (*cookie == 0) {
+        std::cerr << "glGenQueries returned unexpected (invalid) value of 0. Disabling one query.\n";
+        return 1;
+    }
+
+    return 0;
+}
+
+void
+retrace::timerStart(unsigned cookie) {
+    glBeginQuery(GL_TIME_ELAPSED, cookie);
+}
+
+void
+retrace::timerStop(unsigned cookie) {
+    glEndQuery(GL_TIME_ELAPSED);
+}
+
+int
+retrace::timerResultAvailable(unsigned cookie) {
+    int available;
+
+    glGetQueryObjectiv(cookie, GL_QUERY_RESULT_AVAILABLE, &available);
+
+    return available;
+}
+
+float
+retrace::timerGetResultUsec(unsigned cookie) {
+    GLuint64 elapsed;
+
+    glGetQueryObjectui64v(cookie, GL_QUERY_RESULT, &elapsed);
+
+    glDeleteQueries(1, &cookie);
+
+    if (elapsed == (GLuint64) -1) {
+        std::cout << "Ignoring invalid elapsed time value of (GLuint64) -1 from cookie " << cookie << "\n";
+        elapsed = 0;
+    }
+
+    /* GL timer result is in nanoseconds */
+    return (float) elapsed / 1000.;
+}
+
 void
 retrace::cleanUp(void) {
     glws::cleanup();
diff --git a/retrace/retrace.cpp b/retrace/retrace.cpp
index 25c3f64..fe3757c 100644
--- a/retrace/retrace.cpp
+++ b/retrace/retrace.cpp
@@ -36,6 +36,7 @@ namespace retrace {
 
 
 static bool call_dumped = false;
+static unsigned gpu_timer_cookie = 0;
 
 
 static void dumpCall(trace::Call &call) {
@@ -112,17 +113,55 @@ void Retracer::retrace(trace::Call &call) {
     assert(callback);
     assert(callbacks[id] == callback);
 
-    if (retrace::profiling) {
-        long long startTime = os::getTime();
+    if (retrace::profiling && callback != &retrace::ignore) {
+        long long cpuStartTime = os::getTime();
+        bool is_render_call = false;
+        bool gpu_timer_result_available = false;
+
+        /* Is this call something that performs rendering? */
+        if ((call.flags & trace::CALL_FLAG_RENDER ||
+             call.flags & trace::CALL_FLAG_RENDER_BEGIN ||
+             call.flags & trace::CALL_FLAG_RENDER_END ||
+             call.flags & trace::CALL_FLAG_SWAP_RENDERTARGET ||
+             call.flags & trace::CALL_FLAG_END_FRAME)) {
+
+            is_render_call = true;
+        }
+
+        /* Start timer only at the beginning of render calls,
+         * (notably, not at calls like glEnd that are the end of a
+         * block of render calls to be timed as a single unit). */
+        if (is_render_call && !(call.flags & trace::CALL_FLAG_RENDER_END)) {
+
+            if (timerGetCookie(&gpu_timer_cookie) == 0) {
+                timerStart(gpu_timer_cookie);
+            } else {
+                gpu_timer_cookie = 0;
+            }
+        }
+
         callback(call);
-        long long stopTime = os::getTime();
-        float timeInterval = (stopTime - startTime) * (1.0E6 / os::timeFrequency);
-
-        std::cout
-            << call.no << " "
-            << "[" << timeInterval << " usec] "
-        ;
-        trace::dump(call, std::cout, trace::DUMP_FLAG_NO_CALL_NO | trace::DUMP_FLAG_NO_COLOR);
+
+        /* Stop time only at the end of render calls, (notably, not at
+         * calls like glBegin that are at the beginning of a block of
+         * render calls to be timed as a single unit). */
+        if (is_render_call && !(call.flags & trace::CALL_FLAG_RENDER_BEGIN)) {
+
+            /* Don't call timerStop if timerGetCookie failed somehow. */
+            if (gpu_timer_cookie) {
+                timerStop(gpu_timer_cookie);
+                gpu_timer_result_available = true;
+            }
+        }
+
+        long long cpuStopTime = os::getTime();
+        float cpuTimeInterval = (cpuStopTime - cpuStartTime) * (1.0E6 / os::timeFrequency);
+
+        if (gpu_timer_result_available) {
+            addProfileResult(call.no, cpuTimeInterval, gpu_timer_cookie);
+        } else {
+            addProfileResult(call.no, cpuTimeInterval);
+        }
     } else {
         callback(call);
     }
diff --git a/retrace/retrace.hpp b/retrace/retrace.hpp
index a019de7..ae30d8c 100644
--- a/retrace/retrace.hpp
+++ b/retrace/retrace.hpp
@@ -212,6 +212,12 @@ addCallbacks(retrace::Retracer &retracer);
 void
 frameComplete(trace::Call &call);
 
+void
+addProfileResult(unsigned callNo, float cpuTimeElapsed, unsigned gpuTimerCookie);
+
+void
+addProfileResult(unsigned callNo, float cpuTimeElapsed);
+
 image::Image *
 getSnapshot(void);
 
@@ -224,6 +230,23 @@ flushRendering(void);
 void
 waitForInput(void);
 
+/* Returns: 0 for success, any other valid for failure (in which case
+ * the returned cookie value should not be used). */
+int
+timerGetCookie(unsigned *cookie);
+
+void
+timerStart(unsigned cookie);
+
+void
+timerStop(unsigned cookie);
+
+int
+timerResultAvailable(unsigned cookie);
+
+float
+timerGetResultUsec(unsigned cookie);
+
 void
 cleanUp(void);
 
diff --git a/retrace/retrace_main.cpp b/retrace/retrace_main.cpp
index f7fb711..c8a73bf 100644
--- a/retrace/retrace_main.cpp
+++ b/retrace/retrace_main.cpp
@@ -63,6 +63,14 @@ bool coreProfile = false;
 
 static unsigned frameNo = 0;
 
+struct ProfileResult {
+    unsigned callNo;
+    float cpuTimeElapsed;
+    int gpuTimerCookie;
+    int gpuTimerAvailable;
+};
+
+std::list<ProfileResult> profileResults;
 
 void
 frameComplete(trace::Call &call) {
@@ -115,6 +123,61 @@ takeSnapshot(unsigned call_no) {
     return;
 }
 
+void
+addProfileResult(unsigned callNo, float cpuTimeElapsed, unsigned gpuTimerCookie) {
+    ProfileResult result;
+
+    result.callNo = callNo;
+    result.cpuTimeElapsed = cpuTimeElapsed;
+    result.gpuTimerCookie = gpuTimerCookie;
+    result.gpuTimerAvailable = 1;
+
+    profileResults.push_back(result);
+}
+
+void
+addProfileResult(unsigned callNo, float cpuTimeElapsed) {
+    ProfileResult result;
+
+    result.callNo = callNo;
+    result.cpuTimeElapsed = cpuTimeElapsed;
+    result.gpuTimerCookie = 0;
+    result.gpuTimerAvailable = 0;
+
+    profileResults.push_back(result);
+}
+
+static void
+dumpAvailableProfileResults(void) {
+
+    static bool firstResult = true;
+
+    while (! profileResults.empty()) {
+        ProfileResult &front = profileResults.front();
+
+        if (front.gpuTimerAvailable && ! timerResultAvailable(front.gpuTimerCookie)) {
+            break;
+        }
+
+        if (firstResult) {
+            std::cout << "# Call CPU(usec) GPU(usec)\n";
+            firstResult = false;
+        }
+
+        std::cout << front.callNo << " " << front.cpuTimeElapsed << " ";
+
+        if (front.gpuTimerAvailable) {
+            float gpuTimeElapsed = timerGetResultUsec (front.gpuTimerCookie);
+            std::cout << gpuTimeElapsed;
+        } else {
+            std::cout << "0";
+        }
+
+        std::cout << "\n";
+
+        profileResults.pop_front();
+    }
+}
 
 static void
 mainLoop() {
@@ -155,6 +218,10 @@ mainLoop() {
             takeSnapshot(call->no);
         }
 
+        if (swapRenderTarget) {
+            dumpAvailableProfileResults();
+        }
+
         if (call->no >= dumpStateCallNo &&
             dumpState(std::cout)) {
             exit(0);
@@ -166,6 +233,8 @@ mainLoop() {
     // Reached the end of trace
     flushRendering();
 
+    dumpAvailableProfileResults();
+
     long long endTime = os::getTime();
     float timeInterval = (endTime - startTime) * (1.0 / os::timeFrequency);
 
-- 
1.7.10



More information about the apitrace mailing list