[PATCH 1/2] Add a basic support of logging cpu and gpu time

Liu Aleaxander Aleaxander at gmail.com
Tue Aug 23 18:55:03 PDT 2011


Logging GPU time is a kind of trick, since you can't get the gpu time
immediately after your opengl call. Thus we need to move it at the end of
frame to reduce the time query overhead.

Well, that would introduce another problem, as the trace_writer doesn't
know the real opengl call for the specified gpu time. Thus I introduce
another log file for time. Please see comments inline for more info.

Signed-off-by: Liu Aleaxander <Aleaxander at gmail.com>
---
 gltrace.py       |   10 ++++++++++
 glxtrace.py      |   27 +++++++++++++++++++++++++++
 stdapi.py        |    3 ++-
 trace.py         |   17 +++++++++++++++++
 trace_model.cpp  |    9 +++++++++
 trace_model.hpp  |    6 +++++-
 trace_parser.cpp |   23 ++++++++++++++++++++++-
 trace_parser.hpp |    3 +++
 trace_writer.cpp |   36 ++++++++++++++++++++++++++++++++++++
 trace_writer.hpp |    9 +++++++++
 10 files changed, 140 insertions(+), 3 deletions(-)

diff --git a/gltrace.py b/gltrace.py
index af57142..ca6c44c 100644
--- a/gltrace.py
+++ b/gltrace.py
@@ -463,6 +463,8 @@ class GlTracer(Tracer):
             print '        GLint size = 0;'
             print '        GLenum type = 0;'
             print '        GLchar name[256];'
+            print '        gpu_queries[query_index] = cpu_time[query_index] = 0;'
+            print '        query_index++;'
             # TODO: Use ACTIVE_ATTRIBUTE_MAX_LENGTH instead of 256
             print '        __glGetActiveAttrib(program, attrib, sizeof name, NULL, &size, &type, name);'
             print "        if (name[0] != 'g' || name[1] != 'l' || name[2] != '_') {"
@@ -481,6 +483,8 @@ class GlTracer(Tracer):
             print '        GLint size = 0;'
             print '        GLenum type = 0;'
             print '        GLcharARB name[256];'
+            print '        gpu_queries[query_index] = cpu_time[query_index] = 0;'
+            print '        query_index++;'
             # TODO: Use ACTIVE_ATTRIBUTE_MAX_LENGTH instead of 256
             print '        __glGetActiveAttribARB(programObj, attrib, sizeof name, NULL, &size, &type, name);'
             print "        if (name[0] != 'g' || name[1] != 'l' || name[2] != '_') {"
@@ -536,6 +540,8 @@ class GlTracer(Tracer):
         print '        __writer.endEnter();'
         print '        __writer.beginLeave(__call);'
         print '        __writer.endLeave();'
+	print '        gpu_queries[query_index] = cpu_time[query_index] = 0;'
+	print '        query_index++;'
        
     buffer_targets = [
         'ARRAY_BUFFER',
@@ -731,6 +737,8 @@ class GlTracer(Tracer):
             print '            __writer.endEnter();'
             print '            __writer.beginLeave(__call);'
             print '            __writer.endLeave();'
+            print '            gpu_queries[query_index] = cpu_time[query_index] = 0;'
+            print '            query_index++;'
             print '        }'
             print '    }'
             self.array_epilog(api, uppercase_name)
@@ -804,6 +812,8 @@ class GlTracer(Tracer):
             print '                    __writer.endEnter();'
             print '                    __writer.beginLeave(__call);'
             print '                    __writer.endLeave();'
+            print '                   gpu_queries[query_index] = cpu_time[query_index] = 0;'
+            print '                   query_index++;'
             print '                }'
             print '            }'
             print '        }'
diff --git a/glxtrace.py b/glxtrace.py
index 6b5d786..24a76c5 100644
--- a/glxtrace.py
+++ b/glxtrace.py
@@ -47,6 +47,24 @@ class GlxTracer(GlTracer):
         # Take snapshots
         if function.name == 'glXSwapBuffers':
             print '    glsnapshot::snapshot(__call);'
+	    print '    Trace::LocalWriter::write_time tm;'
+	    print '    if (last_gpu_query > 0) {'
+	    print '        int done = 0;'
+	    print '        while (!done)'
+	    print '            __glGetQueryObjectiv(last_gpu_query, GL_QUERY_RESULT_AVAILABLE, &done);'
+	    print '    }'
+	    print '    for (int i = 0; i < query_index; i++) {'
+	    print '        GLuint gpu_time;'
+	    print '        if (gpu_queries[i]) {'
+	    print '            __glGetQueryObjectuiv(gpu_queries[i], GL_QUERY_RESULT, &gpu_time);'
+	    print '            __glDeleteQueries(1, &gpu_queries[i]);'
+	    print '        } else'
+	    print '            gpu_time = 0;'
+	    print '        tm.cpu_time = cpu_time[i];'
+	    print '        tm.gpu_time = gpu_time / 1000.0;'
+	    print '        __writer.writeTime(&tm);'
+	    print '   }'
+	    print '   query_index = 0;'
         if function.name in ('glFinish', 'glFlush'):
             print '    GLint __draw_framebuffer = 0;'
             print '    __glGetIntegerv(GL_DRAW_FRAMEBUFFER_BINDING, &__draw_framebuffer);'
@@ -85,6 +103,15 @@ if __name__ == '__main__':
     print '#include "glsize.hpp"'
     print '#include "glsnapshot.hpp"'
     print
+    print '#include <sys/time.h>'
+    print
+    print '// I hope this would be enough'
+    print '#define MAX_QUERIES		8192'
+    print 'static GLuint gpu_queries[MAX_QUERIES];'
+    print 'static double cpu_time[MAX_QUERIES];'
+    print 'static int query_index = 0;'
+    print 'static int last_gpu_query = 0;'
+    print
     print 'static __GLXextFuncPtr __unwrap_proc_addr(const GLubyte * procName, __GLXextFuncPtr procPtr);'
     print
 
diff --git a/stdapi.py b/stdapi.py
index 8852345..57049d2 100644
--- a/stdapi.py
+++ b/stdapi.py
@@ -215,7 +215,7 @@ class Function:
 
     __id = 0
 
-    def __init__(self, type, name, args, call = '', fail = None, sideeffects=True):
+    def __init__(self, type, name, args, call = '', fail = None, sideeffects=True, loggputime=False):
         self.id = Function.__id
         Function.__id += 1
 
@@ -239,6 +239,7 @@ class Function:
         self.call = call
         self.fail = fail
         self.sideeffects = sideeffects
+        self.loggputime = loggputime
 
     def prototype(self, name=None):
         if name is not None:
diff --git a/trace.py b/trace.py
index 90cc426..62e90a0 100644
--- a/trace.py
+++ b/trace.py
@@ -347,13 +347,30 @@ class Tracer:
         print
 
     def trace_function_impl_body(self, function):
+        print '    long long t0, t1;'
         print '    unsigned __call = __writer.beginEnter(&__%s_sig);' % (function.name,)
         for arg in function.args:
             if not arg.output:
                 self.unwrap_arg(function, arg)
                 self.dump_arg(function, arg)
         print '    __writer.endEnter();'
+	print '    if (query_index < MAX_QUERIES) {'
+	if function.loggputime:
+	    print '        __glGenQueries(1, &gpu_queries[query_index]);'
+	    print '        __glBeginQuery(GL_TIME_ELAPSED, gpu_queries[query_index]);'
+	    print '        last_gpu_query = gpu_queries[query_index];'
+	else:
+	    print '        gpu_queries[query_index] = 0;'
+	print '        t0 = OS::GetTime();'
+	print '    }'
         self.dispatch_function(function)
+	print '    if (query_index < MAX_QUERIES) {'
+	print '        t1 = OS::GetTime();'
+	if function.loggputime:
+	    print '        __glEndQuery(GL_TIME_ELAPSED);'
+	print '        cpu_time[query_index] = (double)(t1 - t0);'
+	print '        query_index++;'
+	print '    }'
         print '    __writer.beginLeave(__call);'
         for arg in function.args:
             if arg.output:
diff --git a/trace_model.cpp b/trace_model.cpp
index 79bb757..b29601e 100644
--- a/trace_model.cpp
+++ b/trace_model.cpp
@@ -24,6 +24,7 @@
  **************************************************************************/
 
 
+#include <iomanip>
 #include "formatter.hpp"
 #include "trace_model.hpp"
 
@@ -329,6 +330,14 @@ public:
             os << " = ";
             _visit(call->ret);
         }
+
+	// print out the time just when cpu time bigger than 0.005 ms or has gpu time
+	if (call->cpu_time > 5 || call->gpu_time > 0) {
+		os << "  { cpu time: " << std::setprecision(3) << call->cpu_time / 1000 << "ms";
+		if (call->gpu_time > 0)
+			os << ", gpu time: " << std::setprecision(3) << call->gpu_time / 1000 << "ms";
+		os << "}";
+	}
         os << "\n";
     }
 };
diff --git a/trace_model.hpp b/trace_model.hpp
index 5c51bba..06c885e 100644
--- a/trace_model.hpp
+++ b/trace_model.hpp
@@ -319,11 +319,15 @@ class Call
 {
 public:
     unsigned no;
+    double cpu_time;
+    double gpu_time;
     const FunctionSig *sig;
     std::vector<Value *> args;
     Value *ret;
 
-    Call(FunctionSig *_sig) : sig(_sig), args(_sig->num_args), ret(0) { }
+    Call(FunctionSig *_sig) : sig(_sig), args(_sig->num_args), ret(0){
+         cpu_time = gpu_time = -1.0;
+    }
     ~Call();
 
     inline const char * name(void) const {
diff --git a/trace_parser.cpp b/trace_parser.cpp
index 1d3c08a..9039a52 100644
--- a/trace_parser.cpp
+++ b/trace_parser.cpp
@@ -27,9 +27,11 @@
 
 #include <assert.h>
 #include <stdlib.h>
+#include <string.h>
 
 #include <zlib.h>
 
+#include "os.hpp"
 #include "trace_parser.hpp"
 
 
@@ -52,6 +54,7 @@ Parser::~Parser() {
 
 
 bool Parser::open(const char *filename) {
+    char TimeFile[PATH_MAX];
     file = gzopen(filename, "rb");
     if (!file) {
         return false;
@@ -63,6 +66,10 @@ bool Parser::open(const char *filename) {
         return false;
     }
 
+    strcpy(TimeFile, filename);
+    strcat(TimeFile, ".time");
+    time_file = fopen(TimeFile, "r");
+
     return true;
 }
 
@@ -98,6 +105,7 @@ void Parser::close(void) {
 
 
 Call *Parser::parse_call(void) {
+    Call *ret;
     do {
         int c = read_byte();
         switch(c) {
@@ -105,7 +113,9 @@ Call *Parser::parse_call(void) {
             parse_enter();
             break;
         case Trace::EVENT_LEAVE:
-            return parse_leave();
+            ret = parse_leave();
+            read_time(&ret->cpu_time, &ret->gpu_time);
+            return ret;
         default:
             std::cerr << "error: unknown event " << c << "\n";
             exit(1);
@@ -452,5 +462,16 @@ inline int Parser::read_byte(void) {
     return c;
 }
 
+void Parser::read_time(double *cpu_time, double *gpu_time) {
+    if (time_file == NULL) {
+        *cpu_time = *gpu_time = -1;
+        return;
+    }
+    
+    fread(cpu_time, sizeof(double), 1, time_file);
+    fread(gpu_time, sizeof(double), 1, time_file);
+}
+
+
 
 } /* namespace Trace */
diff --git a/trace_parser.hpp b/trace_parser.hpp
index 0a01051..9f4d19e 100644
--- a/trace_parser.hpp
+++ b/trace_parser.hpp
@@ -41,6 +41,7 @@ class Parser
 {
 protected:
     void *file;
+    FILE *time_file;
 
     typedef std::list<Call *> CallList;
     CallList calls;
@@ -110,6 +111,8 @@ protected:
     unsigned long long read_uint(void);
 
     inline int read_byte(void);
+
+    void read_time(double *cpu_time, double *gpu_time);
 };
 
 
diff --git a/trace_writer.cpp b/trace_writer.cpp
index 7fcd0e6..8fa947d 100644
--- a/trace_writer.cpp
+++ b/trace_writer.cpp
@@ -311,6 +311,7 @@ LocalWriter::open(void) {
 
     const char *szExtension = "trace";
     char szFileName[PATH_MAX];
+    char TimeFileName[PATH_MAX];
     const char *lpFileName;
 
     lpFileName = getenv("TRACE_FILE");
@@ -344,6 +345,41 @@ LocalWriter::open(void) {
     OS::DebugMessage("apitrace: tracing to %s\n", szFileName);
 
     Writer::open(szFileName);
+
+    // Open another file to log the cpu and gpu time
+    //
+    // The reason why to open another file to log the CPU and GPU time is
+    // that the GPU time is got asynchronously, which means you can NOT get
+    // the GPU time immediately after tracing the corresponding function.
+    // Instead, we put the GPU time query in glXSwapBuffer, the last call
+    // in a frame to reduce the query overhead.
+    //
+    // Thus, the tracedump(or even, qapitrace) side, they don't know where
+    // the time value stored, which means it's hard for them to dump the
+    // time info while dumping functions's info.
+    //
+    // Then, here comes a solustion simply doesn't break the old trace format:
+    // open another file to log the time info, while tracedump(or qapitrace)
+    // dump the function, it tries to read an entry(with a order of cpu_time
+    // and gpu time). If found, the tracedump will show the time info in the
+    // following way:
+    //
+    //    call_no function_name(...args...) = ret_value { cpu time: xxx gpu time: xxx }
+    //
+    strcpy(TimeFileName, szFileName);
+    strcat(TimeFileName, ".time");
+    time_file = fopen(TimeFileName, "w");
+    if (time_file)
+        OS::DebugMessage("apitrace: tracing time to %s\n", TimeFileName);
+
+ }
+ 
+void
+LocalWriter::writeTime(struct write_time *tm) {
+    if (time_file == NULL)
+        return;
+
+    fwrite(tm, sizeof(*tm), 1, time_file);
 }
 
 unsigned LocalWriter::beginEnter(const FunctionSig *sig) {
diff --git a/trace_writer.hpp b/trace_writer.hpp
index 20a1530..ff325b3 100644
--- a/trace_writer.hpp
+++ b/trace_writer.hpp
@@ -94,6 +94,7 @@ namespace Trace {
 
         void writeCall(Call *call);
 
+
     protected:
         void inline _write(const void *sBuffer, size_t dwBytesToWrite);
         void inline _writeByte(char c);
@@ -115,7 +116,15 @@ namespace Trace {
      */
     class LocalWriter : public Writer {
     protected:
+	FILE *time_file;	// Used to log CPU and GPU time
     public:
+	struct write_time {
+            double cpu_time;
+            double gpu_time;
+	};
+	void writeTime(struct write_time *tm);
+
+
         void open(void);
 
         unsigned beginEnter(const FunctionSig *sig);
-- 
1.7.4.4




More information about the apitrace mailing list