<br>
<div class="gmail_quote">>On Mon, Oct 24, 2011 at 5:44 AM, José Fonseca <span dir="ltr"><<a href="mailto:jose.r.fonseca@gmail.com" target="_blank">jose.r.fonseca@gmail.com</a>></span> wrote:</div><span style="background-color:rgb(255,255,255);font-family:'Droid Sans', arial, sans-serif;color:rgb(32,32,32);font-size:13px">>Yes. I was planning to refactor the last patch that Yuanhan Liu<br>
>posted to this list such that the recording was done in glretrace<br>>instead of apitrace, but been busy with other stuff.<br>Here is the patch for cpu/gpu time logging during retracing. </span><span class="Apple-style-span" style="color: rgb(32, 32, 32); font-family: 'Droid Sans', arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255); ">Used Yuanhan Liu's patch as a reference.</span><div>
<div><span style="background-color:rgb(255,255,255)"><div><span style="background-color: rgb(255, 255, 255); font-family: 'Droid Sans', arial, sans-serif; color: rgb(32, 32, 32); font-size: 13px; ">This patch logs the cpu and gpu time duration per gl call. cpu time is still skewed by snappy decompression. </span></div>
<div><span style="background-color: rgb(255, 255, 255); font-family: 'Droid Sans', arial, sans-serif; color: rgb(32, 32, 32); font-size: 13px; ">I guess It can be fixed by moving the decompression to a separate thread. </span></div>
<div><span style="background-color: rgb(255, 255, 255); font-family: 'Droid Sans', arial, sans-serif; color: rgb(32, 32, 32); font-size: 13px; "><br></span></div></span></div>
<div>
<div><span style="background-color:rgb(255,255,255)">diff --git a/glretrace.py b/glretrace.py<br>index e80fedb..3a3fb96 100644<br>--- a/glretrace.py<br>+++ b/glretrace.py<br>@@ -239,9 +239,28 @@ class GlRetracer(Retracer):<br>
<br> if <a href="http://function.name" target="_blank">function.name</a> == 'memcpy':<br> print ' if (!dest || !src || !n) return;'<br>+<br>+ print ' long long t0, t1;'<br>
+ print ' if (query_index < MAX_QUERIES) {'<br>
+ if function.loggputime:<br>+ print ' __glGenQueries(1, &gpu_queries[query_index]);'<br>+ print ' __glBeginQuery(GL_TIME_ELAPSED, gpu_queries[query_index]);'<br>
+ print ' last_gpu_query = gpu_queries[query_index];'<br>+ else:<br>+ print ' gpu_queries[query_index] = 0;'<br>+ print ' t0 = OS::GetTime();'<br>
+ print ' }'<br> <br> Retracer.call_function(self, function)<br> <br>+ print ' if (query_index < MAX_QUERIES) {'<br>+ print ' t1 = OS::GetTime();'<br>+ if function.loggputime:<br>
+ print ' __glEndQuery(GL_TIME_ELAPSED);'<br>+ print ' cpu_time[query_index] = (double)(t1 - t0) * 1.0E-3;'<br>+ print ' query_index++;'<br>+ print ' }'<br>+ <br>
# Error checking<br> if <a href="http://function.name" target="_blank">function.name</a> == "glBegin":<br> print ' glretrace::insideGlBeginEnd = true;'<br>@@ -392,7 +411,13 @@ if __name__ == '__main__':<br>
#include "glretrace.hpp"<br> #include "glstate.hpp"<br> <br>-<br>+#define MAX_QUERIES 20000<br>+GLuint gpu_queries[MAX_QUERIES];<br>+int last_gpu_query = 0;<br>+int query_index = 0;<br>+int last_query_index = 0;<br>
+double cpu_time[MAX_QUERIES];<br>+double gpu_time[MAX_QUERIES];<br> '''<br> api = glapi.glapi<br> retracer = GlRetracer()<br>diff --git a/glretrace_glx.cpp b/glretrace_glx.cpp<br>index 8e2e469..df63034 100644<br>
--- a/glretrace_glx.cpp<br>+++ b/glretrace_glx.cpp<br>@@ -28,9 +28,12 @@<br> #include "retrace.hpp"<br> #include "glretrace.hpp"<br> <br>-<br> using namespace glretrace;<br>-<br>+extern GLuint gpu_queries[];<br>
+extern int last_gpu_query;<br>+extern int query_index;<br>+extern int last_query_index;<br>+extern double gpu_time[];<br> <br> typedef std::map<unsigned long, glws::Drawable *> DrawableMap;<br> typedef std::map<unsigned long long, glws::Context *> ContextMap;<br>
@@ -128,6 +131,23 @@ static void retrace_glXSwapBuffers(Trace::Call &call) {<br> } else {<br> glFlush();<br> }<br>+<br>+ if (last_gpu_query > 0) {<br>+ int done = 0;<br>+ while (!done)<br>
+ __glGetQueryObjectiv(last_gpu_query, GL_QUERY_RESULT_AVAILABLE, &done);<br>+ }<br>+ for (int i = last_query_index; i < query_index; i++) {<br>+ GLuint time_gpu;<br>+ if (gpu_queries[i]) {<br>
+ __glGetQueryObjectuiv(gpu_queries[i], GL_QUERY_RESULT, &time_gpu);<br>+ __glDeleteQueries(1, &gpu_queries[i]);<br>+ } else<br>+ time_gpu = 0;<br>+ gpu_time[i] = time_gpu * 1.0E-6;<br>
+ }<br>+ last_gpu_query = 0;<br>+ last_query_index = query_index;<br> }<br> <br> static void retrace_glXCreateNewContext(Trace::Call &call) {<br>diff --git a/glretrace_main.cpp b/glretrace_main.cpp<br>
index 4e4b8ae..a41e14a 100644<br>--- a/glretrace_main.cpp<br>+++ b/glretrace_main.cpp<br>@@ -32,6 +32,9 @@<br> #include "glstate.hpp"<br> #include "glretrace.hpp"<br> <br>+extern double cpu_time[];<br>
+extern double gpu_time[];<br>+extern int query_index;<br> <br> namespace glretrace {<br> <br>@@ -200,20 +203,19 @@ static void display(void) {<br> Trace::Call *call;<br> <br> while ((call = parser.parse_call())) {<br>
- retracer.retrace(*call);<br>-<br>- if (!insideGlBeginEnd &&<br>+ retracer.retrace(*call);<br>+ <br>+ if (!insideGlBeginEnd &&<br> drawable && context &&<br>
call->no >= dump_state) {<br> glstate::dumpCurrentContext(std::cout);<br> exit(0);<br> }<br>-<br> delete call;<br> }<br>-<br>- // Reached the end of trace<br>
- glFlush();<br>+ <br>+ // Reached the end of trace.Before logging endTime, call glFinish() to complete the execution of all previous gl calls.<br>+ glFinish();<br> <br> long long endTime = OS::GetTime();<br>
float timeInterval = (endTime - startTime) * 1.0E-6;<br>
@@ -222,16 +224,39 @@ static void display(void) {<br> std::cout << <br> "Rendered " << frame << " frames"<br> " in " << timeInterval << " secs,"<br>
- " average of " << (frame/timeInterval) << " fps\n";<br>+ " average of " << (frame/timeInterval) << " fps\n"<br>+ "Refer to time.log file for timing information\n";<br>
}<br>-<br>- if (wait) {<br>+ if (wait) <br> while (glws::processEvents()) {}<br>- } else {<br>- exit(0);<br>- }<br> }<br> <br>+static void reparser(void) {<br>+ Trace::Call *call;<br>+ FILE *timefile = fopen("time.log", "w+");<br>
+ int i = 0;<br>+ fprintf(timefile,"Call#\t\t Function Name\t\tCPU Time\t\t\tGPU Time\n");<br>+ while ( ( call = parser.parse_call() ) && ( i <= query_index) ) {<br>+ if ( !(call->name()[0] == 'g' && call->name()[1] == 'l' && call->name()[2] == 'X') && <br>
+ !(call->name()[2] == 'G' && call->name()[3] == 'e' && call->name()[4] == 't')) {<br>+ <br>+ fprintf(timefile,"%5d\t\t", call->no); <br>
+ fprintf(timefile,"%15s\t\t", call->name());<br>+ // Minimum cpu time logging can be set appropriately<br>+ if ( cpu_time[i] > 0.001 || gpu_time[i] )<br>+ fprintf(timefile,"cpu time = %4.4f ms\t\t", cpu_time[i]);<br>
+ if ( gpu_time[i] )<br>+ fprintf(timefile,"gpu time = %4.4f ms", gpu_time[i]);<br>+ fprintf(timefile,"\n");<br>+ i++; <br>+ }<br>+ delete call;<br>+ }<br>
+ if(timefile != NULL) <br>
+ fclose(timefile);<br>+ <br>+ exit(0);<br>+}<br> <br> static void usage(void) {<br> std::cout << <br>@@ -325,11 +350,21 @@ int main(int argc, char **argv)<br> std::cerr << "error: failed to open " << argv[i] << "\n";<br>
return 1;<br> }<br>-<br> display();<br> <br> parser.close();<br> }<br>+ // Reparsing is required to dump a logfile containing cpu time and gpu time <br>+ // for each gl call. As gpu time for a gl call is available only at the end of <br>
+ // a frame, It's really difficult to write gpu time when parsing for the first time.<br>+ // It is an overhead but doesn't effect the cpu or gpu time in anyway. <br>+ // logging of gpu time can be enabled or disabled by setting a flag in specs/glapi.py<br>
+ if (!parser.open(argv[1])) {<br>+ std::cerr << "error: failed to open " << argv[i] << "\n";<br>+ return 1;<br>+ }<br>+ reparser();<br>+ parser.close();<br>
<br> delete visual;<br> glws::cleanup();<br>diff --git a/specs/glapi.py b/specs/glapi.py<br>index 0268479..5566157 100644<br>--- a/specs/glapi.py<br>+++ b/specs/glapi.py<br>@@ -63,9 +63,9 @@ glapi.add_functions([<br>
GlFunction(Void, "glTexImage1D", [(GLenum, "target"), (GLint, "level"), (GLenum_int, "internalformat"), (GLsizei, "width"), (GLint, "border"), (GLenum, "format"), (GLenum, "type"), (Blob(Const(GLvoid), "__glTexImage1D_size(format, type, width)"), "pixels")]),<br>
GlFunction(Void, "glTexImage2D", [(GLenum, "target"), (GLint, "level"), (GLenum_int, "internalformat"), (GLsizei, "width"), (GLsizei, "height"), (GLint, "border"), (GLenum, "format"), (GLenum, "type"), (Blob(Const(GLvoid), "__glTexImage2D_size(format, type, width, height)"), "pixels")]),<br>
GlFunction(Void, "glDrawBuffer", [(GLenum, "mode")]),<br>- GlFunction(Void, "glClear", [(GLbitfield_attrib, "mask")]),<br>- GlFunction(Void, "glClearColor", [(GLclampf, "red"), (GLclampf, "green"), (GLclampf, "blue"), (GLclampf, "alpha")]),<br>
- GlFunction(Void, "glClearStencil", [(GLint, "s")]),<br>+ GlFunction(Void, "glClear", [(GLbitfield_attrib, "mask")],loggputime=True),<br>+ GlFunction(Void, "glClearColor", [(GLclampf, "red"), (GLclampf, "green"), (GLclampf, "blue"), (GLclampf, "alpha")],loggputime=True),<br>
+ GlFunction(Void, "glClearStencil", [(GLint, "s")],loggputime=True),<br> GlFunction(Void, "glClearDepth", [(GLclampd, "depth")]),<br> GlFunction(Void, "glStencilMask", [(GLuint, "mask")]),<br>
GlFunction(Void, "glColorMask", [(GLboolean, "red"), (GLboolean, "green"), (GLboolean, "blue"), (GLboolean, "alpha")]),<br>@@ -82,7 +82,7 @@ glapi.add_functions([<br>
GlFunction(Void, "glPixelStoref", [(GLenum, "pname"), (GLfloat, "param")]),<br> GlFunction(Void, "glPixelStorei", [(GLenum, "pname"), (GLint, "param")]),<br>
GlFunction(Void, "glReadBuffer", [(GLenum, "mode")]),<br>- GlFunction(Void, "glReadPixels", [(GLint, "x"), (GLint, "y"), (GLsizei, "width"), (GLsizei, "height"), (GLenum, "format"), (GLenum, "type"), Out(GLpointer, "pixels")]),<br>
+ GlFunction(Void, "glReadPixels", [(GLint, "x"), (GLint, "y"), (GLsizei, "width"), (GLsizei, "height"), (GLenum, "format"), (GLenum, "type"), Out(GLpointer, "pixels")],loggputime=True),<br>
GlFunction(Void, "glGetBooleanv", [(GLenum, "pname"), Out(Array(GLboolean, "__gl_param_size(pname)"), "params")], sideeffects=False),<br> GlFunction(Void, "glGetDoublev", [(GLenum, "pname"), Out(Array(GLdouble, "__gl_param_size(pname)"), "params")], sideeffects=False),<br>
GlFunction(GLenum_error, "glGetError", [], sideeffects=False),<br>@@ -322,7 +322,7 @@ glapi.add_functions([<br> GlFunction(Void, "glPixelMapuiv", [(GLenum, "map"), (GLsizei, "mapsize"), (Array(Const(GLuint), "mapsize"), "values")]),<br>
GlFunction(Void, "glPixelMapusv", [(GLenum, "map"), (GLsizei, "mapsize"), (Array(Const(GLushort), "mapsize"), "values")]),<br> GlFunction(Void, "glCopyPixels", [(GLint, "x"), (GLint, "y"), (GLsizei, "width"), (GLsizei, "height"), (GLenum, "type")]),<br>
- GlFunction(Void, "glDrawPixels", [(GLsizei, "width"), (GLsizei, "height"), (GLenum, "format"), (GLenum, "type"), (Blob(Const(GLvoid), "__glDrawPixels_size(format, type, width, height)"), "pixels")]),<br>
+ GlFunction(Void, "glDrawPixels", [(GLsizei, "width"), (GLsizei, "height"), (GLenum, "format"), (GLenum, "type"), (Blob(Const(GLvoid), "__glDrawPixels_size(format, type, width, height)"), "pixels")], loggputime=True),<br>
GlFunction(Void, "glGetClipPlane", [(GLenum, "plane"), Out(Array(GLdouble, 4), "equation")], sideeffects=False),<br> GlFunction(Void, "glGetLightfv", [(GLenum, "light"), (GLenum, "pname"), Out(Array(GLfloat, "__gl_param_size(pname)"), "params")], sideeffects=False),<br>
GlFunction(Void, "glGetLightiv", [(GLenum, "light"), (GLenum, "pname"), Out(Array(GLint, "__gl_param_size(pname)"), "params")], sideeffects=False),<br>@@ -359,8 +359,8 @@ glapi.add_functions([<br>
GlFunction(Void, "glTranslatef", [(GLfloat, "x"), (GLfloat, "y"), (GLfloat, "z")]),<br> <br> # GL_VERSION_1_1<br>- GlFunction(Void, "glDrawArrays", [(GLenum_mode, "mode"), (GLint, "first"), (GLsizei, "count")]),<br>
- GlFunction(Void, "glDrawElements", [(GLenum_mode, "mode"), (GLsizei, "count"), (GLenum, "type"), (GLpointerConst, "indices")]),<br>+ GlFunction(Void, "glDrawArrays", [(GLenum_mode, "mode"), (GLint, "first"), (GLsizei, "count")], loggputime=True),<br>
+ GlFunction(Void, "glDrawElements", [(GLenum_mode, "mode"), (GLsizei, "count"), (GLenum, "type"), (GLpointerConst, "indices")], loggputime=True),<br> GlFunction(Void, "glGetPointerv", [(GLenum, "pname"), Out(Pointer(GLpointer), "params")], sideeffects=False),<br>
GlFunction(Void, "glPolygonOffset", [(GLfloat, "factor"), (GLfloat, "units")]),<br> GlFunction(Void, "glCopyTexImage1D", [(GLenum, "target"), (GLint, "level"), (GLenum, "internalformat"), (GLint, "x"), (GLint, "y"), (GLsizei, "width"), (GLint, "border")]),<br>
diff --git a/specs/stdapi.py b/specs/stdapi.py<br>index 0b2fe82..2617813 100644<br>--- a/specs/stdapi.py<br>+++ b/specs/stdapi.py<br>@@ -254,7 +254,7 @@ class Function:<br> # 0-3 are reserved to memcpy, malloc, free, and realloc<br>
__id = 4<br> <br>- def __init__(self, type, name, args, call = '', fail = None, sideeffects=True):<br>+ def __init__(self, type, name, args, call = '', fail = None, sideeffects=True, loggputime=False):<br>
<a href="http://self.id" target="_blank">self.id</a> = Function.__id<br> Function.__id += 1<br> <br>@@ -278,6 +278,7 @@ class Function:<br> self.call = call<br> self.fail = fail<br> self.sideeffects = sideeffects<br>
+ self.loggputime = loggputime<br> <br> def prototype(self, name=None):<br> if name is not None:</span></div>
<div><span style="background-color:rgb(255,255,255)"><br></span> </div>
<div><span style="background-color:rgb(255,255,255);font-family:'Droid Sans', arial, sans-serif;color:rgb(32,32,32);font-size:13px"><br>>My idea was to have glretrace output the timings to stdout in a TSV format like:<br>
># call no function name cpu start cpu stop gpu start gpu stop<br>>50 glDrawArrays 1.4 1.5 2.4 2.5<br>> ...<br>>The GUI could then use this to produce a joint visualization of both<br>>cpu and gpu timelines.</span></div>
<div><span style="background-color:rgb(255,255,255);font-family:'Droid Sans', arial, sans-serif;color:rgb(32,32,32);font-size:13px">As gpu executes the gl commands asynchronously, </span><span class="Apple-style-span" style="color: rgb(32, 32, 32); font-family: 'Droid Sans', arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255); ">I didn't find a way to query the gpu start time.</span></div>
<div><span class="Apple-style-span" style="color: rgb(32, 32, 32); font-family: 'Droid Sans', arial, sans-serif; font-size: 13px; background-color: rgb(255, 255, 255); ">Is there any way to query the gpu start/stop time per gl call?</span></div>
<div><span style="background-color:rgb(255,255,255);font-family:'Droid Sans', arial, sans-serif;color:rgb(32,32,32);font-size:13px"><br>
I'll appreciate your comments on above patch.</span></div></div><div><span style="background-color:rgb(255,255,255);font-family:'Droid Sans', arial, sans-serif;color:rgb(32,32,32);font-size:13px"><br></span></div>
<div><span style="background-color:rgb(255,255,255);font-family:'Droid Sans', arial, sans-serif;color:rgb(32,32,32);font-size:13px">Sample time.log file:</span></div><div><span style="background-color:rgb(255,255,255);font-family:'Droid Sans', arial, sans-serif;color:rgb(32,32,32);font-size:13px"><div>
Call# Function Name CPU Time GPU Time</div><div> 7 glShadeModel cpu time = 0.0060 ms </div><div> 8 glClearColor cpu time = 0.0040 ms</div>
<div> 9 glClear cpu time = 0.3240 ms gpu time = 0.0040 ms</div><div> 10 glColor3f cpu time = 0.0120 ms</div><div> 11 glPushMatrix cpu time = 0.0050 ms</div>
<div> 12 glRotatef cpu time = 0.0040 ms</div><div> 13 glBegin cpu time = 0.0200 ms</div><div> 14 glVertex2f cpu time = 0.0040 ms</div>
<div> 15 glVertex2f </div><div> 16 glVertex2f</div><div> 17 glEnd cpu time = 0.0020 ms</div><div> 18 glPopMatrix cpu time = 16.2530 ms</div>
<div> 21 glFlush cpu time = 0.0020 ms</div><div> 22 glViewport cpu time = 0.0030 ms</div><div> 23 glMatrixMode cpu time = 0.0020 ms</div>
<div> 24 glLoadIdentity cpu time = 0.0020 ms</div><div> 25 glOrtho cpu time = 0.0030 ms</div><div> 27 glFlush</div><div> 28 glViewport</div>
<div> 29 glMatrixMode</div><div> 30 glLoadIdentity</div><div> 31 glOrtho</div><div> 33 glShadeModel</div><div> 34 glClearColor</div><div> 35 glClear cpu time = 40.7830 ms gpu time = 0.1890 ms</div>
<div> 36 glColor3f cpu time = 0.0030 ms</div><div> 37 glPushMatrix cpu time = 0.0020 ms</div><div> 38 glRotatef cpu time = 0.0030 ms</div>
<div><br></div></span></div><div><font class="Apple-style-span" color="#202020" face="'Droid Sans', arial, sans-serif"><br></font></div><div><font class="Apple-style-span" color="#202020" face="'Droid Sans', arial, sans-serif">Thanks</font></div>
<div><font class="Apple-style-span" color="#202020" face="'Droid Sans', arial, sans-serif">Anuj</font></div>
</div>