[Mesa-dev] [PATCH] gallivm: JIT symbol resolution with linux perf.

jfonseca at vmware.com jfonseca at vmware.com
Wed Apr 17 05:34:50 PDT 2013


From: José Fonseca <jfonseca at vmware.com>

Details on docs/llvmpipe.html
---
 bin/perf-annotate-jit                          |  240 ++++++++++++++++++++++++
 configure.ac                                   |    7 -
 docs/llvmpipe.html                             |   40 ++--
 src/gallium/auxiliary/gallivm/lp_bld_debug.cpp |  117 +++++++++---
 src/gallium/auxiliary/gallivm/lp_bld_debug.h   |    6 +-
 src/gallium/auxiliary/gallivm/lp_bld_init.c    |   11 +-
 src/gallium/auxiliary/gallivm/lp_bld_misc.cpp  |   23 ---
 src/gallium/auxiliary/gallivm/lp_bld_misc.h    |    3 -
 8 files changed, 361 insertions(+), 86 deletions(-)
 create mode 100755 bin/perf-annotate-jit

diff --git a/bin/perf-annotate-jit b/bin/perf-annotate-jit
new file mode 100755
index 0000000..7ebc965
--- /dev/null
+++ b/bin/perf-annotate-jit
@@ -0,0 +1,240 @@
+#!/usr/bin/env python
+#
+# Copyright 2012 VMware Inc
+# Copyright 2008-2009 Jose Fonseca
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+# THE SOFTWARE.
+#
+
+
+import sys
+import os.path
+import re
+import optparse
+import subprocess
+
+
+class Parser:
+    """Parser interface."""
+
+    def __init__(self):
+        pass
+
+    def parse(self):
+        raise NotImplementedError
+
+
+class LineParser(Parser):
+    """Base class for parsers that read line-based formats."""
+
+    def __init__(self, file):
+        Parser.__init__(self)
+        self._file = file
+        self.__line = None
+        self.__eof = False
+        self.line_no = 0
+
+    def readline(self):
+        line = self._file.readline()
+        if not line:
+            self.__line = ''
+            self.__eof = True
+        else:
+            self.line_no += 1
+        self.__line = line.rstrip('\r\n')
+
+    def lookahead(self):
+        assert self.__line is not None
+        return self.__line
+
+    def consume(self):
+        assert self.__line is not None
+        line = self.__line
+        self.readline()
+        return line
+
+    def eof(self):
+        assert self.__line is not None
+        return self.__eof
+
+
+mapFile = None
+
+def lookupMap(filename, matchSymbol):
+    global mapFile
+    mapFile = filename
+    stream = open(filename, 'rt')
+    for line in stream:
+        start, length, symbol = line.split()
+
+        start = int(start, 16)
+        length = int(length,16)
+
+        if symbol == matchSymbol:
+            return start
+
+    return None
+
+def lookupAsm(filename, desiredFunction):
+    stream = open(filename + '.asm', 'rt')
+    while stream.readline() != desiredFunction + ':\n':
+        pass
+
+    asm = []
+    line = stream.readline().strip()
+    while line:
+        addr, instr = line.split(':', 1)
+        addr = int(addr)
+        asm.append((addr, instr))
+        line = stream.readline().strip()
+
+    return asm
+
+
+
+samples = {}
+
+
+class PerfParser(LineParser):
+    """Parser for linux perf callgraph output.
+
+    It expects output generated with
+
+        perf record -g
+        perf script
+    """
+
+    def __init__(self, infile, symbol):
+        LineParser.__init__(self, infile)
+	self.symbol = symbol
+
+    def readline(self):
+        # Override LineParser.readline to ignore comment lines
+        while True:
+            LineParser.readline(self)
+            if self.eof() or not self.lookahead().startswith('#'):
+                break
+
+    def parse(self):
+        # read lookahead
+        self.readline()
+
+        while not self.eof():
+            self.parse_event()
+
+        asm = lookupAsm(mapFile, self.symbol)
+
+        addresses = samples.keys()
+        addresses.sort()
+        total_samples = 0
+
+	sys.stdout.write('%s:\n' % self.symbol)
+        for address, instr in asm:
+            try:
+                sample = samples.pop(address)
+            except KeyError:
+                sys.stdout.write(6*' ')
+            else:
+                sys.stdout.write('%6u' % (sample))
+                total_samples += sample
+            sys.stdout.write('%6u: %s\n' % (address, instr))
+        print 'total:', total_samples
+        assert len(samples) == 0
+
+        sys.exit(0)
+
+    def parse_event(self):
+        if self.eof():
+            return
+
+        line = self.consume()
+        assert line
+
+        callchain = self.parse_callchain()
+        if not callchain:
+            return
+
+    def parse_callchain(self):
+        callchain = []
+        while self.lookahead():
+            function = self.parse_call(len(callchain) == 0)
+            if function is None:
+                break
+            callchain.append(function)
+        if self.lookahead() == '':
+            self.consume()
+        return callchain
+
+    call_re = re.compile(r'^\s+(?P<address>[0-9a-fA-F]+)\s+(?P<symbol>.*)\s+\((?P<module>[^)]*)\)$')
+
+    def parse_call(self, first):
+        line = self.consume()
+        mo = self.call_re.match(line)
+        assert mo
+        if not mo:
+            return None
+
+        if not first:
+            return None
+
+        function_name = mo.group('symbol')
+        if not function_name:
+            function_name = mo.group('address')
+
+        module = mo.group('module')
+
+        function_id = function_name + ':' + module
+
+        address = mo.group('address')
+        address = int(address, 16)
+
+        if function_name != self.symbol:
+            return None
+
+        start_address = lookupMap(module, function_name)
+        address -= start_address
+
+        #print function_name, module, address
+
+        samples[address] = samples.get(address, 0) + 1
+
+        return True
+
+
+def main():
+    """Main program."""
+
+    optparser = optparse.OptionParser(
+        usage="\n\t%prog [options] symbol_name")
+    (options, args) = optparser.parse_args(sys.argv[1:])
+    if len(args) != 1:
+        optparser.error('wrong number of arguments')
+
+    symbol = args[0]
+
+    p = subprocess.Popen(['perf', 'script'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    parser = PerfParser(p.stdout, symbol)
+    parser.parse()
+
+
+if __name__ == '__main__':
+    main()
+
+
+# vim: set sw=4 et:
diff --git a/configure.ac b/configure.ac
index fb1f324..ba92258 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1601,13 +1601,6 @@ if test "x$enable_gallium_llvm" = xyes; then
             LLVM_COMPONENTS="${LLVM_COMPONENTS} mcjit"
         fi
 
-        if test "x$enable_debug" = xyes; then
-            # Debug builds require OProfileJIT if LLVM was built with support for it
-            if $LLVM_CONFIG --components | grep -q '\<oprofilejit\>'; then
-                LLVM_COMPONENTS="${LLVM_COMPONENTS} oprofilejit"
-            fi
-        fi
-
         if test "x$enable_opencl" = xyes; then
             LLVM_COMPONENTS="${LLVM_COMPONENTS} ipo linker instrumentation"
             # LLVM 3.3 >= 177971 requires IRReader
diff --git a/docs/llvmpipe.html b/docs/llvmpipe.html
index be03083..80f8a01 100644
--- a/docs/llvmpipe.html
+++ b/docs/llvmpipe.html
@@ -130,38 +130,38 @@ need to ask, don't even try it.
 
 <h1>Profiling</h1>
 
-To profile llvmpipe you should pass the options
-
+<p>
+To profile llvmpipe you should build as
+</p>
 <pre>
   scons build=profile <same-as-before>
 </pre>
 
+<p>
 This will ensure that frame pointers are used both in C and JIT functions, and
 that no tail call optimizations are done by gcc.
+</p>
 
-To better profile JIT code you'll need to build LLVM with oprofile integration.
-
-<pre>
-  ./configure \
-      --prefix=$install_dir \
-      --enable-optimized \
-      --disable-profiling \
-      --enable-targets=host-only \
-      --with-oprofile
-
-  make -C "$build_dir"
-  make -C "$build_dir" install
-
-  find "$install_dir/lib" -iname '*.a' -print0 | xargs -0 strip --strip-debug
-</pre>
+<h2>Linux perf integration</h2>
 
-The you should define
+<p>
+On Linux, it is possible to have symbol resolution of JIT code with <a href="http://perf.wiki.kernel.org/">Linux perf</a>:
+</p>
 
 <pre>
-  export LLVM=/path/to/llvm-2.6-profile
+	perf record -g /my/application
+	perf report
 </pre>
 
-and rebuild.
+<p>
+When run inside Linux perf, llvmpipe will create a /tmp/perf-XXXXX.map file with
+symbol address table.  It also dumps assembly code to /tmp/perf-XXXXX.map.asm,
+which can be used by the bin/perf-annotate-jit script to produce disassembly of
+the generated code annotated with the samples.
+</p>
+
+<p>You can obtain a call graph via
+<a href="http://code.google.com/p/jrfonseca/wiki/Gprof2Dot#linux_perf">Gprof2Dot</a>.</p>
 
 
 <h1>Unit testing</h1>
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
index af50fcc..ac8e10b 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
@@ -31,6 +31,7 @@
 #include <llvm/Target/TargetMachine.h>
 #include <llvm/Target/TargetInstrInfo.h>
 #include <llvm/Support/raw_ostream.h>
+#include <llvm/Support/Format.h>
 #include <llvm/Support/MemoryObject.h>
 
 #if HAVE_LLVM >= 0x0300
@@ -60,6 +61,11 @@
 
 #include "lp_bld_debug.h"
 
+#ifdef __linux__
+#include <sys/stat.h>
+#include <fcntl.h>
+#endif
+
 
 
 /**
@@ -174,8 +180,8 @@ public:
  * - http://blog.llvm.org/2010/01/x86-disassembler.html
  * - http://blog.llvm.org/2010/04/intro-to-llvm-mc-project.html
  */
-extern "C" void
-lp_disassemble(const void* func)
+static size_t
+disassemble(const void* func, llvm::raw_ostream & Out)
 {
 #if HAVE_LLVM >= 0x0207
    using namespace llvm;
@@ -209,8 +215,8 @@ lp_disassemble(const void* func)
 #endif
 
    if (!AsmInfo) {
-      debug_printf("error: no assembly info for target %s\n", Triple.c_str());
-      return;
+      Out << "error: no assembly info for target " << Triple << "\n";
+      return 0;
    }
 
 #if HAVE_LLVM >= 0x0300
@@ -220,12 +226,10 @@ lp_disassemble(const void* func)
    OwningPtr<const MCDisassembler> DisAsm(T->createMCDisassembler());
 #endif 
    if (!DisAsm) {
-      debug_printf("error: no disassembler for target %s\n", Triple.c_str());
-      return;
+      Out << "error: no disassembler for target " << Triple << "\n";
+      return 0;
    }
 
-   raw_debug_ostream Out;
-
 #if HAVE_LLVM >= 0x0300
    unsigned int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
 #else
@@ -235,14 +239,14 @@ lp_disassemble(const void* func)
 #if HAVE_LLVM >= 0x0301
    OwningPtr<const MCRegisterInfo> MRI(T->createMCRegInfo(Triple));
    if (!MRI) {
-      debug_printf("error: no register info for target %s\n", Triple.c_str());
-      return;
+      Out << "error: no register info for target " << Triple.c_str() << "\n";
+      return 0;
    }
 
    OwningPtr<const MCInstrInfo> MII(T->createMCInstrInfo());
    if (!MII) {
-      debug_printf("error: no instruction info for target %s\n", Triple.c_str());
-      return;
+      Out << "error: no instruction info for target " << Triple.c_str() << "\n";
+      return 0;
    }
 #endif
 
@@ -260,8 +264,8 @@ lp_disassemble(const void* func)
          T->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, Out));
 #endif
    if (!Printer) {
-      debug_printf("error: no instruction printer for target %s\n", Triple.c_str());
-      return;
+      Out << "error: no instruction printer for target " << Triple.c_str() << "\n";
+      return 0;
    }
 
 #if HAVE_LLVM >= 0x0301
@@ -300,7 +304,7 @@ lp_disassemble(const void* func)
        * so that between runs.
        */
 
-      debug_printf("%6lu:\t", (unsigned long)pc);
+      Out << llvm::format("%6lu:\t", (unsigned long)pc);
 
       if (!DisAsm->getInstruction(Inst, Size, memoryObject,
                                  pc,
@@ -309,7 +313,7 @@ lp_disassemble(const void* func)
 #else
 				  nulls())) {
 #endif
-         debug_printf("invalid\n");
+         Out << "invalid";
          pc += 1;
       }
 
@@ -320,25 +324,23 @@ lp_disassemble(const void* func)
       if (0) {
          unsigned i;
          for (i = 0; i < Size; ++i) {
-            debug_printf("%02x ", ((const uint8_t*)bytes)[pc + i]);
+            Out << llvm::format("%02x ", ((const uint8_t*)bytes)[pc + i]);
          }
          for (; i < 16; ++i) {
-            debug_printf("   ");
+            Out << "   ";
          }
       }
 
       /*
        * Print the instruction.
        */
-
 #if HAVE_LLVM >= 0x0300
-      Printer->printInst(&Inst, Out, "");
+	 Printer->printInst(&Inst, Out, "");
 #elif HAVE_LLVM >= 0x208
-      Printer->printInst(&Inst, Out);
+	 Printer->printInst(&Inst, Out);
 #else
-      Printer->printInst(&Inst);
+	 Printer->printInst(&Inst);
 #endif
-      Out.flush();
 
       /*
        * Advance.
@@ -386,7 +388,7 @@ lp_disassemble(const void* func)
                 * Output the address relative to the function start, given
                 * that MC will print the addresses relative the current pc.
                 */
-               debug_printf("\t\t; %lu", (unsigned long)jump);
+               Out << "\t\t; " << jump;
 
                /*
                 * Ignore far jumps given it could be actually a tail return to
@@ -401,7 +403,7 @@ lp_disassemble(const void* func)
          }
       }
 
-      debug_printf("\n");
+      Out << "\n";
 
       /*
        * Stop disassembling on return statements, if there is no record of a
@@ -420,12 +422,73 @@ lp_disassemble(const void* func)
     */
 
    if (0) {
-      debug_printf("disassemble %p %p\n", bytes, bytes + pc);
+      _debug_printf("disassemble %p %p\n", bytes, bytes + pc);
    }
 
-   debug_printf("\n");
+   Out << "\n";
+   Out.flush();
+
+   return pc;
 #else /* HAVE_LLVM < 0x0207 */
    (void)func;
+   return 0;
 #endif /* HAVE_LLVM < 0x0207 */
 }
 
+
+extern "C" void
+lp_disassemble(LLVMValueRef func, const void *code) {
+   raw_debug_ostream Out;
+   disassemble(code, Out);
+}
+
+
+/*
+ * Linux perf profiler integration.
+ *
+ * See also:
+ * - http://penberg.blogspot.co.uk/2009/06/jato-has-profiler.html
+ * - https://github.com/penberg/jato/commit/73ad86847329d99d51b386f5aba692580d1f8fdc
+ * - http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git;a=commitdiff;h=80d496be89ed7dede5abee5c057634e80a31c82d
+ */
+extern "C" void
+lp_profile(LLVMValueRef func, const void *code)
+{
+#if defined(__linux__) && (defined(DEBUG) || defined(PROFILE))
+   static boolean first_time = TRUE;
+   static FILE *perf_map_file = NULL;
+   static int perf_asm_fd = -1;
+   if (first_time) {
+      /*
+       * We rely on the disassembler for determining a function's size, but
+       * the disassembly is a leaky and slow operation, so avoid running
+       * this except when running inside linux perf, which can be inferred
+       * by the PERF_BUILDID_DIR environment variable.
+       */
+      if (getenv("PERF_BUILDID_DIR")) {
+         pid_t pid = getpid();
+         char filename[256];
+         util_snprintf(filename, sizeof filename, "/tmp/perf-%llu.map", (unsigned long long)pid);
+         perf_map_file = fopen(filename, "wt");
+         util_snprintf(filename, sizeof filename, "/tmp/perf-%llu.map.asm", (unsigned long long)pid);
+         mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
+         perf_asm_fd = open(filename, O_WRONLY | O_CREAT, mode);
+      }
+      first_time = FALSE;
+   }
+   if (perf_map_file) {
+      const char *symbol = LLVMGetValueName(func);
+      unsigned long addr = (uintptr_t)code;
+      llvm::raw_fd_ostream Out(perf_asm_fd, false);
+      Out << symbol << ":\n";
+      unsigned long size = disassemble(code, Out);
+      fprintf(perf_map_file, "%lx %lx %s\n", addr, size, symbol);
+      fflush(perf_map_file);
+   }
+#else
+   (void)func;
+   (void)code;
+#endif
+}
+
+
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.h b/src/gallium/auxiliary/gallivm/lp_bld_debug.h
index da873f3..ab83d98 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_debug.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.h
@@ -83,7 +83,11 @@ lp_check_alignment(const void *ptr, unsigned alignment);
 
 
 void
-lp_disassemble(const void* func);
+lp_disassemble(LLVMValueRef func, const void *code);
+
+
+void
+lp_profile(LLVMValueRef func, const void *code);
 
 
 #ifdef __cplusplus
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c
index 4fa5887..1153411 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
@@ -273,10 +273,6 @@ init_gallivm_engine(struct gallivm_state *gallivm)
          LLVMDisposeMessage(error);
          goto fail;
       }
-
-#if defined(DEBUG) || defined(PROFILE)
-      lp_register_oprofile_jit_event_listener(gallivm->engine);
-#endif
    }
 
    LLVMAddModuleProvider(gallivm->engine, gallivm->provider);//new
@@ -635,6 +631,7 @@ gallivm_compile_module(struct gallivm_state *gallivm)
 }
 
 
+
 func_pointer
 gallivm_jit_function(struct gallivm_state *gallivm,
                      LLVMValueRef func)
@@ -650,9 +647,13 @@ gallivm_jit_function(struct gallivm_state *gallivm,
    jit_func = pointer_to_func(code);
 
    if (gallivm_debug & GALLIVM_DEBUG_ASM) {
-      lp_disassemble(code);
+      lp_disassemble(func, code);
    }
 
+#if defined(PROFILE)
+   lp_profile(func, code);
+#endif
+
    /* Free the function body to save memory */
    lp_func_delete_body(func);
 
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
index 46cdbad..c512795 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
@@ -54,7 +54,6 @@
 #include <llvm-c/ExecutionEngine.h>
 #include <llvm/Target/TargetOptions.h>
 #include <llvm/ExecutionEngine/ExecutionEngine.h>
-#include <llvm/ExecutionEngine/JITEventListener.h>
 #if HAVE_LLVM >= 0x0301
 #include <llvm/ADT/Triple.h>
 #include <llvm/ExecutionEngine/JITMemoryManager.h>
@@ -75,28 +74,6 @@
 #include "lp_bld_misc.h"
 
 
-/**
- * Register the engine with oprofile.
- *
- * This allows to see the LLVM IR function names in oprofile output.
- *
- * To actually work LLVM needs to be built with the --with-oprofile configure
- * option.
- *
- * Also a oprofile:oprofile user:group is necessary. Which is not created by
- * default on some distributions.
- */
-extern "C" void
-lp_register_oprofile_jit_event_listener(LLVMExecutionEngineRef EE)
-{
-#if HAVE_LLVM >= 0x0301
-   llvm::unwrap(EE)->RegisterJITEventListener(llvm::JITEventListener::createOProfileJITEventListener());
-#else
-   llvm::unwrap(EE)->RegisterJITEventListener(llvm::createOProfileJITEventListener());
-#endif
-}
-
-
 extern "C" void
 lp_set_target_options(void)
 {
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.h b/src/gallium/auxiliary/gallivm/lp_bld_misc.h
index 9ed7c34..1f735fb 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_misc.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.h
@@ -41,9 +41,6 @@ extern "C" {
 
 
 extern void
-lp_register_oprofile_jit_event_listener(LLVMExecutionEngineRef EE);
-
-extern void
 lp_set_target_options(void);
 
 
-- 
1.7.9.5



More information about the mesa-dev mailing list