[Mesa-dev] [PATCH] gallivm: JIT symbol resolution with linux perf.
Roland Scheidegger
sroland at vmware.com
Wed Apr 17 08:06:46 PDT 2013
Am 17.04.2013 14:34, schrieb jfonseca at vmware.com:
> From: José Fonseca <jfonseca at vmware.com>
>
> Details on docs/llvmpipe.html
> ---
> bin/perf-annotate-jit | 240 ++++++++++++++++++++++++
> configure.ac | 7 -
> docs/llvmpipe.html | 40 ++--
> src/gallium/auxiliary/gallivm/lp_bld_debug.cpp | 117 +++++++++---
> src/gallium/auxiliary/gallivm/lp_bld_debug.h | 6 +-
> src/gallium/auxiliary/gallivm/lp_bld_init.c | 11 +-
> src/gallium/auxiliary/gallivm/lp_bld_misc.cpp | 23 ---
> src/gallium/auxiliary/gallivm/lp_bld_misc.h | 3 -
> 8 files changed, 361 insertions(+), 86 deletions(-)
> create mode 100755 bin/perf-annotate-jit
>
> diff --git a/bin/perf-annotate-jit b/bin/perf-annotate-jit
> new file mode 100755
> index 0000000..7ebc965
> --- /dev/null
> +++ b/bin/perf-annotate-jit
> @@ -0,0 +1,240 @@
> +#!/usr/bin/env python
> +#
> +# Copyright 2012 VMware Inc
> +# Copyright 2008-2009 Jose Fonseca
> +#
> +# Permission is hereby granted, free of charge, to any person obtaining a copy
> +# of this software and associated documentation files (the "Software"), to deal
> +# in the Software without restriction, including without limitation the rights
> +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
> +# copies of the Software, and to permit persons to whom the Software is
> +# furnished to do so, subject to the following conditions:
> +#
> +# The above copyright notice and this permission notice shall be included in
> +# all copies or substantial portions of the Software.
> +#
> +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
> +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
> +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
> +# THE SOFTWARE.
> +#
> +
> +
> +import sys
> +import os.path
> +import re
> +import optparse
> +import subprocess
> +
> +
> +class Parser:
> + """Parser interface."""
> +
> + def __init__(self):
> + pass
> +
> + def parse(self):
> + raise NotImplementedError
> +
> +
> +class LineParser(Parser):
> + """Base class for parsers that read line-based formats."""
> +
> + def __init__(self, file):
> + Parser.__init__(self)
> + self._file = file
> + self.__line = None
> + self.__eof = False
> + self.line_no = 0
> +
> + def readline(self):
> + line = self._file.readline()
> + if not line:
> + self.__line = ''
> + self.__eof = True
> + else:
> + self.line_no += 1
> + self.__line = line.rstrip('\r\n')
> +
> + def lookahead(self):
> + assert self.__line is not None
> + return self.__line
> +
> + def consume(self):
> + assert self.__line is not None
> + line = self.__line
> + self.readline()
> + return line
> +
> + def eof(self):
> + assert self.__line is not None
> + return self.__eof
> +
> +
> +mapFile = None
> +
> +def lookupMap(filename, matchSymbol):
> + global mapFile
> + mapFile = filename
> + stream = open(filename, 'rt')
> + for line in stream:
> + start, length, symbol = line.split()
> +
> + start = int(start, 16)
> + length = int(length,16)
> +
> + if symbol == matchSymbol:
> + return start
> +
> + return None
> +
> +def lookupAsm(filename, desiredFunction):
> + stream = open(filename + '.asm', 'rt')
> + while stream.readline() != desiredFunction + ':\n':
> + pass
> +
> + asm = []
> + line = stream.readline().strip()
> + while line:
> + addr, instr = line.split(':', 1)
> + addr = int(addr)
> + asm.append((addr, instr))
> + line = stream.readline().strip()
> +
> + return asm
> +
> +
> +
> +samples = {}
> +
> +
> +class PerfParser(LineParser):
> + """Parser for linux perf callgraph output.
> +
> + It expects output generated with
> +
> + perf record -g
> + perf script
> + """
> +
> + def __init__(self, infile, symbol):
> + LineParser.__init__(self, infile)
> + self.symbol = symbol
> +
> + def readline(self):
> + # Override LineParser.readline to ignore comment lines
> + while True:
> + LineParser.readline(self)
> + if self.eof() or not self.lookahead().startswith('#'):
> + break
> +
> + def parse(self):
> + # read lookahead
> + self.readline()
> +
> + while not self.eof():
> + self.parse_event()
> +
> + asm = lookupAsm(mapFile, self.symbol)
> +
> + addresses = samples.keys()
> + addresses.sort()
> + total_samples = 0
> +
> + sys.stdout.write('%s:\n' % self.symbol)
> + for address, instr in asm:
> + try:
> + sample = samples.pop(address)
> + except KeyError:
> + sys.stdout.write(6*' ')
> + else:
> + sys.stdout.write('%6u' % (sample))
> + total_samples += sample
> + sys.stdout.write('%6u: %s\n' % (address, instr))
> + print 'total:', total_samples
> + assert len(samples) == 0
> +
> + sys.exit(0)
> +
> + def parse_event(self):
> + if self.eof():
> + return
> +
> + line = self.consume()
> + assert line
> +
> + callchain = self.parse_callchain()
> + if not callchain:
> + return
> +
> + def parse_callchain(self):
> + callchain = []
> + while self.lookahead():
> + function = self.parse_call(len(callchain) == 0)
> + if function is None:
> + break
> + callchain.append(function)
> + if self.lookahead() == '':
> + self.consume()
> + return callchain
> +
> + call_re = re.compile(r'^\s+(?P<address>[0-9a-fA-F]+)\s+(?P<symbol>.*)\s+\((?P<module>[^)]*)\)$')
> +
> + def parse_call(self, first):
> + line = self.consume()
> + mo = self.call_re.match(line)
> + assert mo
> + if not mo:
> + return None
> +
> + if not first:
> + return None
> +
> + function_name = mo.group('symbol')
> + if not function_name:
> + function_name = mo.group('address')
> +
> + module = mo.group('module')
> +
> + function_id = function_name + ':' + module
> +
> + address = mo.group('address')
> + address = int(address, 16)
> +
> + if function_name != self.symbol:
> + return None
> +
> + start_address = lookupMap(module, function_name)
> + address -= start_address
> +
> + #print function_name, module, address
> +
> + samples[address] = samples.get(address, 0) + 1
> +
> + return True
> +
> +
> +def main():
> + """Main program."""
> +
> + optparser = optparse.OptionParser(
> + usage="\n\t%prog [options] symbol_name")
> + (options, args) = optparser.parse_args(sys.argv[1:])
> + if len(args) != 1:
> + optparser.error('wrong number of arguments')
> +
> + symbol = args[0]
> +
> + p = subprocess.Popen(['perf', 'script'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
> + parser = PerfParser(p.stdout, symbol)
> + parser.parse()
> +
> +
> +if __name__ == '__main__':
> + main()
> +
> +
> +# vim: set sw=4 et:
> diff --git a/configure.ac b/configure.ac
> index fb1f324..ba92258 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -1601,13 +1601,6 @@ if test "x$enable_gallium_llvm" = xyes; then
> LLVM_COMPONENTS="${LLVM_COMPONENTS} mcjit"
> fi
>
> - if test "x$enable_debug" = xyes; then
> - # Debug builds require OProfileJIT if LLVM was built with support for it
> - if $LLVM_CONFIG --components | grep -q '\<oprofilejit\>'; then
> - LLVM_COMPONENTS="${LLVM_COMPONENTS} oprofilejit"
> - fi
> - fi
> -
> if test "x$enable_opencl" = xyes; then
> LLVM_COMPONENTS="${LLVM_COMPONENTS} ipo linker instrumentation"
> # LLVM 3.3 >= 177971 requires IRReader
> diff --git a/docs/llvmpipe.html b/docs/llvmpipe.html
> index be03083..80f8a01 100644
> --- a/docs/llvmpipe.html
> +++ b/docs/llvmpipe.html
> @@ -130,38 +130,38 @@ need to ask, don't even try it.
>
> <h1>Profiling</h1>
>
> -To profile llvmpipe you should pass the options
> -
> +<p>
> +To profile llvmpipe you should build as
> +</p>
> <pre>
> scons build=profile <same-as-before>
> </pre>
>
> +<p>
> This will ensure that frame pointers are used both in C and JIT functions, and
> that no tail call optimizations are done by gcc.
> +</p>
>
> -To better profile JIT code you'll need to build LLVM with oprofile integration.
> -
> -<pre>
> - ./configure \
> - --prefix=$install_dir \
> - --enable-optimized \
> - --disable-profiling \
> - --enable-targets=host-only \
> - --with-oprofile
> -
> - make -C "$build_dir"
> - make -C "$build_dir" install
> -
> - find "$install_dir/lib" -iname '*.a' -print0 | xargs -0 strip --strip-debug
> -</pre>
> +<h2>Linux perf integration</h2>
>
> -The you should define
> +<p>
> +On Linux, it is possible to have symbol resolution of JIT code with <a href="http://perf.wiki.kernel.org/">Linux perf</a>:
> +</p>
>
> <pre>
> - export LLVM=/path/to/llvm-2.6-profile
> + perf record -g /my/application
> + perf report
> </pre>
>
> -and rebuild.
> +<p>
> +When run inside Linux perf, llvmpipe will create a /tmp/perf-XXXXX.map file with
> +symbol address table. It also dumps assembly code to /tmp/perf-XXXXX.map.asm,
> +which can be used by the bin/perf-annotate-jit script to produce disassembly of
> +the generated code annotated with the samples.
> +</p>
> +
> +<p>You can obtain a call graph via
> +<a href="http://code.google.com/p/jrfonseca/wiki/Gprof2Dot#linux_perf">Gprof2Dot</a>.</p>
>
>
> <h1>Unit testing</h1>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
> index af50fcc..ac8e10b 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.cpp
> @@ -31,6 +31,7 @@
> #include <llvm/Target/TargetMachine.h>
> #include <llvm/Target/TargetInstrInfo.h>
> #include <llvm/Support/raw_ostream.h>
> +#include <llvm/Support/Format.h>
> #include <llvm/Support/MemoryObject.h>
>
> #if HAVE_LLVM >= 0x0300
> @@ -60,6 +61,11 @@
>
> #include "lp_bld_debug.h"
>
> +#ifdef __linux__
> +#include <sys/stat.h>
> +#include <fcntl.h>
> +#endif
> +
>
>
> /**
> @@ -174,8 +180,8 @@ public:
> * - http://blog.llvm.org/2010/01/x86-disassembler.html
> * - http://blog.llvm.org/2010/04/intro-to-llvm-mc-project.html
> */
> -extern "C" void
> -lp_disassemble(const void* func)
> +static size_t
> +disassemble(const void* func, llvm::raw_ostream & Out)
> {
> #if HAVE_LLVM >= 0x0207
> using namespace llvm;
> @@ -209,8 +215,8 @@ lp_disassemble(const void* func)
> #endif
>
> if (!AsmInfo) {
> - debug_printf("error: no assembly info for target %s\n", Triple.c_str());
> - return;
> + Out << "error: no assembly info for target " << Triple << "\n";
> + return 0;
> }
>
> #if HAVE_LLVM >= 0x0300
> @@ -220,12 +226,10 @@ lp_disassemble(const void* func)
> OwningPtr<const MCDisassembler> DisAsm(T->createMCDisassembler());
> #endif
> if (!DisAsm) {
> - debug_printf("error: no disassembler for target %s\n", Triple.c_str());
> - return;
> + Out << "error: no disassembler for target " << Triple << "\n";
> + return 0;
> }
>
> - raw_debug_ostream Out;
> -
> #if HAVE_LLVM >= 0x0300
> unsigned int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
> #else
> @@ -235,14 +239,14 @@ lp_disassemble(const void* func)
> #if HAVE_LLVM >= 0x0301
> OwningPtr<const MCRegisterInfo> MRI(T->createMCRegInfo(Triple));
> if (!MRI) {
> - debug_printf("error: no register info for target %s\n", Triple.c_str());
> - return;
> + Out << "error: no register info for target " << Triple.c_str() << "\n";
> + return 0;
> }
>
> OwningPtr<const MCInstrInfo> MII(T->createMCInstrInfo());
> if (!MII) {
> - debug_printf("error: no instruction info for target %s\n", Triple.c_str());
> - return;
> + Out << "error: no instruction info for target " << Triple.c_str() << "\n";
> + return 0;
> }
> #endif
>
> @@ -260,8 +264,8 @@ lp_disassemble(const void* func)
> T->createMCInstPrinter(AsmPrinterVariant, *AsmInfo, Out));
> #endif
> if (!Printer) {
> - debug_printf("error: no instruction printer for target %s\n", Triple.c_str());
> - return;
> + Out << "error: no instruction printer for target " << Triple.c_str() << "\n";
> + return 0;
> }
>
> #if HAVE_LLVM >= 0x0301
> @@ -300,7 +304,7 @@ lp_disassemble(const void* func)
> * so that between runs.
> */
>
> - debug_printf("%6lu:\t", (unsigned long)pc);
> + Out << llvm::format("%6lu:\t", (unsigned long)pc);
>
> if (!DisAsm->getInstruction(Inst, Size, memoryObject,
> pc,
> @@ -309,7 +313,7 @@ lp_disassemble(const void* func)
> #else
> nulls())) {
> #endif
> - debug_printf("invalid\n");
> + Out << "invalid";
> pc += 1;
> }
>
> @@ -320,25 +324,23 @@ lp_disassemble(const void* func)
> if (0) {
> unsigned i;
> for (i = 0; i < Size; ++i) {
> - debug_printf("%02x ", ((const uint8_t*)bytes)[pc + i]);
> + Out << llvm::format("%02x ", ((const uint8_t*)bytes)[pc + i]);
> }
> for (; i < 16; ++i) {
> - debug_printf(" ");
> + Out << " ";
> }
> }
>
> /*
> * Print the instruction.
> */
> -
> #if HAVE_LLVM >= 0x0300
> - Printer->printInst(&Inst, Out, "");
> + Printer->printInst(&Inst, Out, "");
> #elif HAVE_LLVM >= 0x208
> - Printer->printInst(&Inst, Out);
> + Printer->printInst(&Inst, Out);
> #else
> - Printer->printInst(&Inst);
> + Printer->printInst(&Inst);
> #endif
> - Out.flush();
>
> /*
> * Advance.
> @@ -386,7 +388,7 @@ lp_disassemble(const void* func)
> * Output the address relative to the function start, given
> * that MC will print the addresses relative the current pc.
> */
> - debug_printf("\t\t; %lu", (unsigned long)jump);
> + Out << "\t\t; " << jump;
>
> /*
> * Ignore far jumps given it could be actually a tail return to
> @@ -401,7 +403,7 @@ lp_disassemble(const void* func)
> }
> }
>
> - debug_printf("\n");
> + Out << "\n";
>
> /*
> * Stop disassembling on return statements, if there is no record of a
> @@ -420,12 +422,73 @@ lp_disassemble(const void* func)
> */
>
> if (0) {
> - debug_printf("disassemble %p %p\n", bytes, bytes + pc);
> + _debug_printf("disassemble %p %p\n", bytes, bytes + pc);
> }
>
> - debug_printf("\n");
> + Out << "\n";
> + Out.flush();
> +
> + return pc;
> #else /* HAVE_LLVM < 0x0207 */
> (void)func;
> + return 0;
> #endif /* HAVE_LLVM < 0x0207 */
> }
>
> +
> +extern "C" void
> +lp_disassemble(LLVMValueRef func, const void *code) {
> + raw_debug_ostream Out;
> + disassemble(code, Out);
> +}
> +
> +
> +/*
> + * Linux perf profiler integration.
> + *
> + * See also:
> + * - http://penberg.blogspot.co.uk/2009/06/jato-has-profiler.html
> + * - https://github.com/penberg/jato/commit/73ad86847329d99d51b386f5aba692580d1f8fdc
> + * - http://git.kernel.org/?p=linux/kernel/git/torvalds/linux.git;a=commitdiff;h=80d496be89ed7dede5abee5c057634e80a31c82d
> + */
> +extern "C" void
> +lp_profile(LLVMValueRef func, const void *code)
> +{
> +#if defined(__linux__) && (defined(DEBUG) || defined(PROFILE))
> + static boolean first_time = TRUE;
> + static FILE *perf_map_file = NULL;
> + static int perf_asm_fd = -1;
> + if (first_time) {
> + /*
> + * We rely on the disassembler for determining a function's size, but
> + * the disassembly is a leaky and slow operation, so avoid running
> + * this except when running inside linux perf, which can be inferred
> + * by the PERF_BUILDID_DIR environment variable.
> + */
> + if (getenv("PERF_BUILDID_DIR")) {
> + pid_t pid = getpid();
> + char filename[256];
> + util_snprintf(filename, sizeof filename, "/tmp/perf-%llu.map", (unsigned long long)pid);
> + perf_map_file = fopen(filename, "wt");
> + util_snprintf(filename, sizeof filename, "/tmp/perf-%llu.map.asm", (unsigned long long)pid);
> + mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH;
> + perf_asm_fd = open(filename, O_WRONLY | O_CREAT, mode);
> + }
> + first_time = FALSE;
> + }
> + if (perf_map_file) {
> + const char *symbol = LLVMGetValueName(func);
> + unsigned long addr = (uintptr_t)code;
> + llvm::raw_fd_ostream Out(perf_asm_fd, false);
> + Out << symbol << ":\n";
> + unsigned long size = disassemble(code, Out);
> + fprintf(perf_map_file, "%lx %lx %s\n", addr, size, symbol);
> + fflush(perf_map_file);
> + }
> +#else
> + (void)func;
> + (void)code;
> +#endif
> +}
> +
> +
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_debug.h b/src/gallium/auxiliary/gallivm/lp_bld_debug.h
> index da873f3..ab83d98 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_debug.h
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_debug.h
> @@ -83,7 +83,11 @@ lp_check_alignment(const void *ptr, unsigned alignment);
>
>
> void
> -lp_disassemble(const void* func);
> +lp_disassemble(LLVMValueRef func, const void *code);
> +
> +
> +void
> +lp_profile(LLVMValueRef func, const void *code);
>
>
> #ifdef __cplusplus
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_init.c b/src/gallium/auxiliary/gallivm/lp_bld_init.c
> index 4fa5887..1153411 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_init.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_init.c
> @@ -273,10 +273,6 @@ init_gallivm_engine(struct gallivm_state *gallivm)
> LLVMDisposeMessage(error);
> goto fail;
> }
> -
> -#if defined(DEBUG) || defined(PROFILE)
> - lp_register_oprofile_jit_event_listener(gallivm->engine);
> -#endif
> }
>
> LLVMAddModuleProvider(gallivm->engine, gallivm->provider);//new
> @@ -635,6 +631,7 @@ gallivm_compile_module(struct gallivm_state *gallivm)
> }
>
>
> +
> func_pointer
> gallivm_jit_function(struct gallivm_state *gallivm,
> LLVMValueRef func)
> @@ -650,9 +647,13 @@ gallivm_jit_function(struct gallivm_state *gallivm,
> jit_func = pointer_to_func(code);
>
> if (gallivm_debug & GALLIVM_DEBUG_ASM) {
> - lp_disassemble(code);
> + lp_disassemble(func, code);
> }
>
> +#if defined(PROFILE)
> + lp_profile(func, code);
> +#endif
> +
> /* Free the function body to save memory */
> lp_func_delete_body(func);
>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
> index 46cdbad..c512795 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.cpp
> @@ -54,7 +54,6 @@
> #include <llvm-c/ExecutionEngine.h>
> #include <llvm/Target/TargetOptions.h>
> #include <llvm/ExecutionEngine/ExecutionEngine.h>
> -#include <llvm/ExecutionEngine/JITEventListener.h>
> #if HAVE_LLVM >= 0x0301
> #include <llvm/ADT/Triple.h>
> #include <llvm/ExecutionEngine/JITMemoryManager.h>
> @@ -75,28 +74,6 @@
> #include "lp_bld_misc.h"
>
>
> -/**
> - * Register the engine with oprofile.
> - *
> - * This allows to see the LLVM IR function names in oprofile output.
> - *
> - * To actually work LLVM needs to be built with the --with-oprofile configure
> - * option.
> - *
> - * Also a oprofile:oprofile user:group is necessary. Which is not created by
> - * default on some distributions.
> - */
> -extern "C" void
> -lp_register_oprofile_jit_event_listener(LLVMExecutionEngineRef EE)
> -{
> -#if HAVE_LLVM >= 0x0301
> - llvm::unwrap(EE)->RegisterJITEventListener(llvm::JITEventListener::createOProfileJITEventListener());
> -#else
> - llvm::unwrap(EE)->RegisterJITEventListener(llvm::createOProfileJITEventListener());
> -#endif
> -}
> -
> -
> extern "C" void
> lp_set_target_options(void)
> {
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_misc.h b/src/gallium/auxiliary/gallivm/lp_bld_misc.h
> index 9ed7c34..1f735fb 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_misc.h
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_misc.h
> @@ -41,9 +41,6 @@ extern "C" {
>
>
> extern void
> -lp_register_oprofile_jit_event_listener(LLVMExecutionEngineRef EE);
> -
> -extern void
> lp_set_target_options(void);
>
Nice to see that going in, as it is really helpful for profiling.
Reviewed-by: Roland Scheidegger <sroland at vmware.com>
More information about the mesa-dev
mailing list