[Mesa-dev] [PATCH 10/11] clover: Add function for building a clover::module for non-TGSI targets
Francisco Jerez
currojerez at riseup.net
Sat May 12 15:40:43 PDT 2012
Tom Stellard <tstellar at gmail.com> writes:
> ---
> .../state_trackers/clover/llvm/invocation.cpp | 174 +++++++++++++++++++-
> 1 files changed, 165 insertions(+), 9 deletions(-)
>
> diff --git a/src/gallium/state_trackers/clover/llvm/invocation.cpp b/src/gallium/state_trackers/clover/llvm/invocation.cpp
> index 89e21bf..b31fddc 100644
> --- a/src/gallium/state_trackers/clover/llvm/invocation.cpp
> +++ b/src/gallium/state_trackers/clover/llvm/invocation.cpp
> @@ -22,24 +22,33 @@
>
> #include "core/compiler.hpp"
>
> -#if 0
> #include <clang/Frontend/CompilerInstance.h>
> #include <clang/Frontend/TextDiagnosticPrinter.h>
> #include <clang/CodeGen/CodeGenAction.h>
> +#include <llvm/Bitcode/BitstreamWriter.h>
> +#include <llvm/Bitcode/ReaderWriter.h>
> +#include <llvm/DerivedTypes.h>
> +#include <llvm/Linker.h>
> #include <llvm/LLVMContext.h>
> +#include <llvm/Module.h>
> +#include <llvm/PassManager.h>
> #include <llvm/Support/TargetSelect.h>
> #include <llvm/Support/MemoryBuffer.h>
> +#include <llvm/Support/PathV1.h>
> +#include <llvm/Target/TargetData.h>
> +#include <llvm/Transforms/IPO/PassManagerBuilder.h>
> +
> +#include "util/u_memory.h"
>
> #include <iostream>
> #include <iomanip>
> #include <fstream>
> #include <cstdio>
> -#endif
>
> using namespace clover;
>
> -#if 0
> namespace {
> +#if 0
> void
> build_binary(const std::string &source, const std::string &target,
> const std::string &name) {
> @@ -78,17 +87,164 @@ namespace {
> compat::istream cs(str);
> return module::deserialize(cs);
> }
> -}
> #endif
> + module
> + build_module_llvm(const std::string &source, const std::string &target,
> + const std::string &name) {
> +
> + /* Compile the kernel */
> + clang::CompilerInstance c;
> + module m;
> + clang::EmitLLVMOnlyAction act(&llvm::getGlobalContext());
> + std::string log;
> + std::string target_triple = target + "--";
> + llvm::raw_string_ostream s_log(log);
> +
> +#if HAVE_LLVM <= 0x0300
> + c.getFrontendOpts().Inputs.push_back(
> + std::make_pair(clang::IK_OpenCL, "cl_input"));
> +#else
> + c.getFrontendOpts().Inputs.push_back(
> + clang::FrontendInputFile("cl_input", clang::IK_OpenCL));
> +#endif
> + c.getFrontendOpts().ProgramAction = clang::frontend::EmitLLVMOnly;
> + c.getHeaderSearchOpts().UseBuiltinIncludes = true;
> +#if HAVE_LLVM < 0x0300
> + c.getHeaderSearchOpts().UseStandardIncludes = true;
> +#else
> + c.getHeaderSearchOpts().UseStandardSystemIncludes = true;
> +#endif
> + c.getHeaderSearchOpts().ResourceDir = CLANG_RESOURCE_DIR;
> +
> + /* Add libclc generic search path */
> + c.getHeaderSearchOpts().AddPath(LIBCLC_PATH "/generic/include/",
> + clang::frontend::Angled,
> + false, false, false);
> +
> + /* Add libclc target specific search path */
> + c.getHeaderSearchOpts().AddPath(LIBCLC_PATH + target + "/include/",
> + clang::frontend::Angled,
> + false, false, false);
> +
> + /* Add libclc include */
> + c.getPreprocessorOpts().Includes.push_back("clc/clc.h");
> + /* clc.h requires that this macro be defined: */
> + c.getPreprocessorOpts().addMacroDef("cl_clang_storage_class_specifiers");
> +
> + c.getLangOpts().NoBuiltin = true;
> + c.getTargetOpts().Triple = target_triple;
> + c.getInvocation().setLangDefaults(clang::IK_OpenCL);
> + c.createDiagnostics(0, NULL, new clang::TextDiagnosticPrinter(
> + s_log, c.getDiagnosticOpts()));
> +
> + c.getPreprocessorOpts().addRemappedFile(
> + "cl_input", llvm::MemoryBuffer::getMemBuffer(source));
> +
> + /* Compile the code */
> + if (!c.ExecuteAction(act))
> + throw build_error(log);
> +
> + /* Link the kernel with libclc */
> + llvm::PassManager PM;
> + llvm::PassManagerBuilder Builder;
> + bool isNative;
> + llvm::Module * mod = act.takeModule();
> + llvm::Linker linker("clover", mod);
> +
> + linker.LinkInFile(llvm::sys::Path(LIBCLC_PATH + target_triple + "/lib/builtins.bc"), isNative);
> + mod = linker.releaseModule();
> +
> + /* Run link time optimizations */
> + Builder.populateLTOPassManager(PM, false, true);
> + Builder.OptLevel = 2;
> + PM.run(*mod);
> +
> + /* Build the clover::module */
> + unsigned char * prog;
> + uint32_t prog_sz;
> +
> +#if HAVE_LLVM > 0x0300
> + llvm::SmallVector<char, 1024> llvm_bitcode;
> + llvm::raw_svector_ostream bitcode_ostream(llvm_bitcode);
> +#else
> + std::vector<unsigned char> llvm_bitcode;
> +#endif
> + llvm::BitstreamWriter writer(llvm_bitcode);
> +
> +#if HAVE_LLVM <= 0x0300
> + llvm::WriteBitcodeToStream(mod, writer);
> +#else
> + llvm::WriteBitcodeToFile(mod, bitcode_ostream);
> + bitcode_ostream.flush();
> +#endif
> +
> + prog_sz = llvm_bitcode.size() * sizeof(unsigned char);
> +
> + /* We need to add 4 to the program size, because we will
> + * be preprending the length of the program to the bitcode string. */
> + prog = (unsigned char *)MALLOC(prog_sz + 4);
> + ((uint32_t *)prog)[0] = prog_sz;
> + memcpy(prog + 4, &llvm_bitcode[0], prog_sz);
> +
> + std::string kernel_name;
> + compat::vector<module::argument> args;
> + const llvm::NamedMDNode * kernel_node =
> + mod->getNamedMetadata("opencl.kernels");
> + /* XXX: Support more than one kernel */
> + /* XXX: Error if there are no kernels */
> + assert(kernel_node->getNumOperands() == 1);
> +
> + llvm::Function * kernel_func = llvm::dyn_cast<llvm::Function>(
> + kernel_node->getOperand(0)->getOperand(0));
> + kernel_name = kernel_func->getName();
> +
> + for (llvm::Function::arg_iterator I = kernel_func->arg_begin(),
> + E = kernel_func->arg_end(); I != E; ++I) {
> + llvm::Argument & arg = *I;
> + llvm::Type * arg_type = arg.getType();
> + llvm::TargetData TD(kernel_func->getParent());
> + unsigned arg_size = TD.getTypeStoreSize(arg_type);
> +
> + if (llvm::isa<llvm::PointerType>(arg_type) and arg.hasByValAttr()) {
> + arg_type =
> + llvm::dyn_cast<llvm::PointerType>(arg_type)->getElementType();
> + }
> +
> + if (arg_type->isPointerTy()) {
> + /* XXX: Figure out LLVM->OpenCL address space mappings for each
> + * target. I think we need to ask clang what these are. For now,
> + * pretend everything is in the global address space. */
> + unsigned address_space = llvm::cast<llvm::PointerType>(arg_type)->getAddressSpace();
> + switch (address_space) {
> + default:
> + args.push_back(module::argument(module::argument::global, arg_size));
> + break;
> + }
> + } else {
> + args.push_back(module::argument(module::argument::scalar, arg_size));
> + }
> + }
> + m.syms.push_back(module::symbol(kernel_name, 0, 0, args ));
> + m.secs.push_back(module::section(0, module::section::text, prog_sz + 4,
> + compat::vector<char>((char *)prog, prog_sz + 4)));
> + return m;
> + }
> +}
>
> module
> clover::compile_program_llvm(const compat::string &source,
> const compat::string &target) {
> +
> + if (target == compat::string("TGSI")) {
> #if 0
> - build_binary(source, target, "cl_input");
> - module m = load_binary("cl_input.o");
> - std::remove("cl_input.o");
> - return m;
> + build_binary(source, target, "cl_input");
> + module m = load_binary("cl_input.o");
> + std::remove("cl_input.o");
> + return m;
> +#else
> + return module();
> + } else {
> + return build_module_llvm(source, target, "cl_input");
> + }
> #endif
> - return module();
> }
Hi Tom,
I'd really like to see this done in a different way. IIUC, with this,
LLVM is invoked twice, the first time here to generate some
R600-specific LLVM bytecode, and then again in the pipe driver to run it
through the R600 back-end and generate the actual machine code. In
particular this means that:
- The state tracker needs a special case for radeon in order to know
that the compilation process has to be terminated prematurely before
it gets to generate the machine code.
- I'm not sure you can take advantage of clCreateProgramWithBinary()
because the state tracker doesn't have access to the final machine
code (this is probably going to be a minor problem in practice
though).
I can think of two different ways this could work (your solution would
be somewhere in between):
- The r600g LLVM back-end could support some well-defined output object
format (e.g. the one implemented by the clover::module classes), like
most of the other LLVM back-ends. You probably want to do this
anyway if you want to be able to compile CL programs off-line. If
you do it this way, the state tracker will just call clang to
completion using the appropriate target and pass the generated
machine code to the pipe driver.
If you think supporting different hardware versions with different
ISAs would be a problem under this scheme, we could have another
compute cap that would determine a specific variant of the ISA.
- Another option would be to forget about driver-specific IRs in the
compute API. The pipe driver would have the choice between TGSI and
LLVM, if it wants LLVM, the state tracker would do roughly what
you're doing here using some sort of "identity" LLVM target that
would do nothing but describing the peculiarities of the hardware
(e.g. endianness, widths of the supported primitive types), which in
turn would be queried from the pipe driver using compute caps.
Personally I think the latter would be closer to ideal, but I guess it
would also involve more work...
-------------- next part --------------
A non-text attachment was scrubbed...
Name: not available
Type: application/pgp-signature
Size: 229 bytes
Desc: not available
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20120513/383b9742/attachment.pgp>
More information about the mesa-dev
mailing list