[Beignet] [PATCH 6/9 newRT] Add GenProgram::toBinaryFormat to generate ELF format binary.

junyan.he at inbox.com junyan.he at inbox.com
Sat Apr 1 09:43:31 UTC 2017


From: Junyan He <junyan.he at intel.com>

We add this function to generate a standard ELF format binary.
All the verbose information we need in runtime will be stored
in .note.gpu_info section. Then, we can separate the runtime
and compiler clearly.

V2:
Add OpenCL info such as Argument nane, workgroup size, etc.
Add GPU version and OpenCL version info.
Use struct and template to clear up the code.

V3:
Fix some bugs.

V4:
Fix a compiler error

Signed-off-by: Junyan He <junyan.he at intel.com>
---
 backend/src/CMakeLists.txt              |   1 +
 backend/src/backend/context.cpp         |  25 +-
 backend/src/backend/gen_program.hpp     |  10 +-
 backend/src/backend/gen_program_elf.cpp | 628 ++++++++++++++++++++++++++++++++
 backend/src/backend/program.cpp         |   4 +-
 backend/src/backend/program.h           |  23 +-
 backend/src/backend/program.hpp         |   8 +-
 backend/src/gbe_bin_interpreter.cpp     |   1 +
 8 files changed, 680 insertions(+), 20 deletions(-)

diff --git a/backend/src/CMakeLists.txt b/backend/src/CMakeLists.txt
index ccfe671..3e7c20f 100644
--- a/backend/src/CMakeLists.txt
+++ b/backend/src/CMakeLists.txt
@@ -146,6 +146,7 @@ set (GBE_SRC
     backend/gen8_encoder.cpp
     backend/gen9_encoder.hpp
     backend/gen9_encoder.cpp
+    backend/compiler_api.cpp
     )
 
 set (GBE_LINK_LIBRARIES
diff --git a/backend/src/backend/context.cpp b/backend/src/backend/context.cpp
index e9ddd17..51ef3a7 100644
--- a/backend/src/backend/context.cpp
+++ b/backend/src/backend/context.cpp
@@ -473,35 +473,44 @@ namespace gbe
       kernel->args[argID].info.typeQual = arg.info.typeQual;
       kernel->args[argID].info.argName = arg.info.argName;
       kernel->args[argID].info.typeSize = arg.info.typeSize;
+      kernel->args[argID].arg_space_type = GBE_ADDRESS_SPACE_PRIVATE;
       switch (arg.type) {
         case ir::FunctionArgument::VALUE:
+          kernel->args[argID].type = GBE_ARG_TYPE_VALUE;
+          kernel->args[argID].size = arg.size;
+          break;
         case ir::FunctionArgument::STRUCTURE:
-          kernel->args[argID].type = GBE_ARG_VALUE;
+          kernel->args[argID].type = GBE_ARG_TYPE_STRUCT;
           kernel->args[argID].size = arg.size;
           break;
         case ir::FunctionArgument::GLOBAL_POINTER:
-          kernel->args[argID].type = GBE_ARG_GLOBAL_PTR;
+          kernel->args[argID].type = GBE_ARG_TYPE_POINTER;
+          kernel->args[argID].arg_space_type = GBE_ADDRESS_SPACE_GLOBAL;
           kernel->args[argID].size = sizeof(void*);
           kernel->args[argID].bti = arg.bti;
           break;
         case ir::FunctionArgument::CONSTANT_POINTER:
-          kernel->args[argID].type = GBE_ARG_CONSTANT_PTR;
+          kernel->args[argID].type = GBE_ARG_TYPE_POINTER;
+          kernel->args[argID].arg_space_type = GBE_ADDRESS_SPACE_CONSTANT;
           kernel->args[argID].size = sizeof(void*);
           break;
         case ir::FunctionArgument::LOCAL_POINTER:
-          kernel->args[argID].type = GBE_ARG_LOCAL_PTR;
-          kernel->args[argID].size = 0;
+          kernel->args[argID].type = GBE_ARG_TYPE_POINTER;
+          kernel->args[argID].arg_space_type = GBE_ADDRESS_SPACE_LOCAL;
+          kernel->args[argID].size = arg.size;
           break;
         case ir::FunctionArgument::IMAGE:
-          kernel->args[argID].type = GBE_ARG_IMAGE;
+          kernel->args[argID].type = GBE_ARG_TYPE_IMAGE;
+          /* image objects are always allocated from the global address space */
+          kernel->args[argID].arg_space_type = GBE_ADDRESS_SPACE_GLOBAL;
           kernel->args[argID].size = sizeof(void*);
           break;
         case ir::FunctionArgument::SAMPLER:
-          kernel->args[argID].type = GBE_ARG_SAMPLER;
+          kernel->args[argID].type = GBE_ARG_TYPE_SAMPLER;
           kernel->args[argID].size = sizeof(void*);
           break;
         case ir::FunctionArgument::PIPE:
-          kernel->args[argID].type = GBE_ARG_PIPE;
+          kernel->args[argID].type = GBE_ARG_TYPE_SAMPLER;
           kernel->args[argID].size = sizeof(void*);
           kernel->args[argID].bti = arg.bti;
           break;
diff --git a/backend/src/backend/gen_program.hpp b/backend/src/backend/gen_program.hpp
index ff756e0..426a0da 100644
--- a/backend/src/backend/gen_program.hpp
+++ b/backend/src/backend/gen_program.hpp
@@ -33,6 +33,8 @@
 struct GenInstruction;
 namespace gbe
 {
+  class GenProgramElfContext;
+
   /*! Describe a compiled kernel */
   class GenKernel : public Kernel
   {
@@ -58,10 +60,14 @@ namespace gbe
   /*! Describe a compiled program */
   class GenProgram : public Program
   {
+  protected:
+    GenProgramElfContext* elf_ctx;
+
   public:
     /*! Create an empty program */
     GenProgram(uint32_t deviceID, const void* mod = NULL, const void* ctx = NULL, const char* asm_fname = NULL, uint32_t fast_relaxed_math = 0) :
-      Program(fast_relaxed_math), deviceID(deviceID),module((void*)mod), llvm_ctx((void*)ctx), asm_file_name(asm_fname) {}
+      Program(fast_relaxed_math), elf_ctx(NULL), deviceID(deviceID), module((void*)mod), llvm_ctx((void*)ctx),
+      asm_file_name(asm_fname) {}
     /*! Current device ID*/
     uint32_t deviceID;
     /*! Destroy the program */
@@ -70,6 +76,8 @@ namespace gbe
     virtual void CleanLlvmResource(void);
     /*! Implements base class */
     virtual Kernel *compileKernel(const ir::Unit &unit, const std::string &name, bool relaxMath, int profiling);
+    /*! Generate binary format */
+    virtual void *toBinaryFormat(size_t &ret_size);
     /*! Allocate an empty kernel. */
     virtual Kernel *allocateKernel(const std::string &name) {
       return GBE_NEW(GenKernel, name, deviceID);
diff --git a/backend/src/backend/gen_program_elf.cpp b/backend/src/backend/gen_program_elf.cpp
index efd45fe..feea392 100644
--- a/backend/src/backend/gen_program_elf.cpp
+++ b/backend/src/backend/gen_program_elf.cpp
@@ -15,16 +15,22 @@
  * License along with this library. If not, see <http://www.gnu.org/licenses/>.
  *
  */
+#include "src/gen/gen_device_pci_id.h"
 #include "ocl_common_defines.h"
 #include "elfio/elfio.hpp"
 #include "backend/gen_program.hpp"
 #include "backend/gen_program.hpp"
+#include "sys/cvar.hpp"
 #include <algorithm>
 #include <sstream>
 #include <streambuf>
+using namespace std;
 
 namespace gbe
 {
+
+BVAR(OCL_DUMP_ELF_FILE, false);
+
 /* The elf writer need to make sure seekp function work, so sstream
    can not work, and we do not want the fostream to generate the real
    file. We just want to keep the elf image in the memory. Implement
@@ -121,4 +127,626 @@ protected:
     return count;
   }
 };
+
+using namespace ELFIO;
+
+/* The format for one Gen Kernel function is following note section format
+ --------------------------
+ | GEN_NOTE_TYPE_GPU_INFO |
+ --------------------------
+ | Function Name size:4 |
+ ------------------------
+ | Desc size:4  |
+ ---------------------------
+ | The kernel name(strlen) |
+ -----------------------------------------------------------------------------------------------
+ | SIMD:4 | Local Mem Size:4 | Scratch Size:4 | Stack Size :4 | Barrier/SLM Used:4 | Arg Num:4 |
+ -----------------------------------------------------------------------------------------------
+   Then the format for each argument is
+ --------------------------------------------------------------------------------------------------------------------------
+ | Index:4 | Size:4 | Type:4 | Offset:4 | Addr Space:4 | Align(if is ptr) | BTI(if buffer):4 / Index(sampler and image):4 |
+ --------------------------------------------------------------------------------------------------------------------------
+   Then all sampler info
+ -----------------------------------
+ | Number:4 | SamperInfo:4 | ......|
+ -----------------------------------
+   Then all image info
+ --------------------------------------------------------------------------------------------
+ | Number:4 | BTI:4 | Width:4 | Height:4 | Depth:4 | Data Type:4 | Channel Order:4 | .......|
+ --------------------------------------------------------------------------------------------
+   Last is the map table of special virtual register and phy register
+ --------------------------------------------------------
+ | Number:4 | Virt Reg:4 | Phy Reg:4 | Size:4 |.........|
+ --------------------------------------------------------  */
+
+/* The format for one Gen Kernel function's OpenCL info is following note section format
+ --------------------------
+ | GEN_NOTE_TYPE_CL_INFO  |
+ ----------------------------------------
+ | The kernel function's name: (strlen) |
+ ----------------------------------------
+ | Function's attribute string: (strlen)|
+ ----------------------------------------
+ | Work Group size: sizeof(size_t) * 3  |
+ ----------------------------------------
+ | Argument TypeName: (strlen) |
+ ---------------------------------
+ | Argument AccessQual: (strlen) |
+ ---------------------------------
+ | Argument Name: (strlen) |
+ ---------------------------  */
+
+/* The format for GPU version is:
+ ----------------------------
+ | GEN_NOTE_TYPE_GPU_VERSION |
+ -----------------------------
+ | GEN string (HasWell e.g.) |
+ -----------------------------
+ | GEN pci id |
+ --------------
+ | GEN version major:4 |
+ -----------------------
+ | GEN version minor:4 |
+ -----------------------  */
+
+/* The format for CL version is:
+ ----------------------------
+ | GEN_NOTE_TYPE_CL_VERSION |
+ ----------------------------------------
+ | CL version string (OpenCL 2.0  e.g.) |
+ ----------------------------------------
+ | CL version major:4 |
+ ----------------------
+ | CL version minor:4 |
+ ----------------------  */
+
+/* The format for Compiler info is:
+ -------------------------------
+ | GEN_NOTE_TYPE_COMPILER_INFO |
+ ----------------------------------------
+ | Compiler name (GBE_Compiler  e.g.) |
+ ----------------------------------------
+ | LLVM version major:4 |
+ ------------------------
+ | LLVM version minor:4 |
+ ------------------------ */
+
+class GenProgramElfContext
+{
+public:
+  enum {
+    GEN_NOTE_TYPE_CL_VERSION = 1,
+    GEN_NOTE_TYPE_GPU_VERSION = 2,
+    GEN_NOTE_TYPE_GPU_INFO = 3,
+    GEN_NOTE_TYPE_CL_INFO = 4,
+    GEN_NOTE_TYPE_COMPILER_INFO = 5,
+  };
+
+  struct KernelInfoHelper {
+    Elf32_Word simd;
+    Elf32_Word local_mem_size;
+    Elf32_Word scratch_size;
+    Elf32_Word stack_size;
+    Elf32_Word barrier_slm_used;
+    Elf32_Word arg_num;
+  };
+  struct ArgInfoHelper {
+    Elf32_Word index;
+    Elf32_Word size;
+    Elf32_Word type;
+    Elf32_Word offset;
+    Elf32_Word addr_space;
+    Elf32_Word align;
+    Elf32_Word extra;
+  };
+  struct ImageInfoHelper {
+    Elf32_Word bti;
+    Elf32_Word width;
+    Elf32_Word height;
+    Elf32_Word depth;
+    Elf32_Word data_type;
+    Elf32_Word channel_order;
+  };
+  struct VirtRegMapHelper {
+    Elf32_Word virt_reg;
+    Elf32_Word phy_reg;
+    Elf32_Word size;
+  };
+
+  GenProgram &genProg;
+
+  elfio writer;
+  section *text_sec;
+  section *sym_sec;
+  section *strtab_sec;
+  section *ker_info_sec;
+  section *cl_info_sec;
+  section *rodata_sec;
+  symbol_section_accessor *syma;
+  string_section_accessor *stra;
+  note_section_accessor *note_writer;
+  note_section_accessor *cl_note_writer;
+  Elf32_Word sym_num;
+  uint64_t bitcode_offset;
+
+  GenProgramElfContext(GenProgram &prog);
+  ~GenProgramElfContext(void);
+
+  template <gbe_curbe_type curbe_enum, typename TYPE, int UNIFORM>
+  void emitOneCurbeReg(unsigned int &total_num, char *&ptr, GenKernel &kernel);
+  void emitOneKernel(GenKernel &kernel);
+  void emitOneKernelCLInfo(GenKernel &kernel);
+};
+
+GenProgramElfContext::GenProgramElfContext(GenProgram &prog)
+  : genProg(prog), text_sec(NULL), sym_sec(NULL), strtab_sec(NULL), ker_info_sec(NULL),
+    cl_info_sec(NULL), rodata_sec(NULL), syma(NULL), stra(NULL), note_writer(NULL),
+    cl_note_writer(NULL), sym_num(0), bitcode_offset(0)
+{
+  writer.create(ELFCLASS64, ELFDATA2LSB);
+  writer.set_os_abi(ELFOSABI_LINUX);
+  writer.set_type(ET_REL);
+  writer.set_machine(EM_INTEL205); // TODO: Some value of Intel GPU;
+
+  // Create code section
+  text_sec = writer.sections.add(".text");
+  text_sec->set_type(SHT_PROGBITS);
+  text_sec->set_flags(SHF_ALLOC | SHF_EXECINSTR);
+  text_sec->set_addr_align(4);
+
+  // Create string table section
+  strtab_sec = writer.sections.add(".strtab");
+  strtab_sec->set_type(SHT_STRTAB);
+  strtab_sec->set_addr_align(1);
+
+  // Create symbol table section
+  sym_sec = writer.sections.add(".symtab");
+  sym_sec->set_type(SHT_SYMTAB);
+  sym_sec->set_addr_align(0x4);
+  sym_sec->set_entry_size(writer.get_default_entry_size(SHT_SYMTAB));
+  sym_sec->set_link(strtab_sec->get_index());
+  sym_sec->set_info(0x01);
+
+  // Create kernel info section
+  ker_info_sec = writer.sections.add(".note.gpu_info");
+  ker_info_sec->set_type(SHT_NOTE);
+  ker_info_sec->set_flags(SHF_ALLOC);
+  ker_info_sec->set_addr_align(0x04);
+
+  // Create cl info section
+  cl_info_sec = writer.sections.add(".note.cl_info");
+  cl_info_sec->set_type(SHT_NOTE);
+  cl_info_sec->set_flags(SHF_ALLOC);
+  cl_info_sec->set_addr_align(0x04);
+
+  // Create string table writer
+  stra = GBE_NEW(string_section_accessor, strtab_sec);
+  // Create symbol table writer
+  syma = GBE_NEW(symbol_section_accessor, writer, sym_sec);
+  // Create note writer
+  note_writer = GBE_NEW(note_section_accessor, writer, ker_info_sec);
+  // Create CL note writer
+  cl_note_writer = GBE_NEW(note_section_accessor, writer, cl_info_sec);
+}
+
+GenProgramElfContext::~GenProgramElfContext(void)
+{
+  if (syma)
+    GBE_DELETE(syma);
+  if (stra)
+    GBE_DELETE(stra);
+  if (note_writer)
+    GBE_DELETE(note_writer);
+  if (cl_note_writer)
+    GBE_DELETE(cl_note_writer);
+}
+
+/*Store the special vitrual register map */
+template <gbe_curbe_type curbe_enum, typename TYPE, int UNIFORM>
+void GenProgramElfContext::emitOneCurbeReg(unsigned int &total_num, char *&ptr, GenKernel &kernel)
+{
+  int32_t offset = kernel.getCurbeOffset(curbe_enum, 0);
+  if (offset >= 0) {
+    VirtRegMapHelper *vri = reinterpret_cast<VirtRegMapHelper *>(ptr);
+    vri->virt_reg = curbe_enum;
+    vri->phy_reg = offset;
+    vri->size = UNIFORM ? sizeof(TYPE) : sizeof(TYPE) * kernel.getSIMDWidth();
+    ptr += sizeof(VirtRegMapHelper);
+    total_num++;
+  }
+}
+template <>
+void GenProgramElfContext::emitOneCurbeReg<GBE_CURBE_EXTRA_ARGUMENT, uint64_t, 0>(
+  unsigned int &total_num, char *&ptr, GenKernel &kernel)
+{
+  int32_t offset = kernel.getCurbeOffset(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER);
+  if (offset >= 0) {
+    VirtRegMapHelper *vri = reinterpret_cast<VirtRegMapHelper *>(ptr);
+    vri->virt_reg = GBE_CURBE_EXTRA_ARGUMENT;
+    vri->phy_reg = offset;
+    vri->size = sizeof(uint64_t);
+    ptr += sizeof(VirtRegMapHelper);
+    total_num++;
+  }
+}
+
+void GenProgramElfContext::emitOneKernelCLInfo(GenKernel &kernel)
+{
+  uint32_t all_str_len = 0;
+  uint32_t attr_size = 0;
+  size_t wg_sz[3];
+  uint32_t wg_sz_size = 0;
+  uint32_t arg_info_size = 0;
+
+  if ((kernel.getFunctionAttributes())[0] != 0)
+    attr_size = ::strlen(kernel.getFunctionAttributes()) + 1;
+  all_str_len = ALIGN(attr_size, 4);
+
+  kernel.getCompileWorkGroupSize(wg_sz);
+  if (wg_sz[0] > 0 || wg_sz[1] > 0 || wg_sz[2] > 0) {
+    wg_sz_size = sizeof(size_t) * 3;
+  }
+  all_str_len = all_str_len + wg_sz_size;
+
+  for (unsigned int i = 0; i < kernel.getArgNum(); i++) {
+    KernelArgument::ArgInfo *arg_info = kernel.getArgInfo(i);
+    if (arg_info == NULL) {
+      assert(i == 0); // All have info or none has info
+      break;
+    }
+    arg_info_size += arg_info->typeName.length() + 1;
+    arg_info_size += arg_info->accessQual.length() + 1;
+    arg_info_size += arg_info->typeQual.length() + 1;
+    arg_info_size += arg_info->argName.length() + 1;
+    arg_info_size = ALIGN(arg_info_size, 4);
+  }
+  all_str_len = all_str_len + arg_info_size;
+
+  if (all_str_len == 0)
+    return;
+
+  all_str_len += 3 * sizeof(uint32_t); // The length themselves
+  char *cl_info = static_cast<char *>(GBE_MALLOC(all_str_len));
+  *reinterpret_cast<uint32_t *>(cl_info) = attr_size;
+  *reinterpret_cast<uint32_t *>(cl_info + sizeof(uint32_t)) = wg_sz_size;
+  *reinterpret_cast<uint32_t *>(cl_info + 2 * sizeof(uint32_t)) = arg_info_size;
+
+  size_t offset = 3 * sizeof(uint32_t);
+
+  if (attr_size > 0) {
+    ::memcpy(cl_info + offset, kernel.getFunctionAttributes(),
+             ::strlen(kernel.getFunctionAttributes()) + 1);
+    offset += attr_size;
+    offset = ALIGN(offset, 4);
+  }
+
+  if (wg_sz_size > 0) {
+    ::memcpy(cl_info + offset, wg_sz, sizeof(size_t) * 3);
+    offset += wg_sz_size;
+  }
+
+  if (arg_info_size) {
+    for (unsigned int i = 0; i < kernel.getArgNum(); i++) {
+      KernelArgument::ArgInfo *arg_info = kernel.getArgInfo(i);
+      assert(arg_info != NULL);
+      if (arg_info->typeName.length() > 0)
+        ::memcpy(cl_info + offset, arg_info->typeName.c_str(), arg_info->typeName.length() + 1);
+      else
+        *(cl_info + offset) = 0;
+      offset += (arg_info->typeName.length() + 1);
+
+      if (arg_info->accessQual.length() > 0)
+        ::memcpy(cl_info + offset, arg_info->accessQual.c_str(), arg_info->accessQual.length() + 1);
+      else
+        *(cl_info + offset) = 0;
+      offset += (arg_info->accessQual.length() + 1);
+
+      if (arg_info->typeQual.length() > 0)
+        ::memcpy(cl_info + offset, arg_info->typeQual.c_str(), arg_info->typeQual.length() + 1);
+      else
+        *(cl_info + offset) = 0;
+      offset += (arg_info->typeQual.length() + 1);
+
+      if (arg_info->argName.length() > 0)
+        ::memcpy(cl_info + offset, arg_info->argName.c_str(), arg_info->argName.length() + 1);
+      else
+        *(cl_info + offset) = 0;
+      offset += (arg_info->argName.length() + 1);
+
+      offset = ALIGN(offset, 4);
+    }
+  }
+
+  assert(offset == all_str_len);
+
+  cl_note_writer->add_note(GEN_NOTE_TYPE_CL_INFO, kernel.getName(), cl_info, all_str_len);
+  GBE_FREE(cl_info);
+}
+
+void GenProgramElfContext::emitOneKernel(GenKernel &kernel)
+{
+  assert(text_sec != NULL);
+  assert(sym_sec != NULL);
+  assert(text_sec != NULL);
+  assert(syma != NULL);
+  assert(stra != NULL);
+
+  sym_num++;
+
+  // Add the kernel's bitcode to .text section
+  text_sec->append_data(kernel.getCode(), kernel.getCodeSize());
+  // Add the kernel func as a symbol
+  syma->add_symbol(*stra, kernel.getName(), bitcode_offset, kernel.getCodeSize(),
+                   STB_GLOBAL, STT_FUNC, 0, text_sec->get_index());
+  bitcode_offset += kernel.getCodeSize();
+
+  uint32_t arg_num = kernel.getArgNum();
+
+  size_t sampler_data_sz = kernel.getSamplerSize() * sizeof(uint32_t);
+  uint32_t *sampler_data = NULL;
+  if (sampler_data_sz) {
+    sampler_data = static_cast<uint32_t *>(GBE_MALLOC(sampler_data_sz));
+    ::memset(sampler_data, 0, sampler_data_sz);
+    kernel.getSamplerData(sampler_data);
+  }
+
+  size_t image_data_sz = kernel.getImageSize() * sizeof(ImageInfo);
+  ImageInfo *image_data = NULL;
+  if (image_data_sz) {
+    image_data = static_cast<ImageInfo *>(GBE_MALLOC(image_data_sz));
+    ::memset(image_data, 0, image_data_sz);
+    kernel.getImageData(image_data);
+    std::sort(image_data, image_data + image_data_sz / sizeof(ImageInfo),
+              [](ImageInfo &a, ImageInfo &b) { return a.idx < b.idx; });
+  }
+
+  void *kernel_info = GBE_MALLOC(4 /* For align */ +
+                                 sizeof(KernelInfoHelper) + arg_num * sizeof(ArgInfoHelper) +
+                                 sizeof(Elf32_Word) /* For sampler num */ + image_data_sz +
+                                 sizeof(Elf32_Word) /* For image num */ +
+                                 ((image_data_sz / sizeof(ImageInfo)) * sizeof(ImageInfoHelper)) +
+                                 sizeof(Elf32_Word) /* For virt/phy num */ +
+                                 GBE_GEN_REG * sizeof(VirtRegMapHelper));
+  char *ptr = reinterpret_cast<char *>(ALIGN(reinterpret_cast<long>(kernel_info), 4));
+  KernelInfoHelper *ki = reinterpret_cast<KernelInfoHelper *>(ptr);
+  ki->simd = kernel.getSIMDWidth();
+  ki->local_mem_size = kernel.getSLMSize();
+  ki->scratch_size = kernel.getScratchSize();
+  ki->stack_size = kernel.getStackSize();
+  ki->barrier_slm_used = kernel.getUseSLM();
+  ki->arg_num = kernel.getArgNum();
+  ptr += sizeof(KernelInfoHelper);
+
+  for (unsigned int i = 0; i < arg_num; i++) {
+    ArgInfoHelper *argi = reinterpret_cast<ArgInfoHelper *>(ptr);
+    argi->index = i;
+    argi->size = kernel.getArgSize(i);
+    argi->type = kernel.getArgType(i);
+    argi->addr_space = kernel.getArgAddressSpace(i);
+    argi->align = kernel.getArgAlign(i);
+
+    if (argi->type == GBE_ARG_TYPE_POINTER && argi->addr_space == GBE_ADDRESS_SPACE_GLOBAL) {
+      argi->extra = kernel.getArgBTI(i);
+    } else if (argi->type == GBE_ARG_TYPE_IMAGE) {
+      assert(image_data_sz > 0);
+      for (size_t j = 0; j < image_data_sz / sizeof(ImageInfo); j++) {
+        if (image_data[j].arg_idx == static_cast<int32_t>(i)) {
+          argi->extra = static_cast<Elf32_Word>(j);
+          break;
+        }
+      }
+    } else if (argi->type == GBE_ARG_TYPE_SAMPLER) {
+      assert(sampler_data_sz > 0);
+      for (size_t j = 0; j < sampler_data_sz / sizeof(uint32_t); j++) {
+        if (((sampler_data[i] & __CLK_SAMPLER_ARG_MASK) >> __CLK_SAMPLER_ARG_BASE) ==
+            static_cast<uint32_t>(i)) {
+          argi->extra = static_cast<Elf32_Word>(j);
+          break;
+        }
+      }
+    } else {
+      argi->extra = 0;
+    }
+
+    argi->offset = kernel.getCurbeOffset(GBE_CURBE_KERNEL_ARGUMENT, i);
+    ptr += sizeof(ArgInfoHelper);
+  }
+
+  /* Store all the sampler info */
+  *(reinterpret_cast<Elf32_Word *>(ptr)) =
+    static_cast<Elf32_Word>(sampler_data_sz / sizeof(uint32_t)); // Samper number
+  ptr = ptr + sizeof(Elf32_Word);
+  if (sampler_data_sz > 0) {
+    ::memcpy(ptr, sampler_data, sampler_data_sz);
+    GBE_FREE(sampler_data);
+    ptr = ptr + sampler_data_sz;
+  }
+
+  /* Store all the Image info */
+  *(reinterpret_cast<Elf32_Word *>(ptr)) =
+    static_cast<Elf32_Word>(image_data_sz / sizeof(ImageInfo)); // Image number
+  ptr = static_cast<char *>(ptr) + sizeof(Elf32_Word);
+  /* Store all the image info by index */
+  if (image_data_sz > 0) {
+    for (size_t i = 0; i < image_data_sz / sizeof(ImageInfo); i++) {
+      ImageInfoHelper *imgi = reinterpret_cast<ImageInfoHelper *>(ptr);
+      imgi->bti = image_data[i].idx;
+      imgi->width = image_data[i].wSlot;
+      imgi->height = image_data[i].hSlot;
+      imgi->depth = image_data[i].depthSlot;
+      imgi->data_type = image_data[i].dataTypeSlot;
+      imgi->channel_order = image_data[i].channelOrderSlot;
+      ptr = ptr + sizeof(ImageInfoHelper);
+    }
+
+    GBE_FREE(image_data);
+  }
+
+  Elf32_Word *p_virt_phy_num = reinterpret_cast<Elf32_Word *>(ptr);
+  ptr = static_cast<char *>(ptr) + sizeof(Elf32_Word);
+  unsigned int virt_phy_num = 0;
+
+  emitOneCurbeReg<GBE_CURBE_LOCAL_ID_X, Elf32_Word, 0>(virt_phy_num, ptr, kernel);
+  emitOneCurbeReg<GBE_CURBE_LOCAL_ID_Y, Elf32_Word, 0>(virt_phy_num, ptr, kernel);
+  emitOneCurbeReg<GBE_CURBE_LOCAL_ID_Z, Elf32_Word, 0>(virt_phy_num, ptr, kernel);
+  emitOneCurbeReg<GBE_CURBE_LOCAL_SIZE_X, Elf32_Word, 1>(virt_phy_num, ptr, kernel);
+  emitOneCurbeReg<GBE_CURBE_LOCAL_SIZE_Y, Elf32_Word, 1>(virt_phy_num, ptr, kernel);
+  emitOneCurbeReg<GBE_CURBE_LOCAL_SIZE_Z, Elf32_Word, 1>(virt_phy_num, ptr, kernel);
+  emitOneCurbeReg<GBE_CURBE_ENQUEUED_LOCAL_SIZE_X, Elf32_Word, 1>(virt_phy_num, ptr, kernel);
+  emitOneCurbeReg<GBE_CURBE_ENQUEUED_LOCAL_SIZE_Y, Elf32_Word, 1>(virt_phy_num, ptr, kernel);
+  emitOneCurbeReg<GBE_CURBE_ENQUEUED_LOCAL_SIZE_Z, Elf32_Word, 1>(virt_phy_num, ptr, kernel);
+  emitOneCurbeReg<GBE_CURBE_GLOBAL_SIZE_X, Elf32_Word, 1>(virt_phy_num, ptr, kernel);
+  emitOneCurbeReg<GBE_CURBE_GLOBAL_SIZE_Y, Elf32_Word, 1>(virt_phy_num, ptr, kernel);
+  emitOneCurbeReg<GBE_CURBE_GLOBAL_SIZE_Z, Elf32_Word, 1>(virt_phy_num, ptr, kernel);
+  emitOneCurbeReg<GBE_CURBE_GLOBAL_OFFSET_X, Elf32_Word, 1>(virt_phy_num, ptr, kernel);
+  emitOneCurbeReg<GBE_CURBE_GLOBAL_OFFSET_Y, Elf32_Word, 1>(virt_phy_num, ptr, kernel);
+  emitOneCurbeReg<GBE_CURBE_GLOBAL_OFFSET_Z, Elf32_Word, 1>(virt_phy_num, ptr, kernel);
+  emitOneCurbeReg<GBE_CURBE_GROUP_NUM_X, Elf32_Word, 1>(virt_phy_num, ptr, kernel);
+  emitOneCurbeReg<GBE_CURBE_GROUP_NUM_Y, Elf32_Word, 1>(virt_phy_num, ptr, kernel);
+  emitOneCurbeReg<GBE_CURBE_GROUP_NUM_Z, Elf32_Word, 1>(virt_phy_num, ptr, kernel);
+  emitOneCurbeReg<GBE_CURBE_WORK_DIM, Elf32_Word, 1>(virt_phy_num, ptr, kernel);
+  emitOneCurbeReg<GBE_CURBE_BLOCK_IP, Elf32_Half, 0>(virt_phy_num, ptr, kernel);
+  emitOneCurbeReg<GBE_CURBE_DW_BLOCK_IP, Elf32_Word, 0>(virt_phy_num, ptr, kernel);
+  emitOneCurbeReg<GBE_CURBE_THREAD_NUM, Elf32_Word, 1>(virt_phy_num, ptr, kernel);
+  emitOneCurbeReg<GBE_CURBE_THREAD_ID, Elf32_Word, 1>(virt_phy_num, ptr, kernel);
+  emitOneCurbeReg<GBE_CURBE_CONSTANT_ADDRSPACE, uint64_t, 1>(virt_phy_num, ptr, kernel);
+  emitOneCurbeReg<GBE_CURBE_STACK_SIZE, uint64_t, 1>(virt_phy_num, ptr, kernel);
+  emitOneCurbeReg<GBE_CURBE_EXTRA_ARGUMENT, uint64_t, 0>(virt_phy_num, ptr, kernel);
+  *p_virt_phy_num = virt_phy_num;
+
+  Elf_Word total_sz = static_cast<char *>(ptr) - static_cast<char *>(kernel_info);
+  note_writer->add_note(GEN_NOTE_TYPE_GPU_INFO, kernel.getName(), kernel_info, total_sz);
+
+#if 0
+  for (int i = 0; i < (int)total_sz; i++) {
+    if (i % 16 == 0)
+      printf("\n");
+    if (i % 2 == 0)
+      printf(" ");
+    printf("%2.2x", ((unsigned char *)kernel_info)[i]);
+  }
+  printf("\n");
+  for (int i = 0; i < (int)total_sz / 4; i++) {
+    printf(" %d", ((unsigned int *)kernel_info)[i]);
+  }
+#endif
+
+  GBE_FREE(kernel_info);
+
+  emitOneKernelCLInfo(kernel);
+}
+
+void *
+GenProgram::toBinaryFormat(size_t &ret_size)
+{
+  ret_size = 0;
+  assert(elf_ctx == NULL);
+  elf_ctx = GBE_NEW(GenProgramElfContext, *this);
+
+  if (getGlobalConstantSize() > 0) {
+    elf_ctx->rodata_sec = elf_ctx->writer.sections.add(".rodata");
+    elf_ctx->rodata_sec->set_type(SHT_PROGBITS);
+    elf_ctx->rodata_sec->set_flags(SHF_ALLOC);
+    elf_ctx->rodata_sec->set_addr_align(1);
+
+    char *const_data = static_cast<char *>(GBE_MALLOC(getGlobalConstantSize()));
+    getGlobalConstantData(const_data);
+    elf_ctx->rodata_sec->set_data(const_data, getGlobalConstantSize());
+    GBE_FREE(const_data);
+  }
+
+  /* Add the note about GPU info */
+  std::string gpu_name;
+  Elf32_Word gpu_version[3]; // pci-id, major and minor
+  if (IS_IVYBRIDGE(deviceID)) {
+    gpu_name = "IVYBridge";
+    gpu_version[0] = 7;
+    gpu_version[1] = 0;
+  } else if (IS_BAYTRAIL_T(deviceID)) {
+    gpu_name = "BayTrail";
+    gpu_version[0] = 7;
+    gpu_version[1] = 0;
+  } else if (IS_HASWELL(deviceID)) {
+    gpu_name = "HasWell";
+    gpu_version[0] = 7;
+    gpu_version[1] = 5;
+  } else if (IS_BROADWELL(deviceID)) {
+    gpu_name = "BroadWell";
+    gpu_version[0] = 8;
+    gpu_version[1] = 0;
+  } else if (IS_CHERRYVIEW(deviceID)) {
+    gpu_name = "CherryView";
+    gpu_version[0] = 8;
+    gpu_version[1] = 0;
+  } else if (IS_SKYLAKE(deviceID)) {
+    gpu_name = "SkyLake";
+    gpu_version[0] = 9;
+    gpu_version[1] = 0;
+  } else if (IS_BROXTON(deviceID)) {
+    gpu_name = "BroxTon";
+    gpu_version[0] = 9;
+    gpu_version[1] = 0;
+  }
+  gpu_version[3] = deviceID;
+  elf_ctx->note_writer->add_note(GenProgramElfContext::GEN_NOTE_TYPE_GPU_VERSION,
+                                 gpu_name, gpu_version, sizeof(gpu_version));
+
+  /* Add note info about compiler */
+  std::string compiler_name("GBE Compiler");
+  Elf32_Word compiler_version[2]; // major and minor
+  compiler_version[0] = LLVM_VERSION_MAJOR;
+  compiler_version[1] = LLVM_VERSION_MINOR;
+  elf_ctx->note_writer->add_note(GenProgramElfContext::GEN_NOTE_TYPE_COMPILER_INFO,
+                                 compiler_name, compiler_version, sizeof(compiler_version));
+
+  bool write_cl_version = false;
+  uint32_t oclVersion = 0;
+  for (map<std::string, Kernel *>::const_iterator it = kernels.begin();
+       it != kernels.end(); ++it) {
+    GenKernel *k = static_cast<GenKernel *>(it->second);
+
+    if (write_cl_version == false) {
+      std::string ocl_version_str;
+      Elf32_Word cl_version[2]; // major and minor
+
+      oclVersion = k->getOclVersion();
+      if (oclVersion == 120) {
+        ocl_version_str = "OpenCL 1.2";
+        cl_version[0] = 1;
+        cl_version[1] = 2;
+      } else if (oclVersion == 200) {
+        ocl_version_str = "OpenCL 2.0";
+        cl_version[0] = 2;
+        cl_version[1] = 0;
+      } else
+        assert(0);
+
+      elf_ctx->cl_note_writer->add_note(GenProgramElfContext::GEN_NOTE_TYPE_CL_VERSION,
+                                        ocl_version_str, cl_version, sizeof(cl_version));
+    } else {
+      assert(oclVersion == k->getOclVersion());
+    }
+
+    elf_ctx->emitOneKernel(*k);
+  }
+
+  wmemstreambuf membuf(4096);
+  std::ostream oss(&membuf);
+  if (OCL_DUMP_ELF_FILE) {
+    elf_ctx->writer.save("gbe_program_elf_dump.o");
+  }
+  elf_ctx->writer.save(oss);
+  GBE_DELETE(elf_ctx);
+
+  size_t elf_size = 0;
+  char *elf_mem = membuf.getcontent(elf_size);
+  if (elf_size == 0)
+    return NULL;
+
+  void *p_elf_ret = ::malloc(elf_size);
+  ::memcpy(p_elf_ret, elf_mem, elf_size);
+  ret_size = elf_size;
+  return p_elf_ret;
+}
+
 } /* namespace gbe */
diff --git a/backend/src/backend/program.cpp b/backend/src/backend/program.cpp
index 09c79d8..98d660d 100644
--- a/backend/src/backend/program.cpp
+++ b/backend/src/backend/program.cpp
@@ -1361,13 +1361,13 @@ EXTEND_QUOTE:
     return kernel->getArgAlign(argID);
   }
   static gbe_arg_type kernelGetArgType(gbe_kernel genKernel, uint32_t argID) {
-    if (genKernel == NULL) return GBE_ARG_INVALID;
+    if (genKernel == NULL) return GBE_ARG_TYPE_INVALID;
     const gbe::Kernel *kernel = (const gbe::Kernel*) genKernel;
     return kernel->getArgType(argID);
   }
 
   static uint32_t kernelGetSIMDWidth(gbe_kernel genKernel) {
-    if (genKernel == NULL) return GBE_ARG_INVALID;
+    if (genKernel == NULL) return GBE_ARG_TYPE_INVALID;
     const gbe::Kernel *kernel = (const gbe::Kernel*) genKernel;
     return kernel->getSIMDWidth();
   }
diff --git a/backend/src/backend/program.h b/backend/src/backend/program.h
index e601c97..cdde2e3 100644
--- a/backend/src/backend/program.h
+++ b/backend/src/backend/program.h
@@ -47,16 +47,23 @@ typedef struct _gbe_program *gbe_program;
 /*! Opaque structure that interfaces a GBE kernel (ie one OCL function) */
 typedef struct _gbe_kernel *gbe_kernel;
 
+enum gbe_address_space_type {
+  GBE_ADDRESS_SPACE_PRIVATE = 0,
+  GBE_ADDRESS_SPACE_GLOBAL = 1,
+  GBE_ADDRESS_SPACE_CONSTANT = 2,
+  GBE_ADDRESS_SPACE_LOCAL = 3,
+  GBE_ADDRESS_SPACE_INVALID = 4,
+};
+
 /*! Argument type for each function call */
 enum gbe_arg_type {
-  GBE_ARG_VALUE = 0,            // int, float and so on
-  GBE_ARG_GLOBAL_PTR = 1,       // __global
-  GBE_ARG_CONSTANT_PTR = 2,     // __constant
-  GBE_ARG_LOCAL_PTR = 3,        // __local
-  GBE_ARG_IMAGE = 4,            // image2d_t, image3d_t
-  GBE_ARG_SAMPLER = 5,          // sampler_t
-  GBE_ARG_PIPE = 6,             // pipe
-  GBE_ARG_INVALID = 0xffffffff
+  GBE_ARG_TYPE_INVALID = 0,
+  GBE_ARG_TYPE_VALUE = 1, // int, fp, etc
+  GBE_ARG_TYPE_STRUCT = 2,
+  GBE_ARG_TYPE_POINTER = 3,
+  GBE_ARG_TYPE_IMAGE = 4,
+  GBE_ARG_TYPE_SAMPLER= 5,
+  GBE_ARG_TYPE_PIPE = 6,  // pipe
 };
 
 /*! Get argument info values */
diff --git a/backend/src/backend/program.hpp b/backend/src/backend/program.hpp
index 1aff8b9..3898f89 100644
--- a/backend/src/backend/program.hpp
+++ b/backend/src/backend/program.hpp
@@ -46,6 +46,7 @@ namespace gbe {
   /*! Info for the kernel argument */
   struct KernelArgument {
     gbe_arg_type type; //!< Pointer, structure, image, regular value?
+    gbe_address_space_type arg_space_type;
     uint32_t size;     //!< Size of the argument
     uint32_t align;    //!< addr alignment of the argument
     uint8_t bti;      //!< binding table index for __global buffer
@@ -100,6 +101,9 @@ namespace gbe {
     INLINE uint32_t getArgSize(uint32_t argID) const {
       return argID >= argNum ? 0u : args[argID].size;
     }
+    INLINE gbe_address_space_type getArgAddressSpace(uint32_t argID) const {
+      return argID >= argNum ? GBE_ADDRESS_SPACE_INVALID : args[argID].arg_space_type;
+    }
     /*! Return the bti for __global buffer */
     INLINE uint8_t getArgBTI(uint32_t argID) const {
       return argID >= argNum ? 0u : args[argID].bti;
@@ -110,7 +114,7 @@ namespace gbe {
     }
     /*! Return the type of the given argument */
     INLINE gbe_arg_type getArgType(uint32_t argID) const {
-      return argID >= argNum ? GBE_ARG_INVALID : args[argID].type;
+      return argID >= argNum ? GBE_ARG_TYPE_INVALID : args[argID].type;
     }
     /*! Get the offset where to patch. Returns -1 if no patch needed */
     int32_t getCurbeOffset(gbe_curbe_type type, uint32_t subType) const;
@@ -341,6 +345,8 @@ namespace gbe {
     /*! Compile a kernel */
     virtual Kernel *compileKernel(const ir::Unit &unit, const std::string &name,
                                   bool relaxMath, int profiling) = 0;
+    /*! Generate binary format */
+    virtual void *toBinaryFormat(size_t &ret_size) = 0;
     /*! Allocate an empty kernel. */
     virtual Kernel *allocateKernel(const std::string &name) = 0;
     /*! Kernels sorted by their name */
diff --git a/backend/src/gbe_bin_interpreter.cpp b/backend/src/gbe_bin_interpreter.cpp
index 64cacd9..6b14dcd 100644
--- a/backend/src/gbe_bin_interpreter.cpp
+++ b/backend/src/gbe_bin_interpreter.cpp
@@ -30,6 +30,7 @@
 #undef GBE_COMPILER_AVAILABLE
 #include "backend/program.cpp"
 #include "backend/gen_program.cpp"
+#include "backend/gen_program_elf.cpp"
 #include "ir/sampler.cpp"
 #include "ir/image.cpp"
 
-- 
2.7.4



More information about the Beignet mailing list