[Beignet] [PATCH 4/4] Add GenProgram::toBinaryFormat to generate ELF format binary.

junyan.he at inbox.com junyan.he at inbox.com
Wed Feb 8 08:13:52 UTC 2017


From: Junyan He <junyan.he at intel.com>

We add this function to generate a standard ELF format binary.
All the verbose information we need in runtime will be stored
in .note.gpu_info section. Then, we can separate the runtime
and compiler clearly.

Signed-off-by: Junyan He <junyan.he at intel.com>
---
 backend/src/backend/gen_program.hpp     |   7 +
 backend/src/backend/gen_program_elf.cpp | 357 ++++++++++++++++++++++++++++++++
 backend/src/backend/program.hpp         |   2 +
 backend/src/gbe_bin_interpreter.cpp     |   1 +
 4 files changed, 367 insertions(+)

diff --git a/backend/src/backend/gen_program.hpp b/backend/src/backend/gen_program.hpp
index ff756e0..8963c38 100644
--- a/backend/src/backend/gen_program.hpp
+++ b/backend/src/backend/gen_program.hpp
@@ -33,6 +33,8 @@
 struct GenInstruction;
 namespace gbe
 {
+  class GenProgramElfContext;
+
   /*! Describe a compiled kernel */
   class GenKernel : public Kernel
   {
@@ -58,6 +60,9 @@ namespace gbe
   /*! Describe a compiled program */
   class GenProgram : public Program
   {
+  protected:
+    GenProgramElfContext* elf_ctx;
+
   public:
     /*! Create an empty program */
     GenProgram(uint32_t deviceID, const void* mod = NULL, const void* ctx = NULL, const char* asm_fname = NULL, uint32_t fast_relaxed_math = 0) :
@@ -70,6 +75,8 @@ namespace gbe
     virtual void CleanLlvmResource(void);
     /*! Implements base class */
     virtual Kernel *compileKernel(const ir::Unit &unit, const std::string &name, bool relaxMath, int profiling);
+    /*! Generate binary format */
+    virtual void *toBinaryFormat(size_t &ret_size);
     /*! Allocate an empty kernel. */
     virtual Kernel *allocateKernel(const std::string &name) {
       return GBE_NEW(GenKernel, name, deviceID);
diff --git a/backend/src/backend/gen_program_elf.cpp b/backend/src/backend/gen_program_elf.cpp
index efd45fe..0440e81 100644
--- a/backend/src/backend/gen_program_elf.cpp
+++ b/backend/src/backend/gen_program_elf.cpp
@@ -22,6 +22,7 @@
 #include <algorithm>
 #include <sstream>
 #include <streambuf>
+using namespace std;
 
 namespace gbe
 {
@@ -121,4 +122,360 @@ protected:
     return count;
   }
 };
+
+using namespace ELFIO;
+
+class GenProgramElfContext
+{
+public:
+  enum { // 0, 1, 2 already have meanings
+    GEN_NOTE_TYPE_GPU_INFO = 3,
+    GEN_NOTE_TYPE_CL_ARG_INFO = 4,
+    GEN_NOTE_TYPE_CL_WORKGROUP_SIZE = 5,
+  };
+
+  GenProgram &genProg;
+  void emitOneKernel(GenKernel &kernel);
+  elfio writer;
+  section *text_sec;
+  section *sym_sec;
+  section *strtab_sec;
+  section *ker_info_sec;
+  section *rodata_sec;
+  symbol_section_accessor *syma;
+  string_section_accessor *stra;
+  note_section_accessor *note_writer;
+  Elf32_Word sym_num;
+  Elf64_Word bitcode_offset;
+
+  ~GenProgramElfContext(void)
+  {
+    if (syma)
+      GBE_DELETE(syma);
+    if (stra)
+      GBE_DELETE(stra);
+    if (note_writer)
+      GBE_DELETE(note_writer);
+  }
+
+  GenProgramElfContext(GenProgram &prog) : genProg(prog), text_sec(NULL), sym_sec(NULL),
+                                           strtab_sec(NULL), ker_info_sec(NULL), rodata_sec(NULL),
+                                           syma(NULL), stra(NULL), note_writer(NULL), sym_num(0),
+                                           bitcode_offset(0)
+  {
+    writer.create(ELFCLASS64, ELFDATA2LSB);
+    writer.set_os_abi(ELFOSABI_LINUX);
+    writer.set_type(ET_REL);
+    writer.set_machine(EM_INTEL205); // TODO: Some value of Intel GPU;
+
+    // Create code section
+    text_sec = writer.sections.add(".text");
+    text_sec->set_type(SHT_PROGBITS);
+    text_sec->set_flags(SHF_ALLOC | SHF_EXECINSTR);
+    text_sec->set_addr_align(4);
+
+    // Create string table section
+    strtab_sec = writer.sections.add(".strtab");
+    strtab_sec->set_type(SHT_STRTAB);
+    strtab_sec->set_addr_align(1);
+
+    // Create symbol table section
+    sym_sec = writer.sections.add(".symtab");
+    sym_sec->set_type(SHT_SYMTAB);
+    sym_sec->set_addr_align(0x4);
+    sym_sec->set_entry_size(writer.get_default_entry_size(SHT_SYMTAB));
+    sym_sec->set_link(strtab_sec->get_index());
+    sym_sec->set_info(0x01);
+
+    // Create kernel info section
+    ker_info_sec = writer.sections.add(".note.gpu_info");
+    ker_info_sec->set_type(SHT_NOTE);
+    text_sec->set_flags(SHF_ALLOC);
+    ker_info_sec->set_addr_align(0x04);
+
+    // Create string table writer
+    stra = GBE_NEW(string_section_accessor, strtab_sec);
+    // Create symbol table writer
+    syma = GBE_NEW(symbol_section_accessor, writer, sym_sec);
+    // Create note writer
+    note_writer = GBE_NEW(note_section_accessor, writer, ker_info_sec);
+  }
+};
+
+void GenProgramElfContext::emitOneKernel(GenKernel &kernel)
+{
+  assert(text_sec != NULL);
+  assert(sym_sec != NULL);
+  assert(text_sec != NULL);
+  assert(syma != NULL);
+  assert(stra != NULL);
+
+  sym_num++;
+
+  // Add the kernel's bitcode to .text section
+  text_sec->append_data(kernel.getCode(), kernel.getCodeSize());
+  // Add the kernel func as a symbol
+  syma->add_symbol(*stra, kernel.getName(), bitcode_offset, kernel.getCodeSize(),
+                   STB_GLOBAL, STT_FUNC, 0, text_sec->get_index());
+  bitcode_offset += kernel.getCodeSize();
+
+  void *kernel_info = GBE_MALLOC(8 * 1024);
+  void *ptr = kernel_info;
+  Elf32_Word *p_simd = static_cast<Elf32_Word *>(ptr);
+  Elf32_Word *p_slm_sz = static_cast<Elf32_Word *>(ptr) + 1;
+  Elf32_Word *p_scratch_sz = static_cast<Elf32_Word *>(ptr) + 2;
+  Elf32_Word *p_stack_sz = static_cast<Elf32_Word *>(ptr) + 3;
+  Elf32_Word *p_barrier_slm_used = static_cast<Elf32_Word *>(ptr) + 4;
+  Elf32_Word *p_arg_num = static_cast<Elf32_Word *>(ptr) + 5;
+  *p_simd = kernel.getSIMDWidth();
+  *p_slm_sz = kernel.getSLMSize();
+  *p_scratch_sz = kernel.getScratchSize();
+  *p_stack_sz = kernel.getStackSize();
+  *p_barrier_slm_used = kernel.getUseSLM();
+  *p_arg_num = kernel.getArgNum();
+
+  ptr = static_cast<char *>(ptr) + 6 * sizeof(Elf32_Word);
+
+  size_t sampler_data_sz = kernel.getSamplerSize();
+  uint32_t *sampler_data = NULL;
+  if (sampler_data_sz) {
+    sampler_data = static_cast<uint32_t *>(GBE_MALLOC(sampler_data_sz));
+    ::memset(sampler_data, 0, sampler_data_sz);
+    kernel.getSamplerData(sampler_data);
+  }
+
+  size_t image_data_sz = kernel.getImageSize();
+  ImageInfo *image_data = NULL;
+  if (image_data_sz) {
+    image_data = static_cast<ImageInfo *>(GBE_MALLOC(image_data_sz));
+    ::memset(image_data, 0, image_data_sz);
+    kernel.getImageData(image_data);
+  }
+
+  for (unsigned int i = 0; i < *p_arg_num; i++) {
+    Elf32_Word *arg_index = static_cast<Elf32_Word *>(ptr);
+    Elf32_Word *arg_size = static_cast<Elf32_Word *>(ptr) + 1;
+    Elf32_Word *arg_type = static_cast<Elf32_Word *>(ptr) + 2;
+    Elf32_Word *arg_offset = static_cast<Elf32_Word *>(ptr) + 3;
+    Elf32_Word *arg_space = static_cast<Elf32_Word *>(ptr) + 4;
+    Elf32_Word *arg_align = static_cast<Elf32_Word *>(ptr) + 5;
+    Elf32_Word *arg_info = static_cast<Elf32_Word *>(ptr) + 6;
+
+    *arg_index = i;
+    *arg_size = kernel.getArgSize(i);
+    *arg_type = kernel.getArgType(i);
+    *arg_align = kernel.getArgAlign(i);
+#if 0
+    *arg_space = kernel.getArgAddressSpace(i);
+
+    if (*arg_type == GBE_ARG_TYPE_POINTER && *arg_space == GBE_ADDRESS_SPACE_GLOBAL) {
+      *arg_info = kernel.getArgBTI(i);
+    } else if (*arg_type == GBE_ARG_TYPE_IMAGE) {
+      assert(image_data_sz > 0);
+      for (size_t j = 0; j < image_data_sz / sizeof(ImageInfo); j++) {
+        if (image_data[j].arg_idx == static_cast<int32_t>(i)) {
+          *arg_info = static_cast<Elf32_Word>(image_data[j].idx);
+          break;
+        }
+      }
+    } else if (*arg_type == GBE_ARG_TYPE_SAMPLER) {
+      assert(sampler_data_sz > 0);
+      for (size_t j = 0; j < sampler_data_sz / sizeof(uint32_t); j++) {
+        if (((sampler_data[i] & __CLK_SAMPLER_ARG_MASK) >> __CLK_SAMPLER_ARG_BASE) ==
+            static_cast<uint32_t>(i)) {
+          *arg_info = static_cast<Elf32_Word>(j);
+          break;
+        }
+      }
+    }
+#endif
+    *arg_offset = kernel.getCurbeOffset(GBE_CURBE_KERNEL_ARGUMENT, i);
+    ptr = static_cast<char *>(ptr) + 7 * sizeof(Elf32_Word);
+  }
+
+  /* Store all the sampler info */
+  *(static_cast<Elf32_Word *>(ptr)) =
+    static_cast<Elf32_Word>(sampler_data_sz / sizeof(uint32_t)); // Samper number
+  ptr = static_cast<char *>(ptr) + sizeof(Elf32_Word);
+  if (sampler_data_sz > 0) {
+    ::memcpy(ptr, sampler_data, sampler_data_sz);
+    GBE_FREE(sampler_data);
+    ptr = static_cast<char *>(ptr) + sampler_data_sz;
+  }
+
+  /* Store all the Image info */
+  *(static_cast<Elf32_Word *>(ptr)) =
+    static_cast<Elf32_Word>(image_data_sz / sizeof(ImageInfo)); // Image number
+  ptr = static_cast<char *>(ptr) + sizeof(Elf32_Word);
+  if (image_data_sz > 0) {
+    std::sort(image_data, image_data + image_data_sz / sizeof(ImageInfo),
+              [](ImageInfo &a, ImageInfo &b) { return a.idx < b.idx; });
+
+    /* Store all the image info by index */
+    for (size_t i = 0; i < image_data_sz / sizeof(ImageInfo); i++) {
+      Elf32_Word *bti = static_cast<Elf32_Word *>(ptr);
+      Elf32_Word *width = static_cast<Elf32_Word *>(ptr) + 1;
+      Elf32_Word *height = static_cast<Elf32_Word *>(ptr) + 2;
+      Elf32_Word *depth = static_cast<Elf32_Word *>(ptr) + 3;
+      Elf32_Word *data_type = static_cast<Elf32_Word *>(ptr) + 4;
+      Elf32_Word *channel_order = static_cast<Elf32_Word *>(ptr) + 5;
+      Elf32_Word *dim_order = static_cast<Elf32_Word *>(ptr) + 6;
+
+      *bti = image_data[i].idx;
+      *width = image_data[i].wSlot;
+      *height = image_data[i].hSlot;
+      *depth = image_data[i].depthSlot;
+      *data_type = image_data[i].dataTypeSlot;
+      *channel_order = image_data[i].channelOrderSlot;
+      *dim_order = image_data[i].dimOrderSlot;
+
+      ptr = static_cast<char *>(ptr) + 7 * sizeof(Elf32_Word);
+    }
+
+    GBE_FREE(image_data);
+  }
+
+  Elf32_Word *p_virt_phy_num = static_cast<Elf32_Word *>(ptr);
+  ptr = static_cast<char *>(ptr) + sizeof(Elf32_Word);
+  unsigned int virt_phy_num = 0;
+
+/*Store the special vitrual register map */
+#define STORE_CURB_MAP(curbe_enum, data_type, uniform)                         \
+  if (kernel.getCurbeOffset(curbe_enum, 0) >= 0) {                             \
+    *static_cast<Elf32_Word *>(ptr) = curbe_enum;                              \
+    ptr = static_cast<char *>(ptr) + sizeof(Elf32_Word);                       \
+    *static_cast<Elf32_Word *>(ptr) = kernel.getCurbeOffset(curbe_enum, 0);    \
+    ptr = static_cast<char *>(ptr) + sizeof(Elf32_Word);                       \
+    *static_cast<Elf32_Word *>(ptr) =                                          \
+      uniform ? sizeof(data_type) : sizeof(data_type) * kernel.getSIMDWidth(); \
+    ptr = static_cast<char *>(ptr) + sizeof(Elf32_Word);                       \
+    virt_phy_num++;                                                            \
+  }
+  STORE_CURB_MAP(GBE_CURBE_LOCAL_ID_X, Elf32_Word, 0);
+  STORE_CURB_MAP(GBE_CURBE_LOCAL_ID_Y, Elf32_Word, 0);
+  STORE_CURB_MAP(GBE_CURBE_LOCAL_ID_Z, Elf32_Word, 0);
+  STORE_CURB_MAP(GBE_CURBE_LOCAL_SIZE_X, Elf32_Word, 1);
+  STORE_CURB_MAP(GBE_CURBE_LOCAL_SIZE_Y, Elf32_Word, 1);
+  STORE_CURB_MAP(GBE_CURBE_LOCAL_SIZE_Z, Elf32_Word, 1);
+  STORE_CURB_MAP(GBE_CURBE_ENQUEUED_LOCAL_SIZE_X, Elf32_Word, 1);
+  STORE_CURB_MAP(GBE_CURBE_ENQUEUED_LOCAL_SIZE_Y, Elf32_Word, 1);
+  STORE_CURB_MAP(GBE_CURBE_ENQUEUED_LOCAL_SIZE_Z, Elf32_Word, 1);
+  STORE_CURB_MAP(GBE_CURBE_GLOBAL_SIZE_X, Elf32_Word, 1);
+  STORE_CURB_MAP(GBE_CURBE_GLOBAL_SIZE_Y, Elf32_Word, 1);
+  STORE_CURB_MAP(GBE_CURBE_GLOBAL_SIZE_Z, Elf32_Word, 1);
+  STORE_CURB_MAP(GBE_CURBE_GLOBAL_OFFSET_X, Elf32_Word, 1);
+  STORE_CURB_MAP(GBE_CURBE_GLOBAL_OFFSET_Y, Elf32_Word, 1);
+  STORE_CURB_MAP(GBE_CURBE_GLOBAL_OFFSET_Z, Elf32_Word, 1);
+  STORE_CURB_MAP(GBE_CURBE_GROUP_NUM_X, Elf32_Word, 1);
+  STORE_CURB_MAP(GBE_CURBE_GROUP_NUM_Y, Elf32_Word, 1);
+  STORE_CURB_MAP(GBE_CURBE_GROUP_NUM_Z, Elf32_Word, 1);
+  STORE_CURB_MAP(GBE_CURBE_WORK_DIM, Elf32_Word, 1);
+  STORE_CURB_MAP(GBE_CURBE_BLOCK_IP, Elf32_Half, 0);
+  STORE_CURB_MAP(GBE_CURBE_DW_BLOCK_IP, Elf32_Word, 0);
+  STORE_CURB_MAP(GBE_CURBE_THREAD_NUM, Elf32_Word, 1);
+  STORE_CURB_MAP(GBE_CURBE_THREAD_ID, Elf32_Word, 1);
+  STORE_CURB_MAP(GBE_CURBE_CONSTANT_ADDRSPACE, Elf64_Word, 1);
+  STORE_CURB_MAP(GBE_CURBE_STACK_SIZE, Elf64_Word, 1);
+#undef STORE_CURB_MAP
+  if (kernel.getCurbeOffset(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER) >= 0) {
+    *static_cast<Elf32_Word *>(ptr) = GBE_CURBE_EXTRA_ARGUMENT;
+    ptr = static_cast<char *>(ptr) + sizeof(Elf32_Word);
+    *static_cast<Elf32_Word *>(ptr) =
+      kernel.getCurbeOffset(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER);
+    ptr = static_cast<char *>(ptr) + sizeof(Elf32_Word);
+    *static_cast<Elf32_Word *>(ptr) = sizeof(Elf64_Word);
+    ptr = static_cast<char *>(ptr) + sizeof(Elf32_Word);
+    virt_phy_num++;
+  }
+  *p_virt_phy_num = virt_phy_num;
+
+  Elf_Word total_sz = static_cast<char *>(ptr) - static_cast<char *>(kernel_info);
+  note_writer->add_note(GEN_NOTE_TYPE_GPU_INFO, kernel.getName(), kernel_info, total_sz);
+
+#if 0
+  for (int i = 0; i < (int)total_sz; i++) {
+    if (i % 16 == 0)
+      printf("\n");
+    if (i % 2 == 0)
+      printf(" ");
+    printf("%2.2x", ((unsigned char *)kernel_info)[i]);
+  }
+  printf("\n");
+  for (int i = 0; i < (int)total_sz / 4; i++) {
+    printf(" %d", ((unsigned int *)kernel_info)[i]);
+  }
+#endif
+
+  GBE_FREE(kernel_info);
+}
+
+/* The format for one Gen Kernel function is following note section format
+   ------------------------
+   | Function Name size:4 |
+   ------------------------
+   | Desc size:4  |
+   ----------------
+   | Type:4       |
+   --------------------
+   | The name(strlen) |
+   -----------------------------------------------------------------------------------------------
+   | SIMD:4 | Local Mem Size:4 | Scratch Size:4 | Stack Size :4 | Barrier/SLM Used:4 | Arg Num:4 |
+   -----------------------------------------------------------------------------------------------
+     Then the format for each argument is
+   --------------------------------------------------------------------------------------------------------------------------
+   | Index:4 | Size:4 | Type:4 | Offset:4 | Addr Space:4 | Align(if is ptr) | BTI(if buffer):4 / Index(sampler and image):4 |
+   --------------------------------------------------------------------------------------------------------------------------
+     Then all sampler info
+   -----------------------------------
+   | Number:4 | SamperInfo:4 | ......|
+   -----------------------------------
+     Then all image info
+   ----------------------------------------------------------------------------------------------------------
+   | Number:4 | BTI:4 | Width:4 | Height:4 | Depth:4 | Data Type:4 | Channel Order:4 | Dim Order:4 | .......|
+   ----------------------------------------------------------------------------------------------------------
+     Last is the map table of special virtual register and phy register
+   --------------------------------------------------------
+   | Number:4 | Virt Reg:4 | Phy Reg:4 | Size:4 |.........|
+   --------------------------------------------------------       */
+void *
+GenProgram::toBinaryFormat(size_t &ret_size)
+{
+  ret_size = 0;
+  assert(elf_ctx == NULL);
+  elf_ctx = GBE_NEW(GenProgramElfContext, *this);
+
+  if (getGlobalConstantSize() > 0) {
+    elf_ctx->rodata_sec = elf_ctx->writer.sections.add(".rodata");
+    elf_ctx->rodata_sec->set_type(SHT_PROGBITS);
+    elf_ctx->rodata_sec->set_flags(SHF_ALLOC);
+    elf_ctx->rodata_sec->set_addr_align(1);
+
+    char *const_data = static_cast<char *>(GBE_MALLOC(getGlobalConstantSize()));
+    getGlobalConstantData(const_data);
+    elf_ctx->rodata_sec->set_data(const_data, getGlobalConstantSize());
+    GBE_FREE(const_data);
+  }
+
+  for (map<std::string, Kernel *>::const_iterator it = kernels.begin();
+       it != kernels.end(); ++it) {
+    GenKernel *k = static_cast<GenKernel *>(it->second);
+    elf_ctx->emitOneKernel(*k);
+  }
+
+  wmemstreambuf membuf(4096);
+  std::ostream oss(&membuf);
+  elf_ctx->writer.save(oss);
+  GBE_DELETE(elf_ctx);
+
+  size_t elf_size = 0;
+  char *elf_mem = membuf.getcontent(elf_size);
+  if (elf_size == 0)
+    return NULL;
+
+  void *p_elf_ret = ::malloc(elf_size);
+  ::memcpy(p_elf_ret, elf_mem, elf_size);
+  ret_size = elf_size;
+  return p_elf_ret;
+}
+
 } /* namespace gbe */
diff --git a/backend/src/backend/program.hpp b/backend/src/backend/program.hpp
index 1aff8b9..1eaaa11 100644
--- a/backend/src/backend/program.hpp
+++ b/backend/src/backend/program.hpp
@@ -341,6 +341,8 @@ namespace gbe {
     /*! Compile a kernel */
     virtual Kernel *compileKernel(const ir::Unit &unit, const std::string &name,
                                   bool relaxMath, int profiling) = 0;
+    /*! Generate binary format */
+    virtual void *toBinaryFormat(size_t &ret_size) = 0;
     /*! Allocate an empty kernel. */
     virtual Kernel *allocateKernel(const std::string &name) = 0;
     /*! Kernels sorted by their name */
diff --git a/backend/src/gbe_bin_interpreter.cpp b/backend/src/gbe_bin_interpreter.cpp
index 64cacd9..6b14dcd 100644
--- a/backend/src/gbe_bin_interpreter.cpp
+++ b/backend/src/gbe_bin_interpreter.cpp
@@ -30,6 +30,7 @@
 #undef GBE_COMPILER_AVAILABLE
 #include "backend/program.cpp"
 #include "backend/gen_program.cpp"
+#include "backend/gen_program_elf.cpp"
 #include "ir/sampler.cpp"
 #include "ir/image.cpp"
 
-- 
2.7.4



More information about the Beignet mailing list