[Beignet] [PATCH 4/4] Add GenProgram::toBinaryFormat to generate ELF format binary.
junyan.he at inbox.com
junyan.he at inbox.com
Wed Feb 8 08:13:52 UTC 2017
From: Junyan He <junyan.he at intel.com>
We add this function to generate a standard ELF format binary.
All the verbose information we need in runtime will be stored
in .note.gpu_info section. Then, we can separate the runtime
and compiler clearly.
Signed-off-by: Junyan He <junyan.he at intel.com>
---
backend/src/backend/gen_program.hpp | 7 +
backend/src/backend/gen_program_elf.cpp | 357 ++++++++++++++++++++++++++++++++
backend/src/backend/program.hpp | 2 +
backend/src/gbe_bin_interpreter.cpp | 1 +
4 files changed, 367 insertions(+)
diff --git a/backend/src/backend/gen_program.hpp b/backend/src/backend/gen_program.hpp
index ff756e0..8963c38 100644
--- a/backend/src/backend/gen_program.hpp
+++ b/backend/src/backend/gen_program.hpp
@@ -33,6 +33,8 @@
struct GenInstruction;
namespace gbe
{
+ class GenProgramElfContext;
+
/*! Describe a compiled kernel */
class GenKernel : public Kernel
{
@@ -58,6 +60,9 @@ namespace gbe
/*! Describe a compiled program */
class GenProgram : public Program
{
+ protected:
+ GenProgramElfContext* elf_ctx;
+
public:
/*! Create an empty program */
GenProgram(uint32_t deviceID, const void* mod = NULL, const void* ctx = NULL, const char* asm_fname = NULL, uint32_t fast_relaxed_math = 0) :
@@ -70,6 +75,8 @@ namespace gbe
virtual void CleanLlvmResource(void);
/*! Implements base class */
virtual Kernel *compileKernel(const ir::Unit &unit, const std::string &name, bool relaxMath, int profiling);
+ /*! Generate binary format */
+ virtual void *toBinaryFormat(size_t &ret_size);
/*! Allocate an empty kernel. */
virtual Kernel *allocateKernel(const std::string &name) {
return GBE_NEW(GenKernel, name, deviceID);
diff --git a/backend/src/backend/gen_program_elf.cpp b/backend/src/backend/gen_program_elf.cpp
index efd45fe..0440e81 100644
--- a/backend/src/backend/gen_program_elf.cpp
+++ b/backend/src/backend/gen_program_elf.cpp
@@ -22,6 +22,7 @@
#include <algorithm>
#include <sstream>
#include <streambuf>
+using namespace std;
namespace gbe
{
@@ -121,4 +122,360 @@ protected:
return count;
}
};
+
+using namespace ELFIO;
+
+class GenProgramElfContext
+{
+public:
+ enum { // 0, 1, 2 already have meanings
+ GEN_NOTE_TYPE_GPU_INFO = 3,
+ GEN_NOTE_TYPE_CL_ARG_INFO = 4,
+ GEN_NOTE_TYPE_CL_WORKGROUP_SIZE = 5,
+ };
+
+ GenProgram &genProg;
+ void emitOneKernel(GenKernel &kernel);
+ elfio writer;
+ section *text_sec;
+ section *sym_sec;
+ section *strtab_sec;
+ section *ker_info_sec;
+ section *rodata_sec;
+ symbol_section_accessor *syma;
+ string_section_accessor *stra;
+ note_section_accessor *note_writer;
+ Elf32_Word sym_num;
+ Elf64_Word bitcode_offset;
+
+ ~GenProgramElfContext(void)
+ {
+ if (syma)
+ GBE_DELETE(syma);
+ if (stra)
+ GBE_DELETE(stra);
+ if (note_writer)
+ GBE_DELETE(note_writer);
+ }
+
+ GenProgramElfContext(GenProgram &prog) : genProg(prog), text_sec(NULL), sym_sec(NULL),
+ strtab_sec(NULL), ker_info_sec(NULL), rodata_sec(NULL),
+ syma(NULL), stra(NULL), note_writer(NULL), sym_num(0),
+ bitcode_offset(0)
+ {
+ writer.create(ELFCLASS64, ELFDATA2LSB);
+ writer.set_os_abi(ELFOSABI_LINUX);
+ writer.set_type(ET_REL);
+ writer.set_machine(EM_INTEL205); // TODO: Some value of Intel GPU;
+
+ // Create code section
+ text_sec = writer.sections.add(".text");
+ text_sec->set_type(SHT_PROGBITS);
+ text_sec->set_flags(SHF_ALLOC | SHF_EXECINSTR);
+ text_sec->set_addr_align(4);
+
+ // Create string table section
+ strtab_sec = writer.sections.add(".strtab");
+ strtab_sec->set_type(SHT_STRTAB);
+ strtab_sec->set_addr_align(1);
+
+ // Create symbol table section
+ sym_sec = writer.sections.add(".symtab");
+ sym_sec->set_type(SHT_SYMTAB);
+ sym_sec->set_addr_align(0x4);
+ sym_sec->set_entry_size(writer.get_default_entry_size(SHT_SYMTAB));
+ sym_sec->set_link(strtab_sec->get_index());
+ sym_sec->set_info(0x01);
+
+ // Create kernel info section
+ ker_info_sec = writer.sections.add(".note.gpu_info");
+ ker_info_sec->set_type(SHT_NOTE);
+ text_sec->set_flags(SHF_ALLOC);
+ ker_info_sec->set_addr_align(0x04);
+
+ // Create string table writer
+ stra = GBE_NEW(string_section_accessor, strtab_sec);
+ // Create symbol table writer
+ syma = GBE_NEW(symbol_section_accessor, writer, sym_sec);
+ // Create note writer
+ note_writer = GBE_NEW(note_section_accessor, writer, ker_info_sec);
+ }
+};
+
+void GenProgramElfContext::emitOneKernel(GenKernel &kernel)
+{
+ assert(text_sec != NULL);
+ assert(sym_sec != NULL);
+ assert(text_sec != NULL);
+ assert(syma != NULL);
+ assert(stra != NULL);
+
+ sym_num++;
+
+ // Add the kernel's bitcode to .text section
+ text_sec->append_data(kernel.getCode(), kernel.getCodeSize());
+ // Add the kernel func as a symbol
+ syma->add_symbol(*stra, kernel.getName(), bitcode_offset, kernel.getCodeSize(),
+ STB_GLOBAL, STT_FUNC, 0, text_sec->get_index());
+ bitcode_offset += kernel.getCodeSize();
+
+ void *kernel_info = GBE_MALLOC(8 * 1024);
+ void *ptr = kernel_info;
+ Elf32_Word *p_simd = static_cast<Elf32_Word *>(ptr);
+ Elf32_Word *p_slm_sz = static_cast<Elf32_Word *>(ptr) + 1;
+ Elf32_Word *p_scratch_sz = static_cast<Elf32_Word *>(ptr) + 2;
+ Elf32_Word *p_stack_sz = static_cast<Elf32_Word *>(ptr) + 3;
+ Elf32_Word *p_barrier_slm_used = static_cast<Elf32_Word *>(ptr) + 4;
+ Elf32_Word *p_arg_num = static_cast<Elf32_Word *>(ptr) + 5;
+ *p_simd = kernel.getSIMDWidth();
+ *p_slm_sz = kernel.getSLMSize();
+ *p_scratch_sz = kernel.getScratchSize();
+ *p_stack_sz = kernel.getStackSize();
+ *p_barrier_slm_used = kernel.getUseSLM();
+ *p_arg_num = kernel.getArgNum();
+
+ ptr = static_cast<char *>(ptr) + 6 * sizeof(Elf32_Word);
+
+ size_t sampler_data_sz = kernel.getSamplerSize();
+ uint32_t *sampler_data = NULL;
+ if (sampler_data_sz) {
+ sampler_data = static_cast<uint32_t *>(GBE_MALLOC(sampler_data_sz));
+ ::memset(sampler_data, 0, sampler_data_sz);
+ kernel.getSamplerData(sampler_data);
+ }
+
+ size_t image_data_sz = kernel.getImageSize();
+ ImageInfo *image_data = NULL;
+ if (image_data_sz) {
+ image_data = static_cast<ImageInfo *>(GBE_MALLOC(image_data_sz));
+ ::memset(image_data, 0, image_data_sz);
+ kernel.getImageData(image_data);
+ }
+
+ for (unsigned int i = 0; i < *p_arg_num; i++) {
+ Elf32_Word *arg_index = static_cast<Elf32_Word *>(ptr);
+ Elf32_Word *arg_size = static_cast<Elf32_Word *>(ptr) + 1;
+ Elf32_Word *arg_type = static_cast<Elf32_Word *>(ptr) + 2;
+ Elf32_Word *arg_offset = static_cast<Elf32_Word *>(ptr) + 3;
+ Elf32_Word *arg_space = static_cast<Elf32_Word *>(ptr) + 4;
+ Elf32_Word *arg_align = static_cast<Elf32_Word *>(ptr) + 5;
+ Elf32_Word *arg_info = static_cast<Elf32_Word *>(ptr) + 6;
+
+ *arg_index = i;
+ *arg_size = kernel.getArgSize(i);
+ *arg_type = kernel.getArgType(i);
+ *arg_align = kernel.getArgAlign(i);
+#if 0
+ *arg_space = kernel.getArgAddressSpace(i);
+
+ if (*arg_type == GBE_ARG_TYPE_POINTER && *arg_space == GBE_ADDRESS_SPACE_GLOBAL) {
+ *arg_info = kernel.getArgBTI(i);
+ } else if (*arg_type == GBE_ARG_TYPE_IMAGE) {
+ assert(image_data_sz > 0);
+ for (size_t j = 0; j < image_data_sz / sizeof(ImageInfo); j++) {
+ if (image_data[j].arg_idx == static_cast<int32_t>(i)) {
+ *arg_info = static_cast<Elf32_Word>(image_data[j].idx);
+ break;
+ }
+ }
+ } else if (*arg_type == GBE_ARG_TYPE_SAMPLER) {
+ assert(sampler_data_sz > 0);
+ for (size_t j = 0; j < sampler_data_sz / sizeof(uint32_t); j++) {
+ if (((sampler_data[i] & __CLK_SAMPLER_ARG_MASK) >> __CLK_SAMPLER_ARG_BASE) ==
+ static_cast<uint32_t>(i)) {
+ *arg_info = static_cast<Elf32_Word>(j);
+ break;
+ }
+ }
+ }
+#endif
+ *arg_offset = kernel.getCurbeOffset(GBE_CURBE_KERNEL_ARGUMENT, i);
+ ptr = static_cast<char *>(ptr) + 7 * sizeof(Elf32_Word);
+ }
+
+ /* Store all the sampler info */
+ *(static_cast<Elf32_Word *>(ptr)) =
+ static_cast<Elf32_Word>(sampler_data_sz / sizeof(uint32_t)); // Samper number
+ ptr = static_cast<char *>(ptr) + sizeof(Elf32_Word);
+ if (sampler_data_sz > 0) {
+ ::memcpy(ptr, sampler_data, sampler_data_sz);
+ GBE_FREE(sampler_data);
+ ptr = static_cast<char *>(ptr) + sampler_data_sz;
+ }
+
+ /* Store all the Image info */
+ *(static_cast<Elf32_Word *>(ptr)) =
+ static_cast<Elf32_Word>(image_data_sz / sizeof(ImageInfo)); // Image number
+ ptr = static_cast<char *>(ptr) + sizeof(Elf32_Word);
+ if (image_data_sz > 0) {
+ std::sort(image_data, image_data + image_data_sz / sizeof(ImageInfo),
+ [](ImageInfo &a, ImageInfo &b) { return a.idx < b.idx; });
+
+ /* Store all the image info by index */
+ for (size_t i = 0; i < image_data_sz / sizeof(ImageInfo); i++) {
+ Elf32_Word *bti = static_cast<Elf32_Word *>(ptr);
+ Elf32_Word *width = static_cast<Elf32_Word *>(ptr) + 1;
+ Elf32_Word *height = static_cast<Elf32_Word *>(ptr) + 2;
+ Elf32_Word *depth = static_cast<Elf32_Word *>(ptr) + 3;
+ Elf32_Word *data_type = static_cast<Elf32_Word *>(ptr) + 4;
+ Elf32_Word *channel_order = static_cast<Elf32_Word *>(ptr) + 5;
+ Elf32_Word *dim_order = static_cast<Elf32_Word *>(ptr) + 6;
+
+ *bti = image_data[i].idx;
+ *width = image_data[i].wSlot;
+ *height = image_data[i].hSlot;
+ *depth = image_data[i].depthSlot;
+ *data_type = image_data[i].dataTypeSlot;
+ *channel_order = image_data[i].channelOrderSlot;
+ *dim_order = image_data[i].dimOrderSlot;
+
+ ptr = static_cast<char *>(ptr) + 7 * sizeof(Elf32_Word);
+ }
+
+ GBE_FREE(image_data);
+ }
+
+ Elf32_Word *p_virt_phy_num = static_cast<Elf32_Word *>(ptr);
+ ptr = static_cast<char *>(ptr) + sizeof(Elf32_Word);
+ unsigned int virt_phy_num = 0;
+
+/*Store the special vitrual register map */
+#define STORE_CURB_MAP(curbe_enum, data_type, uniform) \
+ if (kernel.getCurbeOffset(curbe_enum, 0) >= 0) { \
+ *static_cast<Elf32_Word *>(ptr) = curbe_enum; \
+ ptr = static_cast<char *>(ptr) + sizeof(Elf32_Word); \
+ *static_cast<Elf32_Word *>(ptr) = kernel.getCurbeOffset(curbe_enum, 0); \
+ ptr = static_cast<char *>(ptr) + sizeof(Elf32_Word); \
+ *static_cast<Elf32_Word *>(ptr) = \
+ uniform ? sizeof(data_type) : sizeof(data_type) * kernel.getSIMDWidth(); \
+ ptr = static_cast<char *>(ptr) + sizeof(Elf32_Word); \
+ virt_phy_num++; \
+ }
+ STORE_CURB_MAP(GBE_CURBE_LOCAL_ID_X, Elf32_Word, 0);
+ STORE_CURB_MAP(GBE_CURBE_LOCAL_ID_Y, Elf32_Word, 0);
+ STORE_CURB_MAP(GBE_CURBE_LOCAL_ID_Z, Elf32_Word, 0);
+ STORE_CURB_MAP(GBE_CURBE_LOCAL_SIZE_X, Elf32_Word, 1);
+ STORE_CURB_MAP(GBE_CURBE_LOCAL_SIZE_Y, Elf32_Word, 1);
+ STORE_CURB_MAP(GBE_CURBE_LOCAL_SIZE_Z, Elf32_Word, 1);
+ STORE_CURB_MAP(GBE_CURBE_ENQUEUED_LOCAL_SIZE_X, Elf32_Word, 1);
+ STORE_CURB_MAP(GBE_CURBE_ENQUEUED_LOCAL_SIZE_Y, Elf32_Word, 1);
+ STORE_CURB_MAP(GBE_CURBE_ENQUEUED_LOCAL_SIZE_Z, Elf32_Word, 1);
+ STORE_CURB_MAP(GBE_CURBE_GLOBAL_SIZE_X, Elf32_Word, 1);
+ STORE_CURB_MAP(GBE_CURBE_GLOBAL_SIZE_Y, Elf32_Word, 1);
+ STORE_CURB_MAP(GBE_CURBE_GLOBAL_SIZE_Z, Elf32_Word, 1);
+ STORE_CURB_MAP(GBE_CURBE_GLOBAL_OFFSET_X, Elf32_Word, 1);
+ STORE_CURB_MAP(GBE_CURBE_GLOBAL_OFFSET_Y, Elf32_Word, 1);
+ STORE_CURB_MAP(GBE_CURBE_GLOBAL_OFFSET_Z, Elf32_Word, 1);
+ STORE_CURB_MAP(GBE_CURBE_GROUP_NUM_X, Elf32_Word, 1);
+ STORE_CURB_MAP(GBE_CURBE_GROUP_NUM_Y, Elf32_Word, 1);
+ STORE_CURB_MAP(GBE_CURBE_GROUP_NUM_Z, Elf32_Word, 1);
+ STORE_CURB_MAP(GBE_CURBE_WORK_DIM, Elf32_Word, 1);
+ STORE_CURB_MAP(GBE_CURBE_BLOCK_IP, Elf32_Half, 0);
+ STORE_CURB_MAP(GBE_CURBE_DW_BLOCK_IP, Elf32_Word, 0);
+ STORE_CURB_MAP(GBE_CURBE_THREAD_NUM, Elf32_Word, 1);
+ STORE_CURB_MAP(GBE_CURBE_THREAD_ID, Elf32_Word, 1);
+ STORE_CURB_MAP(GBE_CURBE_CONSTANT_ADDRSPACE, Elf64_Word, 1);
+ STORE_CURB_MAP(GBE_CURBE_STACK_SIZE, Elf64_Word, 1);
+#undef STORE_CURB_MAP
+ if (kernel.getCurbeOffset(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER) >= 0) {
+ *static_cast<Elf32_Word *>(ptr) = GBE_CURBE_EXTRA_ARGUMENT;
+ ptr = static_cast<char *>(ptr) + sizeof(Elf32_Word);
+ *static_cast<Elf32_Word *>(ptr) =
+ kernel.getCurbeOffset(GBE_CURBE_EXTRA_ARGUMENT, GBE_STACK_BUFFER);
+ ptr = static_cast<char *>(ptr) + sizeof(Elf32_Word);
+ *static_cast<Elf32_Word *>(ptr) = sizeof(Elf64_Word);
+ ptr = static_cast<char *>(ptr) + sizeof(Elf32_Word);
+ virt_phy_num++;
+ }
+ *p_virt_phy_num = virt_phy_num;
+
+ Elf_Word total_sz = static_cast<char *>(ptr) - static_cast<char *>(kernel_info);
+ note_writer->add_note(GEN_NOTE_TYPE_GPU_INFO, kernel.getName(), kernel_info, total_sz);
+
+#if 0
+ for (int i = 0; i < (int)total_sz; i++) {
+ if (i % 16 == 0)
+ printf("\n");
+ if (i % 2 == 0)
+ printf(" ");
+ printf("%2.2x", ((unsigned char *)kernel_info)[i]);
+ }
+ printf("\n");
+ for (int i = 0; i < (int)total_sz / 4; i++) {
+ printf(" %d", ((unsigned int *)kernel_info)[i]);
+ }
+#endif
+
+ GBE_FREE(kernel_info);
+}
+
+/* The format for one Gen Kernel function is following note section format
+ ------------------------
+ | Function Name size:4 |
+ ------------------------
+ | Desc size:4 |
+ ----------------
+ | Type:4 |
+ --------------------
+ | The name(strlen) |
+ -----------------------------------------------------------------------------------------------
+ | SIMD:4 | Local Mem Size:4 | Scratch Size:4 | Stack Size :4 | Barrier/SLM Used:4 | Arg Num:4 |
+ -----------------------------------------------------------------------------------------------
+ Then the format for each argument is
+ --------------------------------------------------------------------------------------------------------------------------
+ | Index:4 | Size:4 | Type:4 | Offset:4 | Addr Space:4 | Align(if is ptr) | BTI(if buffer):4 / Index(sampler and image):4 |
+ --------------------------------------------------------------------------------------------------------------------------
+ Then all sampler info
+ -----------------------------------
+ | Number:4 | SamperInfo:4 | ......|
+ -----------------------------------
+ Then all image info
+ ----------------------------------------------------------------------------------------------------------
+ | Number:4 | BTI:4 | Width:4 | Height:4 | Depth:4 | Data Type:4 | Channel Order:4 | Dim Order:4 | .......|
+ ----------------------------------------------------------------------------------------------------------
+ Last is the map table of special virtual register and phy register
+ --------------------------------------------------------
+ | Number:4 | Virt Reg:4 | Phy Reg:4 | Size:4 |.........|
+ -------------------------------------------------------- */
+void *
+GenProgram::toBinaryFormat(size_t &ret_size)
+{
+ ret_size = 0;
+ assert(elf_ctx == NULL);
+ elf_ctx = GBE_NEW(GenProgramElfContext, *this);
+
+ if (getGlobalConstantSize() > 0) {
+ elf_ctx->rodata_sec = elf_ctx->writer.sections.add(".rodata");
+ elf_ctx->rodata_sec->set_type(SHT_PROGBITS);
+ elf_ctx->rodata_sec->set_flags(SHF_ALLOC);
+ elf_ctx->rodata_sec->set_addr_align(1);
+
+ char *const_data = static_cast<char *>(GBE_MALLOC(getGlobalConstantSize()));
+ getGlobalConstantData(const_data);
+ elf_ctx->rodata_sec->set_data(const_data, getGlobalConstantSize());
+ GBE_FREE(const_data);
+ }
+
+ for (map<std::string, Kernel *>::const_iterator it = kernels.begin();
+ it != kernels.end(); ++it) {
+ GenKernel *k = static_cast<GenKernel *>(it->second);
+ elf_ctx->emitOneKernel(*k);
+ }
+
+ wmemstreambuf membuf(4096);
+ std::ostream oss(&membuf);
+ elf_ctx->writer.save(oss);
+ GBE_DELETE(elf_ctx);
+
+ size_t elf_size = 0;
+ char *elf_mem = membuf.getcontent(elf_size);
+ if (elf_size == 0)
+ return NULL;
+
+ void *p_elf_ret = ::malloc(elf_size);
+ ::memcpy(p_elf_ret, elf_mem, elf_size);
+ ret_size = elf_size;
+ return p_elf_ret;
+}
+
} /* namespace gbe */
diff --git a/backend/src/backend/program.hpp b/backend/src/backend/program.hpp
index 1aff8b9..1eaaa11 100644
--- a/backend/src/backend/program.hpp
+++ b/backend/src/backend/program.hpp
@@ -341,6 +341,8 @@ namespace gbe {
/*! Compile a kernel */
virtual Kernel *compileKernel(const ir::Unit &unit, const std::string &name,
bool relaxMath, int profiling) = 0;
+ /*! Generate binary format */
+ virtual void *toBinaryFormat(size_t &ret_size) = 0;
/*! Allocate an empty kernel. */
virtual Kernel *allocateKernel(const std::string &name) = 0;
/*! Kernels sorted by their name */
diff --git a/backend/src/gbe_bin_interpreter.cpp b/backend/src/gbe_bin_interpreter.cpp
index 64cacd9..6b14dcd 100644
--- a/backend/src/gbe_bin_interpreter.cpp
+++ b/backend/src/gbe_bin_interpreter.cpp
@@ -30,6 +30,7 @@
#undef GBE_COMPILER_AVAILABLE
#include "backend/program.cpp"
#include "backend/gen_program.cpp"
+#include "backend/gen_program_elf.cpp"
#include "ir/sampler.cpp"
#include "ir/image.cpp"
--
2.7.4
More information about the Beignet
mailing list