<div dir="ltr"><div class="gmail_quote"><div class="gmail_attr">I'd like the beginning of the .c or .h file to describe what this is good for, as it's not immediately obvious from the code.</div><div class="gmail_attr"><br></div><div class="gmail_attr">See also below.<br></div><div dir="ltr" class="gmail_attr"><br></div><div dir="ltr" class="gmail_attr">On Fri, May 3, 2019 at 7:19 AM Nicolai Hähnle <<a href="mailto:nhaehnle@gmail.com">nhaehnle@gmail.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0px 0px 0px 0.8ex;border-left:1px solid rgb(204,204,204);padding-left:1ex">From: Nicolai Hähnle <<a href="mailto:nicolai.haehnle@amd.com" target="_blank">nicolai.haehnle@amd.com</a>><br>
<br>
Using an explicit linker instead of just concatenating .text<br>
sections will allow us to start using .rodata sections and<br>
explicit descriptions of data on LDS that is shared between<br>
stages.<br>
---<br>
src/amd/Makefile.sources | 2 +<br>
src/amd/common/ac_binary.h | 2 +<br>
src/amd/common/ac_rtld.c | 556 +++++++++++++++++++++++++++++++++++++<br>
src/amd/common/ac_rtld.h | 87 ++++++<br>
src/amd/common/meson.build | 2 +<br>
5 files changed, 649 insertions(+)<br>
create mode 100644 src/amd/common/ac_rtld.c<br>
create mode 100644 src/amd/common/ac_rtld.h<br>
<br>
diff --git a/src/amd/Makefile.sources b/src/amd/Makefile.sources<br>
index 58e0008ee62..122fa306eb1 100644<br>
--- a/src/amd/Makefile.sources<br>
+++ b/src/amd/Makefile.sources<br>
@@ -35,20 +35,22 @@ ADDRLIB_FILES = \<br>
<br>
AMD_COMPILER_FILES = \<br>
common/ac_binary.c \<br>
common/ac_binary.h \<br>
common/ac_exp_param.h \<br>
common/ac_llvm_build.c \<br>
common/ac_llvm_build.h \<br>
common/ac_llvm_helper.cpp \<br>
common/ac_llvm_util.c \<br>
common/ac_llvm_util.h \<br>
+ common/ac_rtld.c \<br>
+ common/ac_rtld.h \<br>
common/ac_shader_abi.h \<br>
common/ac_shader_util.c \<br>
common/ac_shader_util.h<br>
<br>
<br>
AMD_NIR_FILES = \<br>
common/ac_nir_to_llvm.c \<br>
common/ac_nir_to_llvm.h<br>
<br>
AMD_COMMON_FILES = \<br>
diff --git a/src/amd/common/ac_binary.h b/src/amd/common/ac_binary.h<br>
index 8f594a9ce75..b91ecb4317b 100644<br>
--- a/src/amd/common/ac_binary.h<br>
+++ b/src/amd/common/ac_binary.h<br>
@@ -73,20 +73,22 @@ struct ac_shader_binary {<br>
struct ac_shader_config {<br>
unsigned num_sgprs;<br>
unsigned num_vgprs;<br>
unsigned spilled_sgprs;<br>
unsigned spilled_vgprs;<br>
unsigned lds_size; /* in HW allocation units; i.e 256 bytes on SI, 512 bytes on CI+ */<br>
unsigned spi_ps_input_ena;<br>
unsigned spi_ps_input_addr;<br>
unsigned float_mode;<br>
unsigned scratch_bytes_per_wave;<br>
+ unsigned rsrc1;<br>
+ unsigned rsrc2;<br>
};<br>
<br>
/*<br>
* Parse the elf binary stored in \p elf_data and create a<br>
* ac_shader_binary object.<br>
*/<br>
bool ac_elf_read(const char *elf_data, unsigned elf_size,<br>
struct ac_shader_binary *binary);<br>
<br>
/**<br>
diff --git a/src/amd/common/ac_rtld.c b/src/amd/common/ac_rtld.c<br>
new file mode 100644<br>
index 00000000000..a79447904f3<br>
--- /dev/null<br>
+++ b/src/amd/common/ac_rtld.c<br>
@@ -0,0 +1,556 @@<br>
+/*<br>
+ * Copyright 2014-2018 Advanced Micro Devices, Inc.<br>
+ *<br>
+ * Permission is hereby granted, free of charge, to any person obtaining a<br>
+ * copy of this software and associated documentation files (the "Software"),<br>
+ * to deal in the Software without restriction, including without limitation<br>
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,<br>
+ * and/or sell copies of the Software, and to permit persons to whom the<br>
+ * Software is furnished to do so, subject to the following conditions:<br>
+ *<br>
+ * The above copyright notice and this permission notice (including the next<br>
+ * paragraph) shall be included in all copies or substantial portions of the<br>
+ * Software.<br>
+ *<br>
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR<br>
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,<br>
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL<br>
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER<br>
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,<br>
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE<br>
+ * SOFTWARE.<br>
+ */<br>
+<br>
+#include "ac_rtld.h"<br>
+<br>
+#include <gelf.h><br>
+#include <libelf.h><br>
+#include <stdarg.h><br>
+#include <stdio.h><br>
+#include <stdlib.h><br>
+#include <string.h><br>
+<br>
+#include "ac_binary.h"<br>
+#include "util/u_math.h"<br>
+<br>
+// Old distributions may not have this enum constant<br>
+#define MY_EM_AMDGPU 224<br>
+<br>
+#ifndef R_AMDGPU_NONE<br>
+#define R_AMDGPU_NONE 0<br>
+#define R_AMDGPU_ABS32_LO 1<br>
+#define R_AMDGPU_ABS32_HI 2<br>
+#define R_AMDGPU_ABS64 3<br>
+#define R_AMDGPU_REL32 4<br>
+#define R_AMDGPU_REL64 5<br>
+#define R_AMDGPU_ABS32 6<br>
+#define R_AMDGPU_GOTPCREL 7<br>
+#define R_AMDGPU_GOTPCREL32_LO 8<br>
+#define R_AMDGPU_GOTPCREL32_HI 9<br>
+#define R_AMDGPU_REL32_LO 10<br>
+#define R_AMDGPU_REL32_HI 11<br>
+#define R_AMDGPU_RELATIVE64 13<br>
+#endif<br>
+<br>
+/* For the UMR disassembler. */<br>
+#define DEBUGGER_END_OF_CODE_MARKER 0xbf9f0000 /* invalid instruction */<br>
+#define DEBUGGER_NUM_MARKERS 5<br>
+<br>
+struct ac_rtld_section {<br>
+ bool is_rx : 1;<br>
+ bool is_pasted_text : 1;<br>
+ uint64_t offset;<br>
+ const char *name;<br>
+};<br>
+<br>
+struct ac_rtld_part {<br>
+ Elf *elf;<br>
+ struct ac_rtld_section *sections;<br>
+ unsigned num_sections;<br>
+};<br>
+<br>
+static void report_erroraf(const char *fmt, va_list va)<br>
+{<br>
+ char *msg;<br>
+ int ret = asprintf(&msg, fmt, va);<br>
+ if (ret < 0)<br>
+ msg = "(asprintf failed)";<br>
+<br>
+ fprintf(stderr, "ac_rtld error: %s\n", msg);<br>
+<br>
+ if (ret >= 0)<br>
+ free(msg);<br>
+}<br>
+<br>
+static void report_errorf(const char *fmt, ...) PRINTFLIKE(1, 2);<br>
+<br>
+static void report_errorf(const char *fmt, ...)<br>
+{<br>
+ va_list va;<br>
+ va_start(va, fmt);<br>
+ report_erroraf(fmt, va);<br>
+ va_end(va);<br>
+}<br>
+<br>
+static void report_elf_errorf(const char *fmt, ...) PRINTFLIKE(1, 2);<br>
+<br>
+static void report_elf_errorf(const char *fmt, ...)<br>
+{<br>
+ va_list va;<br>
+ va_start(va, fmt);<br>
+ report_erroraf(fmt, va);<br>
+ va_end(va);<br>
+<br>
+ fprintf(stderr, "ELF error: %s\n", elf_errmsg(elf_errno()));<br>
+}<br>
+<br>
+/**<br>
+ * Open a binary consisting of one or more shader parts.<br>
+ *<br>
+ * \param binary the uninitialized struct<br>
+ * \param num_parts number of shader parts<br>
+ * \param elf_ptrs pointers to the in-memory ELF objects for each shader part<br>
+ * \param elf_sizes sizes (in bytes) of the in-memory ELF objects<br>
+ */<br>
+bool ac_rtld_open(struct ac_rtld_binary *binary, unsigned num_parts,<br>
+ const char * const *elf_ptrs,<br>
+ const uint64_t *elf_sizes)<br>
+{<br>
+ /* One of the libelf implementations<br>
+ * (<a href="http://www.mr511.de/software/english.htm" rel="noreferrer" target="_blank">http://www.mr511.de/software/english.htm</a>) requires calling<br>
+ * elf_version() before elf_memory().<br>
+ */<br>
+ elf_version(EV_CURRENT);<br>
+<br>
+ memset(binary, 0, sizeof(*binary));<br>
+ binary->num_parts = num_parts;<br>
+ binary->parts = calloc(sizeof(*binary->parts), num_parts);<br>
+ if (!binary->parts)<br>
+ return false;<br>
+<br>
+ uint64_t pasted_text_size = 0;<br>
+ uint64_t rx_align = 1;<br>
+ uint64_t rx_size = 0;<br>
+<br>
+#define report_if(cond) \<br>
+ do { \<br>
+ if ((cond)) { \<br>
+ report_errorf(#cond); \<br>
+ goto fail; \<br>
+ } \<br>
+ } while (false)<br>
+#define report_elf_if(cond) \<br>
+ do { \<br>
+ if ((cond)) { \<br>
+ report_elf_errorf(#cond); \<br>
+ goto fail; \<br>
+ } \<br>
+ } while (false)<br>
+<br>
+ /* First pass over all parts: open ELFs and determine the placement of<br>
+ * sections in the memory image. */<br>
+ for (unsigned i = 0; i < num_parts; ++i) {<br>
+ struct ac_rtld_part *part = &binary->parts[i];<br>
+ part->elf = elf_memory((char *)elf_ptrs[i], elf_sizes[i]);<br>
+ report_elf_if(!part->elf);<br>
+<br>
+ const Elf64_Ehdr *ehdr = elf64_getehdr(part->elf);<br>
+ report_elf_if(!ehdr);<br>
+ report_if(ehdr->e_machine != MY_EM_AMDGPU);<br>
+<br>
+ size_t section_str_index;<br>
+ size_t num_shdrs;<br>
+ report_elf_if(elf_getshdrstrndx(part->elf, §ion_str_index) < 0);<br>
+ report_elf_if(elf_getshdrnum(part->elf, &num_shdrs) < 0);<br>
+<br>
+ part->num_sections = num_shdrs;<br>
+ part->sections = calloc(sizeof(*part->sections), num_shdrs);<br>
+ report_if(!part->sections);<br>
+<br>
+ Elf_Scn *section = NULL;<br>
+ while ((section = elf_nextscn(part->elf, section))) {<br>
+ Elf64_Shdr *shdr = elf64_getshdr(section);<br>
+ struct ac_rtld_section *s = &part->sections[elf_ndxscn(section)];<br>
+ s->name = elf_strptr(part->elf, section_str_index, shdr->sh_name);<br>
+ report_elf_if(!s->name);<br>
+<br>
+ /* Cannot actually handle linked objects yet */<br>
+ report_elf_if(shdr->sh_addr != 0);<br>
+<br>
+ /* Alignment must be 0 or a power of two */<br>
+ report_elf_if(shdr->sh_addralign & (shdr->sh_addralign - 1));<br>
+ uint64_t sh_align = MAX2(shdr->sh_addralign, 1);<br>
+<br>
+ if (shdr->sh_flags & SHF_ALLOC &&<br>
+ shdr->sh_type != SHT_NOTE) {<br>
+ report_if(shdr->sh_flags & SHF_WRITE);<br>
+<br>
+ s->is_rx = true;<br>
+<br>
+ if (shdr->sh_flags & SHF_EXECINSTR) {<br>
+ report_elf_if(shdr->sh_size & 3);<br>
+<br>
+ if (!strcmp(s->name, ".text"))<br>
+ s->is_pasted_text = true;<br>
+ }<br>
+<br>
+ if (s->is_pasted_text) {<br>
+ s->offset = pasted_text_size;<br>
+ pasted_text_size += shdr->sh_size;<br>
+ } else {<br>
+ rx_align = align(rx_align, sh_align);<br>
+ rx_size = align(rx_size, sh_align);<br>
+ s->offset = rx_size;<br>
+ rx_size += shdr->sh_size;<br>
+ }<br>
+ }<br>
+ }<br>
+ }<br>
+<br>
+ binary->rx_end_markers = pasted_text_size;<br>
+ pasted_text_size += 4 * DEBUGGER_NUM_MARKERS;<br>
+<br>
+ /* Second pass: Adjust offsets of non-pasted text sections. */<br>
+ binary->rx_size = pasted_text_size;<br>
+ binary->rx_size = align(binary->rx_size, rx_align);<br>
+<br>
+ for (unsigned i = 0; i < num_parts; ++i) {<br>
+ struct ac_rtld_part *part = &binary->parts[i];<br>
+ size_t num_shdrs;<br>
+ elf_getshdrnum(part->elf, &num_shdrs);<br>
+<br>
+ for (unsigned j = 0; j < num_shdrs; ++j) {<br>
+ struct ac_rtld_section *s = &part->sections[j];<br>
+ if (s->is_rx && !s->is_pasted_text)<br>
+ s->offset += binary->rx_size;<br>
+ }<br>
+ }<br>
+<br>
+ binary->rx_size += rx_size;<br>
+<br>
+ return true;<br>
+<br>
+#undef report_if<br>
+#undef report_elf_if<br>
+<br>
+fail:<br>
+ ac_rtld_close(binary);<br>
+ return false;<br>
+}<br>
+<br>
+void ac_rtld_close(struct ac_rtld_binary *binary)<br>
+{<br>
+ for (unsigned i = 0; i < binary->num_parts; ++i) {<br>
+ struct ac_rtld_part *part = &binary->parts[i];<br>
+ free(part->sections);<br>
+ elf_end(part->elf);<br>
+ }<br>
+<br>
+ free(binary->parts);<br>
+ binary->parts = NULL;<br>
+ binary->num_parts = 0;<br>
+}<br>
+<br>
+static bool get_section_by_name(struct ac_rtld_part *part, const char *name,<br>
+ const char **data, uint64_t *nbytes)<br>
+{<br>
+ for (unsigned i = 0; i < part->num_sections; ++i) {<br>
+ struct ac_rtld_section *s = &part->sections[i];<br>
+ if (s->name && !strcmp(name, s->name)) {<br>
+ Elf_Scn *target_scn = elf_getscn(part->elf, i);<br>
+ Elf_Data *target_data = elf_getdata(target_scn, NULL);<br>
+ if (!target_data) {<br>
+ report_elf_errorf("ac_rtld: get_section_by_name: elf_getdata");<br>
+ return false;<br>
+ }<br>
+<br>
+ *data = target_data->d_buf;<br>
+ *nbytes = target_data->d_size;<br>
+ return true;<br>
+ }<br>
+ }<br>
+ return false;<br>
+}<br>
+<br>
+bool ac_rtld_get_section_by_name(struct ac_rtld_binary *binary, const char *name,<br>
+ const char **data, uint64_t *nbytes)<br>
+{<br>
+ assert(binary->num_parts == 1);<br>
+ return get_section_by_name(&binary->parts[0], name, data, nbytes);<br>
+}<br>
+<br>
+bool ac_rtld_read_config(struct ac_rtld_binary *binary,<br>
+ struct ac_shader_config *config)<br>
+{<br>
+ for (unsigned i = 0; i < binary->num_parts; ++i) {<br>
+ struct ac_rtld_part *part = &binary->parts[i];<br>
+ const char *config_data;<br>
+ uint64_t config_nbytes;<br>
+<br>
+ if (!get_section_by_name(part, ".AMDGPU.config",<br>
+ &config_data, &config_nbytes))<br>
+ return false;<br>
+<br>
+ /* TODO: be precise about scratch use? */<br>
+ struct ac_shader_config c = {};<br>
+ ac_parse_shader_binary_config(config_data, config_nbytes, true, &c);<br>
+<br>
+ config->num_sgprs = MAX2(config->num_sgprs, c.num_sgprs);<br>
+ config->num_vgprs = MAX2(config->num_vgprs, c.num_vgprs);<br>
+ config->spilled_sgprs = MAX2(config->spilled_sgprs, c.spilled_sgprs);<br>
+ config->spilled_vgprs = MAX2(config->spilled_vgprs, c.spilled_vgprs);<br>
+ config->scratch_bytes_per_wave = MAX2(config->scratch_bytes_per_wave,<br>
+ c.scratch_bytes_per_wave);<br>
+<br>
+ assert(i == 0 || config->float_mode == c.float_mode);<br>
+ config->float_mode = c.float_mode;<br>
+<br>
+ config->spi_ps_input_ena |= c.spi_ps_input_ena;<br>
+ config->spi_ps_input_addr |= c.spi_ps_input_addr;<br></blockquote><div><br></div><div>I'm pretty sure spi_ps_input_* can't be combined like that. RadeonSI only uses the values from the main part and ignores values from the other parts.</div><div><br></div><div>Marek</div></div></div>