[Mesa-dev] [PATCH 2/6] amd/rtld: layout and relocate LDS symbols
Marek Olšák
maraeo at gmail.com
Mon May 13 23:13:21 UTC 2019
On Sat, May 4, 2019 at 9:39 AM Nicolai Hähnle <nhaehnle at gmail.com> wrote:
> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
>
> Upcoming changes to LLVM will emit LDS objects as symbols in the ELF
> symbol table, with relocations that will be resolved with this change.
>
> Callers will also be able to define LDS symbols that are shared between
> shader parts. This will be used by radeonsi for the ESGS ring in gfx9+
> merged shaders.
> ---
> src/amd/common/ac_rtld.c | 210 ++++++++++++++++--
> src/amd/common/ac_rtld.h | 39 +++-
> src/gallium/drivers/radeonsi/si_compute.c | 9 +-
> src/gallium/drivers/radeonsi/si_debug.c | 22 +-
> src/gallium/drivers/radeonsi/si_shader.c | 61 +++--
> src/gallium/drivers/radeonsi/si_shader.h | 5 +-
> .../drivers/radeonsi/si_state_shaders.c | 2 +-
> 7 files changed, 296 insertions(+), 52 deletions(-)
>
> diff --git a/src/amd/common/ac_rtld.c b/src/amd/common/ac_rtld.c
> index 4e0468d2062..3df7b3ba51f 100644
> --- a/src/amd/common/ac_rtld.c
> +++ b/src/amd/common/ac_rtld.c
> @@ -24,25 +24,31 @@
> #include "ac_rtld.h"
>
> #include <gelf.h>
> #include <libelf.h>
> #include <stdarg.h>
> #include <stdio.h>
> #include <stdlib.h>
> #include <string.h>
>
> #include "ac_binary.h"
> +#include "ac_gpu_info.h"
> +#include "util/u_dynarray.h"
> #include "util/u_math.h"
>
> // Old distributions may not have this enum constant
> #define MY_EM_AMDGPU 224
>
> +#ifndef STT_AMDGPU_LDS
> +#define STT_AMDGPU_LDS 13
> +#endif
> +
> #ifndef R_AMDGPU_NONE
> #define R_AMDGPU_NONE 0
> #define R_AMDGPU_ABS32_LO 1
> #define R_AMDGPU_ABS32_HI 2
> #define R_AMDGPU_ABS64 3
> #define R_AMDGPU_REL32 4
> #define R_AMDGPU_REL64 5
> #define R_AMDGPU_ABS32 6
> #define R_AMDGPU_GOTPCREL 7
> #define R_AMDGPU_GOTPCREL32_LO 8
> @@ -97,41 +103,155 @@ static void report_elf_errorf(const char *fmt, ...)
> PRINTFLIKE(1, 2);
> static void report_elf_errorf(const char *fmt, ...)
> {
> va_list va;
> va_start(va, fmt);
> report_erroraf(fmt, va);
> va_end(va);
>
> fprintf(stderr, "ELF error: %s\n", elf_errmsg(elf_errno()));
> }
>
> +/**
> + * Find a symbol in a dynarray of struct ac_rtld_symbol by \p name and
> shader
> + * \p part_idx.
> + */
> +static const struct ac_rtld_symbol *find_symbol(const struct
> util_dynarray *symbols,
> + const char *name, unsigned
> part_idx)
> +{
> + util_dynarray_foreach(symbols, struct ac_rtld_symbol, symbol) {
> + if ((symbol->part_idx == ~0u || symbol->part_idx ==
> part_idx) &&
> + !strcmp(name, symbol->name))
> + return symbol;
> + }
> + return 0;
> +}
> +
> +static int compare_symbol_by_align(const void *lhsp, const void *rhsp)
> +{
> + const struct ac_rtld_symbol *lhs = lhsp;
> + const struct ac_rtld_symbol *rhs = rhsp;
> + if (rhs->align > lhs->align)
> + return -1;
> + if (rhs->align < lhs->align)
> + return 1;
> + return 0;
> +}
> +
> +/**
> + * Sort the given symbol list by decreasing alignment and assign offsets.
> + */
> +static bool layout_symbols(struct ac_rtld_symbol *symbols, unsigned
> num_symbols,
> + uint64_t *ptotal_size)
> +{
> + qsort(symbols, num_symbols, sizeof(*symbols),
> compare_symbol_by_align);
> +
> + uint64_t total_size = *ptotal_size;
> +
> + for (unsigned i = 0; i < num_symbols; ++i) {
> + struct ac_rtld_symbol *s = &symbols[i];
> + assert(util_is_power_of_two_nonzero(s->align));
> +
> + total_size = align64(total_size, s->align);
> + s->offset = total_size;
> +
> + if (total_size + s->size < total_size) {
> + report_errorf("%s: size overflow", __FUNCTION__);
> + return false;
> + }
> +
> + total_size += s->size;
> + }
> +
> + *ptotal_size = total_size;
> + return true;
> +}
> +
> +/**
> + * Read LDS symbols from the given \p section of the ELF of \p part and
> append
> + * them to the LDS symbols list.
> + *
> + * Shared LDS symbols are filtered out.
> + */
> +static bool read_private_lds_symbols(struct ac_rtld_binary *binary,
> + unsigned part_idx,
> + Elf_Scn *section,
> + uint32_t *lds_end_align)
> +{
> +#define report_elf_if(cond) \
> + do { \
> + if ((cond)) { \
> + report_errorf(#cond); \
> + return false; \
> + } \
> + } while (false)
> +
> + struct ac_rtld_part *part = &binary->parts[part_idx];
> + Elf64_Shdr *shdr = elf64_getshdr(section);
> + uint32_t strtabidx = shdr->sh_link;
> + Elf_Data *symbols_data = elf_getdata(section, NULL);
> + report_elf_if(!symbols_data);
> +
> + const Elf64_Sym *symbol = symbols_data->d_buf;
> + size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym);
> +
> + for (size_t j = 0; j < num_symbols; ++j, ++symbol) {
> + if (ELF64_ST_TYPE(symbol->st_info) != STT_AMDGPU_LDS)
> + continue;
> +
> + report_elf_if(symbol->st_size > 1u << 29);
> +
> + struct ac_rtld_symbol s = {};
> + s.name = elf_strptr(part->elf, strtabidx,
> symbol->st_name);
> + s.size = symbol->st_size;
> + s.align = MIN2(1u << (symbol->st_other >> 3), 1u << 16);
> + s.part_idx = part_idx;
> +
> + if (!strcmp(s.name, "__lds_end")) {
> + report_elf_if(s.size != 0);
> + *lds_end_align = MAX2(*lds_end_align, s.align);
> + continue;
> + }
> +
> + const struct ac_rtld_symbol *shared =
> + find_symbol(&binary->lds_symbols, s.name,
> part_idx);
> + if (shared) {
> + report_elf_if(s.align > shared->align);
> + report_elf_if(s.size > shared->size);
> + continue;
> + }
> +
> + util_dynarray_append(&binary->lds_symbols, struct
> ac_rtld_symbol, s);
> + }
> +
> + return true;
> +
> +#undef report_elf_if
> +}
> +
> /**
> * Open a binary consisting of one or more shader parts.
> *
> * \param binary the uninitialized struct
> - * \param num_parts number of shader parts
> - * \param elf_ptrs pointers to the in-memory ELF objects for each shader
> part
> - * \param elf_sizes sizes (in bytes) of the in-memory ELF objects
> + * \param i binary opening parameters
> */
> -bool ac_rtld_open(struct ac_rtld_binary *binary, unsigned num_parts,
> - const char * const *elf_ptrs,
> - const uint64_t *elf_sizes)
> +bool ac_rtld_open(struct ac_rtld_binary *binary,
> + struct ac_rtld_open_info i)
> {
> /* One of the libelf implementations
> * (http://www.mr511.de/software/english.htm) requires calling
> * elf_version() before elf_memory().
> */
> elf_version(EV_CURRENT);
>
> memset(binary, 0, sizeof(*binary));
> - binary->num_parts = num_parts;
> - binary->parts = calloc(sizeof(*binary->parts), num_parts);
> + binary->num_parts = i.num_parts;
> + binary->parts = calloc(sizeof(*binary->parts), i.num_parts);
> if (!binary->parts)
> return false;
>
> uint64_t pasted_text_size = 0;
> uint64_t rx_align = 1;
> uint64_t rx_size = 0;
>
> #define report_if(cond) \
> do { \
> if ((cond)) { \
> @@ -140,25 +260,44 @@ bool ac_rtld_open(struct ac_rtld_binary *binary,
> unsigned num_parts,
> } \
> } while (false)
> #define report_elf_if(cond) \
> do { \
> if ((cond)) { \
> report_elf_errorf(#cond); \
> goto fail; \
> } \
> } while (false)
>
> - /* First pass over all parts: open ELFs and determine the
> placement of
> - * sections in the memory image. */
> - for (unsigned i = 0; i < num_parts; ++i) {
> - struct ac_rtld_part *part = &binary->parts[i];
> - part->elf = elf_memory((char *)elf_ptrs[i], elf_sizes[i]);
> + /* Copy and layout shared LDS symbols. */
> + util_dynarray_resize(&binary->lds_symbols, struct ac_rtld_symbol,
> i.num_shared_lds_symbols);
> + memcpy(binary->lds_symbols.data, i.shared_lds_symbols,
> binary->lds_symbols.size);
> +
> + util_dynarray_foreach(&binary->lds_symbols, struct ac_rtld_symbol,
> symbol)
> + symbol->part_idx = ~0u;
> +
> + unsigned max_lds_size = i.info->chip_class >= CIK ? 64 * 1024 : 32
> * 1024;
>
For ESGS and LSHS, the max LDS size is 32K, because the other half is
reserved for PS. A GPU hang can occur if more LDS is used for those stages.
Marek
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20190513/432658e0/attachment-0001.html>
More information about the mesa-dev
mailing list