[Mesa-dev] [PATCH 2/6] amd/rtld: layout and relocate LDS symbols

Marek Olšák maraeo at gmail.com
Mon May 13 23:13:21 UTC 2019


On Sat, May 4, 2019 at 9:39 AM Nicolai Hähnle <nhaehnle at gmail.com> wrote:

> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
>
> Upcoming changes to LLVM will emit LDS objects as symbols in the ELF
> symbol table, with relocations that will be resolved with this change.
>
> Callers will also be able to define LDS symbols that are shared between
> shader parts. This will be used by radeonsi for the ESGS ring in gfx9+
> merged shaders.
> ---
>  src/amd/common/ac_rtld.c                      | 210 ++++++++++++++++--
>  src/amd/common/ac_rtld.h                      |  39 +++-
>  src/gallium/drivers/radeonsi/si_compute.c     |   9 +-
>  src/gallium/drivers/radeonsi/si_debug.c       |  22 +-
>  src/gallium/drivers/radeonsi/si_shader.c      |  61 +++--
>  src/gallium/drivers/radeonsi/si_shader.h      |   5 +-
>  .../drivers/radeonsi/si_state_shaders.c       |   2 +-
>  7 files changed, 296 insertions(+), 52 deletions(-)
>
> diff --git a/src/amd/common/ac_rtld.c b/src/amd/common/ac_rtld.c
> index 4e0468d2062..3df7b3ba51f 100644
> --- a/src/amd/common/ac_rtld.c
> +++ b/src/amd/common/ac_rtld.c
> @@ -24,25 +24,31 @@
>  #include "ac_rtld.h"
>
>  #include <gelf.h>
>  #include <libelf.h>
>  #include <stdarg.h>
>  #include <stdio.h>
>  #include <stdlib.h>
>  #include <string.h>
>
>  #include "ac_binary.h"
> +#include "ac_gpu_info.h"
> +#include "util/u_dynarray.h"
>  #include "util/u_math.h"
>
>  // Old distributions may not have this enum constant
>  #define MY_EM_AMDGPU 224
>
> +#ifndef STT_AMDGPU_LDS
> +#define STT_AMDGPU_LDS 13
> +#endif
> +
>  #ifndef R_AMDGPU_NONE
>  #define R_AMDGPU_NONE 0
>  #define R_AMDGPU_ABS32_LO 1
>  #define R_AMDGPU_ABS32_HI 2
>  #define R_AMDGPU_ABS64 3
>  #define R_AMDGPU_REL32 4
>  #define R_AMDGPU_REL64 5
>  #define R_AMDGPU_ABS32 6
>  #define R_AMDGPU_GOTPCREL 7
>  #define R_AMDGPU_GOTPCREL32_LO 8
> @@ -97,41 +103,155 @@ static void report_elf_errorf(const char *fmt, ...)
> PRINTFLIKE(1, 2);
>  static void report_elf_errorf(const char *fmt, ...)
>  {
>         va_list va;
>         va_start(va, fmt);
>         report_erroraf(fmt, va);
>         va_end(va);
>
>         fprintf(stderr, "ELF error: %s\n", elf_errmsg(elf_errno()));
>  }
>
> +/**
> + * Find a symbol in a dynarray of struct ac_rtld_symbol by \p name and
> shader
> + * \p part_idx.
> + */
> +static const struct ac_rtld_symbol *find_symbol(const struct
> util_dynarray *symbols,
> +                                               const char *name, unsigned
> part_idx)
> +{
> +       util_dynarray_foreach(symbols, struct ac_rtld_symbol, symbol) {
> +               if ((symbol->part_idx == ~0u || symbol->part_idx ==
> part_idx) &&
> +                   !strcmp(name, symbol->name))
> +                       return symbol;
> +       }
> +       return 0;
> +}
> +
> +static int compare_symbol_by_align(const void *lhsp, const void *rhsp)
> +{
> +       const struct ac_rtld_symbol *lhs = lhsp;
> +       const struct ac_rtld_symbol *rhs = rhsp;
> +       if (rhs->align > lhs->align)
> +               return -1;
> +       if (rhs->align < lhs->align)
> +               return 1;
> +       return 0;
> +}
> +
> +/**
> + * Sort the given symbol list by decreasing alignment and assign offsets.
> + */
> +static bool layout_symbols(struct ac_rtld_symbol *symbols, unsigned
> num_symbols,
> +                          uint64_t *ptotal_size)
> +{
> +       qsort(symbols, num_symbols, sizeof(*symbols),
> compare_symbol_by_align);
> +
> +       uint64_t total_size = *ptotal_size;
> +
> +       for (unsigned i = 0; i < num_symbols; ++i) {
> +               struct ac_rtld_symbol *s = &symbols[i];
> +               assert(util_is_power_of_two_nonzero(s->align));
> +
> +               total_size = align64(total_size, s->align);
> +               s->offset = total_size;
> +
> +               if (total_size + s->size < total_size) {
> +                       report_errorf("%s: size overflow", __FUNCTION__);
> +                       return false;
> +               }
> +
> +               total_size += s->size;
> +       }
> +
> +       *ptotal_size = total_size;
> +       return true;
> +}
> +
> +/**
> + * Read LDS symbols from the given \p section of the ELF of \p part and
> append
> + * them to the LDS symbols list.
> + *
> + * Shared LDS symbols are filtered out.
> + */
> +static bool read_private_lds_symbols(struct ac_rtld_binary *binary,
> +                                    unsigned part_idx,
> +                                    Elf_Scn *section,
> +                                    uint32_t *lds_end_align)
> +{
> +#define report_elf_if(cond) \
> +       do { \
> +               if ((cond)) { \
> +                       report_errorf(#cond); \
> +                       return false; \
> +               } \
> +       } while (false)
> +
> +       struct ac_rtld_part *part = &binary->parts[part_idx];
> +       Elf64_Shdr *shdr = elf64_getshdr(section);
> +       uint32_t strtabidx = shdr->sh_link;
> +       Elf_Data *symbols_data = elf_getdata(section, NULL);
> +       report_elf_if(!symbols_data);
> +
> +       const Elf64_Sym *symbol = symbols_data->d_buf;
> +       size_t num_symbols = symbols_data->d_size / sizeof(Elf64_Sym);
> +
> +       for (size_t j = 0; j < num_symbols; ++j, ++symbol) {
> +               if (ELF64_ST_TYPE(symbol->st_info) != STT_AMDGPU_LDS)
> +                       continue;
> +
> +               report_elf_if(symbol->st_size > 1u << 29);
> +
> +               struct ac_rtld_symbol s = {};
> +               s.name = elf_strptr(part->elf, strtabidx,
> symbol->st_name);
> +               s.size = symbol->st_size;
> +               s.align = MIN2(1u << (symbol->st_other >> 3), 1u << 16);
> +               s.part_idx = part_idx;
> +
> +               if (!strcmp(s.name, "__lds_end")) {
> +                       report_elf_if(s.size != 0);
> +                       *lds_end_align = MAX2(*lds_end_align, s.align);
> +                       continue;
> +               }
> +
> +               const struct ac_rtld_symbol *shared =
> +                       find_symbol(&binary->lds_symbols, s.name,
> part_idx);
> +               if (shared) {
> +                       report_elf_if(s.align > shared->align);
> +                       report_elf_if(s.size > shared->size);
> +                       continue;
> +               }
> +
> +               util_dynarray_append(&binary->lds_symbols, struct
> ac_rtld_symbol, s);
> +       }
> +
> +       return true;
> +
> +#undef report_elf_if
> +}
> +
>  /**
>   * Open a binary consisting of one or more shader parts.
>   *
>   * \param binary the uninitialized struct
> - * \param num_parts number of shader parts
> - * \param elf_ptrs pointers to the in-memory ELF objects for each shader
> part
> - * \param elf_sizes sizes (in bytes) of the in-memory ELF objects
> + * \param i binary opening parameters
>   */
> -bool ac_rtld_open(struct ac_rtld_binary *binary, unsigned num_parts,
> -                 const char * const *elf_ptrs,
> -                 const uint64_t *elf_sizes)
> +bool ac_rtld_open(struct ac_rtld_binary *binary,
> +                 struct ac_rtld_open_info i)
>  {
>         /* One of the libelf implementations
>          * (http://www.mr511.de/software/english.htm) requires calling
>          * elf_version() before elf_memory().
>          */
>         elf_version(EV_CURRENT);
>
>         memset(binary, 0, sizeof(*binary));
> -       binary->num_parts = num_parts;
> -       binary->parts = calloc(sizeof(*binary->parts), num_parts);
> +       binary->num_parts = i.num_parts;
> +       binary->parts = calloc(sizeof(*binary->parts), i.num_parts);
>         if (!binary->parts)
>                 return false;
>
>         uint64_t pasted_text_size = 0;
>         uint64_t rx_align = 1;
>         uint64_t rx_size = 0;
>
>  #define report_if(cond) \
>         do { \
>                 if ((cond)) { \
> @@ -140,25 +260,44 @@ bool ac_rtld_open(struct ac_rtld_binary *binary,
> unsigned num_parts,
>                 } \
>         } while (false)
>  #define report_elf_if(cond) \
>         do { \
>                 if ((cond)) { \
>                         report_elf_errorf(#cond); \
>                         goto fail; \
>                 } \
>         } while (false)
>
> -       /* First pass over all parts: open ELFs and determine the
> placement of
> -        * sections in the memory image. */
> -       for (unsigned i = 0; i < num_parts; ++i) {
> -               struct ac_rtld_part *part = &binary->parts[i];
> -               part->elf = elf_memory((char *)elf_ptrs[i], elf_sizes[i]);
> +       /* Copy and layout shared LDS symbols. */
> +       util_dynarray_resize(&binary->lds_symbols, struct ac_rtld_symbol,
> i.num_shared_lds_symbols);
> +       memcpy(binary->lds_symbols.data, i.shared_lds_symbols,
> binary->lds_symbols.size);
> +
> +       util_dynarray_foreach(&binary->lds_symbols, struct ac_rtld_symbol,
> symbol)
> +               symbol->part_idx = ~0u;
> +
> +       unsigned max_lds_size = i.info->chip_class >= CIK ? 64 * 1024 : 32
> * 1024;
>

For ESGS and LSHS, the max LDS size is 32K, because the other half is
reserved for PS. A GPU hang can occur if more LDS is used for those stages.

Marek
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20190513/432658e0/attachment-0001.html>


More information about the mesa-dev mailing list