[PATCH 1/2] drm: add libdrm_amdgpu
Emil Velikov
emil.l.velikov at gmail.com
Thu Apr 23 09:23:18 PDT 2015
Hi Alex,
On 21/04/15 16:39, Alex Deucher wrote:
> This is the new ioctl wrapper used by the new admgpu driver.
> It's primarily used by xf86-video-amdgpu and mesa.
>
Glad to see amdgpu finally out :-)
Can we annotate the private symbols with drm_private, in both
declaration and definition ? It will hide the symbols, plus will make
the leap towards Solaris support a bit smaller.
> Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
> ---
> Makefile.am | 5 +
> amdgpu/Makefile.am | 55 ++
> amdgpu/amdgpu.h | 1278 ++++++++++++++++++++++++++++++++++++++++++++
> amdgpu/amdgpu_bo.c | 622 +++++++++++++++++++++
> amdgpu/amdgpu_cs.c | 981 ++++++++++++++++++++++++++++++++++
> amdgpu/amdgpu_device.c | 242 +++++++++
> amdgpu/amdgpu_gpu_info.c | 275 ++++++++++
> amdgpu/amdgpu_internal.h | 210 ++++++++
> amdgpu/amdgpu_vamgr.c | 169 ++++++
> amdgpu/libdrm_amdgpu.pc.in | 10 +
> amdgpu/util_double_list.h | 146 +++++
> amdgpu/util_hash.c | 382 +++++++++++++
> amdgpu/util_hash.h | 99 ++++
> amdgpu/util_hash_table.c | 257 +++++++++
> amdgpu/util_hash_table.h | 65 +++
> amdgpu/util_math.h | 32 ++
> configure.ac | 20 +
> include/drm/amdgpu_drm.h | 600 +++++++++++++++++++++
> 18 files changed, 5448 insertions(+)
> create mode 100644 amdgpu/Makefile.am
> create mode 100644 amdgpu/amdgpu.h
> create mode 100644 amdgpu/amdgpu_bo.c
> create mode 100644 amdgpu/amdgpu_cs.c
> create mode 100644 amdgpu/amdgpu_device.c
> create mode 100644 amdgpu/amdgpu_gpu_info.c
> create mode 100644 amdgpu/amdgpu_internal.h
> create mode 100644 amdgpu/amdgpu_vamgr.c
> create mode 100644 amdgpu/libdrm_amdgpu.pc.in
> create mode 100644 amdgpu/util_double_list.h
> create mode 100644 amdgpu/util_hash.c
> create mode 100644 amdgpu/util_hash.h
> create mode 100644 amdgpu/util_hash_table.c
> create mode 100644 amdgpu/util_hash_table.h
> create mode 100644 amdgpu/util_math.h
> create mode 100644 include/drm/amdgpu_drm.h
>
> --- /dev/null
> +++ b/amdgpu/Makefile.am
> +AM_CFLAGS = \
> + $(WARN_CFLAGS)
> -Wno-switch-enum \
>From a quick look you should be OK without this. If that's not the case,
it might be worth looking into ?
> + -I$(top_srcdir) \
> + -I$(top_srcdir)/amdgpu \
You can drop this line.
> + $(PTHREADSTUBS_CFLAGS) \
> + -I$(top_srcdir)/include/drm
> +
> +libdrm_amdgpu_la_LTLIBRARIES = libdrm_amdgpu.la
> +libdrm_amdgpu_ladir = $(libdir)
> +libdrm_amdgpu_la_LDFLAGS = -version-number 1:0:1 -no-undefined
Considering that this is the first public version shouldn't this be 1:0:0 ?
> +libdrm_amdgpu_la_LIBADD = ../libdrm.la @PTHREADSTUBS_LIBS@
> +
> +libdrm_amdgpu_la_SOURCES = \
> + amdgpu_gpu_info.c \
> + amdgpu_device.c \
> + amdgpu_bo.c \
> + util_hash.c \
> + util_hash_table.c \
> + amdgpu_vamgr.c \
> + amdgpu_cs.c
> +
Please include all files, and sort them alphabetically, something like:
amdgpu_bo.c
amdgpu_cs.c
amdgpu_device.c
amdgpu_gpu_info.c
amdgpu_internal.h
amdgpu_vamgr.c
util_double_list.h
util_hash.c
util_hash.h
util_hash_table.c
util_hash_table.h
util_math.h
One might want to drop util_double_list.h if you go for my suggestion below.
> +nodist_EXTRA_libdrm_amdgpu_la_SOURCES = dummy.cxx
> +
There are no C++ sources or static libs based on such so you can drop this.
> +EXTRA_DIST = libdrm_amdgpu.pc.in
You don't need this.
> --- /dev/null
> +++ b/amdgpu/amdgpu.h
> +#ifndef _amdgpu_h_
> +#define _amdgpu_h_
> +
Capitalise the header guard name ?
> +#include <stdint.h>
> +#include <stdbool.h>
> +
Albeit not so common in libdrm, you can add make the header C++ safe.
#if defined(__cplusplus)
extern "C" {
#endif
> +struct amdgpu_bo_alloc_request {
> + /** Allocation request. It must be aligned correctly. */
> + uint64_t alloc_size;
> +
> + /**
> + * It may be required to have some specific alignment requirements
> + * for physical back-up storage (e.g. for displayable surface).
> + * If 0 there is no special alignment requirement
> + */
> + uint64_t phys_alignment;
> +
> + /**
> + * UMD should specify where to allocate memory and how it
> + * will be accessed by the CPU.
> + */
> + uint32_t preferred_heap;
> +
Worth adding a pad ?
> + /** Additional flags passed on allocation */
> + uint64_t flags;
> +};
> +struct amdgpu_bo_info {
> + /** Allocated memory size */
> + uint64_t alloc_size;
> +
> + /**
> + * It may be required to have some specific alignment requirements
> + * for physical back-up storage.
> + */
> + uint64_t phys_alignment;
> +
> + /**
> + * Assigned virtual MC Base Address.
> + * \note This information will be returned only if this buffer was
> + * allocated in the same process otherwise 0 will be returned.
> + */
> + uint64_t virtual_mc_base_address;
> +
> + /** Heap where to allocate memory. */
> + uint32_t preferred_heap;
> +
Ditto.
> + /** Additional allocation flags. */
> + uint64_t alloc_flags;
> +
> + /** Metadata associated with buffer if any. */
> + struct amdgpu_bo_metadata metadata;
> +};
> +struct amdgpu_gds_alloc_info {
> + /** Handle assigned to gds allocation */
> + amdgpu_bo_handle resource_handle;
> +
> + /** How much was really allocated */
> + uint32_t gds_memory_size;
> +
> + /** Number of GWS resources allocated */
> + uint32_t gws;
> +
> + /** Number of OA resources allocated */
> + uint32_t oa;
Ditto.
> +struct amdgpu_cs_ib_info {
> + /** Special flags */
> + uint64_t flags;
> +
> + /** Handle of command buffer */
> + amdgpu_ib_handle ib_handle;
> +
> + /**
> + * Size of Command Buffer to be submitted.
> + * - The size is in units of dwords (4 bytes).
> + * - Must be less or equal to the size of allocated IB
> + * - Could be 0
> + */
> + uint32_t size;
Ditto.
> +struct amdgpu_cs_request {
> + /** Specify flags with additional information */
> + uint64_t flags;
> +
> + /** Specify HW IP block type to which to send the IB. */
> + unsigned ip_type;
> +
> + /** IP instance index if there are several IPs of the same type. */
> + unsigned ip_instance;
> +
> + /**
> + * Specify ring index of the IP. We could have several rings
> + * in the same IP. E.g. 0 for SDMA0 and 1 for SDMA1.
> + */
> + uint32_t ring;
> +
> + /**
> + * Specify number of resource handles passed.
> + * Size of 'handles' array
> + *
> + */
> + uint32_t number_of_resources;
> +
> + /** Array of resources used by submission. */
> + amdgpu_bo_handle *resources;
> +
> + /** Array of resources flags. This is optional and can be NULL. */
> + uint8_t *resource_flags;
> +
> + /** Number of IBs to submit in the field ibs. */
> + uint32_t number_of_ibs;
> +
Ditto.
> + /**
> + * IBs to submit. Those IBs will be submit together as single entity
> + */
> + struct amdgpu_cs_ib_info *ibs;
> +};
> +
> +/**
> + * Structure describing request to check submission state using fence
> + *
> + * \sa amdgpu_cs_query_fence_status()
> + *
> +*/
> +struct amdgpu_cs_query_fence {
> +
> + /** In which context IB was sent to execution */
> + amdgpu_context_handle context;
> +
> + /** Timeout in nanoseconds. */
> + uint64_t timeout_ns;
> +
> + /** To which HW IP type the fence belongs */
> + unsigned ip_type;
> +
> + /** IP instance index if there are several IPs of the same type. */
> + unsigned ip_instance;
> +
> + /** Ring index of the HW IP */
> + uint32_t ring;
> +
Ditto.
> + /** Flags */
> + uint64_t flags;
> +
> + /** Specify fence for which we need to check
> + * submission status.*/
> + uint64_t fence;
> --- /dev/null
> +++ b/amdgpu/amdgpu_bo.c
> +#define _FILE_OFFSET_BITS 64
How about
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
If you feel against poluting all the C sources with it, you should
ensure that amdgpu_internal.h is included before any other header in
every source and header file.
> +int amdgpu_bo_cpu_map(amdgpu_bo_handle bo, void **cpu)
> +{
> + union drm_amdgpu_gem_mmap args;
> + void *ptr;
> + int r;
> +
> + pthread_mutex_lock(&bo->cpu_access_mutex);
> +
> + if (bo->cpu_ptr) {
> + /* already mapped */
> + assert(bo->cpu_map_count > 0);
> + bo->cpu_map_count++;
> + *cpu = bo->cpu_ptr;
> + pthread_mutex_unlock(&bo->cpu_access_mutex);
> + return 0;
> + }
> +
> + assert(bo->cpu_map_count == 0);
> +
> + memset(&args, 0, sizeof(args));
> +
> + /* Query the buffer address (args.addr_ptr).
> + * The kernel driver ignores the offset and size parameters. */
> + args.in.handle = bo->handle;
> +
> + r = drmCommandWriteRead(bo->dev->fd, DRM_AMDGPU_GEM_MMAP, &args,
> + sizeof(args));
> + if (r) {
> + pthread_mutex_unlock(&bo->cpu_access_mutex);
> + return r;
> + }
> +
> + /* Map the buffer. */
> + ptr = mmap(NULL, bo->alloc_size, PROT_READ | PROT_WRITE, MAP_SHARED,
Please use drm_mmap/drm_munmap.
> --- /dev/null
> +++ b/amdgpu/amdgpu_cs.c
> +static int amdgpu_cs_create_ib(amdgpu_device_handle dev,
> + amdgpu_context_handle context,
> + enum amdgpu_cs_ib_size ib_size,
> + amdgpu_ib_handle *ib)
> +{
> + new_ib = malloc(sizeof(struct amdgpu_ib));
> + if (NULL == new_ib) {
Use new_ib == NULL or !new_ib ? There are a few other places that can do so.
> --- /dev/null
> +++ b/amdgpu/amdgpu_device.c
> +static unsigned fd_hash(void *key)
> +{
> + int fd = PTR_TO_UINT(key);
> + struct stat stat;
> + fstat(fd, &stat);
> +
> + if (!S_ISCHR(stat.st_mode))
> + return stat.st_dev ^ stat.st_ino;
I'm a bit puzzled, how is this possible ?
> + else
> + return stat.st_dev ^ (stat.st_rdev & RENDERNODE_MINOR_MASK);
I'm not 100% sure that there won't be any side-effects with the idea of
using primary and render node interchangeably.
Plus one cannot mask out to get from the primary device to the render
one, or vice-versa. The minor number is not reliable (rightfully so),
thus one can use drmGet(Primary|Render)DeviceNameFromFd or introduce
their fd counterpart.
> +}
> +
> +static int fd_compare(void *key1, void *key2)
> +{
> + int fd1 = PTR_TO_UINT(key1);
> + int fd2 = PTR_TO_UINT(key2);
> + struct stat stat1, stat2;
> + fstat(fd1, &stat1);
> + fstat(fd2, &stat2);
> +
> + if (!S_ISCHR(stat1.st_mode) || !S_ISCHR(stat2.st_mode))
> + return stat1.st_dev != stat2.st_dev ||
> + stat1.st_ino != stat2.st_ino;
> + else
> + return major(stat1.st_rdev) != major(stat2.st_rdev) ||
> + (minor(stat1.st_rdev) & RENDERNODE_MINOR_MASK) !=
> + (minor(stat2.st_rdev) & RENDERNODE_MINOR_MASK);
Same comments apply.
> +}
> +
> +/**
> +* Get the authenticated form fd,
> +*
> +* \param fd - \c [in] File descriptor for AMD GPU device
> +* \param auth - \c [out] Pointer to output the fd is authenticated or not
> +* A render node fd, output auth = 0
> +* A legacy fd, get the authenticated for compatibility root
> +*
> +* \return 0 on success\n
> +* >0 - AMD specific error code\n
> +* <0 - Negative POSIX Error code
> +*/
> +static int amdgpu_get_auth(int fd, int *auth)
> +{
> + int r = 0;
> + drm_client_t client;
> + struct stat stat1;
> + fstat(fd,&stat1);
> + if (minor(stat1.st_rdev) & ~RENDERNODE_MINOR_MASK)/* find a render node fd */
Please use drmGetNodeTypeFromFd().
> --- /dev/null
> +++ b/amdgpu/amdgpu_internal.h
> @@ -0,0 +1,210 @@
> +#ifndef _amdgpu_internal_h_
> +#define _amdgpu_internal_h_
> +
Capital letters for the header guard ?
> +#ifdef HAVE_CONFIG_H
> +#include "config.h"
> +#endif
> +
> +#include <assert.h>
> +#include <pthread.h>
> +#include "xf86atomic.h"
> +#include "amdgpu.h"
> +#include "util_double_list.h"
> +
> +#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
> +
Drop this and use util_math.h where needed ?
> +/**
> + * Increment src and decrement dst as if we were updating references
> + * for an assignment between 2 pointers of some objects.
> + *
> + * \return true if dst is 0
> + */
> +static inline bool update_references(atomic_t *dst, atomic_t *src)
> +{
> + if (dst != src) {
> + /* bump src first */
> + if (src) {
> + assert(atomic_read(src) > 0);
> + atomic_inc(src);
> + }
> + if (dst) {
> + assert(atomic_read(dst) > 0);
> + return atomic_dec_and_test(dst);
Am I imagining something or does the assert conditions look a bit strange ?
> diff --git a/amdgpu/util_double_list.h b/amdgpu/util_double_list.h
> new file mode 100644
> index 0000000..3f48ae2
> --- /dev/null
> +++ b/amdgpu/util_double_list.h
There are already two identical copies of this file - let's not add a
third one.
freedreno/list.h
tests/radeon/list.h
How about we move it one level up, and update the current users (incl.
Makefile.sources)
> --- /dev/null
> +++ b/amdgpu/util_hash.c
> --- /dev/null
> +++ b/amdgpu/util_hash_table.c
Can one use the existing drmHash functions like omap and freedreno ?
Would be nice to avoid going the mesa's route which iirc has four
different implementations.
> --- /dev/null
> +++ b/amdgpu/util_math.h
FWIW we can move this a level up, so that others can use it. Obviously
there is no reason to be part of this series.
> diff --git a/configure.ac b/configure.ac
> index 155d577..509f2d4 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -36,6 +36,7 @@ m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
>
> # Check for programs
> AC_PROG_CC
> +AC_PROG_CXX
>
There are no C++ sources, so you don't need a C++ compiler.
> @@ -236,6 +242,9 @@ if test "x$drm_cv_atomic_primitives" = "xnone"; then
> LIBDRM_ATOMICS_NOT_FOUND_MSG($RADEON, radeon, Radeon, radeon)
> RADEON=no
>
> + LIBDRM_ATOMICS_NOT_FOUND_MSG($AMDGPU, amdgpu, AMD, amdgpu)
> + AMDGPU=no
> +
Glad to see this, unlike the original copy/paste of an insanely long
message previously :)
> +if test "x$AMDGPU" = xyes; then
> + AC_DEFINE(HAVE_AMDGPU, 1, [Have amdgpu support])
> +fi
> +
Unless you're planning to add support to libkms you can drop this.
> diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h
> new file mode 100644
> index 0000000..d248d77
> --- /dev/null
> +++ b/include/drm/amdgpu_drm.h
> +struct drm_amdgpu_cs_in {
> + /** Rendering context id */
> + uint32_t ctx_id;
> + /** Handle of resource list associated with CS */
> + uint32_t bo_list_handle;
> + uint32_t num_chunks;
> + uint32_t pad;
> + /* this points to uint64_t * which point to cs chunks */
> + uint64_t chunks;
> +};
> +
> +struct drm_amdgpu_cs_out {
> + uint64_t handle;
> +};
> +
> +union drm_amdgpu_cs {
> + struct drm_amdgpu_cs_in in;
> + struct drm_amdgpu_cs_out out;
> +};
> +
Don't think I've seen similar contruct in any of the other drm drivers.
(Genuenly curious) What benefit does it bring ?
I think you want to add a trailing padding for the following structures.
I'm thinking of a case where, old 64bit userspace feeds in the last u32
as rubbish, to the kernel while the latter uses an updated/expanded
struct. In such case the driver will either reject the request, or worse
use the rubbish data.
struct drm_amdgpu_gem_metadata
struct drm_amdgpu_wait_cs_in
struct drm_amdgpu_info_hw_ip
struct drm_amdgpu_info_device
Cheers
Emil
P.S. Based on the coding style seems like this is the combined work of
3+ developers. Anyway glad to see that it's out :-)
More information about the dri-devel
mailing list