[PATCH 1/2] drm: add libdrm_amdgpu

Thu Apr 23 09:23:18 PDT 2015

Hi Alex,
On 21/04/15 16:39, Alex Deucher wrote:
> This is the new ioctl wrapper used by the new admgpu driver.
> It's primarily used by xf86-video-amdgpu and mesa.
> 
Glad to see amdgpu finally out :-)

Can we annotate the private symbols with drm_private, in both
declaration and definition ? It will hide the symbols, plus will make
the leap towards Solaris support a bit smaller.

> Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
> ---
>  Makefile.am                |    5 +
>  amdgpu/Makefile.am         |   55 ++
>  amdgpu/amdgpu.h            | 1278 ++++++++++++++++++++++++++++++++++++++++++++
>  amdgpu/amdgpu_bo.c         |  622 +++++++++++++++++++++
>  amdgpu/amdgpu_cs.c         |  981 ++++++++++++++++++++++++++++++++++
>  amdgpu/amdgpu_device.c     |  242 +++++++++
>  amdgpu/amdgpu_gpu_info.c   |  275 ++++++++++
>  amdgpu/amdgpu_internal.h   |  210 ++++++++
>  amdgpu/amdgpu_vamgr.c      |  169 ++++++
>  amdgpu/libdrm_amdgpu.pc.in |   10 +
>  amdgpu/util_double_list.h  |  146 +++++
>  amdgpu/util_hash.c         |  382 +++++++++++++
>  amdgpu/util_hash.h         |   99 ++++
>  amdgpu/util_hash_table.c   |  257 +++++++++
>  amdgpu/util_hash_table.h   |   65 +++
>  amdgpu/util_math.h         |   32 ++
>  configure.ac               |   20 +
>  include/drm/amdgpu_drm.h   |  600 +++++++++++++++++++++
>  18 files changed, 5448 insertions(+)
>  create mode 100644 amdgpu/Makefile.am
>  create mode 100644 amdgpu/amdgpu.h
>  create mode 100644 amdgpu/amdgpu_bo.c
>  create mode 100644 amdgpu/amdgpu_cs.c
>  create mode 100644 amdgpu/amdgpu_device.c
>  create mode 100644 amdgpu/amdgpu_gpu_info.c
>  create mode 100644 amdgpu/amdgpu_internal.h
>  create mode 100644 amdgpu/amdgpu_vamgr.c
>  create mode 100644 amdgpu/libdrm_amdgpu.pc.in
>  create mode 100644 amdgpu/util_double_list.h
>  create mode 100644 amdgpu/util_hash.c
>  create mode 100644 amdgpu/util_hash.h
>  create mode 100644 amdgpu/util_hash_table.c
>  create mode 100644 amdgpu/util_hash_table.h
>  create mode 100644 amdgpu/util_math.h
>  create mode 100644 include/drm/amdgpu_drm.h
> 

> --- /dev/null
> +++ b/amdgpu/Makefile.am

> +AM_CFLAGS = \
> +	$(WARN_CFLAGS)

> -Wno-switch-enum \
>From a quick look you should be OK without this. If that's not the case,
it might be worth looking into ?

> +	-I$(top_srcdir) \
> +	-I$(top_srcdir)/amdgpu \
You can drop this line.

> +	$(PTHREADSTUBS_CFLAGS) \
> +	-I$(top_srcdir)/include/drm
> +
> +libdrm_amdgpu_la_LTLIBRARIES = libdrm_amdgpu.la
> +libdrm_amdgpu_ladir = $(libdir)
> +libdrm_amdgpu_la_LDFLAGS = -version-number 1:0:1 -no-undefined
Considering that this is the first public version shouldn't this be 1:0:0 ?

> +libdrm_amdgpu_la_LIBADD = ../libdrm.la @PTHREADSTUBS_LIBS@
> +
> +libdrm_amdgpu_la_SOURCES = \
> +	amdgpu_gpu_info.c \
> +	amdgpu_device.c \
> +	amdgpu_bo.c \
> +	util_hash.c \
> +	util_hash_table.c \
> +	amdgpu_vamgr.c \
> +	amdgpu_cs.c
> +
Please include all files, and sort them alphabetically, something like:

amdgpu_bo.c
amdgpu_cs.c
amdgpu_device.c
amdgpu_gpu_info.c
amdgpu_internal.h
amdgpu_vamgr.c
util_double_list.h
util_hash.c
util_hash.h
util_hash_table.c
util_hash_table.h
util_math.h

One might want to drop util_double_list.h if you go for my suggestion below.

> +nodist_EXTRA_libdrm_amdgpu_la_SOURCES = dummy.cxx
> +
There are no C++ sources or static libs based on such so you can drop this.

> +EXTRA_DIST = libdrm_amdgpu.pc.in
You don't need this.

> --- /dev/null
> +++ b/amdgpu/amdgpu.h

> +#ifndef _amdgpu_h_
> +#define _amdgpu_h_
> +
Capitalise the header guard name ?

> +#include <stdint.h>
> +#include <stdbool.h>
> +

Albeit not so common in libdrm, you can add make the header C++ safe.

#if defined(__cplusplus)
extern "C" {
#endif

> +struct amdgpu_bo_alloc_request {
> +	/** Allocation request. It must be aligned correctly. */
> +	uint64_t alloc_size;
> +
> +	/**
> +	 * It may be required to have some specific alignment requirements
> +	 * for physical back-up storage (e.g. for displayable surface).
> +	 * If 0 there is no special alignment requirement
> +	 */
> +	uint64_t phys_alignment;
> +
> +	/**
> +	 * UMD should specify where to allocate memory and how it
> +	 * will be accessed by the CPU.
> +	 */
> +	uint32_t preferred_heap;
> +
Worth adding a pad ?

> +	/** Additional flags passed on allocation */
> +	uint64_t flags;
> +};

> +struct amdgpu_bo_info {
> +	/** Allocated memory size */
> +	uint64_t alloc_size;
> +
> +	/**
> +	 * It may be required to have some specific alignment requirements
> +	 * for physical back-up storage.
> +	 */
> +	uint64_t phys_alignment;
> +
> +	/**
> +	 * Assigned virtual MC Base Address.
> +	 * \note  This information will be returned only if this buffer was
> +	 * allocated in the same process otherwise 0 will be returned.
> +	*/
> +	uint64_t virtual_mc_base_address;
> +
> +	/** Heap where to allocate memory. */
> +	uint32_t preferred_heap;
> +
Ditto.

> +	/** Additional allocation flags. */
> +	uint64_t alloc_flags;
> +
> +	/** Metadata associated with buffer if any. */
> +	struct amdgpu_bo_metadata metadata;
> +};

> +struct amdgpu_gds_alloc_info {
> +	/** Handle assigned to gds allocation */
> +	amdgpu_bo_handle resource_handle;
> +
> +	/** How much was really allocated */
> +	uint32_t   gds_memory_size;
> +
> +	/** Number of GWS resources allocated */
> +	uint32_t   gws;
> +
> +	/** Number of OA resources allocated */
> +	uint32_t   oa;
Ditto.

> +struct amdgpu_cs_ib_info {
> +	/** Special flags */
> +	uint64_t      flags;
> +
> +	/** Handle of command buffer */
> +	amdgpu_ib_handle ib_handle;
> +
> +	/**
> +	 * Size of Command Buffer to be submitted.
> +	 *   - The size is in units of dwords (4 bytes).
> +	 *   - Must be less or equal to the size of allocated IB
> +	 *   - Could be 0
> +	 */
> +	uint32_t       size;
Ditto.

> +struct amdgpu_cs_request {
> +	/** Specify flags with additional information */
> +	uint64_t	flags;
> +
> +	/** Specify HW IP block type to which to send the IB. */
> +	unsigned	ip_type;
> +
> +	/** IP instance index if there are several IPs of the same type. */
> +	unsigned	ip_instance;
> +
> +	/**
> +	 * Specify ring index of the IP. We could have several rings
> +	 * in the same IP. E.g. 0 for SDMA0 and 1 for SDMA1.
> +	 */
> +	uint32_t           ring;
> +
> +	/**
> +	 * Specify number of resource handles passed.
> +	 * Size of 'handles' array
> +	 *
> +	 */
> +	uint32_t number_of_resources;
> +
> +	/** Array of resources used by submission. */
> +	amdgpu_bo_handle   *resources;
> +
> +	/** Array of resources flags. This is optional and can be NULL. */
> +	uint8_t *resource_flags;
> +
> +	/** Number of IBs to submit in the field ibs. */
> +	uint32_t number_of_ibs;
> +
Ditto.

> +	/**
> +	 * IBs to submit. Those IBs will be submit together as single entity
> +	 */
> +	struct amdgpu_cs_ib_info *ibs;
> +};
> +
> +/**
> + * Structure describing request to check submission state using fence
> + *
> + * \sa amdgpu_cs_query_fence_status()
> + *
> +*/
> +struct amdgpu_cs_query_fence {
> +
> +	/** In which context IB was sent to execution */
> +	amdgpu_context_handle  context;
> +
> +	/** Timeout in nanoseconds. */
> +	uint64_t  timeout_ns;
> +
> +	/** To which HW IP type the fence belongs */
> +	unsigned  ip_type;
> +
> +	/** IP instance index if there are several IPs of the same type. */
> +	unsigned ip_instance;
> +
> +	/** Ring index of the HW IP */
> +	uint32_t      ring;
> +
Ditto.

> +	/** Flags */
> +	uint64_t  flags;
> +
> +	/** Specify fence for which we need to check
> +	 * submission status.*/
> +	uint64_t	fence;

> --- /dev/null
> +++ b/amdgpu/amdgpu_bo.c

> +#define _FILE_OFFSET_BITS 64
How about

#ifdef HAVE_CONFIG_H
#include "config.h"
#endif

If you feel against poluting all the C sources with it, you should
ensure that amdgpu_internal.h is included before any other header in
every source and header file.

> +int amdgpu_bo_cpu_map(amdgpu_bo_handle bo, void **cpu)
> +{
> +	union drm_amdgpu_gem_mmap args;
> +	void *ptr;
> +	int r;
> +
> +	pthread_mutex_lock(&bo->cpu_access_mutex);
> +
> +	if (bo->cpu_ptr) {
> +		/* already mapped */
> +		assert(bo->cpu_map_count > 0);
> +		bo->cpu_map_count++;
> +		*cpu = bo->cpu_ptr;
> +		pthread_mutex_unlock(&bo->cpu_access_mutex);
> +		return 0;
> +	}
> +
> +	assert(bo->cpu_map_count == 0);
> +
> +	memset(&args, 0, sizeof(args));
> +
> +	/* Query the buffer address (args.addr_ptr).
> +	 * The kernel driver ignores the offset and size parameters. */
> +	args.in.handle = bo->handle;
> +
> +	r = drmCommandWriteRead(bo->dev->fd, DRM_AMDGPU_GEM_MMAP, &args,
> +				sizeof(args));
> +	if (r) {
> +		pthread_mutex_unlock(&bo->cpu_access_mutex);
> +		return r;
> +	}
> +
> +	/* Map the buffer. */
> +	ptr = mmap(NULL, bo->alloc_size, PROT_READ | PROT_WRITE, MAP_SHARED,
Please use drm_mmap/drm_munmap.

> --- /dev/null
> +++ b/amdgpu/amdgpu_cs.c

> +static int amdgpu_cs_create_ib(amdgpu_device_handle dev,
> +			       amdgpu_context_handle context,
> +			       enum amdgpu_cs_ib_size ib_size,
> +			       amdgpu_ib_handle *ib)
> +{

> +	new_ib = malloc(sizeof(struct amdgpu_ib));
> +	if (NULL == new_ib) {
Use new_ib == NULL or !new_ib ? There are a few other places that can do so.

> --- /dev/null
> +++ b/amdgpu/amdgpu_device.c

> +static unsigned fd_hash(void *key)
> +{
> +	int fd = PTR_TO_UINT(key);
> +	struct stat stat;
> +	fstat(fd, &stat);
> +
> +	if (!S_ISCHR(stat.st_mode))
> +		return stat.st_dev ^ stat.st_ino;
I'm a bit puzzled, how is this possible ?

> +	else
> +		return stat.st_dev ^ (stat.st_rdev & RENDERNODE_MINOR_MASK);
I'm not 100% sure that there won't be any side-effects with the idea of
using primary and render node interchangeably.

Plus one cannot mask out to get from the primary device to the render
one, or vice-versa. The minor number is not reliable (rightfully so),
thus one can use drmGet(Primary|Render)DeviceNameFromFd or introduce
their fd counterpart.

> +}
> +
> +static int fd_compare(void *key1, void *key2)
> +{
> +	int fd1 = PTR_TO_UINT(key1);
> +	int fd2 = PTR_TO_UINT(key2);
> +	struct stat stat1, stat2;
> +	fstat(fd1, &stat1);
> +	fstat(fd2, &stat2);
> +
> +	if (!S_ISCHR(stat1.st_mode) || !S_ISCHR(stat2.st_mode))
> +		return stat1.st_dev != stat2.st_dev ||
> +			stat1.st_ino != stat2.st_ino;
> +        else
> +		return major(stat1.st_rdev) != major(stat2.st_rdev) ||
> +			(minor(stat1.st_rdev) & RENDERNODE_MINOR_MASK) !=
> +			(minor(stat2.st_rdev) & RENDERNODE_MINOR_MASK);
Same comments apply.

> +}
> +
> +/**
> +* Get the authenticated form fd,
> +*
> +* \param   fd   - \c [in]  File descriptor for AMD GPU device
> +* \param   auth - \c [out] Pointer to output the fd is authenticated or not
> +*                          A render node fd, output auth = 0
> +*                          A legacy fd, get the authenticated for compatibility root
> +*
> +* \return   0 on success\n
> +*          >0 - AMD specific error code\n
> +*          <0 - Negative POSIX Error code
> +*/
> +static int amdgpu_get_auth(int fd, int *auth)
> +{
> +	int r = 0;
> +	drm_client_t client;
> +	struct stat stat1;
> +	fstat(fd,&stat1);
> +	if (minor(stat1.st_rdev) & ~RENDERNODE_MINOR_MASK)/* find a render node fd */
Please use drmGetNodeTypeFromFd().

> --- /dev/null
> +++ b/amdgpu/amdgpu_internal.h
> @@ -0,0 +1,210 @@

> +#ifndef _amdgpu_internal_h_
> +#define _amdgpu_internal_h_
> +
Capital letters for the header guard ?

> +#ifdef HAVE_CONFIG_H
> +#include "config.h"
> +#endif
> +
> +#include <assert.h>
> +#include <pthread.h>
> +#include "xf86atomic.h"
> +#include "amdgpu.h"
> +#include "util_double_list.h"
> +
> +#define ARRAY_SIZE(a) (sizeof(a) / sizeof((a)[0]))
> +
Drop this and use util_math.h where needed ?

> +/**
> + * Increment src and decrement dst as if we were updating references
> + * for an assignment between 2 pointers of some objects.
> + *
> + * \return  true if dst is 0
> + */
> +static inline bool update_references(atomic_t *dst, atomic_t *src)
> +{
> +	if (dst != src) {
> +		/* bump src first */
> +		if (src) {
> +			assert(atomic_read(src) > 0);
> +			atomic_inc(src);
> +		}
> +		if (dst) {
> +			assert(atomic_read(dst) > 0);
> +			return atomic_dec_and_test(dst);
Am I imagining something or does the assert conditions look a bit strange ?

> diff --git a/amdgpu/util_double_list.h b/amdgpu/util_double_list.h
> new file mode 100644
> index 0000000..3f48ae2
> --- /dev/null
> +++ b/amdgpu/util_double_list.h
There are already two identical copies of this file - let's not add a
third one.

freedreno/list.h
tests/radeon/list.h

How about we move it one level up, and update the current users (incl.
Makefile.sources)

> --- /dev/null
> +++ b/amdgpu/util_hash.c

> --- /dev/null
> +++ b/amdgpu/util_hash_table.c

Can one use the existing drmHash functions like omap and freedreno ?
Would be nice to avoid going the mesa's route which iirc has four
different implementations.

> --- /dev/null
> +++ b/amdgpu/util_math.h
FWIW we can move this a level up, so that others can use it. Obviously
there is no reason to be part of this series.

> diff --git a/configure.ac b/configure.ac
> index 155d577..509f2d4 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -36,6 +36,7 @@ m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
>  
>  # Check for programs
>  AC_PROG_CC
> +AC_PROG_CXX
>  
There are no C++ sources, so you don't need a C++ compiler.

> @@ -236,6 +242,9 @@ if test "x$drm_cv_atomic_primitives" = "xnone"; then
>  	LIBDRM_ATOMICS_NOT_FOUND_MSG($RADEON, radeon, Radeon, radeon)
>  	RADEON=no
>  
> +	LIBDRM_ATOMICS_NOT_FOUND_MSG($AMDGPU, amdgpu, AMD, amdgpu)
> +	AMDGPU=no
> +
Glad to see this, unlike the original copy/paste of an insanely long
message previously :)

> +if test "x$AMDGPU" = xyes; then
> +	AC_DEFINE(HAVE_AMDGPU, 1, [Have amdgpu support])
> +fi
> +
Unless you're planning to add support to libkms you can drop this.

> diff --git a/include/drm/amdgpu_drm.h b/include/drm/amdgpu_drm.h
> new file mode 100644
> index 0000000..d248d77
> --- /dev/null
> +++ b/include/drm/amdgpu_drm.h

> +struct drm_amdgpu_cs_in {
> +	/** Rendering context id */
> +	uint32_t		ctx_id;
> +	/**  Handle of resource list associated with CS */
> +	uint32_t		bo_list_handle;
> +	uint32_t		num_chunks;
> +	uint32_t		pad;
> +	/* this points to uint64_t * which point to cs chunks */
> +	uint64_t		chunks;
> +};
> +
> +struct drm_amdgpu_cs_out {
> +	uint64_t handle;
> +};
> +
> +union drm_amdgpu_cs {
> +       struct drm_amdgpu_cs_in in;
> +       struct drm_amdgpu_cs_out out;
> +};
> +
Don't think I've seen similar contruct in any of the other drm drivers.
(Genuenly curious) What benefit does it bring ?

I think you want to add a trailing padding for the following structures.
I'm thinking of a case where, old 64bit userspace feeds in the last u32
as rubbish, to the kernel while the latter uses an updated/expanded
struct. In such case the driver will either reject the request, or worse
use the rubbish data.

struct drm_amdgpu_gem_metadata
struct drm_amdgpu_wait_cs_in

struct drm_amdgpu_info_hw_ip
struct drm_amdgpu_info_device

Cheers
Emil

P.S. Based on the coding style seems like this is the combined work of
3+ developers. Anyway glad to see that it's out :-)