[Intel-gfx] [RFC] libdrm_intel: Add support for userptr objects

Fri May 2 12:27:45 CEST 2014

On 05/01/2014 07:47 PM, Ben Widawsky wrote:
> On Wed, Feb 26, 2014 at 04:41:41PM +0000, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>>
>> Allow userptr objects to be created and used via libdrm_intel.
>>
>> At the moment tiling and mapping to GTT aperture is not supported
>> due hardware limitations across different generations and uncertainty
>> about its usefulness.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>> ---
>>   include/drm/i915_drm.h    |  16 +++++
>>   intel/intel_bufmgr.c      |  13 ++++
>>   intel/intel_bufmgr.h      |   5 ++
>>   intel/intel_bufmgr_gem.c  | 154 +++++++++++++++++++++++++++++++++++++++++++++-
>>   intel/intel_bufmgr_priv.h |  12 +++-
>>   5 files changed, 198 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
>> index 2f4eb8c..d32ef99 100644
>> --- a/include/drm/i915_drm.h
>> +++ b/include/drm/i915_drm.h
>> @@ -223,6 +223,7 @@ typedef struct _drm_i915_sarea {
>>   #define DRM_I915_GEM_GET_CACHING	0x30
>>   #define DRM_I915_REG_READ		0x31
>>   #define DRM_I915_GET_RESET_STATS	0x32
>> +#define DRM_I915_GEM_USERPTR		0x34
>>
>>   #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
>>   #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
>> @@ -273,6 +274,7 @@ typedef struct _drm_i915_sarea {
>>   #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY	DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
>>   #define DRM_IOCTL_I915_REG_READ			DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
>>   #define DRM_IOCTL_I915_GET_RESET_STATS		DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats)
>> +#define DRM_IOCTL_I915_GEM_USERPTR	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR,  struct drm_i915_gem_userptr)
>>
>>   /* Allow drivers to submit batchbuffers directly to hardware, relying
>>    * on the security mechanisms provided by hardware.
>> @@ -498,6 +500,20 @@ struct drm_i915_gem_mmap_gtt {
>>   	__u64 offset;
>>   };
>>
>> +struct drm_i915_gem_userptr {
>> +	__u64 user_ptr;
>> +	__u64 user_size;
>
> Adding alignment might be a safe bet.

Hmmmm, at first I thought you are raising a good point. But then I don't 
understand why I don't see any aligned types in 
linux/include/uapi/drm/i915_drm.h ?

>> +	__u32 flags;
>> +#define I915_USERPTR_READ_ONLY 0x1
>
> I'll eventually want something like:
> #define I915_MIRRORED_GVA 0x2 /* Request the same GPU address */

Plenty of free flags. :)

>> +#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
>> +	/**
>> +	* Returned handle for the object.
>> +	*
>> +	* Object handles are nonzero.
>> +	*/
>> +	__u32 handle;
>> +};
>> +
>>   struct drm_i915_gem_set_domain {
>>   	/** Handle for the object */
>>   	__u32 handle;
>> diff --git a/intel/intel_bufmgr.c b/intel/intel_bufmgr.c
>> index 905556f..7f3d795 100644
>> --- a/intel/intel_bufmgr.c
>> +++ b/intel/intel_bufmgr.c
>> @@ -60,6 +60,19 @@ drm_intel_bo *drm_intel_bo_alloc_for_render(drm_intel_bufmgr *bufmgr,
>>   	return bufmgr->bo_alloc_for_render(bufmgr, name, size, alignment);
>>   }
>>
>> +drm_intel_bo *drm_intel_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
>> +					  const char *name, void *addr,
>> +					  uint32_t tiling_mode,
>> +					  uint32_t stride,
>> +					  unsigned long size,
>> +					  unsigned long flags)
>> +{
>> +	if (bufmgr->bo_alloc_userptr)
>> +		return bufmgr->bo_alloc_userptr(bufmgr, name, addr, tiling_mode,
>> +						stride, size, flags);
>> +	return NULL;
>> +}
>> +
>>   drm_intel_bo *
>>   drm_intel_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
>>                           int x, int y, int cpp, uint32_t *tiling_mode,
>> diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
>> index 9383c72..be83a56 100644
>> --- a/intel/intel_bufmgr.h
>> +++ b/intel/intel_bufmgr.h
>> @@ -113,6 +113,11 @@ drm_intel_bo *drm_intel_bo_alloc_for_render(drm_intel_bufmgr *bufmgr,
>>   					    const char *name,
>>   					    unsigned long size,
>>   					    unsigned int alignment);
>> +drm_intel_bo *drm_intel_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
>> +					const char *name,
>> +					void *addr, uint32_t tiling_mode,
>> +					uint32_t stride, unsigned long size,
>> +					unsigned long flags);
>>   drm_intel_bo *drm_intel_bo_alloc_tiled(drm_intel_bufmgr *bufmgr,
>>   				       const char *name,
>>   				       int x, int y, int cpp,
>> diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
>> index 007a6d8..7cad945 100644
>> --- a/intel/intel_bufmgr_gem.c
>> +++ b/intel/intel_bufmgr_gem.c
>> @@ -182,6 +182,11 @@ struct _drm_intel_bo_gem {
>>   	void *mem_virtual;
>>   	/** GTT virtual address for the buffer, saved across map/unmap cycles */
>>   	void *gtt_virtual;
>> +	/**
>> +	 * Virtual address of the buffer allocated by user, used for userptr
>> +	 * objects only.
>> +	 */
>> +	void *user_virtual;
>
> Can we not just re-use mem_virtual? Do you intend to provide the ability
> to have two distinct CPU mappings to the same object?

Haven't heard anything like that. By quick look at the code seems that 
reusing mem_virtual would require special casing some parts which touch 
it. So it may be it is actually cleaner as it is.

>>   	int map_count;
>>   	drmMMListHead vma_list;
>>
>> @@ -221,6 +226,11 @@ struct _drm_intel_bo_gem {
>>   	bool idle;
>>
>>   	/**
>> +	 * Boolean of whether this buffer was allocated with userptr
>> +	 */
>> +	bool is_userptr;
>> +
>> +	/**
>>   	 * Size in bytes of this buffer and its relocation descendents.
>>   	 *
>>   	 * Used to avoid costly tree walking in
>> @@ -847,6 +857,80 @@ drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
>>   					       tiling, stride);
>>   }
>>
>> +static drm_intel_bo *
>> +drm_intel_gem_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
>> +				const char *name,
>> +				void *addr,
>> +				uint32_t tiling_mode,
>> +				uint32_t stride,
>> +				unsigned long size,
>> +				unsigned long flags)
>
> I'd vote for dropping tiling_mode, stride. Make size and flags
> explicitly sized types. Not doing this has bitten us already.

Some people expressed interest in tiling so I thought to preserve it in 
the API.

Kernel even allows it, and it is just because Chris found bspec 
references saying it will break badly on some platforms, that I (or 
maybe it was both of us) decided to disable it on this level for time 
being.

So I think it is just a matter of coming up with a blacklist and it 
would be possible to use it then.

>> +{
>> +	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
>> +	drm_intel_bo_gem *bo_gem;
>> +	int ret;
>> +	struct drm_i915_gem_userptr userptr;
>> +
>> +	/* Tiling with userptr surfaces is not supported
>> +	 * on all hardware so refuse it for time being.
>> +	 */
>> +	if (tiling_mode != I915_TILING_NONE)
>> +		return NULL;
>> +
>> +	bo_gem = calloc(1, sizeof(*bo_gem));
>> +	if (!bo_gem)
>> +		return NULL;
>> +
>> +	bo_gem->bo.size = size;
>> +
>> +	VG_CLEAR(userptr);
>> +	userptr.user_ptr = (__u64)((unsigned long)addr);
>
> How are we getting away with non page aligned addresses? I'd vote for
> moving some of the memalign checks into here if all cases require page
> aligned.

Kernel will reject any unaligned address or size in the ioctl.

>> +	userptr.user_size = size;
>> +	userptr.flags = flags;
>> +
>> +	ret = drmIoctl(bufmgr_gem->fd,
>> +			DRM_IOCTL_I915_GEM_USERPTR,
>> +			&userptr);
>> +	if (ret != 0) {
>> +		DBG("bo_create_userptr: "
>> +		    "ioctl failed with user ptr %p size 0x%lx, "
>> +		    "user flags 0x%lx\n", addr, size, flags);
>> +		free(bo_gem);
>> +		return NULL;
>> +	}
>> +
>> +	bo_gem->gem_handle = userptr.handle;
>> +	bo_gem->bo.handle = bo_gem->gem_handle;
>> +	bo_gem->bo.bufmgr    = bufmgr;
>> +	bo_gem->is_userptr   = true;
>> +	bo_gem->bo.virtual   = addr;
>> +	/* Save the address provided by user */
>> +	bo_gem->user_virtual = addr;
>> +	bo_gem->tiling_mode  = I915_TILING_NONE;
>> +	bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
>> +	bo_gem->stride       = 0;
>> +
>> +	DRMINITLISTHEAD(&bo_gem->name_list);
>> +	DRMINITLISTHEAD(&bo_gem->vma_list);
>> +
>> +	bo_gem->name = name;
>> +	atomic_set(&bo_gem->refcount, 1);
>> +	bo_gem->validate_index = -1;
>> +	bo_gem->reloc_tree_fences = 0;
>> +	bo_gem->used_as_reloc_target = false;
>> +	bo_gem->has_error = false;
>> +	bo_gem->reusable = false;
>
> TODO for someone: Some room to consolidate this with the other create
> functions.
>
>> +
>> +	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
>
> I'm looking at the code and trying to figure out how this is relevant.
> It's not introduced by you directly. Just looking at the math it all
> seems to kind of mix-up aperture and non-aperture usages. Just talking
> to myself...
>
>> +
>> +	DBG("bo_create_userptr: "
>> +	    "ptr %p buf %d (%s) size %ldb, stride 0x%x, tile mode %d\n",
>> +		addr, bo_gem->gem_handle, bo_gem->name,
>> +		size, stride, tiling_mode);
>> +
>> +	return &bo_gem->bo;
>> +}
>> +
>>   /**
>>    * Returns a drm_intel_bo wrapping the given buffer object handle.
>>    *
>> @@ -1173,6 +1257,12 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
>>   	struct drm_i915_gem_set_domain set_domain;
>>   	int ret;
>>
>> +	if (bo_gem->is_userptr) {
>> +		/* Return the same user ptr */
>> +		bo->virtual = bo_gem->user_virtual;
>> +		return 0;
>> +	}
>> +
>>   	pthread_mutex_lock(&bufmgr_gem->lock);
>>
>>   	if (bo_gem->map_count++ == 0)
>> @@ -1241,6 +1331,9 @@ map_gtt(drm_intel_bo *bo)
>>   	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
>>   	int ret;
>>
>> +	if (bo_gem->is_userptr)
>> +		return -EINVAL;
>> +
>>   	if (bo_gem->map_count++ == 0)
>>   		drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
>>
>> @@ -1385,13 +1478,18 @@ int drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo)
>>
>>   static int drm_intel_gem_bo_unmap(drm_intel_bo *bo)
>>   {
>> -	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
>> +	drm_intel_bufmgr_gem *bufmgr_gem;
>
> I'm missing why this hunk was changed. But, okay.

Just to ensure NULL bo is handled nicely I guess.

>>   	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
>>   	int ret = 0;
>>
>>   	if (bo == NULL)
>>   		return 0;
>>
>> +	if (bo_gem->is_userptr)
>> +		return 0;
>> +
>> +	bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
>> +
>>   	pthread_mutex_lock(&bufmgr_gem->lock);
>>
>>   	if (bo_gem->map_count <= 0) {
>> @@ -1449,6 +1547,9 @@ drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset,
>>   	struct drm_i915_gem_pwrite pwrite;
>>   	int ret;
>>
>> +	if (bo_gem->is_userptr)
>> +		return -EINVAL;
>> +
>>   	VG_CLEAR(pwrite);
>>   	pwrite.handle = bo_gem->gem_handle;
>>   	pwrite.offset = offset;
>> @@ -1501,6 +1602,9 @@ drm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset,
>>   	struct drm_i915_gem_pread pread;
>>   	int ret;
>>
>> +	if (bo_gem->is_userptr)
>> +		return -EINVAL;
>> +
>>   	VG_CLEAR(pread);
>>   	pread.handle = bo_gem->gem_handle;
>>   	pread.offset = offset;
>> @@ -2460,6 +2564,12 @@ drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
>>   	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
>>   	int ret;
>>
>> +	/* Tiling with userptr surfaces is not supported
>> +	 * on all hardware so refuse it for time being.
>> +	 */
>> +	if (bo_gem->is_userptr)
>> +		return -EINVAL;
>> +
>>   	/* Linear buffers have no stride. By ensuring that we only ever use
>>   	 * stride 0 with linear buffers, we simplify our code.
>>   	 */
>> @@ -3181,6 +3291,44 @@ drm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo,
>>   	bo_gem->aub_annotation_count = count;
>>   }
>>
>> +static bool
>> +has_userptr(int fd)
>> +{
>> +	int ret;
>> +	void *ptr;
>> +	long pgsz;
>> +	struct drm_i915_gem_userptr userptr;
>> +	struct drm_gem_close close_bo;
>> +
>> +	pgsz = sysconf(_SC_PAGESIZE);
>> +	assert(ret > 0);
>> +
>> +	ret = posix_memalign(&ptr, pgsz, pgsz);
>> +	assert(ret == 0);
>> +
>> +	memset(&userptr, 0, sizeof(userptr));
>> +	userptr.user_ptr = (__u64)(unsigned long)ptr;
>> +	userptr.user_size = pgsz;
>> +
>> +retry:
>> +	ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr);
>> +	if (ret) {
>> +		if (errno == ENODEV && userptr.flags == 0) {
>> +			userptr.flags = I915_USERPTR_UNSYNCHRONIZED;
>> +			goto retry;
>> +		}
>> +		free(ptr);
>> +		return false;
>> +	}
>> +
>> +	close_bo.handle = userptr.handle;
>> +	ret = drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &close_bo);
>> +	assert(ret == 0);
>> +	free(ptr);
>> +
>> +	return true;
>> +}
>
> I think a param for USERPTR is warranted. Or did we decide not to do
> this already?

I asked for it, but people with authority said no. It is all very weak, 
fragile and dangerous anyway - param or feature detect like the above.

We would really need a proper feature detect mechanism, like text based 
in sysfs or something.

>> +
>>   /**
>>    * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
>>    * and manage map buffer objections.
>> @@ -3273,6 +3421,10 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
>>   	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
>>   	bufmgr_gem->has_relaxed_fencing = ret == 0;
>>
>> +	if (has_userptr(fd))
>> +		bufmgr_gem->bufmgr.bo_alloc_userptr =
>> +			drm_intel_gem_bo_alloc_userptr;
>> +
>>   	gp.param = I915_PARAM_HAS_WAIT_TIMEOUT;
>>   	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
>>   	bufmgr_gem->has_wait_timeout = ret == 0;
>> diff --git a/intel/intel_bufmgr_priv.h b/intel/intel_bufmgr_priv.h
>> index 2592d42..3aa1abb 100644
>> --- a/intel/intel_bufmgr_priv.h
>> +++ b/intel/intel_bufmgr_priv.h
>> @@ -60,7 +60,17 @@ struct _drm_intel_bufmgr {
>>   					      const char *name,
>>   					      unsigned long size,
>>   					      unsigned int alignment);
>> -
>> +	/**
>> +	 * Allocate a buffer object from an existing user accessible
>> +	 * address malloc'd with the provided size.
>> +	 * Alignment is used when mapping to the gtt.
>> +	 * Flags may be I915_VMAP_READ_ONLY or I915_USERPTR_UNSYNCHRONIZED
>> +	 */
>> +	drm_intel_bo *(*bo_alloc_userptr)(drm_intel_bufmgr *bufmgr,
>> +					  const char *name, void *addr,
>> +					  uint32_t tiling_mode, uint32_t stride,
>> +					  unsigned long size,
>> +					  unsigned long flags);
>>   	/**
>>   	 * Allocate a tiled buffer object.
>>   	 *
>
> Probably don't need the special function pointer yet since I don't think
> we can yet envision use cases which will require any kind of special
> handling. A simple has_userptr in bufmgr_gem will probably suffice. But
> I don't care too much either way.

What do you mean?

> I couldn't spot any real bugs...

Cool, I am glad there is some interest for this.

Regards,

Tvrtko