[Intel-gfx] [RFC] libdrm_intel: Add support for userptr objects
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Fri May 2 12:27:45 CEST 2014
On 05/01/2014 07:47 PM, Ben Widawsky wrote:
> On Wed, Feb 26, 2014 at 04:41:41PM +0000, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>>
>> Allow userptr objects to be created and used via libdrm_intel.
>>
>> At the moment tiling and mapping to GTT aperture is not supported
>> due hardware limitations across different generations and uncertainty
>> about its usefulness.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>> ---
>> include/drm/i915_drm.h | 16 +++++
>> intel/intel_bufmgr.c | 13 ++++
>> intel/intel_bufmgr.h | 5 ++
>> intel/intel_bufmgr_gem.c | 154 +++++++++++++++++++++++++++++++++++++++++++++-
>> intel/intel_bufmgr_priv.h | 12 +++-
>> 5 files changed, 198 insertions(+), 2 deletions(-)
>>
>> diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
>> index 2f4eb8c..d32ef99 100644
>> --- a/include/drm/i915_drm.h
>> +++ b/include/drm/i915_drm.h
>> @@ -223,6 +223,7 @@ typedef struct _drm_i915_sarea {
>> #define DRM_I915_GEM_GET_CACHING 0x30
>> #define DRM_I915_REG_READ 0x31
>> #define DRM_I915_GET_RESET_STATS 0x32
>> +#define DRM_I915_GEM_USERPTR 0x34
>>
>> #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
>> #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
>> @@ -273,6 +274,7 @@ typedef struct _drm_i915_sarea {
>> #define DRM_IOCTL_I915_GEM_CONTEXT_DESTROY DRM_IOW (DRM_COMMAND_BASE + DRM_I915_GEM_CONTEXT_DESTROY, struct drm_i915_gem_context_destroy)
>> #define DRM_IOCTL_I915_REG_READ DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_REG_READ, struct drm_i915_reg_read)
>> #define DRM_IOCTL_I915_GET_RESET_STATS DRM_IOWR (DRM_COMMAND_BASE + DRM_I915_GET_RESET_STATS, struct drm_i915_reset_stats)
>> +#define DRM_IOCTL_I915_GEM_USERPTR DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_USERPTR, struct drm_i915_gem_userptr)
>>
>> /* Allow drivers to submit batchbuffers directly to hardware, relying
>> * on the security mechanisms provided by hardware.
>> @@ -498,6 +500,20 @@ struct drm_i915_gem_mmap_gtt {
>> __u64 offset;
>> };
>>
>> +struct drm_i915_gem_userptr {
>> + __u64 user_ptr;
>> + __u64 user_size;
>
> Adding alignment might be a safe bet.
Hmmmm, at first I thought you are raising a good point. But then I don't
understand why I don't see any aligned types in
linux/include/uapi/drm/i915_drm.h ?
>> + __u32 flags;
>> +#define I915_USERPTR_READ_ONLY 0x1
>
> I'll eventually want something like:
> #define I915_MIRRORED_GVA 0x2 /* Request the same GPU address */
Plenty of free flags. :)
>> +#define I915_USERPTR_UNSYNCHRONIZED 0x80000000
>> + /**
>> + * Returned handle for the object.
>> + *
>> + * Object handles are nonzero.
>> + */
>> + __u32 handle;
>> +};
>> +
>> struct drm_i915_gem_set_domain {
>> /** Handle for the object */
>> __u32 handle;
>> diff --git a/intel/intel_bufmgr.c b/intel/intel_bufmgr.c
>> index 905556f..7f3d795 100644
>> --- a/intel/intel_bufmgr.c
>> +++ b/intel/intel_bufmgr.c
>> @@ -60,6 +60,19 @@ drm_intel_bo *drm_intel_bo_alloc_for_render(drm_intel_bufmgr *bufmgr,
>> return bufmgr->bo_alloc_for_render(bufmgr, name, size, alignment);
>> }
>>
>> +drm_intel_bo *drm_intel_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
>> + const char *name, void *addr,
>> + uint32_t tiling_mode,
>> + uint32_t stride,
>> + unsigned long size,
>> + unsigned long flags)
>> +{
>> + if (bufmgr->bo_alloc_userptr)
>> + return bufmgr->bo_alloc_userptr(bufmgr, name, addr, tiling_mode,
>> + stride, size, flags);
>> + return NULL;
>> +}
>> +
>> drm_intel_bo *
>> drm_intel_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
>> int x, int y, int cpp, uint32_t *tiling_mode,
>> diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
>> index 9383c72..be83a56 100644
>> --- a/intel/intel_bufmgr.h
>> +++ b/intel/intel_bufmgr.h
>> @@ -113,6 +113,11 @@ drm_intel_bo *drm_intel_bo_alloc_for_render(drm_intel_bufmgr *bufmgr,
>> const char *name,
>> unsigned long size,
>> unsigned int alignment);
>> +drm_intel_bo *drm_intel_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
>> + const char *name,
>> + void *addr, uint32_t tiling_mode,
>> + uint32_t stride, unsigned long size,
>> + unsigned long flags);
>> drm_intel_bo *drm_intel_bo_alloc_tiled(drm_intel_bufmgr *bufmgr,
>> const char *name,
>> int x, int y, int cpp,
>> diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
>> index 007a6d8..7cad945 100644
>> --- a/intel/intel_bufmgr_gem.c
>> +++ b/intel/intel_bufmgr_gem.c
>> @@ -182,6 +182,11 @@ struct _drm_intel_bo_gem {
>> void *mem_virtual;
>> /** GTT virtual address for the buffer, saved across map/unmap cycles */
>> void *gtt_virtual;
>> + /**
>> + * Virtual address of the buffer allocated by user, used for userptr
>> + * objects only.
>> + */
>> + void *user_virtual;
>
> Can we not just re-use mem_virtual? Do you intend to provide the ability
> to have two distinct CPU mappings to the same object?
Haven't heard anything like that. By quick look at the code seems that
reusing mem_virtual would require special casing some parts which touch
it. So it may be it is actually cleaner as it is.
>> int map_count;
>> drmMMListHead vma_list;
>>
>> @@ -221,6 +226,11 @@ struct _drm_intel_bo_gem {
>> bool idle;
>>
>> /**
>> + * Boolean of whether this buffer was allocated with userptr
>> + */
>> + bool is_userptr;
>> +
>> + /**
>> * Size in bytes of this buffer and its relocation descendents.
>> *
>> * Used to avoid costly tree walking in
>> @@ -847,6 +857,80 @@ drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
>> tiling, stride);
>> }
>>
>> +static drm_intel_bo *
>> +drm_intel_gem_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
>> + const char *name,
>> + void *addr,
>> + uint32_t tiling_mode,
>> + uint32_t stride,
>> + unsigned long size,
>> + unsigned long flags)
>
> I'd vote for dropping tiling_mode, stride. Make size and flags
> explicitly sized types. Not doing this has bitten us already.
Some people expressed interest in tiling so I thought to preserve it in
the API.
Kernel even allows it, and it is just because Chris found bspec
references saying it will break badly on some platforms, that I (or
maybe it was both of us) decided to disable it on this level for time
being.
So I think it is just a matter of coming up with a blacklist and it
would be possible to use it then.
>> +{
>> + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
>> + drm_intel_bo_gem *bo_gem;
>> + int ret;
>> + struct drm_i915_gem_userptr userptr;
>> +
>> + /* Tiling with userptr surfaces is not supported
>> + * on all hardware so refuse it for time being.
>> + */
>> + if (tiling_mode != I915_TILING_NONE)
>> + return NULL;
>> +
>> + bo_gem = calloc(1, sizeof(*bo_gem));
>> + if (!bo_gem)
>> + return NULL;
>> +
>> + bo_gem->bo.size = size;
>> +
>> + VG_CLEAR(userptr);
>> + userptr.user_ptr = (__u64)((unsigned long)addr);
>
> How are we getting away with non page aligned addresses? I'd vote for
> moving some of the memalign checks into here if all cases require page
> aligned.
Kernel will reject any unaligned address or size in the ioctl.
>> + userptr.user_size = size;
>> + userptr.flags = flags;
>> +
>> + ret = drmIoctl(bufmgr_gem->fd,
>> + DRM_IOCTL_I915_GEM_USERPTR,
>> + &userptr);
>> + if (ret != 0) {
>> + DBG("bo_create_userptr: "
>> + "ioctl failed with user ptr %p size 0x%lx, "
>> + "user flags 0x%lx\n", addr, size, flags);
>> + free(bo_gem);
>> + return NULL;
>> + }
>> +
>> + bo_gem->gem_handle = userptr.handle;
>> + bo_gem->bo.handle = bo_gem->gem_handle;
>> + bo_gem->bo.bufmgr = bufmgr;
>> + bo_gem->is_userptr = true;
>> + bo_gem->bo.virtual = addr;
>> + /* Save the address provided by user */
>> + bo_gem->user_virtual = addr;
>> + bo_gem->tiling_mode = I915_TILING_NONE;
>> + bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
>> + bo_gem->stride = 0;
>> +
>> + DRMINITLISTHEAD(&bo_gem->name_list);
>> + DRMINITLISTHEAD(&bo_gem->vma_list);
>> +
>> + bo_gem->name = name;
>> + atomic_set(&bo_gem->refcount, 1);
>> + bo_gem->validate_index = -1;
>> + bo_gem->reloc_tree_fences = 0;
>> + bo_gem->used_as_reloc_target = false;
>> + bo_gem->has_error = false;
>> + bo_gem->reusable = false;
>
> TODO for someone: Some room to consolidate this with the other create
> functions.
>
>> +
>> + drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
>
> I'm looking at the code and trying to figure out how this is relevant.
> It's not introduced by you directly. Just looking at the math it all
> seems to kind of mix-up aperture and non-aperture usages. Just talking
> to myself...
>
>> +
>> + DBG("bo_create_userptr: "
>> + "ptr %p buf %d (%s) size %ldb, stride 0x%x, tile mode %d\n",
>> + addr, bo_gem->gem_handle, bo_gem->name,
>> + size, stride, tiling_mode);
>> +
>> + return &bo_gem->bo;
>> +}
>> +
>> /**
>> * Returns a drm_intel_bo wrapping the given buffer object handle.
>> *
>> @@ -1173,6 +1257,12 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
>> struct drm_i915_gem_set_domain set_domain;
>> int ret;
>>
>> + if (bo_gem->is_userptr) {
>> + /* Return the same user ptr */
>> + bo->virtual = bo_gem->user_virtual;
>> + return 0;
>> + }
>> +
>> pthread_mutex_lock(&bufmgr_gem->lock);
>>
>> if (bo_gem->map_count++ == 0)
>> @@ -1241,6 +1331,9 @@ map_gtt(drm_intel_bo *bo)
>> drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
>> int ret;
>>
>> + if (bo_gem->is_userptr)
>> + return -EINVAL;
>> +
>> if (bo_gem->map_count++ == 0)
>> drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
>>
>> @@ -1385,13 +1478,18 @@ int drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo)
>>
>> static int drm_intel_gem_bo_unmap(drm_intel_bo *bo)
>> {
>> - drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
>> + drm_intel_bufmgr_gem *bufmgr_gem;
>
> I'm missing why this hunk was changed. But, okay.
Just to ensure NULL bo is handled nicely I guess.
>> drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
>> int ret = 0;
>>
>> if (bo == NULL)
>> return 0;
>>
>> + if (bo_gem->is_userptr)
>> + return 0;
>> +
>> + bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
>> +
>> pthread_mutex_lock(&bufmgr_gem->lock);
>>
>> if (bo_gem->map_count <= 0) {
>> @@ -1449,6 +1547,9 @@ drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset,
>> struct drm_i915_gem_pwrite pwrite;
>> int ret;
>>
>> + if (bo_gem->is_userptr)
>> + return -EINVAL;
>> +
>> VG_CLEAR(pwrite);
>> pwrite.handle = bo_gem->gem_handle;
>> pwrite.offset = offset;
>> @@ -1501,6 +1602,9 @@ drm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset,
>> struct drm_i915_gem_pread pread;
>> int ret;
>>
>> + if (bo_gem->is_userptr)
>> + return -EINVAL;
>> +
>> VG_CLEAR(pread);
>> pread.handle = bo_gem->gem_handle;
>> pread.offset = offset;
>> @@ -2460,6 +2564,12 @@ drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
>> drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
>> int ret;
>>
>> + /* Tiling with userptr surfaces is not supported
>> + * on all hardware so refuse it for time being.
>> + */
>> + if (bo_gem->is_userptr)
>> + return -EINVAL;
>> +
>> /* Linear buffers have no stride. By ensuring that we only ever use
>> * stride 0 with linear buffers, we simplify our code.
>> */
>> @@ -3181,6 +3291,44 @@ drm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo,
>> bo_gem->aub_annotation_count = count;
>> }
>>
>> +static bool
>> +has_userptr(int fd)
>> +{
>> + int ret;
>> + void *ptr;
>> + long pgsz;
>> + struct drm_i915_gem_userptr userptr;
>> + struct drm_gem_close close_bo;
>> +
>> + pgsz = sysconf(_SC_PAGESIZE);
>> + assert(ret > 0);
>> +
>> + ret = posix_memalign(&ptr, pgsz, pgsz);
>> + assert(ret == 0);
>> +
>> + memset(&userptr, 0, sizeof(userptr));
>> + userptr.user_ptr = (__u64)(unsigned long)ptr;
>> + userptr.user_size = pgsz;
>> +
>> +retry:
>> + ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr);
>> + if (ret) {
>> + if (errno == ENODEV && userptr.flags == 0) {
>> + userptr.flags = I915_USERPTR_UNSYNCHRONIZED;
>> + goto retry;
>> + }
>> + free(ptr);
>> + return false;
>> + }
>> +
>> + close_bo.handle = userptr.handle;
>> + ret = drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &close_bo);
>> + assert(ret == 0);
>> + free(ptr);
>> +
>> + return true;
>> +}
>
> I think a param for USERPTR is warranted. Or did we decide not to do
> this already?
I asked for it, but people with authority said no. It is all very weak,
fragile and dangerous anyway - param or feature detect like the above.
We would really need a proper feature detect mechanism, like text based
in sysfs or something.
>> +
>> /**
>> * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
>> * and manage map buffer objections.
>> @@ -3273,6 +3421,10 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
>> ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
>> bufmgr_gem->has_relaxed_fencing = ret == 0;
>>
>> + if (has_userptr(fd))
>> + bufmgr_gem->bufmgr.bo_alloc_userptr =
>> + drm_intel_gem_bo_alloc_userptr;
>> +
>> gp.param = I915_PARAM_HAS_WAIT_TIMEOUT;
>> ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
>> bufmgr_gem->has_wait_timeout = ret == 0;
>> diff --git a/intel/intel_bufmgr_priv.h b/intel/intel_bufmgr_priv.h
>> index 2592d42..3aa1abb 100644
>> --- a/intel/intel_bufmgr_priv.h
>> +++ b/intel/intel_bufmgr_priv.h
>> @@ -60,7 +60,17 @@ struct _drm_intel_bufmgr {
>> const char *name,
>> unsigned long size,
>> unsigned int alignment);
>> -
>> + /**
>> + * Allocate a buffer object from an existing user accessible
>> + * address malloc'd with the provided size.
>> + * Alignment is used when mapping to the gtt.
>> + * Flags may be I915_VMAP_READ_ONLY or I915_USERPTR_UNSYNCHRONIZED
>> + */
>> + drm_intel_bo *(*bo_alloc_userptr)(drm_intel_bufmgr *bufmgr,
>> + const char *name, void *addr,
>> + uint32_t tiling_mode, uint32_t stride,
>> + unsigned long size,
>> + unsigned long flags);
>> /**
>> * Allocate a tiled buffer object.
>> *
>
> Probably don't need the special function pointer yet since I don't think
> we can yet envision use cases which will require any kind of special
> handling. A simple has_userptr in bufmgr_gem will probably suffice. But
> I don't care too much either way.
What do you mean?
> I couldn't spot any real bugs...
Cool, I am glad there is some interest for this.
Regards,
Tvrtko
More information about the Intel-gfx
mailing list