[Intel-gfx] [PATCH] intel: non-blocking mmaps on the cheap
Daniel Vetter
daniel at ffwll.ch
Fri Sep 23 10:52:01 CEST 2011
On Thu, Sep 22, 2011 at 04:35:20PM -0700, Ben Widawsky wrote:
> From: Daniel Vetter <daniel.vetter at ffwll.ch>
>
> This adds a new mode to map gem objects in a non-blocking way.
>
> The new kernel interface required to get the caching level/coherency
> is not yet wired up. All the code to transparently choose between gtt
> mappings or (if coherent) cpu mappings is already in place, though.
>
> To make this useable for mesa, we need to allow other buffer acces in
> between non-blocking modes. It's not a big problem if this accidentaly
> blocks once when switching back to non-blocking maps, but domains need
> to be correctly tracked.
>
> I've gone through historical kernel versions and luckily the only
> place we move a bo from the gtt domain to the cpu domain is in the
> set_ioctl. pwrite/pread don't move objects into the cpu domain as long
> as they're untiled or llc cached.
>
> So track whether an object is in the gtt domain and invalidate this
> only on access to cpu mappings.
>
> [patch reworked by Ben]
>
> Cc: Eric Anholt <eric at anholt.net>
> Cc: "Kilarski, Bernard R" <bernard.r.kilarski at intel.com>
> Signed-off-by: Daniel Vetter <daniel.vetter at ffwll.ch>
> Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
A bit of bikeshedding below ;-)
> ---
> include/drm/i915_drm.h | 7 ++
> intel/intel_bufmgr.h | 3 +
> intel/intel_bufmgr_gem.c | 181 +++++++++++++++++++++++++++++++++++++++++-----
> 3 files changed, 174 insertions(+), 17 deletions(-)
>
> diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
> index adc2392..4b62222 100644
> --- a/include/drm/i915_drm.h
> +++ b/include/drm/i915_drm.h
> @@ -189,6 +189,7 @@ typedef struct _drm_i915_sarea {
> #define DRM_I915_OVERLAY_PUT_IMAGE 0x27
> #define DRM_I915_OVERLAY_ATTRS 0x28
> #define DRM_I915_GEM_EXECBUFFER2 0x29
> +#define DRM_I915_GEM_GET_CACHE_TYPE 0x2a
>
> #define DRM_IOCTL_I915_INIT DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
> #define DRM_IOCTL_I915_FLUSH DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
> @@ -230,6 +231,7 @@ typedef struct _drm_i915_sarea {
> #define DRM_IOCTL_I915_GEM_MADVISE DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_MADVISE, struct drm_i915_gem_madvise)
> #define DRM_IOCTL_I915_OVERLAY_PUT_IMAGE DRM_IOW(DRM_COMMAND_BASE + DRM_IOCTL_I915_OVERLAY_ATTRS, struct drm_intel_overlay_put_image)
> #define DRM_IOCTL_I915_OVERLAY_ATTRS DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_OVERLAY_ATTRS, struct drm_intel_overlay_attrs)
> +#define DRM_IOCTL_I915_GEM_GET_CACHE_TYPE DRM_IOR (DRM_COMMAND_BASE + DRM_I915_GEM_GET_CACHE_TYPE, struct drm_i915_gem_get_cache_type)
>
> /* Allow drivers to submit batchbuffers directly to hardware, relying
> * on the security mechanisms provided by hardware.
> @@ -835,4 +837,9 @@ struct drm_intel_overlay_attrs {
> __u32 gamma5;
> };
>
> +struct drm_i915_gem_get_cache_type {
> + __u32 handle;
> + __u32 cache_level;
> +};
> +
> #endif /* _I915_DRM_H_ */
> diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
> index 889ef46..116a9d7 100644
> --- a/intel/intel_bufmgr.h
> +++ b/intel/intel_bufmgr.h
> @@ -149,6 +149,9 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo);
> int drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo);
> void drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable);
>
> +int drm_intel_gem_bo_map_nonblocking(drm_intel_bo *bo, int *gtt_mapped);
> +int drm_intel_gem_bo_unmap_nonblocking(drm_intel_bo *bo);
> +
> int drm_intel_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id);
>
> int drm_intel_get_aperture_sizes(int fd, size_t *mappable, size_t *total);
> diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
> index 4f4de92..5b7e053 100644
> --- a/intel/intel_bufmgr_gem.c
> +++ b/intel/intel_bufmgr_gem.c
> @@ -141,6 +141,8 @@ struct _drm_intel_bo_gem {
> uint32_t swizzle_mode;
> unsigned long stride;
>
> + unsigned cache_coherent : 1;
I prefere my in_gtt_domain - it spells clearly what it tracks, whereas
with cache_coherent I momentarily confused it with LLC coherency and went:
wtf, why do we need to call set_domain(gtt) for llc objects?
> time_t free_time;
>
> /** Array passed to the DRM containing relocation information. */
> @@ -998,15 +1000,11 @@ static void drm_intel_gem_bo_unreference(drm_intel_bo *bo)
> }
> }
>
> -static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
> +static int do_mmap_cpu(drm_intel_bufmgr_gem *bufmgr_gem,
> + drm_intel_bo_gem *bo_gem)
> {
> - drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> - drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> - struct drm_i915_gem_set_domain set_domain;
> int ret;
>
> - pthread_mutex_lock(&bufmgr_gem->lock);
> -
> /* Allow recursive mapping. Mesa may recursively map buffers with
> * nested display loops.
> */
> @@ -1018,7 +1016,7 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
> memset(&mmap_arg, 0, sizeof(mmap_arg));
> mmap_arg.handle = bo_gem->gem_handle;
> mmap_arg.offset = 0;
> - mmap_arg.size = bo->size;
> + mmap_arg.size = bo_gem->bo.size;
> ret = drmIoctl(bufmgr_gem->fd,
> DRM_IOCTL_I915_GEM_MMAP,
> &mmap_arg);
> @@ -1027,11 +1025,52 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
> DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
> __FILE__, __LINE__, bo_gem->gem_handle,
> bo_gem->name, strerror(errno));
> - pthread_mutex_unlock(&bufmgr_gem->lock);
> return ret;
> }
> bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
> }
> + return 0;
> +}
> +
> +enum i915_cache_level {
> + I915_CACHE_NONE,
> + I915_CACHE_LLC,
> + I915_CACHE_LLC_MLC, /* gen6+ */
> +};
> +
> +static int get_cache_type(drm_intel_bo *bo)
> +{
> + struct drm_i915_gem_get_cache_type cache = {0, 0};
> + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> + int ret = 0;
> +
> + cache.handle = bo_gem->gem_handle;
> + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_GET_CACHE_TYPE,
> + &cache);
> +
> + /* This should maintain old behavior */
> + if (ret)
> + return I915_CACHE_NONE;
> +
> +#define CACHE_LEVEL 0xff
> + return cache.cache_level & CACHE_LEVEL;
> +}
> +
> +static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
> +{
> + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> + struct drm_i915_gem_set_domain set_domain;
> + int ret;
> +
> + pthread_mutex_lock(&bufmgr_gem->lock);
> +
> + ret = do_mmap_cpu(bufmgr_gem, bo_gem);
> + if (ret != 0) {
> + pthread_mutex_unlock(&bufmgr_gem->lock);
> + return ret;
> + }
> DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
> bo_gem->mem_virtual);
> bo->virtual = bo_gem->mem_virtual;
> @@ -1051,20 +1090,19 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
> strerror(errno));
> }
>
> + if (get_cache_type(bo) != I915_CACHE_LLC)
> + bo_gem->cache_coherent = 0;
> +
> pthread_mutex_unlock(&bufmgr_gem->lock);
>
> return 0;
> }
>
> -int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
> +static int do_mmap_gtt(drm_intel_bufmgr_gem *bufmgr_gem,
> + drm_intel_bo_gem *bo_gem)
> {
> - drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> - drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> - struct drm_i915_gem_set_domain set_domain;
> int ret;
>
> - pthread_mutex_lock(&bufmgr_gem->lock);
> -
> /* Get a mapping of the buffer if we haven't before. */
> if (bo_gem->gtt_virtual == NULL) {
> struct drm_i915_gem_mmap_gtt mmap_arg;
> @@ -1085,12 +1123,11 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
> __FILE__, __LINE__,
> bo_gem->gem_handle, bo_gem->name,
> strerror(errno));
> - pthread_mutex_unlock(&bufmgr_gem->lock);
> return ret;
> }
>
> /* and mmap it */
> - bo_gem->gtt_virtual = mmap(0, bo->size, PROT_READ | PROT_WRITE,
> + bo_gem->gtt_virtual = mmap(0, bo_gem->bo.size, PROT_READ | PROT_WRITE,
> MAP_SHARED, bufmgr_gem->fd,
> mmap_arg.offset);
> if (bo_gem->gtt_virtual == MAP_FAILED) {
> @@ -1100,11 +1137,28 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
> __FILE__, __LINE__,
> bo_gem->gem_handle, bo_gem->name,
> strerror(errno));
> - pthread_mutex_unlock(&bufmgr_gem->lock);
> return ret;
> }
> }
>
> + return 0;
> +}
> +
> +int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
> +{
> + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> + struct drm_i915_gem_set_domain set_domain;
> + int ret;
> +
> + pthread_mutex_lock(&bufmgr_gem->lock);
> +
> + ret = do_mmap_gtt(bufmgr_gem, bo_gem);
> + if (ret != 0) {
> + pthread_mutex_unlock(&bufmgr_gem->lock);
> + return ret;
> + }
> +
> bo->virtual = bo_gem->gtt_virtual;
>
> DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
> @@ -1123,6 +1177,8 @@ int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
> strerror(errno));
> }
>
> + bo_gem->cache_coherent = 1;
> +
> pthread_mutex_unlock(&bufmgr_gem->lock);
>
> return 0;
> @@ -1282,6 +1338,97 @@ drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable)
> set_domain.read_domains, set_domain.write_domain,
> strerror(errno));
> }
> +
> + bo_gem->cache_coherent = 1;
> +}
> +
> +/**
> + * Map an object without blocking on the GPU if possible.
> + *
> + * This automatically chooses either the GTT mapping or if coherent and faster,
> + * the CPU mapping.
> + *
> + * Not allowed on tiled buffers (to prevent headaches with swizzling and
> + * tracking the gem domain) or to share these buffers with flink, though that's
> + * not currently tracked.
> + */
> +int drm_intel_gem_bo_map_nonblocking(drm_intel_bo *bo, int *gtt_mapped)
I don't quite see the need for gtt_mapped. mesa only uses this to call the
correct unmap, but
- they're all the same anyway
- you still add an unmap_nonblocking.
Also, I think map_nonblocking should only use cpu maps when they're
actually equivalent (coherency-wise) to gtt maps.
So I think the right thing to do is to kill this and teach mesa to keep
track of 3 different mapings.
> +{
> + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> + struct drm_i915_gem_set_domain set_domain;
> + int gpu_coherent_cpu_map = 0;
> + int ret;
> +
> + pthread_mutex_lock(&bufmgr_gem->lock);
> + assert(bo_gem->tiling_mode == I915_TILING_NONE);
> +
> + if (bufmgr_gem->gen >= 6)
> + gpu_coherent_cpu_map = 1;
> +
> + if (gpu_coherent_cpu_map && get_cache_type(bo) == I915_CACHE_LLC) {
> + ret = do_mmap_cpu(bufmgr_gem, bo_gem);
> + if (ret != 0) {
> + pthread_mutex_unlock(&bufmgr_gem->lock);
> + return ret;
> + }
> +
> + bo->virtual = bo_gem->mem_virtual;
> + *gtt_mapped = 1;
> + } else {
> + ret = do_mmap_gtt(bufmgr_gem, bo_gem);
> + if (ret != 0) {
> + pthread_mutex_unlock(&bufmgr_gem->lock);
> + return ret;
> + }
> +
> + bo->virtual = bo_gem->gtt_virtual;
> + *gtt_mapped = 0;
> + }
> +
> + DBG("bo_map_nonblocking: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
> + bo_gem->gtt_virtual);
> +
> + /* Move it to the GTT domain in case it isn't there yet
> + * In the coherent buffers case, below variable is modified at map time.
> + */
> + if (!bo_gem->cache_coherent) {
> + set_domain.handle = bo_gem->gem_handle;
> + set_domain.read_domains = I915_GEM_DOMAIN_GTT;
> + set_domain.write_domain = I915_GEM_DOMAIN_GTT;
> + ret = drmIoctl(bufmgr_gem->fd,
> + DRM_IOCTL_I915_GEM_SET_DOMAIN,
> + &set_domain);
> + if (ret != 0) {
> + DBG("%s:%d: Error setting domain %d: %s\n",
> + __FILE__, __LINE__, bo_gem->gem_handle,
> + strerror(errno));
> + }
> +
> + bo_gem->cache_coherent = 1;
> + }
> +
> + pthread_mutex_unlock(&bufmgr_gem->lock);
> +
> + return 0;
> +}
> +
> +/**
> + * unmap an object in the non-blocking mode
> + */
> +int drm_intel_gem_bo_unmap_nonblocking(drm_intel_bo *bo)
> +{
> + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> + int ret = 0;
> +
> + if (bo == NULL)
> + return 0;
> +
> + pthread_mutex_lock(&bufmgr_gem->lock);
> + bo->virtual = NULL;
> + pthread_mutex_unlock(&bufmgr_gem->lock);
> +
> + return ret;
> }
>
> static void
> --
> 1.7.6.3
>
--
Daniel Vetter
Mail: daniel at ffwll.ch
Mobile: +41 (0)79 365 57 48
More information about the Intel-gfx
mailing list