[Intel-gfx] [PATCH] intel: New libdrm interface to create unbound wc user mappings for objects
Damien Lespiau
damien.lespiau at intel.com
Sat Oct 25 14:45:05 CEST 2014
On Sat, Oct 25, 2014 at 05:21:53PM +0530, akash.goel at intel.com wrote:
> From: Akash Goel <akash.goel at intel.com>
>
> A new libdrm interface 'drm_intel_gem_bo_map_wc' is provided by this
> patch. Through this interface Gfx clients can create write combining
> virtual mappings of the Gem object. It will provide the same funtionality
> of 'mmap_gtt' interface without the constraints of limited aperture space,
> but provided clients handles the linear to tile conversion on their own.
> This patch is intended for improving the CPU write operation performance,
> as with such mapping, writes are almost 50% faster than with mmap_gtt.
> Also it avoids the Cache flush after update from CPU side, when object is
> passed on to GPU, which will be the case if regular mmap interface is used.
> This type of mapping is specially useful in case of sub-region
> update, i.e. when only a portion of the object is to be updated.
> Also there is a support for the unsynchronized version of this interface
> named 'drm_intel_gem_bo_map_wc_unsynchronized', through which wc virtual
> mappings, but unsynchronized one, can be created of the Gem object.
> To ensure the cache coherency, before using this mapping, the GTT domain has
> been reused here. This provides the required Cache flush if the object is in
> CPU domain or synchronization against the concurrent rendering
>
> The drm_i915_gem_mmap structure (for the DRM_I915_GEM_MMAP_IOCTL) has been
> extended with a new flags field (defaulting to 0 for existent users). In
> order for userspace to detect the extended ioctl, a new parameter
> I915_PARAM_HAS_EXT_MMAP has been added for versioning the ioctl interface.
>
> v2: Aligned with the v2 of the corresponding kernel patch (Chris)
>
> Signed-off-by: Akash Goel <akash.goel at intel.com>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
>From a quick glance at the patch:
It looks like you copy/pasted the map() implementation and changed a few
things here and there instead of adding flag to drm_intel_gem_bo_map()
and reusing the current code.
Can we expose another version of map that takes flags (_WC,
_UNSYNCHRONIZED, ...) instead of starting to have every single
combination possible?
Do we really need a separate mem_wc_virtual? Using _map() or _map_wc()
in an exclusively way makes some sense to me.
With the introduction of mem_wc_virtual, you left aside the vma cache
handling and so we'll never end up unmapping it, drm_intel_gem_bo_free()
doesn't unmap it either, ...
I'd just expect users to use _unmap().
--
Damien
> ---
> include/drm/i915_drm.h | 9 ++++
> intel/intel_bufmgr.h | 3 ++
> intel/intel_bufmgr_gem.c | 124 +++++++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 136 insertions(+)
>
> diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
> index 15dd01d..a91a1d0 100644
> --- a/include/drm/i915_drm.h
> +++ b/include/drm/i915_drm.h
> @@ -340,6 +340,7 @@ typedef struct drm_i915_irq_wait {
> #define I915_PARAM_HAS_EXEC_HANDLE_LUT 26
> #define I915_PARAM_HAS_WT 27
> #define I915_PARAM_CMD_PARSER_VERSION 28
> +#define I915_PARAM_MMAP_VERSION 29
>
> typedef struct drm_i915_getparam {
> int param;
> @@ -487,6 +488,14 @@ struct drm_i915_gem_mmap {
> * This is a fixed-size type for 32/64 compatibility.
> */
> __u64 addr_ptr;
> +
> + /**
> + * Flags for extended behaviour.
> + *
> + * Added in version 2.
> + */
> + __u64 flags;
> +#define I915_MMAP_WC 0x1
> };
>
> struct drm_i915_gem_mmap_gtt {
> diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
> index be83a56..bda4115 100644
> --- a/intel/intel_bufmgr.h
> +++ b/intel/intel_bufmgr.h
> @@ -177,6 +177,9 @@ void drm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr,
> int drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo);
> int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo);
> int drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo);
> +int drm_intel_gem_bo_map_wc_unsynchronized(drm_intel_bo *bo);
> +int drm_intel_gem_bo_map_wc(drm_intel_bo *bo);
> +int drm_intel_gem_bo_unmap_wc(drm_intel_bo *bo);
>
> int drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo);
> void drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start);
> diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
> index f2f4fea..95c588f 100644
> --- a/intel/intel_bufmgr_gem.c
> +++ b/intel/intel_bufmgr_gem.c
> @@ -184,6 +184,8 @@ struct _drm_intel_bo_gem {
> int reloc_count;
> /** Mapped address for the buffer, saved across map/unmap cycles */
> void *mem_virtual;
> + /** Uncached Mapped address for the buffer, saved across map/unmap cycles */
> + void *mem_wc_virtual;
> /** GTT virtual address for the buffer, saved across map/unmap cycles */
> void *gtt_virtual;
> /**
> @@ -1267,6 +1269,121 @@ static void drm_intel_gem_bo_unreference(drm_intel_bo *bo)
> }
> }
>
> +static int
> +map_wc(drm_intel_bo *bo)
> +{
> + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> + int ret;
> +
> + if (bo_gem->is_userptr)
> + return -EINVAL;
> +
> + if (bo_gem->map_count++ == 0)
> + drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
> +
> + /* Get a mapping of the buffer if we haven't before. */
> + if (bo_gem->mem_wc_virtual == NULL) {
> + struct drm_i915_gem_mmap mmap_arg;
> +
> + DBG("bo_map_wc: mmap %d (%s), map_count=%d\n",
> + bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
> +
> + VG_CLEAR(mmap_arg);
> + mmap_arg.handle = bo_gem->gem_handle;
> + /* To indicate the uncached virtual mapping to KMD */
> + mmap_arg.flags = I915_MMAP_WC;
> + mmap_arg.offset = 0;
> + mmap_arg.size = bo->size;
> + ret = drmIoctl(bufmgr_gem->fd,
> + DRM_IOCTL_I915_GEM_MMAP,
> + &mmap_arg);
> + if (ret != 0) {
> + ret = -errno;
> + DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
> + __FILE__, __LINE__, bo_gem->gem_handle,
> + bo_gem->name, strerror(errno));
> + if (--bo_gem->map_count == 0)
> + drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
> + pthread_mutex_unlock(&bufmgr_gem->lock);
> + return ret;
> + }
> + VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
> + bo_gem->mem_wc_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
> + }
> +
> + bo->virtual = bo_gem->mem_wc_virtual;
> +
> + DBG("bo_map_wc: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
> + bo_gem->mem_wc_virtual);
> +
> + return 0;
> +}
> +
> +/* To be used in a similar way to mmap_gtt */
> +drm_public int
> +drm_intel_gem_bo_map_wc(drm_intel_bo *bo) {
> + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> + struct drm_i915_gem_set_domain set_domain;
> + int ret;
> +
> + pthread_mutex_lock(&bufmgr_gem->lock);
> +
> + ret = map_wc(bo);
> + if (ret) {
> + pthread_mutex_unlock(&bufmgr_gem->lock);
> + return ret;
> + }
> +
> + /* Now move it to the GTT domain so that the GPU and CPU
> + * caches are flushed and the GPU isn't actively using the
> + * buffer.
> + *
> + * The domain change is done even for the objects which
> + * are not bounded. For them first the pages are acquired,
> + * before the domain change.
> + */
> + VG_CLEAR(set_domain);
> + set_domain.handle = bo_gem->gem_handle;
> + set_domain.read_domains = I915_GEM_DOMAIN_GTT;
> + set_domain.write_domain = I915_GEM_DOMAIN_GTT;
> + ret = drmIoctl(bufmgr_gem->fd,
> + DRM_IOCTL_I915_GEM_SET_DOMAIN,
> + &set_domain);
> + if (ret != 0) {
> + DBG("%s:%d: Error setting domain %d: %s\n",
> + __FILE__, __LINE__, bo_gem->gem_handle,
> + strerror(errno));
> + }
> + drm_intel_gem_bo_mark_mmaps_incoherent(bo);
> + VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_virtual, bo->size));
> + pthread_mutex_unlock(&bufmgr_gem->lock);
> +
> + return 0;
> +}
> +
> +drm_public int
> +drm_intel_gem_bo_map_wc_unsynchronized(drm_intel_bo *bo) {
> + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> +#ifdef HAVE_VALGRIND
> + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> +#endif
> + int ret;
> +
> + pthread_mutex_lock(&bufmgr_gem->lock);
> +
> + ret = map_wc(bo);
> + if (ret == 0) {
> + drm_intel_gem_bo_mark_mmaps_incoherent(bo);
> + VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size));
> + }
> +
> + pthread_mutex_unlock(&bufmgr_gem->lock);
> +
> + return ret;
> +}
> +
> static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
> {
> drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> @@ -1293,6 +1410,7 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
>
> VG_CLEAR(mmap_arg);
> mmap_arg.handle = bo_gem->gem_handle;
> + mmap_arg.flags = 0;
> mmap_arg.offset = 0;
> mmap_arg.size = bo->size;
> ret = drmIoctl(bufmgr_gem->fd,
> @@ -1553,6 +1671,12 @@ static int drm_intel_gem_bo_unmap(drm_intel_bo *bo)
> }
>
> drm_public int
> +drm_intel_gem_bo_unmap_wc(drm_intel_bo *bo)
> +{
> + return drm_intel_gem_bo_unmap(bo);
> +}
> +
> +drm_public int
> drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo)
> {
> return drm_intel_gem_bo_unmap(bo);
> --
> 1.9.2
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
More information about the Intel-gfx
mailing list