[Intel-gfx] [PATCH v3] intel: New libdrm interface to create unbound wc user mappings for objects
Damien Lespiau
damien.lespiau at intel.com
Wed Dec 3 06:13:58 PST 2014
On Tue, Oct 28, 2014 at 06:39:27PM +0530, akash.goel at intel.com wrote:
> @@ -1126,6 +1136,8 @@ static void drm_intel_gem_bo_purge_vma_cache(drm_intel_bufmgr_gem *bufmgr_gem)
>
> /* We may need to evict a few entries in order to create new mmaps */
> limit = bufmgr_gem->vma_max - 2*bufmgr_gem->vma_open;
> + if (bufmgr_gem->has_ext_mmap)
> + limit -= bufmgr_gem->vma_open;
> if (limit < 0)
> limit = 0;
>
Aren't we being overly aggressive in purging the vma cache with this new
code? I guess it depends on the workload but I don't think the WC
mapping is likely to be used in addition to the other two often enough
to warrant a max - 3 * open;
> +static int
> +map_wc(drm_intel_bo *bo)
> +{
> + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> + int ret;
> +
> + if (bo_gem->is_userptr)
> + return -EINVAL;
> +
> + if (!bufmgr_gem->has_ext_mmap)
> + return -EINVAL;
> +
> + if (bo_gem->map_count++ == 0)
> + drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
> +
> + /* Get a mapping of the buffer if we haven't before. */
> + if (bo_gem->mem_wc_virtual == NULL) {
> + struct drm_i915_gem_mmap mmap_arg;
> +
> + DBG("bo_map_wc: mmap %d (%s), map_count=%d\n",
> + bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
> +
> + VG_CLEAR(mmap_arg);
> + mmap_arg.handle = bo_gem->gem_handle;
> + /* To indicate the uncached virtual mapping to KMD */
> + mmap_arg.flags = I915_MMAP_WC;
> + mmap_arg.offset = 0;
> + mmap_arg.size = bo->size;
> + ret = drmIoctl(bufmgr_gem->fd,
> + DRM_IOCTL_I915_GEM_MMAP,
> + &mmap_arg);
> + if (ret != 0) {
> + ret = -errno;
> + DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
> + __FILE__, __LINE__, bo_gem->gem_handle,
> + bo_gem->name, strerror(errno));
> + if (--bo_gem->map_count == 0)
> + drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
> + pthread_mutex_unlock(&bufmgr_gem->lock);
There's an unlock() here (and no lock()!), but the locking is hanled by
the higher level functions.
> + return ret;
> + }
> + VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
> + bo_gem->mem_wc_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
> + }
> +
> + bo->virtual = bo_gem->mem_wc_virtual;
> +
> + DBG("bo_map_wc: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
> + bo_gem->mem_wc_virtual);
> +
> + return 0;
> +}
> +
> +/* To be used in a similar way to mmap_gtt */
> +drm_public int
> +drm_intel_gem_bo_map_wc(drm_intel_bo *bo) {
> + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> + struct drm_i915_gem_set_domain set_domain;
> + int ret;
> +
> + pthread_mutex_lock(&bufmgr_gem->lock);
> +
> + ret = map_wc(bo);
> + if (ret) {
> + pthread_mutex_unlock(&bufmgr_gem->lock);
> + return ret;
> + }
> +
> + /* Now move it to the GTT domain so that the GPU and CPU
> + * caches are flushed and the GPU isn't actively using the
> + * buffer.
> + *
> + * The domain change is done even for the objects which
> + * are not bounded. For them first the pages are acquired,
> + * before the domain change.
> + */
> + VG_CLEAR(set_domain);
> + set_domain.handle = bo_gem->gem_handle;
> + set_domain.read_domains = I915_GEM_DOMAIN_GTT;
> + set_domain.write_domain = I915_GEM_DOMAIN_GTT;
> + ret = drmIoctl(bufmgr_gem->fd,
> + DRM_IOCTL_I915_GEM_SET_DOMAIN,
> + &set_domain);
> + if (ret != 0) {
> + DBG("%s:%d: Error setting domain %d: %s\n",
> + __FILE__, __LINE__, bo_gem->gem_handle,
> + strerror(errno));
> + }
Why move the buffer to the GTT domain and not the CPU domain here?
> + drm_intel_gem_bo_mark_mmaps_incoherent(bo);
> + VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_wc_virtual, bo->size));
> + pthread_mutex_unlock(&bufmgr_gem->lock);
> +
> + return 0;
> +}
> +
> +drm_public int
> +drm_intel_gem_bo_map_wc_unsynchronized(drm_intel_bo *bo) {
> + drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> +#ifdef HAVE_VALGRIND
> + drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
> +#endif
> + int ret;
> +
> + pthread_mutex_lock(&bufmgr_gem->lock);
> +
> + ret = map_wc(bo);
> + if (ret == 0) {
> + drm_intel_gem_bo_mark_mmaps_incoherent(bo);
> + VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_wc_virtual, bo->size));
> + }
> +
> + pthread_mutex_unlock(&bufmgr_gem->lock);
> +
> + return ret;
> +}
> +
> static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
> {
> drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
> @@ -1293,6 +1432,7 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
>
> VG_CLEAR(mmap_arg);
> mmap_arg.handle = bo_gem->gem_handle;
> + mmap_arg.flags = 0;
> mmap_arg.offset = 0;
> mmap_arg.size = bo->size;
> ret = drmIoctl(bufmgr_gem->fd,
> @@ -1553,6 +1693,12 @@ static int drm_intel_gem_bo_unmap(drm_intel_bo *bo)
> }
>
> drm_public int
> +drm_intel_gem_bo_unmap_wc(drm_intel_bo *bo)
> +{
> + return drm_intel_gem_bo_unmap(bo);
> +}
> +
> +drm_public int
> drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo)
> {
> return drm_intel_gem_bo_unmap(bo);
> @@ -3538,6 +3684,10 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
> ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
> bufmgr_gem->has_vebox = (ret == 0) & (*gp.value > 0);
>
> + gp.param = I915_PARAM_MMAP_VERSION;
> + ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
> + bufmgr_gem->has_ext_mmap = (ret == 0) & (*gp.value > 0);
It looks like this works, but can we have && instead?
--
Damien
More information about the Intel-gfx
mailing list