[Intel-gfx] [PATCH 2/2] intel: Add prelocation support
Ben Widawsky
benjamin.widawsky at intel.com
Fri Aug 22 05:12:33 CEST 2014
Words
Signed-off-by: Ben Widawsky <ben at bwidawsk.net>
---
intel/intel_bufmgr.h | 8 ++++
intel/intel_bufmgr_gem.c | 102 +++++++++++++++++++++++++++++++++++++++++++----
2 files changed, 102 insertions(+), 8 deletions(-)
diff --git a/intel/intel_bufmgr.h b/intel/intel_bufmgr.h
index 9383c72..e4ecc44 100644
--- a/intel/intel_bufmgr.h
+++ b/intel/intel_bufmgr.h
@@ -88,6 +88,8 @@ struct _drm_intel_bo {
* Last seen card virtual address (offset from the beginning of the
* aperture) for the object. This should be used to fill relocation
* entries when calling drm_intel_bo_emit_reloc()
+ *
+ * This is also useful when prelocating an object.
*/
uint64_t offset64;
};
@@ -106,6 +108,8 @@ typedef struct _drm_intel_aub_annotation {
} drm_intel_aub_annotation;
#define BO_ALLOC_FOR_RENDER (1<<0)
+#define BO_ALLOC_PRELOCATE (1<<1)
+#define BO_ALLOC_PRELOCATE_32 (1<<2)
drm_intel_bo *drm_intel_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name,
unsigned long size, unsigned int alignment);
@@ -119,6 +123,10 @@ drm_intel_bo *drm_intel_bo_alloc_tiled(drm_intel_bufmgr *bufmgr,
uint32_t *tiling_mode,
unsigned long *pitch,
unsigned long flags);
+drm_intel_bo *drm_intel_bo_alloc_prelocated(drm_intel_bufmgr *bufmgr,
+ const char *name,
+ unsigned long size,
+ int low);
void drm_intel_bo_reference(drm_intel_bo *bo);
void drm_intel_bo_unreference(drm_intel_bo *bo);
int drm_intel_bo_map(drm_intel_bo *bo, int write_enable);
diff --git a/intel/intel_bufmgr_gem.c b/intel/intel_bufmgr_gem.c
index d7d3769..5a2a9bd 100644
--- a/intel/intel_bufmgr_gem.c
+++ b/intel/intel_bufmgr_gem.c
@@ -221,6 +221,11 @@ struct _drm_intel_bo_gem {
*/
bool idle;
+ /** Fillme in */
+ #define PRELOCATE_MMAP 1
+ #define PRELOCATE_MALLOC 2
+ int prelocated;
+
/**
* Size in bytes of this buffer and its relocation descendents.
*
@@ -489,7 +494,10 @@ drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count;
bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs;
bufmgr_gem->exec2_objects[index].alignment = 0;
- bufmgr_gem->exec2_objects[index].offset = 0;
+ if (bo_gem->prelocated)
+ bufmgr_gem->exec2_objects[index].offset = bo->offset64;
+ else
+ bufmgr_gem->exec2_objects[index].offset = 0;
bufmgr_gem->exec_bos[index] = bo;
bufmgr_gem->exec2_objects[index].flags = 0;
bufmgr_gem->exec2_objects[index].rsvd1 = 0;
@@ -637,9 +645,10 @@ drm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem,
}
static drm_intel_bo_gem *
-__bo_alloc(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size)
+__bo_alloc(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size, bool prelocate, bool low32)
{
struct drm_i915_gem_create create;
+ drm_intel_bo *bo;
drm_intel_bo_gem *bo_gem;
int ret;
@@ -647,10 +656,35 @@ __bo_alloc(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size)
if (!bo_gem)
return NULL;
+ bo = (drm_intel_bo *)bo_gem;
+
bo_gem->bo.size = size;
VG_CLEAR(create);
create.size = size;
+ /* FIXME: This is a gross hack to repurpose the create args */
+ if (prelocate) {
+ create.size |= (1ULL << 63);
+ if (low32) {
+ bo->offset64 = (uint64_t)mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE | MAP_32BIT,
+ -1, 0);
+ bo_gem->prelocated = PRELOCATE_MALLOC;
+ } else {
+ bo->offset64 = (uint64_t)aligned_alloc(getpagesize(), size);
+ bo_gem->prelocated = PRELOCATE_MMAP;
+ }
+ if (!bo->offset64) {
+ DBG("Couldn't allocate %ld address space for object. %s\n",
+ size, strerror(errno));
+ free(bo_gem);
+ return NULL;
+ }
+ create.handle = bo->offset64 >> 32;
+ create.pad = bo->offset64;
+ } else
+ bo->offset64 = 0x1;
ret = drmIoctl(bufmgr_gem->fd,
DRM_IOCTL_I915_GEM_CREATE,
@@ -658,6 +692,10 @@ __bo_alloc(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size)
bo_gem->gem_handle = create.handle;
bo_gem->bo.handle = bo_gem->gem_handle;
if (ret != 0) {
+ if (prelocate && low32)
+ munmap((void *)bo->offset64, size);
+ else if (prelocate)
+ free((void *)bo->offset64);
free(bo_gem);
return NULL;
}
@@ -687,10 +725,17 @@ drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr,
struct drm_intel_gem_bo_bucket *bucket;
bool alloc_from_cache;
unsigned long bo_size;
- bool for_render = false;
+ bool for_render = false, prelocate = false, low = false;
if (flags & BO_ALLOC_FOR_RENDER)
for_render = true;
+ if (flags & BO_ALLOC_PRELOCATE) {
+ if (flags & BO_ALLOC_PRELOCATE_32)
+ low = true;
+ prelocate = true;
+ bo_size = size;
+ goto skip_cache;
+ }
/* Round the allocated size up to a power of two number of pages. */
bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size);
@@ -756,7 +801,8 @@ retry:
pthread_mutex_unlock(&bufmgr_gem->lock);
if (!alloc_from_cache) {
- bo_gem = __bo_alloc(bufmgr_gem, bo_size);
+skip_cache:
+ bo_gem = __bo_alloc(bufmgr_gem, bo_size, prelocate, low);
if (!bo_gem)
return NULL;
@@ -774,7 +820,7 @@ retry:
bo_gem->reloc_tree_fences = 0;
bo_gem->used_as_reloc_target = false;
bo_gem->has_error = false;
- bo_gem->reusable = true;
+ bo_gem->reusable = !prelocate;
bo_gem->aub_annotations = NULL;
bo_gem->aub_annotation_count = 0;
@@ -859,6 +905,25 @@ drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
tiling, stride);
}
+drm_public drm_intel_bo *
+drm_intel_bo_alloc_prelocated(drm_intel_bufmgr *bufmgr,
+ const char *name,
+ unsigned long size,
+ int low)
+{
+ drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
+ int flag = BO_ALLOC_PRELOCATE;
+ /* FIXME: Need to replace this with a paramcheck */
+ if (bufmgr_gem->gen < 8 || !bufmgr_gem->has_llc)
+ return NULL;
+
+ if (low)
+ flag |= BO_ALLOC_PRELOCATE_32;
+
+ return drm_intel_gem_bo_alloc_internal(bufmgr, name, size,
+ flag, I915_TILING_NONE, 0);
+}
+
/**
* Returns a drm_intel_bo wrapping the given buffer object handle.
*
@@ -964,7 +1029,7 @@ drm_intel_gem_bo_free(drm_intel_bo *bo)
int ret;
DRMLISTDEL(&bo_gem->vma_list);
- if (bo_gem->mem_virtual) {
+ if (bo_gem->mem_virtual && !bo_gem->prelocated) {
VG(VALGRIND_FREELIKE_BLOCK(bo_gem->mem_virtual, 0));
munmap(bo_gem->mem_virtual, bo_gem->bo.size);
bufmgr_gem->vma_count--;
@@ -982,6 +1047,12 @@ drm_intel_gem_bo_free(drm_intel_bo *bo)
DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
bo_gem->gem_handle, bo_gem->name, strerror(errno));
}
+
+ if (bo_gem->prelocated == PRELOCATE_MMAP)
+ munmap((void *)bo->offset64, bo->size);
+ else if (bo_gem->prelocated == PRELOCATE_MALLOC)
+ free((void *)bo->offset64);
+
free(bo_gem->aub_annotations);
free(bo);
}
@@ -1190,7 +1261,9 @@ static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
if (bo_gem->map_count++ == 0)
drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
- if (!bo_gem->mem_virtual) {
+ if (bo_gem->prelocated) {
+ bo_gem->mem_virtual = (void *)bo->offset64;
+ } else if (!bo_gem->mem_virtual) {
struct drm_i915_gem_mmap mmap_arg;
DBG("bo_map: %d (%s), map_count=%d\n",
@@ -1683,6 +1756,17 @@ do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
return -ENOMEM;
}
+ /* If the target we're trying point to was a prelocated target, then we
+ * can skip actually telling the kernel about the relocation. Userspace
+ * is expected to use offset64 */
+ if (target_bo_gem->prelocated) {
+ assert(target_bo->offset64 != 0x1);
+ assert(target_bo->offset64 != 0); // temp hack
+ if (bo_gem->validate_index == -1)
+ drm_intel_add_validate_buffer2(target_bo, false);
+ return 0;
+ }
+
/* We never use HW fences for rendering on 965+ */
if (bufmgr_gem->gen >= 4)
need_fence = false;
@@ -1863,7 +1947,6 @@ drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
}
}
-
static void
drm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem)
{
@@ -1894,6 +1977,9 @@ drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem)
drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
+ if (bo_gem->prelocated)
+ continue;
+
/* Update the buffer offset */
if (bufmgr_gem->exec2_objects[i].offset != bo->offset64) {
DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n",
--
2.0.4
More information about the Intel-gfx
mailing list