[Intel-gfx] [PATCH v2 17/18] drm/i915: Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset
Michel Thierry
michel.thierry at intel.com
Wed Jun 10 09:46:54 PDT 2015
There are some allocations that must be only referenced by 32bit
offsets. To limit the chances of having the first 4GB already full,
objects not requiring this workaround use DRM_MM_SEARCH_BELOW/
DRM_MM_CREATE_TOP flags
User must pass I915_EXEC_SUPPORTS_48BADDRESS flag to indicate it can
be allocated above the 32b address range.
The flag is ignored in 32b PPGTT.
v2: Changed flag logic from neeeds_32b, to supports_48b.
Signed-off-by: Michel Thierry <michel.thierry at intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 1 +
drivers/gpu/drm/i915/i915_gem.c | 19 ++++++++++++++--
drivers/gpu/drm/i915/i915_gem_execbuffer.c | 36 +++++++++++++++++++++---------
include/uapi/drm/i915_drm.h | 4 +++-
4 files changed, 47 insertions(+), 13 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 95f59d3..d73dddb 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2714,6 +2714,7 @@ void i915_gem_vma_destroy(struct i915_vma *vma);
#define PIN_OFFSET_BIAS (1<<3)
#define PIN_USER (1<<4)
#define PIN_UPDATE (1<<5)
+#define PIN_FULL_RANGE (1<<6)
#define PIN_OFFSET_MASK (~4095)
int __must_check
i915_gem_object_pin(struct drm_i915_gem_object *obj,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 35690ef..e6325a4a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3672,6 +3672,8 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
struct drm_i915_private *dev_priv = dev->dev_private;
u32 fence_alignment, unfenced_alignment;
u64 size, fence_size;
+ u32 search_flag = DRM_MM_SEARCH_DEFAULT;
+ u32 alloc_flag = DRM_MM_CREATE_DEFAULT;
u64 start =
flags & PIN_OFFSET_BIAS ? flags & PIN_OFFSET_MASK : 0;
u64 end =
@@ -3713,6 +3715,19 @@ i915_gem_object_bind_to_vm(struct drm_i915_gem_object *obj,
obj->tiling_mode,
false);
size = flags & PIN_MAPPABLE ? fence_size : obj->base.size;
+
+ /* Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
+ * limit address to 4GB-1 for objects requiring this wa; for
+ * others, set alloc flag to TOP.
+ */
+ if (USES_FULL_48BIT_PPGTT(dev)) {
+ if (flags & PIN_FULL_RANGE) {
+ search_flag = DRM_MM_SEARCH_BELOW;
+ alloc_flag = DRM_MM_CREATE_TOP;
+ } else {
+ end = ((4ULL << GEN8_PDPE_SHIFT) - 1);
+ }
+ }
}
if (alignment == 0)
@@ -3755,8 +3770,8 @@ search_free:
size, alignment,
obj->cache_level,
start, end,
- DRM_MM_SEARCH_DEFAULT,
- DRM_MM_CREATE_DEFAULT);
+ search_flag,
+ alloc_flag);
if (ret) {
ret = i915_gem_evict_something(dev, vm, size, alignment,
obj->cache_level,
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index bd0e4bd..04af62b 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -577,7 +577,8 @@ static bool only_mappable_for_reloc(unsigned int flags)
static int
i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
struct intel_engine_cs *ring,
- bool *need_reloc)
+ bool *need_reloc,
+ bool support_48baddr)
{
struct drm_i915_gem_object *obj = vma->obj;
struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
@@ -588,6 +589,9 @@ i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
flags |= PIN_GLOBAL;
+ if (support_48baddr)
+ flags |= PIN_FULL_RANGE;
+
if (!drm_mm_node_allocated(&vma->node)) {
if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
flags |= PIN_GLOBAL | PIN_MAPPABLE;
@@ -650,7 +654,7 @@ need_reloc_mappable(struct i915_vma *vma)
}
static bool
-eb_vma_misplaced(struct i915_vma *vma)
+eb_vma_misplaced(struct i915_vma *vma, bool support_48baddr)
{
struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
struct drm_i915_gem_object *obj = vma->obj;
@@ -670,13 +674,18 @@ eb_vma_misplaced(struct i915_vma *vma)
if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
return !only_mappable_for_reloc(entry->flags);
+ if (!support_48baddr &&
+ vma->node.start >= (1ULL << 32))
+ return true;
+
return false;
}
static int
i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
struct list_head *vmas,
- bool *need_relocs)
+ bool *need_relocs,
+ bool support_48baddr)
{
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
@@ -737,10 +746,11 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
if (!drm_mm_node_allocated(&vma->node))
continue;
- if (eb_vma_misplaced(vma))
+ if (eb_vma_misplaced(vma, support_48baddr))
ret = i915_vma_unbind(vma);
else
- ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
+ ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs,
+ support_48baddr);
if (ret)
goto err;
}
@@ -750,7 +760,9 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *ring,
if (drm_mm_node_allocated(&vma->node))
continue;
- ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
+ ret = i915_gem_execbuffer_reserve_vma(vma, ring,
+ need_relocs,
+ support_48baddr);
if (ret)
goto err;
}
@@ -780,7 +792,7 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
struct drm_i915_gem_relocation_entry *reloc;
struct i915_address_space *vm;
struct i915_vma *vma;
- bool need_relocs;
+ bool need_relocs, support_48baddr;
int *reloc_offset;
int i, total, ret;
unsigned count = args->buffer_count;
@@ -861,7 +873,9 @@ i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
goto err;
need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
- ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs);
+ support_48baddr = (args->flags & I915_EXEC_SUPPORT_48BADDRESS);
+ ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs,
+ support_48baddr);
if (ret)
goto err;
@@ -1411,7 +1425,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
u64 exec_start = args->batch_start_offset;
u32 dispatch_flags;
int ret;
- bool need_relocs;
+ bool need_relocs, support_48baddr;
if (!i915_gem_check_execbuffer(args))
return -EINVAL;
@@ -1519,7 +1533,9 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
/* Move the objects en-masse into the GTT, evicting if necessary. */
need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
- ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs);
+ support_48baddr = (args->flags & I915_EXEC_SUPPORT_48BADDRESS);
+ ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs,
+ support_48baddr);
if (ret)
goto err;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 4851d66..18df34a 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -760,7 +760,9 @@ struct drm_i915_gem_execbuffer2 {
#define I915_EXEC_BSD_RING1 (1<<13)
#define I915_EXEC_BSD_RING2 (2<<13)
-#define __I915_EXEC_UNKNOWN_FLAGS -(1<<15)
+#define I915_EXEC_SUPPORT_48BADDRESS (1<<15)
+
+#define __I915_EXEC_UNKNOWN_FLAGS -(1<<16)
#define I915_EXEC_CONTEXT_ID_MASK (0xffffffff)
#define i915_execbuffer2_set_context_id(eb2, context) \
--
2.4.0
More information about the Intel-gfx
mailing list