[Intel-gfx] [PATCH v2 7/7] drm/i915: fix gtt space allocated for tiled objects
Imre Deak
imre.deak at intel.com
Mon Jan 7 20:47:39 CET 2013
The GTT space needed for tiled objects might be bigger than the linear
size programmed into the correpsonding fence register. For example for
the following buffer on a Gen5+ HW:
- allocation size: 4 * 4096 bytes
- tiling mode: X tiled
- stride: 1536
This needs 6 * 4096 bytes of GTT space to cover all the pixels in the
buffer, but at the moment we allocate only 4 * 4096. This means that any
buffer following this tiled buffer in the GTT space will be corrupted if
pixels belonging to the last two tiles are written.
Fix this by limiting the linear size to the object's last full tile row
offset (3 * 4096 bytes in the above example). Beyond this offset we
don't have enough backing storage to hold even a single linear pixel
line, so tiling wouldn't work properly anyway. For old HW with
power-of-two fence regions we might have to also overallocate GTT space
in case the POT offset happens to be not aligned to the tile row size.
The page frames for the overallocated area will be backed by the single
GTT scratch page used already for padding objects with a POT fence
region.
Note that this is more of a security/robustness problem and not fixing any
reported issue that I know of. This is because applications will normally
access only the part of the buffer that is tile row size aligned.
In v2:
- Instead of always overallocating for unaligned objects, limit the
linear size - and overallocate only when necessary for POT fence
sizes. Discussed this with Chris Wilson.
- Force buffer-rebind if the GTT size changes after a tiling change.
(Chris Wilson)
Signed-off-by: Imre Deak <imre.deak at intel.com>
---
drivers/gpu/drm/i915/i915_drv.h | 5 +-
drivers/gpu/drm/i915/i915_gem.c | 85 ++++++++++++++++++++++++++------
drivers/gpu/drm/i915/i915_gem_tiling.c | 18 ++++---
3 files changed, 84 insertions(+), 24 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index e67332f..22ade39 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1566,10 +1566,11 @@ void i915_gem_free_all_phys_object(struct drm_device *dev);
void i915_gem_release(struct drm_device *dev, struct drm_file *file);
uint32_t
-i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode);
+i915_gem_get_gtt_physical_size(struct drm_device *dev, uint32_t size,
+ int tiling_mode, int stride);
uint32_t
i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
- int tiling_mode, bool fenced);
+ int tiling_mode, int stride, bool fenced);
size_t
i915_gem_get_tile_row_size(struct drm_device *dev, int tiling_mode, int stride);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e51a4cd..22a2108 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1453,16 +1453,39 @@ i915_gem_get_tile_row_size(struct drm_device *dev, int tiling_mode, int stride)
return stride / i915_gem_get_tile_width(dev, tiling_mode) * PAGE_SIZE;
}
-uint32_t
-i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
+static uint32_t
+i915_gem_get_gtt_linear_size(struct drm_device *dev, uint32_t size,
+ int tiling_mode, int stride)
{
uint32_t gtt_size;
- if (INTEL_INFO(dev)->gen >= 4 ||
- tiling_mode == I915_TILING_NONE)
+ if (tiling_mode == I915_TILING_NONE)
return size;
- /* Previous chips need a power-of-two fence region when tiling */
+ /*
+ * Applications can set tiling for objects with sizes not aligned to
+ * the object's tile row size. This is a valid use case for example if
+ * the application wants to reuse the same object with different tile
+ * settings. In this case we have to limit the linear range
+ * (programmed to the fence register) to the last full tile row
+ * offset, otherwise users of this object would have access to the
+ * object following it in the GTT space by reading/writing to the
+ * unaligned part. For the unaligned part we don't have backing storage
+ * even for a single linear pixel line, so tiling wouldn't work
+ * properly anyway.
+ */
+ size = rounddown(size, i915_gem_get_tile_row_size(dev, tiling_mode,
+ stride));
+ if (INTEL_INFO(dev)->gen >= 4)
+ return size;
+
+ /*
+ * Previous chips need a power-of-two fence region when tiling. This
+ * might set an unaligned object's linear size between its last full
+ * tile row offset and the next tile row offset, which is a problem as
+ * pointed out above. For these objects we'll overallocate GTT space
+ * up to the next tile row offset, see i915_gem_get_gtt_physical_size().
+ */
if (INTEL_INFO(dev)->gen == 3)
gtt_size = 1024*1024;
else
@@ -1474,6 +1497,28 @@ i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
return gtt_size;
}
+uint32_t
+i915_gem_get_gtt_physical_size(struct drm_device *dev, uint32_t size,
+ int tiling_mode, int stride)
+{
+ uint32_t linear_size;
+
+ if (INTEL_INFO(dev)->gen >= 4 || tiling_mode == I915_TILING_NONE)
+ return size;
+
+ linear_size = i915_gem_get_gtt_linear_size(dev, size, tiling_mode,
+ stride);
+ /*
+ * Overallocate in case linear_size is greater than the object's last
+ * full tile row offset and smaller than the tile row offset after
+ * that.
+ */
+ size = roundup(size,
+ i915_gem_get_tile_row_size(dev, tiling_mode, stride));
+
+ return max(size, linear_size);
+}
+
/**
* i915_gem_get_gtt_alignment - return required GTT alignment for an object
* @obj: object to check
@@ -1483,7 +1528,7 @@ i915_gem_get_gtt_size(struct drm_device *dev, uint32_t size, int tiling_mode)
*/
uint32_t
i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
- int tiling_mode, bool fenced)
+ int tiling_mode, int stride, bool fenced)
{
/*
* Minimum alignment is 4k (GTT page size), but might be greater
@@ -1497,7 +1542,7 @@ i915_gem_get_gtt_alignment(struct drm_device *dev, uint32_t size,
* Previous chips need to be aligned to the size of the smallest
* fence register that can contain the object.
*/
- return i915_gem_get_gtt_size(dev, size, tiling_mode);
+ return i915_gem_get_gtt_linear_size(dev, size, tiling_mode, stride);
}
static int i915_gem_object_create_mmap_offset(struct drm_i915_gem_object *obj)
@@ -2542,8 +2587,11 @@ static void i965_write_fence_reg(struct drm_device *dev, int reg,
}
if (obj) {
- u32 size = obj->gtt_space->size;
+ u32 size;
+ size = i915_gem_get_gtt_linear_size(dev, obj->base.size,
+ obj->tiling_mode,
+ obj->stride);
val = (uint64_t)((obj->gtt_offset + size - 4096) &
0xfffff000) << 32;
val |= obj->gtt_offset & 0xfffff000;
@@ -2566,10 +2614,13 @@ static void i915_write_fence_reg(struct drm_device *dev, int reg,
u32 val;
if (obj) {
- u32 size = obj->gtt_space->size;
+ u32 size;
int pitch_val;
int tile_width;
+ size = i915_gem_get_gtt_linear_size(dev, obj->base.size,
+ obj->tiling_mode,
+ obj->stride);
WARN((obj->gtt_offset & ~I915_FENCE_START_MASK) ||
(size & -size) != size ||
(obj->gtt_offset & (size - 1)),
@@ -2610,9 +2661,12 @@ static void i830_write_fence_reg(struct drm_device *dev, int reg,
uint32_t val;
if (obj) {
- u32 size = obj->gtt_space->size;
+ u32 size;
uint32_t pitch_val;
+ size = i915_gem_get_gtt_linear_size(dev, obj->base.size,
+ obj->tiling_mode,
+ obj->stride);
WARN((obj->gtt_offset & ~I830_FENCE_START_MASK) ||
(size & -size) != size ||
(obj->gtt_offset & (size - 1)),
@@ -2903,16 +2957,17 @@ i915_gem_object_bind_to_gtt(struct drm_i915_gem_object *obj,
return -EINVAL;
}
- fence_size = i915_gem_get_gtt_size(dev,
- obj->base.size,
- obj->tiling_mode);
+ fence_size = i915_gem_get_gtt_physical_size(dev, obj->base.size,
+ obj->tiling_mode, obj->stride);
fence_alignment = i915_gem_get_gtt_alignment(dev,
obj->base.size,
- obj->tiling_mode, true);
+ obj->tiling_mode,
+ obj->stride, true);
unfenced_alignment =
i915_gem_get_gtt_alignment(dev,
obj->base.size,
- obj->tiling_mode, false);
+ obj->tiling_mode,
+ obj->stride, false);
if (alignment == 0)
alignment = map_and_fenceable ? fence_alignment :
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 6d69d06..1af6f60 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -253,7 +253,8 @@ i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode)
/* Is the current GTT allocation valid for the change in tiling? */
static bool
-i915_gem_object_fence_ok(struct drm_i915_gem_object *obj, int tiling_mode)
+i915_gem_object_fence_ok(struct drm_i915_gem_object *obj, int tiling_mode,
+ int stride)
{
u32 size;
u32 align;
@@ -261,6 +262,11 @@ i915_gem_object_fence_ok(struct drm_i915_gem_object *obj, int tiling_mode)
if (tiling_mode == I915_TILING_NONE)
return true;
+ size = i915_gem_get_gtt_physical_size(obj->base.dev, obj->base.size,
+ tiling_mode, stride);
+ if (obj->gtt_space->size != size)
+ return false;
+
if (INTEL_INFO(obj->base.dev)->gen >= 4)
return true;
@@ -272,12 +278,8 @@ i915_gem_object_fence_ok(struct drm_i915_gem_object *obj, int tiling_mode)
return false;
}
- size = i915_gem_get_gtt_size(obj->base.dev, obj->base.size, tiling_mode);
- if (obj->gtt_space->size != size)
- return false;
-
align = i915_gem_get_gtt_alignment(obj->base.dev, obj->base.size,
- tiling_mode, true);
+ tiling_mode, stride, true);
if (obj->gtt_offset & (align - 1))
return false;
@@ -361,13 +363,15 @@ i915_gem_set_tiling(struct drm_device *dev, void *data,
obj->map_and_fenceable =
obj->gtt_space == NULL ||
(obj->gtt_offset + obj->base.size <= dev_priv->mm.gtt_mappable_end &&
- i915_gem_object_fence_ok(obj, args->tiling_mode));
+ i915_gem_object_fence_ok(obj, args->tiling_mode,
+ args->stride));
/* Rebind if we need a change of alignment */
if (!obj->map_and_fenceable) {
u32 unfenced_alignment =
i915_gem_get_gtt_alignment(dev, obj->base.size,
args->tiling_mode,
+ args->stride,
false);
if (obj->gtt_offset & (unfenced_alignment - 1))
ret = i915_gem_object_unbind(obj);
--
1.7.10.4
More information about the Intel-gfx
mailing list