xf86-video-intel: 7 commits - src/sna/blt.c src/sna/kgem.c src/sna/kgem.h src/sna/sna_accel.c src/sna/sna_blt.c src/sna/sna_composite.c src/sna/sna.h src/sna/sna_io.c
Chris Wilson
ickle at kemper.freedesktop.org
Sat Jun 22 00:34:14 PDT 2013
src/sna/blt.c | 324 ++++++++++++++++++++++++++++++++++++------------
src/sna/kgem.c | 48 +++++--
src/sna/kgem.h | 24 +++
src/sna/sna.h | 7 -
src/sna/sna_accel.c | 163 ++++++++++++++++++++----
src/sna/sna_blt.c | 80 +++++++----
src/sna/sna_composite.c | 2
src/sna/sna_io.c | 15 --
8 files changed, 495 insertions(+), 168 deletions(-)
New commits:
commit 62e42de300275a668a326357d454062221714fe8
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Fri Jun 21 21:00:23 2013 +0100
sna: Determine swizzling once during initialisation and choose memcpy_to_tiled_x
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/blt.c b/src/sna/blt.c
index af87667..4dbd9e8 100644
--- a/src/sna/blt.c
+++ b/src/sna/blt.c
@@ -213,12 +213,12 @@ memcpy_blt(const void *src, void *dst, int bpp,
}
}
-fast_memcpy void
-memcpy_to_tiled_x(const void *src, void *dst, int bpp, int swizzling,
- int32_t src_stride, int32_t dst_stride,
- int16_t src_x, int16_t src_y,
- int16_t dst_x, int16_t dst_y,
- uint16_t width, uint16_t height)
+static fast_memcpy void
+memcpy_to_tiled_x__swizzle_0(const void *src, void *dst, int bpp,
+ int32_t src_stride, int32_t dst_stride,
+ int16_t src_x, int16_t src_y,
+ int16_t dst_x, int16_t dst_y,
+ uint16_t width, uint16_t height)
{
const unsigned tile_width = 512;
const unsigned tile_height = 8;
@@ -226,14 +226,14 @@ memcpy_to_tiled_x(const void *src, void *dst, int bpp, int swizzling,
const unsigned cpp = bpp / 8;
const unsigned stride_tiles = dst_stride / tile_width;
- const unsigned swizzle_pixels = (swizzling ? 64 : tile_width) / cpp;
+ const unsigned swizzle_pixels = tile_width / cpp;
const unsigned tile_pixels = ffs(tile_width / cpp) - 1;
const unsigned tile_mask = (1 << tile_pixels) - 1;
unsigned x, y;
- DBG(("%s(bpp=%d, swizzling=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n",
- __FUNCTION__, bpp, swizzling, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride));
+ DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n",
+ __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride));
src = (const uint8_t *)src + src_y * src_stride + src_x * cpp;
@@ -252,19 +252,71 @@ memcpy_to_tiled_x(const void *src, void *dst, int bpp, int swizzling,
offset = tile_row +
(dx >> tile_pixels) * tile_size +
(dx & tile_mask) * cpp;
- switch (swizzling) {
- case I915_BIT_6_SWIZZLE_NONE:
- break;
- case I915_BIT_6_SWIZZLE_9:
- offset ^= (offset >> 3) & 64;
- break;
- case I915_BIT_6_SWIZZLE_9_10:
- offset ^= ((offset ^ (offset >> 1)) >> 3) & 64;
- break;
- case I915_BIT_6_SWIZZLE_9_11:
- offset ^= ((offset ^ (offset >> 2)) >> 3) & 64;
- break;
- }
+ memcpy((char *)dst + offset, src_row, length * cpp);
+
+ src_row += length * cpp;
+ x -= length * cpp;
+ dx += length;
+ }
+ while (x >= 512) {
+ assert((dx & tile_mask) == 0);
+ offset = tile_row + (dx >> tile_pixels) * tile_size;
+
+ memcpy((char *)dst + offset, src_row, 512);
+
+ src_row += 512;
+ x -= 512;
+ dx += swizzle_pixels;
+ }
+ if (x) {
+ offset = tile_row +
+ (dx >> tile_pixels) * tile_size +
+ (dx & tile_mask) * cpp;
+ memcpy((char *)dst + offset, src_row, x);
+ }
+ }
+}
+
+fast_memcpy static void
+memcpy_to_tiled_x__swizzle_9(const void *src, void *dst, int bpp,
+ int32_t src_stride, int32_t dst_stride,
+ int16_t src_x, int16_t src_y,
+ int16_t dst_x, int16_t dst_y,
+ uint16_t width, uint16_t height)
+{
+ const unsigned tile_width = 512;
+ const unsigned tile_height = 8;
+ const unsigned tile_size = 4096;
+
+ const unsigned cpp = bpp / 8;
+ const unsigned stride_tiles = dst_stride / tile_width;
+ const unsigned swizzle_pixels = 64 / cpp;
+ const unsigned tile_pixels = ffs(tile_width / cpp) - 1;
+ const unsigned tile_mask = (1 << tile_pixels) - 1;
+
+ unsigned x, y;
+
+ DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n",
+ __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride));
+
+ src = (const uint8_t *)src + src_y * src_stride + src_x * cpp;
+
+ for (y = 0; y < height; ++y) {
+ const uint32_t dy = y + dst_y;
+ const uint32_t tile_row =
+ (dy / tile_height * stride_tiles * tile_size +
+ (dy & (tile_height-1)) * tile_width);
+ const uint8_t *src_row = (const uint8_t *)src + src_stride * y;
+ uint32_t dx = dst_x, offset;
+
+ x = width * cpp;
+ if (dx & (swizzle_pixels - 1)) {
+ const uint32_t swizzle_bound_pixels = ALIGN(dx + 1, swizzle_pixels);
+ const uint32_t length = min(dst_x + width, swizzle_bound_pixels) - dx;
+ offset = tile_row +
+ (dx >> tile_pixels) * tile_size +
+ (dx & tile_mask) * cpp;
+ offset ^= (offset >> 3) & 64;
memcpy((char *)dst + offset, src_row, length * cpp);
@@ -272,64 +324,184 @@ memcpy_to_tiled_x(const void *src, void *dst, int bpp, int swizzling,
x -= length * cpp;
dx += length;
}
- if (swizzling) {
- while (x >= 64) {
- offset = tile_row +
- (dx >> tile_pixels) * tile_size +
- (dx & tile_mask) * cpp;
- switch (swizzling) {
- case I915_BIT_6_SWIZZLE_9:
- offset ^= (offset >> 3) & 64;
- break;
- case I915_BIT_6_SWIZZLE_9_10:
- offset ^= ((offset ^ (offset >> 1)) >> 3) & 64;
- break;
- case I915_BIT_6_SWIZZLE_9_11:
- offset ^= ((offset ^ (offset >> 2)) >> 3) & 64;
- break;
- }
-
- memcpy((char *)dst + offset, src_row, 64);
-
- src_row += 64;
- x -= 64;
- dx += swizzle_pixels;
- }
- } else {
- while (x >= 512) {
- assert((dx & tile_mask) == 0);
- offset = tile_row + (dx >> tile_pixels) * tile_size;
+ while (x >= 64) {
+ offset = tile_row +
+ (dx >> tile_pixels) * tile_size +
+ (dx & tile_mask) * cpp;
+ offset ^= (offset >> 3) & 64;
- memcpy((char *)dst + offset, src_row, 512);
+ memcpy((char *)dst + offset, src_row, 64);
- src_row += 512;
- x -= 512;
- dx += swizzle_pixels;
- }
+ src_row += 64;
+ x -= 64;
+ dx += swizzle_pixels;
}
if (x) {
offset = tile_row +
(dx >> tile_pixels) * tile_size +
(dx & tile_mask) * cpp;
- switch (swizzling) {
- case I915_BIT_6_SWIZZLE_NONE:
- break;
- case I915_BIT_6_SWIZZLE_9:
- offset ^= (offset >> 3) & 64;
- break;
- case I915_BIT_6_SWIZZLE_9_10:
- offset ^= ((offset ^ (offset >> 1)) >> 3) & 64;
- break;
- case I915_BIT_6_SWIZZLE_9_11:
- offset ^= ((offset ^ (offset >> 2)) >> 3) & 64;
- break;
- }
+ offset ^= (offset >> 3) & 64;
+ memcpy((char *)dst + offset, src_row, x);
+ }
+ }
+}
+
+fast_memcpy static void
+memcpy_to_tiled_x__swizzle_9_10(const void *src, void *dst, int bpp,
+ int32_t src_stride, int32_t dst_stride,
+ int16_t src_x, int16_t src_y,
+ int16_t dst_x, int16_t dst_y,
+ uint16_t width, uint16_t height)
+{
+ const unsigned tile_width = 512;
+ const unsigned tile_height = 8;
+ const unsigned tile_size = 4096;
+
+ const unsigned cpp = bpp / 8;
+ const unsigned stride_tiles = dst_stride / tile_width;
+ const unsigned swizzle_pixels = 64 / cpp;
+ const unsigned tile_pixels = ffs(tile_width / cpp) - 1;
+ const unsigned tile_mask = (1 << tile_pixels) - 1;
+
+ unsigned x, y;
+
+ DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n",
+ __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride));
+
+ src = (const uint8_t *)src + src_y * src_stride + src_x * cpp;
+ for (y = 0; y < height; ++y) {
+ const uint32_t dy = y + dst_y;
+ const uint32_t tile_row =
+ (dy / tile_height * stride_tiles * tile_size +
+ (dy & (tile_height-1)) * tile_width);
+ const uint8_t *src_row = (const uint8_t *)src + src_stride * y;
+ uint32_t dx = dst_x, offset;
+
+ x = width * cpp;
+ if (dx & (swizzle_pixels - 1)) {
+ const uint32_t swizzle_bound_pixels = ALIGN(dx + 1, swizzle_pixels);
+ const uint32_t length = min(dst_x + width, swizzle_bound_pixels) - dx;
+ offset = tile_row +
+ (dx >> tile_pixels) * tile_size +
+ (dx & tile_mask) * cpp;
+ offset ^= ((offset ^ (offset >> 1)) >> 3) & 64;
+
+ memcpy((char *)dst + offset, src_row, length * cpp);
+
+ src_row += length * cpp;
+ x -= length * cpp;
+ dx += length;
+ }
+ while (x >= 64) {
+ offset = tile_row +
+ (dx >> tile_pixels) * tile_size +
+ (dx & tile_mask) * cpp;
+ offset ^= ((offset ^ (offset >> 1)) >> 3) & 64;
+
+ memcpy((char *)dst + offset, src_row, 64);
+
+ src_row += 64;
+ x -= 64;
+ dx += swizzle_pixels;
+ }
+ if (x) {
+ offset = tile_row +
+ (dx >> tile_pixels) * tile_size +
+ (dx & tile_mask) * cpp;
+ offset ^= ((offset ^ (offset >> 1)) >> 3) & 64;
memcpy((char *)dst + offset, src_row, x);
}
}
}
+fast_memcpy static void
+memcpy_to_tiled_x__swizzle_9_11(const void *src, void *dst, int bpp,
+ int32_t src_stride, int32_t dst_stride,
+ int16_t src_x, int16_t src_y,
+ int16_t dst_x, int16_t dst_y,
+ uint16_t width, uint16_t height)
+{
+ const unsigned tile_width = 512;
+ const unsigned tile_height = 8;
+ const unsigned tile_size = 4096;
+
+ const unsigned cpp = bpp / 8;
+ const unsigned stride_tiles = dst_stride / tile_width;
+ const unsigned swizzle_pixels = 64 / cpp;
+ const unsigned tile_pixels = ffs(tile_width / cpp) - 1;
+ const unsigned tile_mask = (1 << tile_pixels) - 1;
+
+ unsigned x, y;
+
+ DBG(("%s(bpp=%d): src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n",
+ __FUNCTION__, bpp, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride));
+
+ src = (const uint8_t *)src + src_y * src_stride + src_x * cpp;
+
+ for (y = 0; y < height; ++y) {
+ const uint32_t dy = y + dst_y;
+ const uint32_t tile_row =
+ (dy / tile_height * stride_tiles * tile_size +
+ (dy & (tile_height-1)) * tile_width);
+ const uint8_t *src_row = (const uint8_t *)src + src_stride * y;
+ uint32_t dx = dst_x, offset;
+
+ x = width * cpp;
+ if (dx & (swizzle_pixels - 1)) {
+ const uint32_t swizzle_bound_pixels = ALIGN(dx + 1, swizzle_pixels);
+ const uint32_t length = min(dst_x + width, swizzle_bound_pixels) - dx;
+ offset = tile_row +
+ (dx >> tile_pixels) * tile_size +
+ (dx & tile_mask) * cpp;
+ offset ^= ((offset ^ (offset >> 2)) >> 3) & 64;
+ memcpy((char *)dst + offset, src_row, length * cpp);
+
+ src_row += length * cpp;
+ x -= length * cpp;
+ dx += length;
+ }
+ while (x >= 64) {
+ offset = tile_row +
+ (dx >> tile_pixels) * tile_size +
+ (dx & tile_mask) * cpp;
+ offset ^= ((offset ^ (offset >> 2)) >> 3) & 64;
+
+ memcpy((char *)dst + offset, src_row, 64);
+
+ src_row += 64;
+ x -= 64;
+ dx += swizzle_pixels;
+ }
+ if (x) {
+ offset = tile_row +
+ (dx >> tile_pixels) * tile_size +
+ (dx & tile_mask) * cpp;
+ offset ^= ((offset ^ (offset >> 2)) >> 3) & 64;
+ memcpy((char *)dst + offset, src_row, x);
+ }
+ }
+}
+
+void choose_memcpy_to_tiled_x(struct kgem *kgem, int swizzling)
+{
+ switch (swizzling) {
+ default:
+ case I915_BIT_6_SWIZZLE_NONE:
+ kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__swizzle_0;
+ break;
+ case I915_BIT_6_SWIZZLE_9:
+ kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__swizzle_9;
+ break;
+ case I915_BIT_6_SWIZZLE_9_10:
+ kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__swizzle_9_10;
+ break;
+ case I915_BIT_6_SWIZZLE_9_11:
+ kgem->memcpy_to_tiled_x = memcpy_to_tiled_x__swizzle_9_11;
+ break;
+ }
+}
+
void
memmove_box(const void *src, void *dst,
int bpp, int32_t stride,
@@ -561,10 +733,10 @@ memcpy_xor(const void *src, void *dst, int bpp,
while (i >= 16) {
__m128i xmm1, xmm2, xmm3, xmm4;
- xmm1 = xmm_load_128u((__m128i*)s + 0);
- xmm2 = xmm_load_128u((__m128i*)s + 1);
- xmm3 = xmm_load_128u((__m128i*)s + 2);
- xmm4 = xmm_load_128u((__m128i*)s + 3);
+ xmm1 = xmm_load_128u((const __m128i*)s + 0);
+ xmm2 = xmm_load_128u((const __m128i*)s + 1);
+ xmm3 = xmm_load_128u((const __m128i*)s + 2);
+ xmm4 = xmm_load_128u((const __m128i*)s + 3);
xmm_save_128((__m128i*)d + 0,
_mm_or_si128(xmm1, mask));
@@ -583,8 +755,8 @@ memcpy_xor(const void *src, void *dst, int bpp,
if (i & 8) {
__m128i xmm1, xmm2;
- xmm1 = xmm_load_128u((__m128i*)s + 0);
- xmm2 = xmm_load_128u((__m128i*)s + 1);
+ xmm1 = xmm_load_128u((const __m128i*)s + 0);
+ xmm2 = xmm_load_128u((const __m128i*)s + 1);
xmm_save_128((__m128i*)d + 0,
_mm_or_si128(xmm1, mask));
@@ -597,7 +769,7 @@ memcpy_xor(const void *src, void *dst, int bpp,
if (i & 4) {
xmm_save_128((__m128i*)d,
- _mm_or_si128(xmm_load_128u((__m128i*)s),
+ _mm_or_si128(xmm_load_128u((const __m128i*)s),
mask));
d += 4;
@@ -643,7 +815,7 @@ memcpy_xor(const void *src, void *dst, int bpp,
case 2:
do {
uint16_t *d = (uint16_t *)dst_bytes;
- uint16_t *s = (uint16_t *)src_bytes;
+ const uint16_t *s = (const uint16_t *)src_bytes;
for (i = 0; i < width; i++)
d[i] = (s[i] & and) | or;
@@ -656,7 +828,7 @@ memcpy_xor(const void *src, void *dst, int bpp,
case 4:
do {
uint32_t *d = (uint32_t *)dst_bytes;
- uint32_t *s = (uint32_t *)src_bytes;
+ const uint32_t *s = (const uint32_t *)src_bytes;
for (i = 0; i < width; i++)
d[i] = (s[i] & and) | or;
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 66dce47..b32ceee 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -964,6 +964,39 @@ err:
return false;
}
+static void kgem_init_swizzling(struct kgem *kgem)
+{
+ struct drm_i915_gem_get_tiling tiling;
+
+#ifndef __x86_64__
+ /* Between a register starved compiler emitting attrocious code
+ * and the extra overhead in the kernel for managing the tight
+ * 32-bit address space, unless we have a 64-bit system,
+ * using memcpy_to_tiled_x() is extremely slow.
+ */
+ return;
+#endif
+
+ if (kgem->gen < 050) /* bit17 swizzling :( */
+ return;
+
+ VG_CLEAR(tiling);
+ tiling.handle = gem_create(kgem->fd, 1);
+ if (!tiling.handle)
+ return;
+
+ if (!gem_set_tiling(kgem->fd, tiling.handle, I915_TILING_X, 512))
+ goto out;
+
+ if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &tiling))
+ goto out;
+
+ choose_memcpy_to_tiled_x(kgem, tiling.swizzle_mode);
+out:
+ gem_close(kgem->fd, tiling.handle);
+}
+
+
void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen)
{
struct drm_i915_gem_get_aperture aperture;
@@ -1212,6 +1245,8 @@ void kgem_init(struct kgem *kgem, int fd, struct pci_device *dev, unsigned gen)
kgem->batch_flags_base |= LOCAL_I915_EXEC_HANDLE_LUT;
if (kgem->has_pinned_batches)
kgem->batch_flags_base |= LOCAL_I915_EXEC_IS_PINNED;
+
+ kgem_init_swizzling(kgem);
}
/* XXX hopefully a good approximation */
@@ -5797,19 +5832,6 @@ void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset)
}
}
-int kgem_bo_get_swizzling(struct kgem *kgem, struct kgem_bo *bo)
-{
- struct drm_i915_gem_get_tiling tiling;
-
- VG_CLEAR(tiling);
- tiling.handle = bo->handle;
- if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &tiling))
- return 0;
-
- assert(bo->tiling == tiling.tiling_mode);
- return tiling.swizzle_mode;
-}
-
struct kgem_bo *
kgem_replace_bo(struct kgem *kgem,
struct kgem_bo *src,
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 33a4db0..91a38f7 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -196,6 +196,12 @@ struct kgem {
void (*retire)(struct kgem *kgem);
void (*expire)(struct kgem *kgem);
+ void (*memcpy_to_tiled_x)(const void *src, void *dst, int bpp,
+ int32_t src_stride, int32_t dst_stride,
+ int16_t src_x, int16_t src_y,
+ int16_t dst_x, int16_t dst_y,
+ uint16_t width, uint16_t height);
+
uint16_t reloc__self[256];
uint32_t batch[64*1024-8] page_aligned;
struct drm_i915_gem_exec_object2 exec[256] page_aligned;
@@ -286,7 +292,6 @@ struct kgem_bo *kgem_create_cpu_2d(struct kgem *kgem,
uint32_t kgem_bo_get_binding(struct kgem_bo *bo, uint32_t format);
void kgem_bo_set_binding(struct kgem_bo *bo, uint32_t format, uint16_t offset);
-int kgem_bo_get_swizzling(struct kgem *kgem, struct kgem_bo *bo);
bool kgem_retire(struct kgem *kgem);
@@ -693,4 +698,21 @@ static inline void __kgem_batch_debug(struct kgem *kgem, uint32_t nbatch)
}
#endif
+static inline void
+memcpy_to_tiled_x(struct kgem *kgem,
+ const void *src, void *dst, int bpp,
+ int32_t src_stride, int32_t dst_stride,
+ int16_t src_x, int16_t src_y,
+ int16_t dst_x, int16_t dst_y,
+ uint16_t width, uint16_t height)
+{
+ return kgem->memcpy_to_tiled_x(src, dst, bpp,
+ src_stride, dst_stride,
+ src_x, src_y,
+ dst_x, dst_y,
+ width, height);
+}
+
+void choose_memcpy_to_tiled_x(struct kgem *kgem, int swizzling);
+
#endif /* KGEM_H */
diff --git a/src/sna/sna.h b/src/sna/sna.h
index da5d8af..f720c64 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -848,12 +848,7 @@ memcpy_blt(const void *src, void *dst, int bpp,
int16_t src_x, int16_t src_y,
int16_t dst_x, int16_t dst_y,
uint16_t width, uint16_t height);
-void
-memcpy_to_tiled_x(const void *src, void *dst, int bpp, int swizzling,
- int32_t src_stride, int32_t dst_stride,
- int16_t src_x, int16_t src_y,
- int16_t dst_x, int16_t dst_y,
- uint16_t width, uint16_t height);
+
void
memmove_box(const void *src, void *dst,
int bpp, int32_t stride,
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 599cfc1..44b87cd 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -3868,15 +3868,7 @@ static inline void box32_add_rect(Box32Rec *box, const xRectangle *r)
static bool can_upload_tiled_x(struct kgem *kgem, struct kgem_bo *bo)
{
-#ifndef __x86_64__
- /* Between a register starved compiler emitting attrocious code
- * and the extra overhead in the kernel for managing the tight
- * 32-bit address space, unless we have a 64-bit system,
- * using memcpy_to_tiled_x() is extremely slow.
- */
- return false;
-#endif
- if (kgem->gen < 050) /* bit17 swizzling :( */
+ if (!kgem->memcpy_to_tiled_x)
return false;
if (bo->tiling != I915_TILING_X)
@@ -3896,7 +3888,6 @@ try_upload_tiled_x(PixmapPtr pixmap, RegionRec *region,
struct sna_pixmap *priv = sna_pixmap(pixmap);
BoxRec *box;
uint8_t *dst;
- int swizzle;
int n;
DBG(("%s: bo? %d, can tile? %d\n", __FUNCTION__,
@@ -3919,10 +3910,9 @@ try_upload_tiled_x(PixmapPtr pixmap, RegionRec *region,
DBG(("%s: upload(%d, %d, %d, %d) x %d\n", __FUNCTION__, x, y, w, h, n));
kgem_bo_sync__cpu(&sna->kgem, priv->gpu_bo);
- swizzle = kgem_bo_get_swizzling(&sna->kgem, priv->gpu_bo);
do {
- memcpy_to_tiled_x(bits, dst,
- pixmap->drawable.bitsPerPixel, swizzle,
+ memcpy_to_tiled_x(&sna->kgem, bits, dst,
+ pixmap->drawable.bitsPerPixel,
stride, priv->gpu_bo->pitch,
box->x1 - x, box->y1 - y,
box->x1, box->y1,
diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c
index 1ec1a60..e51c033 100644
--- a/src/sna/sna_io.c
+++ b/src/sna/sna_io.c
@@ -477,16 +477,7 @@ fallback:
static bool upload_inplace__tiled(struct kgem *kgem, struct kgem_bo *bo)
{
-#ifndef __x86_64__
- /* Between a register starved compiler emitting attrocious code
- * and the extra overhead in the kernel for managing the tight
- * 32-bit address space, unless we have a 64-bit system,
- * using memcpy_to_tiled_x() is extremely slow.
- */
- return false;
-#endif
-
- if (kgem->gen < 050) /* bit17 swizzling :( */
+ if (!kgem->memcpy_to_tiled_x)
return false;
if (bo->tiling != I915_TILING_X)
@@ -505,7 +496,6 @@ write_boxes_inplace__tiled(struct kgem *kgem,
const BoxRec *box, int n)
{
uint8_t *dst;
- int swizzle;
assert(bo->tiling == I915_TILING_X);
@@ -514,9 +504,8 @@ write_boxes_inplace__tiled(struct kgem *kgem,
return false;
kgem_bo_sync__cpu(kgem, bo);
- swizzle = kgem_bo_get_swizzling(kgem, bo);
do {
- memcpy_to_tiled_x(src, dst, bpp, swizzle, stride, bo->pitch,
+ memcpy_to_tiled_x(kgem, src, dst, bpp, stride, bo->pitch,
box->x1 + src_dx, box->y1 + src_dy,
box->x1 + dst_dx, box->y1 + dst_dy,
box->x2 - box->x1, box->y2 - box->y1);
commit 53c113c3cc2f8527debc185f0819139ca8637637
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Fri Jun 21 19:27:24 2013 +0100
sna: Allow PutImage to write inplace using manual tiling
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 277bab6..599cfc1 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -3866,6 +3866,74 @@ static inline void box32_add_rect(Box32Rec *box, const xRectangle *r)
box->y2 = v;
}
+static bool can_upload_tiled_x(struct kgem *kgem, struct kgem_bo *bo)
+{
+#ifndef __x86_64__
+ /* Between a register starved compiler emitting attrocious code
+ * and the extra overhead in the kernel for managing the tight
+ * 32-bit address space, unless we have a 64-bit system,
+ * using memcpy_to_tiled_x() is extremely slow.
+ */
+ return false;
+#endif
+ if (kgem->gen < 050) /* bit17 swizzling :( */
+ return false;
+
+ if (bo->tiling != I915_TILING_X)
+ return false;
+
+ if (bo->scanout)
+ return false;
+
+ return bo->domain == DOMAIN_CPU || kgem->has_llc;
+}
+
+static bool
+try_upload_tiled_x(PixmapPtr pixmap, RegionRec *region,
+ int x, int y, int w, int h, char *bits, int stride)
+{
+ struct sna *sna = to_sna_from_pixmap(pixmap);
+ struct sna_pixmap *priv = sna_pixmap(pixmap);
+ BoxRec *box;
+ uint8_t *dst;
+ int swizzle;
+ int n;
+
+ DBG(("%s: bo? %d, can tile? %d\n", __FUNCTION__,
+ priv->gpu_bo != NULL,
+ priv->gpu_bo ? can_upload_tiled(&sna->kgem, priv->gpu_bo) : 0));
+
+ if (!DAMAGE_IS_ALL(priv->gpu_damage) ||
+ !can_upload_tiled_x(&sna->kgem, priv->gpu_bo))
+ return false;
+
+ assert(priv->gpu_bo->tiling == I915_TILING_X);
+
+ dst = __kgem_bo_map__cpu(&sna->kgem, priv->gpu_bo);
+ if (dst == NULL)
+ return false;
+
+ box = RegionRects(region);
+ n = RegionNumRects(region);
+
+ DBG(("%s: upload(%d, %d, %d, %d) x %d\n", __FUNCTION__, x, y, w, h, n));
+
+ kgem_bo_sync__cpu(&sna->kgem, priv->gpu_bo);
+ swizzle = kgem_bo_get_swizzling(&sna->kgem, priv->gpu_bo);
+ do {
+ memcpy_to_tiled_x(bits, dst,
+ pixmap->drawable.bitsPerPixel, swizzle,
+ stride, priv->gpu_bo->pitch,
+ box->x1 - x, box->y1 - y,
+ box->x1, box->y1,
+ box->x2 - box->x1, box->y2 - box->y1);
+ box++;
+ } while (--n);
+ __kgem_bo_unmap__cpu(&sna->kgem, priv->gpu_bo, dst);
+
+ return true;
+}
+
static bool
sna_put_zpixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
int x, int y, int w, int h, char *bits, int stride)
@@ -3883,14 +3951,17 @@ sna_put_zpixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
if (drawable->depth < 8)
return false;
- if (!sna_drawable_move_region_to_cpu(&pixmap->drawable,
- region, MOVE_WRITE))
- return false;
-
get_drawable_deltas(drawable, pixmap, &dx, &dy);
x += dx + drawable->x;
y += dy + drawable->y;
+ if (try_upload_tiled_x(pixmap, region, x, y, w, h, bits, stride))
+ return true;
+
+ if (!sna_drawable_move_region_to_cpu(&pixmap->drawable,
+ region, MOVE_WRITE))
+ return false;
+
DBG(("%s: upload(%d, %d, %d, %d)\n", __FUNCTION__, x, y, w, h));
/* Region is pre-clipped and translated into pixmap space */
@@ -4330,7 +4401,7 @@ source_contains_region(struct sna_damage *damage,
static bool
move_to_gpu(PixmapPtr pixmap, struct sna_pixmap *priv,
- const RegionRec *region, int16_t dx, int16_t dy,
+ RegionRec *region, int16_t dx, int16_t dy,
uint8_t alu, bool dst_is_gpu)
{
int w = region->extents.x2 - region->extents.x1;
@@ -14488,7 +14559,7 @@ static void sna_accel_post_damage(struct sna *sna)
DBG(("%s: slave: ((%d, %d), (%d, %d))x%d\n", __FUNCTION__,
region.extents.x1, region.extents.y1,
region.extents.x2, region.extents.y2,
- RegionNumRects(®ion.extents)));
+ RegionNumRects(®ion)));
box = RegionRects(®ion);
n = RegionNumRects(®ion);
commit 48028a7c923fa0d66b01e8e94d4f0742866f78ec
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Fri Jun 21 14:29:43 2013 +0100
sna: Inspect availablity of render before prefering to use the GPU
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
index 7002638..08960fc 100644
--- a/src/sna/sna_blt.c
+++ b/src/sna/sna_blt.c
@@ -1981,21 +1981,6 @@ is_clear(PixmapPtr pixmap)
return priv && priv->clear;
}
-static struct kgem_bo *
-peek_bo(DrawablePtr draw)
-{
- struct sna_pixmap *priv;
-
- if (draw == NULL)
- return NULL;
-
- priv = sna_pixmap(get_drawable_pixmap(draw));
- if (priv == NULL)
- return NULL;
-
- return priv->gpu_bo;
-}
-
bool
sna_blt_composite(struct sna *sna,
uint32_t op,
@@ -2013,6 +1998,7 @@ sna_blt_composite(struct sna *sna,
int16_t tx, ty;
BoxRec dst_box, src_box;
uint32_t alpha_fixup;
+ uint32_t color, hint;
bool was_clear;
bool ret;
@@ -2045,28 +2031,30 @@ sna_blt_composite(struct sna *sna,
} else
sna_render_picture_extents(dst, &dst_box);
- bo = sna_pixmap(tmp->dst.pixmap)->gpu_bo;
- if (bo == NULL || bo != peek_bo(src->pDrawable))
- bo = sna_drawable_use_bo(dst->pDrawable, PREFER_GPU,
- &dst_box, &tmp->damage);
- if (bo && !kgem_bo_can_blt(&sna->kgem, bo)) {
- DBG(("%s: can not blit to dst, tiling? %d, pitch? %d\n",
- __FUNCTION__, bo->tiling, bo->pitch));
- return false;
- }
-
tmp->dst.format = dst->format;
tmp->dst.width = tmp->dst.pixmap->drawable.width;
tmp->dst.height = tmp->dst.pixmap->drawable.height;
get_drawable_deltas(dst->pDrawable, tmp->dst.pixmap,
&tmp->dst.x, &tmp->dst.y);
- tmp->dst.bo = bo;
if (op == PictOpClear) {
clear:
if (was_clear)
return prepare_blt_nop(sna, tmp);
+ hint = 0;
+ if (can_render(sna))
+ hint |= PREFER_GPU;
+ if (dst->pCompositeClip->data == NULL)
+ hint |= IGNORE_CPU;
+ tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint,
+ &dst_box, &tmp->damage);
+ if (tmp->dst.bo && !kgem_bo_can_blt(&sna->kgem, tmp->dst.bo)) {
+ DBG(("%s: can not blit to dst, tiling? %d, pitch? %d\n",
+ __FUNCTION__, tmp->dst.bo->tiling, tmp->dst.bo->pitch));
+ return false;
+ }
+
if (!tmp->dst.bo) {
RegionRec region;
@@ -2096,6 +2084,21 @@ clear:
return false;
}
+ color = get_solid_color(src, tmp->dst.format);
+fill:
+ hint = 0;
+ if (can_render(sna))
+ hint |= PREFER_GPU;
+ if (dst->pCompositeClip->data == NULL)
+ hint |= IGNORE_CPU;
+ tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint,
+ &dst_box, &tmp->damage);
+ if (tmp->dst.bo && !kgem_bo_can_blt(&sna->kgem, tmp->dst.bo)) {
+ DBG(("%s: can not blit to dst, tiling? %d, pitch? %d\n",
+ __FUNCTION__, tmp->dst.bo->tiling, tmp->dst.bo->pitch));
+ return false;
+ }
+
if (!tmp->dst.bo) {
RegionRec region;
@@ -2107,7 +2110,7 @@ clear:
return false;
}
- return prepare_blt_fill(sna, tmp, get_solid_color(src, tmp->dst.format));
+ return prepare_blt_fill(sna, tmp, color);
}
if (!src->pDrawable) {
@@ -2151,9 +2154,9 @@ clear:
src_pixmap = get_drawable_pixmap(src->pDrawable);
if (is_clear(src_pixmap)) {
- return prepare_blt_fill(sna, tmp,
- color_convert(sna_pixmap(src_pixmap)->clear_color,
- src->format, tmp->dst.format));
+ color = color_convert(sna_pixmap(src_pixmap)->clear_color,
+ src->format, tmp->dst.format);
+ goto fill;
}
alpha_fixup = 0;
@@ -2214,6 +2217,23 @@ clear:
src_box.x2 = x + width;
src_box.y2 = y + height;
bo = NULL;
+
+ hint = 0;
+ if (can_render(sna))
+ hint |= PREFER_GPU;
+ if (dst->pCompositeClip->data == NULL)
+ hint |= IGNORE_CPU;
+ if (source_is_gpu(src_pixmap, &src_box))
+ hint |= FORCE_GPU;
+
+ tmp->dst.bo = sna_drawable_use_bo(dst->pDrawable, hint,
+ &dst_box, &tmp->damage);
+ if (tmp->dst.bo && !kgem_bo_can_blt(&sna->kgem, tmp->dst.bo)) {
+ DBG(("%s: can not blit to dst, tiling? %d, pitch? %d\n",
+ __FUNCTION__, tmp->dst.bo->tiling, tmp->dst.bo->pitch));
+ return false;
+ }
+
if (tmp->dst.bo || source_is_gpu(src_pixmap, &src_box))
bo = __sna_render_pixmap_bo(sna, src_pixmap, &src_box, true);
if (bo) {
diff --git a/src/sna/sna_composite.c b/src/sna/sna_composite.c
index 17cc68c..da3fd62 100644
--- a/src/sna/sna_composite.c
+++ b/src/sna/sna_composite.c
@@ -920,7 +920,7 @@ sna_composite_rectangles(CARD8 op,
* operation, then we may as well delete it without moving it
* first to the GPU.
*/
- hint = PREFER_GPU;
+ hint = can_render(sna) ? PREFER_GPU : 0;
if (op <= PictOpSrc) {
if (priv->cpu_damage &&
region_subsumes_damage(®ion, priv->cpu_damage)) {
commit 71fc83401ec8c560a89a284805e849954ea18ee4
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Fri Jun 21 14:28:38 2013 +0100
sna: Check if we may want to simply upload for a CopyArea
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 7d03023..277bab6 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -4312,9 +4312,26 @@ out:
}
static bool
+source_contains_region(struct sna_damage *damage,
+ const RegionRec *region, int16_t dx, int16_t dy)
+{
+ BoxRec box;
+
+ if (DAMAGE_IS_ALL(damage))
+ return true;
+
+ box = region->extents;
+ box.x1 += dx;
+ box.x2 += dx;
+ box.y1 += dy;
+ box.y2 += dy;
+ return sna_damage_contains_box__no_reduce(damage, &box);
+}
+
+static bool
move_to_gpu(PixmapPtr pixmap, struct sna_pixmap *priv,
const RegionRec *region, int16_t dx, int16_t dy,
- uint8_t alu)
+ uint8_t alu, bool dst_is_gpu)
{
int w = region->extents.x2 - region->extents.x1;
int h = region->extents.y2 - region->extents.y1;
@@ -4326,7 +4343,26 @@ move_to_gpu(PixmapPtr pixmap, struct sna_pixmap *priv,
return true;
}
+ if (dst_is_gpu && priv->cpu_bo && priv->cpu_damage) {
+ DBG(("%s: can use CPU bo? cpu_damage=%d, gpu_damage=%d, cpu hint=%d\n",
+ __FUNCTION__,
+ priv->cpu_damage ? DAMAGE_IS_ALL(priv->cpu_damage) ? -1 : 1 : 0,
+ priv->gpu_damage ? DAMAGE_IS_ALL(priv->gpu_damage) ? -1 : 1 : 0,
+ priv->cpu));
+ if (DAMAGE_IS_ALL(priv->cpu_damage) || priv->gpu_damage == NULL)
+ return false;
+
+ if (priv->cpu &&
+ source_contains_region(priv->cpu_damage, region, dx, dy))
+ return false;
+ }
+
if (priv->gpu_bo) {
+ DBG(("%s: has gpu bo (cpu damage?=%d, cpu=%d, gpu tiling=%d)\n",
+ __FUNCTION__,
+ priv->cpu_damage ? DAMAGE_IS_ALL(priv->cpu_damage) ? -1 : 1 : 0,
+ priv->cpu, priv->gpu_bo->tiling));
+
if (priv->cpu_damage == NULL)
return true;
@@ -4548,7 +4584,8 @@ sna_pixmap_is_gpu(PixmapPtr pixmap)
}
static int
-source_prefer_gpu(struct sna *sna, struct sna_pixmap *priv)
+source_prefer_gpu(struct sna *sna, struct sna_pixmap *priv,
+ RegionRec *region, int16_t dx, int16_t dy)
{
if (priv == NULL) {
DBG(("%s: source unattached, use cpu\n", __FUNCTION__));
@@ -4560,10 +4597,13 @@ source_prefer_gpu(struct sna *sna, struct sna_pixmap *priv)
return 0;
}
- if (priv->gpu_damage) {
- DBG(("%s: source has gpu damage, force gpu\n", __FUNCTION__));
+ if (priv->gpu_damage &&
+ (priv->cpu_damage == NULL ||
+ !source_contains_region(priv->cpu_damage, region, dx, dy))) {
+ DBG(("%s: source has gpu damage, force gpu? %d\n",
+ __FUNCTION__, priv->cpu_damage == NULL));
assert(priv->gpu_bo);
- return PREFER_GPU | FORCE_GPU;
+ return priv->cpu_damage ? PREFER_GPU : PREFER_GPU | FORCE_GPU;
}
if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) {
@@ -4589,7 +4629,7 @@ static bool use_shm_bo(struct sna *sna,
return false;
}
- if (!priv->shm) {
+ if (!priv->shm && !priv->cpu) {
DBG(("%s: yes, ordinary CPU bo\n", __FUNCTION__));
return true;
}
@@ -4685,7 +4725,7 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
if (dst_priv == NULL)
goto fallback;
- hint = source_prefer_gpu(sna, src_priv) ?:
+ hint = source_prefer_gpu(sna, src_priv, region, src_dx, src_dy) ?:
region_inplace(sna, dst_pixmap, region,
dst_priv, alu_overwrites(alu));
if (dst_priv->cpu_damage && alu_overwrites(alu)) {
@@ -4765,7 +4805,7 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
}
if (src_priv &&
- move_to_gpu(src_pixmap, src_priv, region, src_dx, src_dy, alu) &&
+ move_to_gpu(src_pixmap, src_priv, region, src_dx, src_dy, alu, bo == dst_priv->gpu_bo) &&
sna_pixmap_move_to_gpu(src_pixmap, MOVE_READ | MOVE_ASYNC_HINT)) {
DBG(("%s: move whole src_pixmap to GPU and copy\n",
__FUNCTION__));
commit 7e90e522199c4d6b479554073acb33e9d82fb8cc
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Fri Jun 21 14:27:42 2013 +0100
sna: Fix inspection of transfer extents for deciding transport
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index d26d613..7d03023 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -1505,7 +1505,7 @@ sna_pixmap_create_mappable_gpu(PixmapPtr pixmap,
static inline bool use_cpu_bo_for_download(struct sna *sna,
struct sna_pixmap *priv,
- const BoxRec *box)
+ int nbox, const BoxRec *box)
{
if (DBG_NO_CPU_DOWNLOAD)
return false;
@@ -1523,10 +1523,11 @@ static inline bool use_cpu_bo_for_download(struct sna *sna,
}
/* Is it worth detiling? */
+ assert(box[0].y1 < box[nbox-1].y2);
if (kgem_bo_is_mappable(&sna->kgem, priv->gpu_bo) &&
- (box->y2 - box->y1 - 1) * priv->gpu_bo->pitch < 4096) {
- DBG(("%s: no, tiny transfer, expect to read inplace\n",
- __FUNCTION__));
+ (box[nbox-1].y2 - box[0].y1 - 1) * priv->gpu_bo->pitch < 4096) {
+ DBG(("%s: no, tiny transfer (height=%d, pitch=%d) expect to read inplace\n",
+ __FUNCTION__, box[nbox-1].y2-box[0].y1, priv->gpu_bo->pitch));
return false;
}
@@ -2020,7 +2021,7 @@ skip_inplace_map:
if (n) {
bool ok = false;
- if (use_cpu_bo_for_download(sna, priv, &priv->gpu_damage->extents)) {
+ if (use_cpu_bo_for_download(sna, priv, n, box)) {
DBG(("%s: using CPU bo for download from GPU\n", __FUNCTION__));
ok = sna->render.copy_boxes(sna, GXcopy,
pixmap, priv->gpu_bo, 0, 0,
@@ -2406,7 +2407,7 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
assert(priv->gpu_bo);
ok = false;
- if (use_cpu_bo_for_download(sna, priv, &priv->gpu_damage->extents)) {
+ if (use_cpu_bo_for_download(sna, priv, n, box)) {
DBG(("%s: using CPU bo for download from GPU\n", __FUNCTION__));
ok = sna->render.copy_boxes(sna, GXcopy,
pixmap, priv->gpu_bo, 0, 0,
@@ -2522,7 +2523,7 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
if (n) {
bool ok = false;
- if (use_cpu_bo_for_download(sna, priv, &priv->gpu_damage->extents)) {
+ if (use_cpu_bo_for_download(sna, priv, n, box)) {
DBG(("%s: using CPU bo for download from GPU\n", __FUNCTION__));
ok = sna->render.copy_boxes(sna, GXcopy,
pixmap, priv->gpu_bo, 0, 0,
@@ -2550,7 +2551,7 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
DBG(("%s: region wholly inside damage\n",
__FUNCTION__));
- if (use_cpu_bo_for_download(sna, priv, &r->extents)) {
+ if (use_cpu_bo_for_download(sna, priv, n, box)) {
DBG(("%s: using CPU bo for download from GPU\n", __FUNCTION__));
ok = sna->render.copy_boxes(sna, GXcopy,
pixmap, priv->gpu_bo, 0, 0,
@@ -2578,7 +2579,7 @@ sna_drawable_move_region_to_cpu(DrawablePtr drawable,
DBG(("%s: region intersects damage\n",
__FUNCTION__));
- if (use_cpu_bo_for_download(sna, priv, &need.extents)) {
+ if (use_cpu_bo_for_download(sna, priv, n, box)) {
DBG(("%s: using CPU bo for download from GPU\n", __FUNCTION__));
ok = sna->render.copy_boxes(sna, GXcopy,
pixmap, priv->gpu_bo, 0, 0,
commit 94cbe7bf7b7acc9a7f2cb56c5e275af028f3cdc8
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Thu Jun 20 19:40:44 2013 +0100
sna: Mark overwriting CopyArea as not needing the dst damage
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index a481388..d26d613 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -2693,7 +2693,7 @@ sna_drawable_move_to_cpu(DrawablePtr drawable, unsigned flags)
return sna_drawable_move_region_to_cpu(&pixmap->drawable, ®ion, flags);
}
-static bool alu_overwrites(uint8_t alu)
+pure static bool alu_overwrites(uint8_t alu)
{
switch (alu) {
case GXclear:
@@ -4700,10 +4700,8 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
list_del(&dst_priv->flush_list);
dst_priv->cpu = false;
}
- if (region->data == NULL)
- hint |= IGNORE_CPU;
}
- if (replaces)
+ if (alu_overwrites(alu))
hint |= IGNORE_CPU;
/* XXX hack for firefox -- subsequent uses of src will be corrupt! */
commit b3d1118bbee1172f72c946163a37ca4ad5feecce
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date: Thu Jun 20 19:34:18 2013 +0100
sna: Promote the CopyArea to the GPU if it subsumes the CPU damage
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 9cbecfe..a481388 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -4312,10 +4312,11 @@ out:
static bool
move_to_gpu(PixmapPtr pixmap, struct sna_pixmap *priv,
- const BoxRec *box, uint8_t alu)
+ const RegionRec *region, int16_t dx, int16_t dy,
+ uint8_t alu)
{
- int w = box->x2 - box->x1;
- int h = box->y2 - box->y1;
+ int w = region->extents.x2 - region->extents.x1;
+ int h = region->extents.y2 - region->extents.y1;
int count;
if (DAMAGE_IS_ALL(priv->gpu_damage)) {
@@ -4336,6 +4337,12 @@ move_to_gpu(PixmapPtr pixmap, struct sna_pixmap *priv,
if (priv->gpu_bo->tiling)
return true;
+
+ RegionTranslate(region, dx, dy);
+ count = region_subsumes_damage(region, priv->cpu_damage);
+ RegionTranslate(region, -dx, -dy);
+ if (count)
+ return true;
} else {
if ((priv->create & KGEM_CAN_CREATE_GPU) == 0)
return false;
@@ -4759,7 +4766,7 @@ sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc,
}
if (src_priv &&
- move_to_gpu(src_pixmap, src_priv, ®ion->extents, alu) &&
+ move_to_gpu(src_pixmap, src_priv, region, src_dx, src_dy, alu) &&
sna_pixmap_move_to_gpu(src_pixmap, MOVE_READ | MOVE_ASYNC_HINT)) {
DBG(("%s: move whole src_pixmap to GPU and copy\n",
__FUNCTION__));
More information about the xorg-commit
mailing list