xf86-video-intel: 4 commits - src/sna/kgem.c src/sna/kgem.h src/sna/sna_accel.c src/sna/sna_tiling.c

Chris Wilson ickle at kemper.freedesktop.org
Wed Nov 6 10:01:28 CET 2013


 src/sna/kgem.c       |   28 ++++++++++++++++++----------
 src/sna/kgem.h       |    2 +-
 src/sna/sna_accel.c  |    7 ++++++-
 src/sna/sna_tiling.c |    4 ++--
 4 files changed, 27 insertions(+), 14 deletions(-)

New commits:
commit ef842d2ceee4d1ccf8a0f8a81530dc8be8e18b44
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Wed Nov 6 08:56:01 2013 +0000

    sna: Be more pessimistic for tiling sizes on older gen
    
    On the older generation, we have severe alignment penalties for fenced
    regions which dramatically reduce the amount of space we can effectively
    use in a batch. To accommodate this, reduce the tiling step size.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_tiling.c b/src/sna/sna_tiling.c
index b0a48dd..d23fb00 100644
--- a/src/sna/sna_tiling.c
+++ b/src/sna/sna_tiling.c
@@ -718,8 +718,6 @@ bool sna_tiling_blt_copy_boxes(struct sna *sna, uint8_t alu,
 	}
 	if (max_size > sna->kgem.max_copy_tile_size)
 		max_size = sna->kgem.max_copy_tile_size;
-	if (sna->kgem.gen < 033)
-		max_size /= 2; /* accommodate fence alignment */
 
 	pixman_region_init_rects(&region, box, nbox);
 
@@ -729,6 +727,8 @@ bool sna_tiling_blt_copy_boxes(struct sna *sna, uint8_t alu,
 		step /= 2;
 	while (step * step * 4 > max_size)
 		step /= 2;
+	if (sna->kgem.gen < 033)
+		step /= 2; /* accommodate severe fence restrictions */
 	if (step == 0) {
 		DBG(("%s: tiles cannot fit into aperture\n", __FUNCTION__));
 		return false;
commit f2f9019bae5f6f03b5e23da759d3871fc18dd9f4
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Nov 5 22:41:06 2013 +0000

    sna: Only operate inplace if no existing CPU damage for a read
    
    With a large object, we try harder to operate inplace (to avoid creating
    a second large CPU bo). This introduced an issue where we tried to read
    from the GPU bo when there was already existing damage in the CPU -
    triggering an assertion.
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 02b319d..3176a77 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -1848,6 +1848,11 @@ static inline bool operate_inplace(struct sna_pixmap *priv, unsigned flags)
 		return false;
 	}
 
+	if (priv->cpu_damage && flags & MOVE_READ) {
+		DBG(("%s: no, has CPU damage and requires readback\n", __FUNCTION__));
+		return false;
+	}
+
 	if (priv->cpu_bo && kgem_bo_is_busy(priv->cpu_bo)) {
 		DBG(("%s: yes, CPU is busy\n", __FUNCTION__));
 		return true;
commit 7f901495cdef0ae3b4a328bb98a6bc0ff03ea362
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Nov 5 21:59:37 2013 +0000

    sna: Trim the overestimate of required aperture space for fence alignment
    
    We can optimistically only require that we waste the largest fence
    region in a batch, as all other fences will then be naturally aligned as
    well. So long as the kernel succeeds in defragmenting the aperture...
    
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index fda92c1..0f9b443 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -2749,6 +2749,7 @@ void kgem_reset(struct kgem *kgem)
 	kgem->nreloc__self = 0;
 	kgem->aperture = 0;
 	kgem->aperture_fenced = 0;
+	kgem->aperture_max_fence = 0;
 	kgem->nbatch = 0;
 	kgem->surface = kgem->batch_size;
 	kgem->mode = KGEM_NONE;
@@ -4724,8 +4725,8 @@ static bool aperture_check(struct kgem *kgem, unsigned num_pages)
 		/* Leave some space in case of alignment issues */
 		aperture.aper_available_size -= 1024 * 1024;
 		aperture.aper_available_size -= kgem->aperture_mappable * PAGE_SIZE / 2;
-		if (kgem->gen < 040)
-			aperture.aper_available_size -= kgem->aperture_fenced * PAGE_SIZE;
+		if (kgem->gen < 033)
+			aperture.aper_available_size -= kgem->aperture_max_fence * PAGE_SIZE;
 		if (!kgem->has_llc)
 			aperture.aper_available_size -= 2 * kgem->nexec * PAGE_SIZE;
 
@@ -4841,10 +4842,12 @@ bool kgem_check_bo_fenced(struct kgem *kgem, struct kgem_bo *bo)
 				}
 			}
 
-			size = kgem->aperture_fenced;
-			size += kgem_bo_fenced_size(kgem, bo);
+			size = kgem_bo_fenced_size(kgem, bo);
+			if (size > kgem->aperture_max_fence)
+				kgem->aperture_max_fence = size;
+			size += kgem->aperture_fenced;
 			if (kgem->gen < 033)
-				size *= 2;
+				size += kgem->aperture_max_fence;
 			if (kgem->aperture_total == kgem->aperture_mappable)
 				size += kgem->aperture;
 			if (size > kgem->aperture_mappable) {
@@ -4885,10 +4888,12 @@ bool kgem_check_bo_fenced(struct kgem *kgem, struct kgem_bo *bo)
 			}
 		}
 
-		size = kgem->aperture_fenced;
-		size += kgem_bo_fenced_size(kgem, bo);
+		size = kgem_bo_fenced_size(kgem, bo);
+		if (size > kgem->aperture_max_fence)
+			kgem->aperture_max_fence = size;
+		size += kgem->aperture_fenced;
 		if (kgem->gen < 033)
-			size *= 2;
+			size += kgem->aperture_max_fence;
 		if (kgem->aperture_total == kgem->aperture_mappable)
 			size += kgem->aperture;
 		if (size > kgem->aperture_mappable) {
@@ -4949,7 +4954,10 @@ bool kgem_check_many_bo_fenced(struct kgem *kgem, ...)
 		num_pages += num_pages(bo);
 		num_exec++;
 		if (kgem->gen < 040 && bo->tiling) {
-			fenced_size += kgem_bo_fenced_size(kgem, bo);
+			uint32_t size = kgem_bo_fenced_size(kgem, bo);
+			if (size > kgem->aperture_max_fence)
+				kgem->aperture_max_fence = size;
+			fenced_size += size;
 			num_fence++;
 		}
 
@@ -4978,7 +4986,7 @@ bool kgem_check_many_bo_fenced(struct kgem *kgem, ...)
 		size = kgem->aperture_fenced;
 		size += fenced_size;
 		if (kgem->gen < 033)
-			size *= 2;
+			size += kgem->aperture_max_fence;
 		if (kgem->aperture_total == kgem->aperture_mappable)
 			size += kgem->aperture;
 		if (size > kgem->aperture_mappable) {
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 6abab08..13c5b42 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -188,7 +188,7 @@ struct kgem {
 	uint16_t fence_max;
 	uint16_t half_cpu_cache_pages;
 	uint32_t aperture_total, aperture_high, aperture_low, aperture_mappable;
-	uint32_t aperture, aperture_fenced;
+	uint32_t aperture, aperture_fenced, aperture_max_fence;
 	uint32_t max_upload_tile_size, max_copy_tile_size;
 	uint32_t max_gpu_size, max_cpu_size;
 	uint32_t large_object_size, max_object_size;
commit 736b496b458d666416ea94f157c05ce78f98a600
Author: Chris Wilson <chris at chris-wilson.co.uk>
Date:   Tue Nov 5 21:33:18 2013 +0000

    sna: Mark partial move_area_to_gpu with MOVE_READ on promotion to move_to_gpu
    
    When promoting a partial move_area_to_gpu to a full move_to_gpu, we have
    to disable certain optimisations that we try to use if MOVE_READ==0.
    
    Reported-and-tested-by: Matti Hamalainen <ccr at tnsp.org>
    Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=71198
    Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>

diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 2aae0e2..02b319d 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -3035,7 +3035,7 @@ sna_pixmap_move_area_to_gpu(PixmapPtr pixmap, const BoxRec *box, unsigned int fl
 
 	if (priv->cpu_damage == NULL) {
 		list_del(&priv->flush_list);
-		return sna_pixmap_move_to_gpu(pixmap, flags);
+		return sna_pixmap_move_to_gpu(pixmap, MOVE_READ | flags);
 	}
 
 	if (priv->gpu_bo == NULL) {


More information about the xorg-commit mailing list