[PATCH 046/131] drm/i915: Allow DMA pagetables to use highmem

Sat Aug 6 07:36:13 UTC 2016

As we never need to directly access the pages we allocate for scratch and
the pagetables, and always remap them into the GTT through the dma
remapper, we do not need to limit the allocations to lowmem i.e. we can
pass in the __GFP_HIGHMEM flag to the page allocation.

For backwards compatibility, e.g. certain old GPUs not liking highmem
for certain functions that may be accidentally mapped to the scratch
page by userspace, keep the GMCH probe as only allocating form DMA32.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_drv.h         |  1 +
 drivers/gpu/drm/i915/i915_gem_gtt.c     | 18 ++++++++++++------
 drivers/gpu/drm/i915/i915_gem_request.c | 13 +++++++++----
 drivers/gpu/drm/i915/i915_gpu_error.c   |  4 +++-
 4 files changed, 25 insertions(+), 11 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 99be655750fd..2294d08a8c44 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -570,6 +570,7 @@ struct drm_i915_error_state {
 			long jiffies;
 			pid_t pid;
 			u32 seqno;
+			u32 head;
 			u32 tail;
 		} *requests;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index dbba0b1602d8..655962e74614 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -32,6 +32,8 @@
 #include "i915_trace.h"
 #include "intel_drv.h"
 
+#define I915_GFP_DMA (GFP_KERNEL | __GFP_HIGHMEM)
+
 /**
  * DOC: Global GTT views
  *
@@ -343,7 +345,7 @@ static int __setup_page_dma(struct drm_device *dev,
 
 static int setup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
 {
-	return __setup_page_dma(dev, p, GFP_KERNEL);
+	return __setup_page_dma(dev, p, I915_GFP_DMA);
 }
 
 static void cleanup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
@@ -406,9 +408,11 @@ static void fill_page_dma_32(struct drm_device *dev, struct i915_page_dma *p,
 }
 
 static int
-setup_scratch_page(struct drm_device *dev, struct i915_page_dma *scratch)
+setup_scratch_page(struct drm_device *dev,
+		   struct i915_page_dma *scratch,
+		   gfp_t gfp)
 {
-	return __setup_page_dma(dev, scratch, GFP_DMA32 | __GFP_ZERO);
+	return __setup_page_dma(dev, scratch, gfp | __GFP_ZERO);
 }
 
 static void cleanup_scratch_page(struct drm_device *dev,
@@ -863,7 +867,7 @@ static int gen8_init_scratch(struct i915_address_space *vm)
 	struct drm_device *dev = vm->dev;
 	int ret;
 
-	ret = setup_scratch_page(dev, &vm->scratch_page);
+	ret = setup_scratch_page(dev, &vm->scratch_page, I915_GFP_DMA);
 	if (ret)
 		return ret;
 
@@ -1930,7 +1934,7 @@ static int gen6_init_scratch(struct i915_address_space *vm)
 	struct drm_device *dev = vm->dev;
 	int ret;
 
-	ret = setup_scratch_page(dev, &vm->scratch_page);
+	ret = setup_scratch_page(dev, &vm->scratch_page, I915_GFP_DMA);
 	if (ret)
 		return ret;
 
@@ -2936,7 +2940,9 @@ static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
 		return -ENOMEM;
 	}
 
-	ret = setup_scratch_page(ggtt->base.dev, &ggtt->base.scratch_page);
+	ret = setup_scratch_page(ggtt->base.dev,
+				 &ggtt->base.scratch_page,
+				 GFP_DMA32);
 	if (ret) {
 		DRM_ERROR("Scratch setup failed\n");
 		/* iounmap will also get called at remove, but meh */
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 8fdd313248f9..afe28112e8f6 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -421,6 +421,13 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	if (ret)
 		goto err_ctx;
 
+	/* Record the position of the start of the request so that
+	 * should we detect the updated seqno part-way through the
+	 * GPU processing the request, we never over-estimate the
+	 * position of the head.
+	 */
+	req->head = req->ring->tail;
+
 	return req;
 
 err_ctx:
@@ -489,8 +496,6 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 
 	trace_i915_gem_request_add(request);
 
-	request->head = request_start;
-
 	/* Seal the request and mark it as pending execution. Note that
 	 * we may inspect this state, without holding any locks, during
 	 * hangcheck. Hence we apply the barrier to ensure that we do not
@@ -503,10 +508,10 @@ void __i915_add_request(struct drm_i915_gem_request *request, bool flush_caches)
 	list_add_tail(&request->link, &engine->request_list);
 	list_add_tail(&request->ring_link, &ring->request_list);
 
-	/* Record the position of the start of the request so that
+	/* Record the position of the start of the breadcrumb so that
 	 * should we detect the updated seqno part-way through the
 	 * GPU processing the request, we never over-estimate the
-	 * position of the head.
+	 * position of the ring's HEAD.
 	 */
 	request->postfix = ring->tail;
 
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index f4de2746e445..9135d64f985c 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -495,10 +495,11 @@ int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
 				   dev_priv->engine[i].name,
 				   ee->num_requests);
 			for (j = 0; j < ee->num_requests; j++) {
-				err_printf(m, "  pid %d, seqno 0x%08x, emitted %ld, tail 0x%08x\n",
+				err_printf(m, "  pid %d, seqno 0x%08x, emitted %ld, head 0x%08x tail 0x%08x\n",
 					   ee->requests[j].pid,
 					   ee->requests[j].seqno,
 					   ee->requests[j].jiffies,
+					   ee->requests[j].head,
 					   ee->requests[j].tail);
 			}
 		}
@@ -1073,6 +1074,7 @@ static void engine_record_requests(struct intel_engine_cs *engine,
 		erq = &ee->requests[count++];
 		erq->seqno = request->fence.seqno;
 		erq->jiffies = request->emitted_jiffies;
+		erq->head = request->head;
 		erq->tail = request->tail;
 
 		rcu_read_lock();
-- 
2.8.1