[Intel-gfx] [PATCH 08/42] drm/i915: Rearrange i915_wait_request() accounting with callers
Chris Wilson
chris at chris-wilson.co.uk
Fri Oct 7 09:46:01 UTC 2016
Our low-level wait routine has evolved from our generic wait interface
that handled unlocked, RPS boosting, waits with time tracking. If we
push our GEM fence tracking to use reservation_objects (required for
handling multiple timelines), we lose the ability to pass the required
information down to i915_wait_request(). However, if we push the extra
functionality from i915_wait_request() to the individual callsites
(i915_gem_object_wait_rendering and i915_gem_wait_ioctl) that make use
of those extras, we can both simplify our low level wait and prepare for
extending the GEM interface for use of reservation_objects.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
drivers/gpu/drm/i915/i915_drv.h | 7 +-
drivers/gpu/drm/i915/i915_gem.c | 301 +++++++++++++++++++++++---------
drivers/gpu/drm/i915/i915_gem_request.c | 119 ++-----------
drivers/gpu/drm/i915/i915_gem_request.h | 32 ++--
drivers/gpu/drm/i915/i915_gem_userptr.c | 12 +-
drivers/gpu/drm/i915/intel_display.c | 27 +--
drivers/gpu/drm/i915/intel_ringbuffer.c | 14 +-
drivers/gpu/drm/i915/intel_ringbuffer.h | 3 +-
8 files changed, 283 insertions(+), 232 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 54b1b931573c..ee25e265416f 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -3301,9 +3301,10 @@ int __must_check i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
int __must_check i915_gem_suspend(struct drm_device *dev);
void i915_gem_resume(struct drm_device *dev);
int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
-int __must_check
-i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
- bool readonly);
+int i915_gem_object_wait(struct drm_i915_gem_object *obj,
+ unsigned int flags,
+ long timeout,
+ struct intel_rps_client *rps);
int __must_check
i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj,
bool write);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a89a88922448..7fa5cb764739 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -292,7 +292,12 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
* must wait for all rendering to complete to the object (as unbinding
* must anyway), and retire the requests.
*/
- ret = i915_gem_object_wait_rendering(obj, false);
+ ret = i915_gem_object_wait(obj,
+ I915_WAIT_INTERRUPTIBLE |
+ I915_WAIT_LOCKED |
+ I915_WAIT_ALL,
+ MAX_SCHEDULE_TIMEOUT,
+ NULL);
if (ret)
return ret;
@@ -311,88 +316,171 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
return ret;
}
-/**
- * Ensures that all rendering to the object has completed and the object is
- * safe to unbind from the GTT or access from the CPU.
- * @obj: i915 gem object
- * @readonly: waiting for just read access or read-write access
- */
-int
-i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
- bool readonly)
+static long
+i915_gem_object_wait_fence(struct fence *fence,
+ unsigned int flags,
+ long timeout,
+ struct intel_rps_client *rps)
{
- struct reservation_object *resv;
- struct i915_gem_active *active;
- unsigned long active_mask;
- int idx;
+ struct drm_i915_gem_request *rq;
- lockdep_assert_held(&obj->base.dev->struct_mutex);
+ BUILD_BUG_ON(I915_WAIT_INTERRUPTIBLE != 0x1);
- if (!readonly) {
- active = obj->last_read;
- active_mask = i915_gem_object_get_active(obj);
- } else {
- active_mask = 1;
- active = &obj->last_write;
+ if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+ return timeout;
+
+ if (!fence_is_i915(fence))
+ return fence_wait_timeout(fence,
+ flags & I915_WAIT_INTERRUPTIBLE,
+ timeout);
+
+ rq = to_request(fence);
+ if (i915_gem_request_completed(rq))
+ goto out;
+
+ /* This client is about to stall waiting for the GPU. In many cases
+ * this is undesirable and limits the throughput of the system, as
+ * many clients cannot continue processing user input/output whilst
+ * blocked. RPS autotuning may take tens of milliseconds to respond
+ * to the GPU load and thus incurs additional latency for the client.
+ * We can circumvent that by promoting the GPU frequency to maximum
+ * before we wait. This makes the GPU throttle up much more quickly
+ * (good for benchmarks and user experience, e.g. window animations),
+ * but at a cost of spending more power processing the workload
+ * (bad for battery). Not all clients even want their results
+ * immediately and for them we should just let the GPU select its own
+ * frequency to maximise efficiency. To prevent a single client from
+ * forcing the clocks too high for the whole system, we only allow
+ * each client to waitboost once in a busy period.
+ */
+ if (rps) {
+ if (INTEL_GEN(rq->i915) >= 6)
+ gen6_rps_boost(rq->i915, rps, rq->emitted_jiffies);
+ else
+ rps = NULL;
}
- for_each_active(active_mask, idx) {
+ timeout = i915_wait_request(rq, flags, timeout);
+
+out:
+ if (flags & I915_WAIT_LOCKED && i915_gem_request_completed(rq))
+ i915_gem_request_retire_upto(rq);
+
+ if (rps && rq->fence.seqno == rq->engine->last_submitted_seqno) {
+ /* The GPU is now idle and this client has stalled.
+ * Since no other client has submitted a request in the
+ * meantime, assume that this client is the only one
+ * supplying work to the GPU but is unable to keep that
+ * work supplied because it is waiting. Since the GPU is
+ * then never kept fully busy, RPS autoclocking will
+ * keep the clocks relatively low, causing further delays.
+ * Compensate by giving the synchronous client credit for
+ * a waitboost next time.
+ */
+ spin_lock(&rq->i915->rps.client_lock);
+ list_del_init(&rps->link);
+ spin_unlock(&rq->i915->rps.client_lock);
+ }
+
+ return timeout;
+}
+
+static long
+i915_gem_object_wait_reservation(struct reservation_object *resv,
+ unsigned int flags,
+ long timeout,
+ struct intel_rps_client *rps)
+{
+ struct fence *excl;
+
+ if (flags & I915_WAIT_ALL) {
+ struct fence **shared;
+ unsigned int count, i;
int ret;
- ret = i915_gem_active_wait(&active[idx],
- &obj->base.dev->struct_mutex);
+ ret = reservation_object_get_fences_rcu(resv,
+ &excl, &count, &shared);
if (ret)
return ret;
- }
- resv = i915_gem_object_get_dmabuf_resv(obj);
- if (resv) {
- long err;
+ for (i = 0; i < count; i++) {
+ timeout = i915_gem_object_wait_fence(shared[i],
+ flags, timeout,
+ rps);
+ if (timeout <= 0)
+ break;
- err = reservation_object_wait_timeout_rcu(resv, !readonly, true,
- MAX_SCHEDULE_TIMEOUT);
- if (err < 0)
- return err;
+ fence_put(shared[i]);
+ }
+
+ for (; i < count; i++)
+ fence_put(shared[i]);
+ kfree(shared);
+ } else {
+ excl = reservation_object_get_excl_rcu(resv);
}
- return 0;
+ if (excl && timeout > 0)
+ timeout = i915_gem_object_wait_fence(excl, flags, timeout, rps);
+
+ fence_put(excl);
+
+ return timeout;
}
-/* A nonblocking variant of the above wait. Must be called prior to
- * acquiring the mutex for the object, as the object state may change
- * during this call. A reference must be held by the caller for the object.
+/**
+ * Waits for rendering to the object to be completed
+ * @obj: i915 gem object
+ * @flags: how to wait (under a lock, for all rendering or just for writes etc)
+ * @timeout: how long to wait
+ * @rps: client (user process) to charge for any waitboosting
*/
-static __must_check int
-__unsafe_wait_rendering(struct drm_i915_gem_object *obj,
- struct intel_rps_client *rps,
- bool readonly)
+int
+i915_gem_object_wait(struct drm_i915_gem_object *obj,
+ unsigned int flags,
+ long timeout,
+ struct intel_rps_client *rps)
{
+ struct reservation_object *resv;
struct i915_gem_active *active;
unsigned long active_mask;
int idx;
- active_mask = __I915_BO_ACTIVE(obj);
- if (!active_mask)
- return 0;
+ might_sleep();
+#if IS_ENABLED(CONFIG_LOCKDEP)
+ GEM_BUG_ON(!!lockdep_is_held(&obj->base.dev->struct_mutex) !=
+ !!(flags & I915_WAIT_LOCKED));
+#endif
+ GEM_BUG_ON(timeout < 0);
- if (!readonly) {
+ if (flags & I915_WAIT_ALL) {
active = obj->last_read;
+ active_mask = i915_gem_object_get_active(obj);
} else {
active_mask = 1;
active = &obj->last_write;
}
for_each_active(active_mask, idx) {
- int ret;
-
- ret = i915_gem_active_wait_unlocked(&active[idx],
- I915_WAIT_INTERRUPTIBLE,
- NULL, rps);
- if (ret)
- return ret;
+ struct drm_i915_gem_request *request;
+
+ request = i915_gem_active_get_unlocked(&active[idx]);
+ if (request) {
+ timeout = i915_gem_object_wait_fence(&request->fence,
+ flags, timeout,
+ rps);
+ i915_gem_request_put(request);
+ }
+ if (timeout < 0)
+ return timeout;
}
- return 0;
+ resv = i915_gem_object_get_dmabuf_resv(obj);
+ if (resv)
+ timeout = i915_gem_object_wait_reservation(resv,
+ flags, timeout,
+ rps);
+ return timeout < 0 ? timeout : 0;
}
static struct intel_rps_client *to_rps_client(struct drm_file *file)
@@ -449,12 +537,18 @@ i915_gem_phys_pwrite(struct drm_i915_gem_object *obj,
struct drm_device *dev = obj->base.dev;
void *vaddr = obj->phys_handle->vaddr + args->offset;
char __user *user_data = u64_to_user_ptr(args->data_ptr);
- int ret = 0;
+ int ret;
/* We manually control the domain here and pretend that it
* remains coherent i.e. in the GTT domain, like shmem_pwrite.
*/
- ret = i915_gem_object_wait_rendering(obj, false);
+ lockdep_assert_held(&obj->base.dev->struct_mutex);
+ ret = i915_gem_object_wait(obj,
+ I915_WAIT_INTERRUPTIBLE |
+ I915_WAIT_LOCKED |
+ I915_WAIT_ALL,
+ MAX_SCHEDULE_TIMEOUT,
+ to_rps_client(file_priv));
if (ret)
return ret;
@@ -614,12 +708,17 @@ int i915_gem_obj_prepare_shmem_read(struct drm_i915_gem_object *obj,
{
int ret;
- *needs_clflush = 0;
+ lockdep_assert_held(&obj->base.dev->struct_mutex);
+ *needs_clflush = 0;
if (!i915_gem_object_has_struct_page(obj))
return -ENODEV;
- ret = i915_gem_object_wait_rendering(obj, true);
+ ret = i915_gem_object_wait(obj,
+ I915_WAIT_INTERRUPTIBLE |
+ I915_WAIT_LOCKED,
+ MAX_SCHEDULE_TIMEOUT,
+ NULL);
if (ret)
return ret;
@@ -661,11 +760,18 @@ int i915_gem_obj_prepare_shmem_write(struct drm_i915_gem_object *obj,
{
int ret;
+ lockdep_assert_held(&obj->base.dev->struct_mutex);
+
*needs_clflush = 0;
if (!i915_gem_object_has_struct_page(obj))
return -ENODEV;
- ret = i915_gem_object_wait_rendering(obj, false);
+ ret = i915_gem_object_wait(obj,
+ I915_WAIT_INTERRUPTIBLE |
+ I915_WAIT_LOCKED |
+ I915_WAIT_ALL,
+ MAX_SCHEDULE_TIMEOUT,
+ NULL);
if (ret)
return ret;
@@ -1050,7 +1156,10 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
trace_i915_gem_object_pread(obj, args->offset, args->size);
- ret = __unsafe_wait_rendering(obj, to_rps_client(file), true);
+ ret = i915_gem_object_wait(obj,
+ I915_WAIT_INTERRUPTIBLE,
+ MAX_SCHEDULE_TIMEOUT,
+ to_rps_client(file));
if (ret)
goto err;
@@ -1450,7 +1559,11 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
trace_i915_gem_object_pwrite(obj, args->offset, args->size);
- ret = __unsafe_wait_rendering(obj, to_rps_client(file), false);
+ ret = i915_gem_object_wait(obj,
+ I915_WAIT_INTERRUPTIBLE |
+ I915_WAIT_ALL,
+ MAX_SCHEDULE_TIMEOUT,
+ to_rps_client(file));
if (ret)
goto err;
@@ -1537,7 +1650,11 @@ i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
* We will repeat the flush holding the lock in the normal manner
* to catch cases where we are gazumped.
*/
- ret = __unsafe_wait_rendering(obj, to_rps_client(file), !write_domain);
+ ret = i915_gem_object_wait(obj,
+ I915_WAIT_INTERRUPTIBLE |
+ (write_domain ? I915_WAIT_ALL : 0),
+ MAX_SCHEDULE_TIMEOUT,
+ to_rps_client(file));
if (ret)
goto err;
@@ -1773,7 +1890,10 @@ int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
* repeat the flush holding the lock in the normal manner to catch cases
* where we are gazumped.
*/
- ret = __unsafe_wait_rendering(obj, NULL, !write);
+ ret = i915_gem_object_wait(obj,
+ I915_WAIT_INTERRUPTIBLE,
+ MAX_SCHEDULE_TIMEOUT,
+ NULL);
if (ret)
goto err;
@@ -2800,10 +2920,9 @@ int
i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
{
struct drm_i915_gem_wait *args = data;
- struct intel_rps_client *rps = to_rps_client(file);
struct drm_i915_gem_object *obj;
- unsigned long active;
- int idx, ret = 0;
+ ktime_t start;
+ long ret;
if (args->flags != 0)
return -EINVAL;
@@ -2812,14 +2931,17 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
if (!obj)
return -ENOENT;
- active = __I915_BO_ACTIVE(obj);
- for_each_active(active, idx) {
- s64 *timeout = args->timeout_ns >= 0 ? &args->timeout_ns : NULL;
- ret = i915_gem_active_wait_unlocked(&obj->last_read[idx],
- I915_WAIT_INTERRUPTIBLE,
- timeout, rps);
- if (ret)
- break;
+ start = ktime_get();
+
+ ret = i915_gem_object_wait(obj,
+ I915_WAIT_INTERRUPTIBLE | I915_WAIT_ALL,
+ args->timeout_ns < 0 ? MAX_SCHEDULE_TIMEOUT : nsecs_to_jiffies(args->timeout_ns),
+ to_rps_client(file));
+
+ if (args->timeout_ns > 0) {
+ args->timeout_ns -= ktime_to_ns(ktime_sub(ktime_get(), start));
+ if (args->timeout_ns < 0)
+ args->timeout_ns = 0;
}
i915_gem_object_put_unlocked(obj);
@@ -3234,7 +3356,13 @@ i915_gem_object_set_to_gtt_domain(struct drm_i915_gem_object *obj, bool write)
uint32_t old_write_domain, old_read_domains;
int ret;
- ret = i915_gem_object_wait_rendering(obj, !write);
+ lockdep_assert_held(&obj->base.dev->struct_mutex);
+ ret = i915_gem_object_wait(obj,
+ I915_WAIT_INTERRUPTIBLE |
+ I915_WAIT_LOCKED |
+ (write ? I915_WAIT_ALL : 0),
+ MAX_SCHEDULE_TIMEOUT,
+ NULL);
if (ret)
return ret;
@@ -3351,7 +3479,12 @@ restart:
* If we wait upon the object, we know that all the bound
* VMA are no longer active.
*/
- ret = i915_gem_object_wait_rendering(obj, false);
+ ret = i915_gem_object_wait(obj,
+ I915_WAIT_INTERRUPTIBLE |
+ I915_WAIT_LOCKED |
+ I915_WAIT_ALL,
+ MAX_SCHEDULE_TIMEOUT,
+ NULL);
if (ret)
return ret;
@@ -3606,7 +3739,13 @@ i915_gem_object_set_to_cpu_domain(struct drm_i915_gem_object *obj, bool write)
uint32_t old_write_domain, old_read_domains;
int ret;
- ret = i915_gem_object_wait_rendering(obj, !write);
+ lockdep_assert_held(&obj->base.dev->struct_mutex);
+ ret = i915_gem_object_wait(obj,
+ I915_WAIT_INTERRUPTIBLE |
+ I915_WAIT_LOCKED |
+ (write ? I915_WAIT_ALL : 0),
+ MAX_SCHEDULE_TIMEOUT,
+ NULL);
if (ret)
return ret;
@@ -3662,11 +3801,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
struct drm_i915_file_private *file_priv = file->driver_priv;
unsigned long recent_enough = jiffies - DRM_I915_THROTTLE_JIFFIES;
struct drm_i915_gem_request *request, *target = NULL;
- int ret;
-
- ret = i915_gem_wait_for_error(&dev_priv->gpu_error);
- if (ret)
- return ret;
+ long ret;
/* ABI: return -EIO if already wedged */
if (i915_terminally_wedged(&dev_priv->gpu_error))
@@ -3693,10 +3828,12 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file)
if (target == NULL)
return 0;
- ret = i915_wait_request(target, I915_WAIT_INTERRUPTIBLE, NULL, NULL);
+ ret = i915_wait_request(target,
+ I915_WAIT_INTERRUPTIBLE,
+ MAX_SCHEDULE_TIMEOUT);
i915_gem_request_put(target);
- return ret;
+ return ret < 0 ? ret : 0;
}
static bool
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index e1f7a32d4080..9b2ac082d5da 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -59,31 +59,9 @@ static bool i915_fence_enable_signaling(struct fence *fence)
static signed long i915_fence_wait(struct fence *fence,
bool interruptible,
- signed long timeout_jiffies)
+ signed long timeout)
{
- s64 timeout_ns, *timeout;
- int ret;
-
- if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT) {
- timeout_ns = jiffies_to_nsecs(timeout_jiffies);
- timeout = &timeout_ns;
- } else {
- timeout = NULL;
- }
-
- ret = i915_wait_request(to_request(fence),
- interruptible, timeout,
- NO_WAITBOOST);
- if (ret == -ETIME)
- return 0;
-
- if (ret < 0)
- return ret;
-
- if (timeout_jiffies != MAX_SCHEDULE_TIMEOUT)
- timeout_jiffies = nsecs_to_jiffies(timeout_ns);
-
- return timeout_jiffies;
+ return i915_wait_request(to_request(fence), interruptible, timeout);
}
static void i915_fence_value_str(struct fence *fence, char *str, int size)
@@ -166,7 +144,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
struct i915_gem_active *active, *next;
trace_i915_gem_request_retire(request);
- list_del(&request->link);
+ list_del_init(&request->link);
/* We know the GPU must have read the request to have
* sent us the seqno + interrupt, so use the position
@@ -224,7 +202,8 @@ void i915_gem_request_retire_upto(struct drm_i915_gem_request *req)
struct drm_i915_gem_request *tmp;
lockdep_assert_held(&req->i915->drm.struct_mutex);
- GEM_BUG_ON(list_empty(&req->link));
+ if (list_empty(&req->link))
+ return;
do {
tmp = list_first_entry(&engine->request_list,
@@ -786,60 +765,30 @@ bool __i915_spin_request(const struct drm_i915_gem_request *req,
* Returns 0 if the request was found within the alloted time. Else returns the
* errno with remaining time filled in timeout argument.
*/
-int i915_wait_request(struct drm_i915_gem_request *req,
- unsigned int flags,
- s64 *timeout,
- struct intel_rps_client *rps)
+long i915_wait_request(struct drm_i915_gem_request *req,
+ unsigned int flags,
+ long timeout)
{
const int state = flags & I915_WAIT_INTERRUPTIBLE ?
TASK_INTERRUPTIBLE : TASK_UNINTERRUPTIBLE;
DEFINE_WAIT(reset);
struct intel_wait wait;
- unsigned long timeout_remain;
- int ret = 0;
might_sleep();
#if IS_ENABLED(CONFIG_LOCKDEP)
GEM_BUG_ON(!!lockdep_is_held(&req->i915->drm.struct_mutex) !=
!!(flags & I915_WAIT_LOCKED));
#endif
+ GEM_BUG_ON(timeout < 0);
if (i915_gem_request_completed(req))
- return 0;
-
- timeout_remain = MAX_SCHEDULE_TIMEOUT;
- if (timeout) {
- if (WARN_ON(*timeout < 0))
- return -EINVAL;
+ return timeout;
- if (*timeout == 0)
- return -ETIME;
-
- /* Record current time in case interrupted, or wedged */
- timeout_remain = nsecs_to_jiffies_timeout(*timeout);
- *timeout += ktime_get_raw_ns();
- }
+ if (!timeout)
+ return -ETIME;
trace_i915_gem_request_wait_begin(req);
- /* This client is about to stall waiting for the GPU. In many cases
- * this is undesirable and limits the throughput of the system, as
- * many clients cannot continue processing user input/output whilst
- * blocked. RPS autotuning may take tens of milliseconds to respond
- * to the GPU load and thus incurs additional latency for the client.
- * We can circumvent that by promoting the GPU frequency to maximum
- * before we wait. This makes the GPU throttle up much more quickly
- * (good for benchmarks and user experience, e.g. window animations),
- * but at a cost of spending more power processing the workload
- * (bad for battery). Not all clients even want their results
- * immediately and for them we should just let the GPU select its own
- * frequency to maximise efficiency. To prevent a single client from
- * forcing the clocks too high for the whole system, we only allow
- * each client to waitboost once in a busy period.
- */
- if (IS_RPS_CLIENT(rps) && INTEL_GEN(req->i915) >= 6)
- gen6_rps_boost(req->i915, rps, req->emitted_jiffies);
-
/* Optimistic short spin before touching IRQs */
if (i915_spin_request(req, state, 5))
goto complete;
@@ -858,16 +807,17 @@ int i915_wait_request(struct drm_i915_gem_request *req,
for (;;) {
if (signal_pending_state(state, current)) {
- ret = -ERESTARTSYS;
+ timeout = -ERESTARTSYS;
break;
}
- timeout_remain = io_schedule_timeout(timeout_remain);
- if (timeout_remain == 0) {
- ret = -ETIME;
+ if (!timeout) {
+ timeout = -ETIME;
break;
}
+ timeout = io_schedule_timeout(timeout);
+
if (intel_wait_complete(&wait))
break;
@@ -914,40 +864,7 @@ wakeup:
complete:
trace_i915_gem_request_wait_end(req);
- if (timeout) {
- *timeout -= ktime_get_raw_ns();
- if (*timeout < 0)
- *timeout = 0;
-
- /*
- * Apparently ktime isn't accurate enough and occasionally has a
- * bit of mismatch in the jiffies<->nsecs<->ktime loop. So patch
- * things up to make the test happy. We allow up to 1 jiffy.
- *
- * This is a regrssion from the timespec->ktime conversion.
- */
- if (ret == -ETIME && *timeout < jiffies_to_usecs(1)*1000)
- *timeout = 0;
- }
-
- if (IS_RPS_USER(rps) &&
- req->fence.seqno == req->engine->last_submitted_seqno) {
- /* The GPU is now idle and this client has stalled.
- * Since no other client has submitted a request in the
- * meantime, assume that this client is the only one
- * supplying work to the GPU but is unable to keep that
- * work supplied because it is waiting. Since the GPU is
- * then never kept fully busy, RPS autoclocking will
- * keep the clocks relatively low, causing further delays.
- * Compensate by giving the synchronous client credit for
- * a waitboost next time.
- */
- spin_lock(&req->i915->rps.client_lock);
- list_del_init(&rps->link);
- spin_unlock(&req->i915->rps.client_lock);
- }
-
- return ret;
+ return timeout;
}
static bool engine_retire_requests(struct intel_engine_cs *engine)
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index c85a3d82febf..f1ce390c9244 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -228,13 +228,13 @@ struct intel_rps_client;
#define IS_RPS_CLIENT(p) (!IS_ERR(p))
#define IS_RPS_USER(p) (!IS_ERR_OR_NULL(p))
-int i915_wait_request(struct drm_i915_gem_request *req,
- unsigned int flags,
- s64 *timeout,
- struct intel_rps_client *rps)
+long i915_wait_request(struct drm_i915_gem_request *req,
+ unsigned int flags,
+ long timeout)
__attribute__((nonnull(1)));
#define I915_WAIT_INTERRUPTIBLE BIT(0)
#define I915_WAIT_LOCKED BIT(1) /* struct_mutex held, handle GPU reset */
+#define I915_WAIT_ALL BIT(2) /* used by i915_gem_object_wait() */
static inline u32 intel_engine_get_seqno(struct intel_engine_cs *engine);
@@ -583,14 +583,16 @@ static inline int __must_check
i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex)
{
struct drm_i915_gem_request *request;
+ long ret;
request = i915_gem_active_peek(active, mutex);
if (!request)
return 0;
- return i915_wait_request(request,
- I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
- NULL, NULL);
+ ret = i915_wait_request(request,
+ I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
+ MAX_SCHEDULE_TIMEOUT);
+ return ret < 0 ? ret : 0;
}
/**
@@ -617,20 +619,18 @@ i915_gem_active_wait(const struct i915_gem_active *active, struct mutex *mutex)
*/
static inline int
i915_gem_active_wait_unlocked(const struct i915_gem_active *active,
- unsigned int flags,
- s64 *timeout,
- struct intel_rps_client *rps)
+ unsigned int flags)
{
struct drm_i915_gem_request *request;
- int ret = 0;
+ long ret = 0;
request = i915_gem_active_get_unlocked(active);
if (request) {
- ret = i915_wait_request(request, flags, timeout, rps);
+ ret = i915_wait_request(request, flags, MAX_SCHEDULE_TIMEOUT);
i915_gem_request_put(request);
}
- return ret;
+ return ret < 0 ? ret : 0;
}
/**
@@ -647,7 +647,7 @@ i915_gem_active_retire(struct i915_gem_active *active,
struct mutex *mutex)
{
struct drm_i915_gem_request *request;
- int ret;
+ long ret;
request = i915_gem_active_raw(active, mutex);
if (!request)
@@ -655,8 +655,8 @@ i915_gem_active_retire(struct i915_gem_active *active,
ret = i915_wait_request(request,
I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
- NULL, NULL);
- if (ret)
+ MAX_SCHEDULE_TIMEOUT);
+ if (ret < 0)
return ret;
list_del_init(&active->link);
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index e537930c64b5..1c891b92ac80 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -61,23 +61,13 @@ struct i915_mmu_object {
bool attached;
};
-static void wait_rendering(struct drm_i915_gem_object *obj)
-{
- unsigned long active = __I915_BO_ACTIVE(obj);
- int idx;
-
- for_each_active(active, idx)
- i915_gem_active_wait_unlocked(&obj->last_read[idx],
- 0, NULL, NULL);
-}
-
static void cancel_userptr(struct work_struct *work)
{
struct i915_mmu_object *mo = container_of(work, typeof(*mo), work);
struct drm_i915_gem_object *obj = mo->obj;
struct drm_device *dev = obj->base.dev;
- wait_rendering(obj);
+ i915_gem_object_wait(obj, I915_WAIT_ALL, MAX_SCHEDULE_TIMEOUT, NULL);
mutex_lock(&dev->struct_mutex);
/* Cancel any active worker and force us to re-evaluate gup */
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 4b3d315c5254..5ee7bab6e560 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -12042,7 +12042,7 @@ static void intel_mmio_flip_work_func(struct work_struct *w)
if (work->flip_queued_req)
WARN_ON(i915_wait_request(work->flip_queued_req,
- 0, NULL, NO_WAITBOOST));
+ 0, MAX_SCHEDULE_TIMEOUT) < 0);
/* For framebuffer backed by dmabuf, wait for fence */
resv = i915_gem_object_get_dmabuf_resv(obj);
@@ -14089,19 +14089,21 @@ static int intel_atomic_prepare_commit(struct drm_device *dev,
for_each_plane_in_state(state, plane, plane_state, i) {
struct intel_plane_state *intel_plane_state =
to_intel_plane_state(plane_state);
+ long timeout;
if (!intel_plane_state->wait_req)
continue;
- ret = i915_wait_request(intel_plane_state->wait_req,
- I915_WAIT_INTERRUPTIBLE,
- NULL, NULL);
- if (ret) {
+ timeout = i915_wait_request(intel_plane_state->wait_req,
+ I915_WAIT_INTERRUPTIBLE,
+ MAX_SCHEDULE_TIMEOUT);
+ if (timeout < 0) {
/* Any hang should be swallowed by the wait */
- WARN_ON(ret == -EIO);
+ WARN_ON(timeout == -EIO);
mutex_lock(&dev->struct_mutex);
drm_atomic_helper_cleanup_planes(dev, state);
mutex_unlock(&dev->struct_mutex);
+ ret = timeout;
break;
}
}
@@ -14303,7 +14305,7 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
bool hw_check = intel_state->modeset;
unsigned long put_domains[I915_MAX_PIPES] = {};
unsigned crtc_vblank_mask = 0;
- int i, ret;
+ int i;
for_each_plane_in_state(state, plane, plane_state, i) {
struct intel_plane_state *intel_plane_state =
@@ -14312,11 +14314,10 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
if (!intel_plane_state->wait_req)
continue;
- ret = i915_wait_request(intel_plane_state->wait_req,
- 0, NULL, NULL);
/* EIO should be eaten, and we can't get interrupted in the
* worker, and blocking commits have waited already. */
- WARN_ON(ret);
+ WARN_ON(i915_wait_request(intel_plane_state->wait_req,
+ 0, MAX_SCHEDULE_TIMEOUT) < 0);
}
drm_atomic_helper_wait_for_dependencies(state);
@@ -14679,7 +14680,11 @@ intel_prepare_plane_fb(struct drm_plane *plane,
* can safely continue.
*/
if (needs_modeset(crtc_state))
- ret = i915_gem_object_wait_rendering(old_obj, true);
+ ret = i915_gem_object_wait(old_obj,
+ I915_WAIT_INTERRUPTIBLE |
+ I915_WAIT_LOCKED,
+ MAX_SCHEDULE_TIMEOUT,
+ NULL);
if (ret) {
/* GPU hangs should have been swallowed by the wait */
WARN_ON(ret == -EIO);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 729f373782e2..b60c6f09fbfd 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -2156,7 +2156,9 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
{
struct intel_ring *ring = req->ring;
struct drm_i915_gem_request *target;
- int ret;
+ long timeout;
+
+ lockdep_assert_held(&req->i915->drm.struct_mutex);
intel_ring_update_space(ring);
if (ring->space >= bytes)
@@ -2186,11 +2188,11 @@ static int wait_for_space(struct drm_i915_gem_request *req, int bytes)
if (WARN_ON(&target->ring_link == &ring->request_list))
return -ENOSPC;
- ret = i915_wait_request(target,
- I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
- NULL, NO_WAITBOOST);
- if (ret)
- return ret;
+ timeout = i915_wait_request(target,
+ I915_WAIT_INTERRUPTIBLE | I915_WAIT_LOCKED,
+ MAX_SCHEDULE_TIMEOUT);
+ if (timeout < 0)
+ return timeout;
i915_gem_request_retire_upto(target);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 124f4646958d..dbb7176534af 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -530,8 +530,7 @@ static inline int intel_engine_idle(struct intel_engine_cs *engine,
unsigned int flags)
{
/* Wait upon the last request to be completed */
- return i915_gem_active_wait_unlocked(&engine->last_request,
- flags, NULL, NULL);
+ return i915_gem_active_wait_unlocked(&engine->last_request, flags);
}
int intel_init_render_ring_buffer(struct intel_engine_cs *engine);
--
2.9.3
More information about the Intel-gfx
mailing list