[Intel-gfx] [RFC 22/21] drm/i915: Cache request completion status
John Harrison
John.C.Harrison at Intel.com
Tue Oct 28 16:36:29 CET 2014
On 19/10/2014 15:14, Daniel Vetter wrote:
> On Tue, Oct 07, 2014 at 05:47:29PM +0100, John.C.Harrison at Intel.com wrote:
>> From: John Harrison <John.C.Harrison at Intel.com>
>>
>> For: VIZ-4377
>> Signed-off-by: John.C.Harrison at Intel.com
> Why? If it's just for performance I think we should do this as part of the
> switch to struct fence, which already has this.
For performance and also as part of getting rid of all the
i915_seqno_passed() calls.
>
>> ---
>> drivers/gpu/drm/i915/i915_drv.h | 34 ++++++++++++++++---------------
>> drivers/gpu/drm/i915/i915_gem.c | 21 +++++++++++++++++++
>> drivers/gpu/drm/i915/intel_lrc.c | 1 +
>> drivers/gpu/drm/i915/intel_ringbuffer.c | 2 ++
>> drivers/gpu/drm/i915/intel_ringbuffer.h | 3 +++
>> 5 files changed, 45 insertions(+), 16 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
>> index cdbbdeb..4ab3b23 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.h
>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>> @@ -1913,6 +1913,9 @@ void i915_gem_track_fb(struct drm_i915_gem_object *old,
>> struct drm_i915_gem_request {
>> struct kref ref;
>>
>> + /** Is this request known to be complete? */
>> + bool complete;
>> +
>> /** On Which ring this request was generated */
>> struct intel_engine_cs *ring;
>>
>> @@ -1943,6 +1946,8 @@ struct drm_i915_gem_request {
>> };
>>
>> void i915_gem_request_free(struct kref *req_ref);
>> +void i915_gem_complete_requests_ring(struct intel_engine_cs *ring,
>> + bool lazy_coherency);
>>
>> static inline uint32_t
>> i915_gem_request_get_seqno(struct drm_i915_gem_request *req)
>> @@ -1968,7 +1973,19 @@ i915_gem_request_unreference(struct drm_i915_gem_request *req)
>> kref_put(&req->ref, i915_gem_request_free);
>> }
>>
>> -/* ??? i915_gem_request_completed should be here ??? */
>> +static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req,
>> + bool lazy_coherency)
>> +{
>> + if (req->complete)
>> + return true;
>> +
>> + if (req->ring == NULL)
>> + return false;
>> +
>> + i915_gem_complete_requests_ring(req->ring, lazy_coherency);
>> +
>> + return req->complete;
>> +}
> Also, this is looking way too big now I think ;-) If you have a full
> non-inline function call in your inline it's always a net loss.
> -Daniel
That depends how you define gain/loss. In terms of performance, it can
still be a gain because the function call is not always taken. Whereas
the alternative is at least one function calls and possibly two. Either
way, as noted already, the final intention is for this to become simply
'return req->complete' and not have any function calls at all.
>
>>
>> struct drm_i915_file_private {
>> struct drm_i915_private *dev_priv;
>> @@ -3019,19 +3036,4 @@ wait_remaining_ms_from_jiffies(unsigned long timestamp_jiffies, int to_wait_ms)
>> }
>> }
>>
>> -static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req,
>> - bool lazy_coherency)
>> -{
>> - u32 seqno;
>> -
>> - BUG_ON(req == NULL);
>> -
>> - if (req->ring == NULL)
>> - return false;
>> -
>> - seqno = req->ring->get_seqno(req->ring, lazy_coherency);
>> -
>> - return i915_seqno_passed(seqno, req->seqno);
>> -}
>> -
>> #endif
>> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
>> index 0f14333..0a9b29e 100644
>> --- a/drivers/gpu/drm/i915/i915_gem.c
>> +++ b/drivers/gpu/drm/i915/i915_gem.c
>> @@ -2641,6 +2641,27 @@ void i915_gem_reset(struct drm_device *dev)
>> i915_gem_restore_fences(dev);
>> }
>>
>> +void i915_gem_complete_requests_ring(struct intel_engine_cs *ring,
>> + bool lazy_coherency)
>> +{
>> + struct drm_i915_gem_request *req;
>> + u32 seqno;
>> +
>> + seqno = ring->get_seqno(ring, lazy_coherency);
>> + if (seqno == ring->last_read_seqno)
>> + return;
>> +
>> + list_for_each_entry(req, &ring->request_list, list) {
>> + if (req->complete)
>> + continue;
>> +
>> + if (i915_seqno_passed(seqno, req->seqno))
>> + req->complete = true;
>> + }
>> +
>> + ring->last_read_seqno = seqno;
>> +}
>> +
>> /**
>> * This function clears the request list as sequence numbers are passed.
>> */
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
>> index 744684a..57acd2a 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -808,6 +808,7 @@ static int logical_ring_alloc_seqno(struct intel_engine_cs *ring,
>>
>> kref_init(&request->ref);
>> request->ring = NULL;
>> + request->complete = false;
>>
>> ret = i915_gem_get_seqno(ring->dev, &request->seqno);
>> if (ret) {
>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
>> index 0a3c24a..392dc25 100644
>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
>> @@ -2023,6 +2023,7 @@ intel_ring_alloc_seqno(struct intel_engine_cs *ring)
>>
>> kref_init(&request->ref);
>> request->ring = NULL;
>> + request->complete = false;
>>
>> ret = i915_gem_get_seqno(ring->dev, &request->seqno);
>> if (ret) {
>> @@ -2115,6 +2116,7 @@ void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno)
>> I915_WRITE(RING_SYNC_2(ring->mmio_base), 0);
>> }
>>
>> + ring->last_read_seqno = 0;
>> ring->set_seqno(ring, seqno);
>> ring->hangcheck.seqno = seqno;
>> }
>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> index 64a4346..40394d3 100644
>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> @@ -269,6 +269,9 @@ struct intel_engine_cs {
>> bool gpu_caches_dirty;
>> bool fbc_dirty;
>>
>> + /* For optimising request completion events */
>> + u32 last_read_seqno;
>> +
>> wait_queue_head_t irq_queue;
>>
>> struct intel_context *default_context;
>> --
>> 1.7.9.5
>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx at lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
More information about the Intel-gfx
mailing list