[Intel-gfx] [PATCH 073/190] drm/i915: Introduce i915_gem_active for request tracking
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Mon Jan 11 09:32:27 PST 2016
On 11/01/16 09:17, Chris Wilson wrote:
> In the next patch, request tracking is made more generic and for that we
> need a new expanded struct and to separate out the logic changes from
> the mechanical churn, we split out the structure renaming into this
> patch.
>
> v2: Writer's block. Add some spiel about why we track requests.
> v3: Now i915_gem_active.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> ---
> drivers/gpu/drm/i915/i915_debugfs.c | 10 +++---
> drivers/gpu/drm/i915/i915_drv.h | 9 +++--
> drivers/gpu/drm/i915/i915_gem.c | 56 +++++++++++++++---------------
> drivers/gpu/drm/i915/i915_gem_execbuffer.c | 4 +--
> drivers/gpu/drm/i915/i915_gem_fence.c | 6 ++--
> drivers/gpu/drm/i915/i915_gem_request.h | 38 ++++++++++++++++++++
> drivers/gpu/drm/i915/i915_gem_tiling.c | 2 +-
> drivers/gpu/drm/i915/i915_gpu_error.c | 6 ++--
> drivers/gpu/drm/i915/intel_display.c | 10 +++---
> 9 files changed, 89 insertions(+), 52 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 8de944ed3369..65cb1d6a5d64 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -146,10 +146,10 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
> obj->base.write_domain);
> for_each_ring(ring, dev_priv, i)
> seq_printf(m, "%x ",
> - i915_gem_request_get_seqno(obj->last_read_req[i]));
> + i915_gem_request_get_seqno(obj->last_read[i].request));
> seq_printf(m, "] %x %x%s%s%s",
> - i915_gem_request_get_seqno(obj->last_write_req),
> - i915_gem_request_get_seqno(obj->last_fenced_req),
> + i915_gem_request_get_seqno(obj->last_write.request),
> + i915_gem_request_get_seqno(obj->last_fence.request),
> i915_cache_level_str(to_i915(obj->base.dev), obj->cache_level),
> obj->dirty ? " dirty" : "",
> obj->madv == I915_MADV_DONTNEED ? " purgeable" : "");
> @@ -184,8 +184,8 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
> *t = '\0';
> seq_printf(m, " (%s mappable)", s);
> }
> - if (obj->last_write_req != NULL)
> - seq_printf(m, " (%s)", obj->last_write_req->engine->name);
> + if (obj->last_write.request != NULL)
> + seq_printf(m, " (%s)", obj->last_write.request->engine->name);
> if (obj->frontbuffer_bits)
> seq_printf(m, " (frontbuffer: 0x%03x)", obj->frontbuffer_bits);
> }
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index cae448e238ca..c577f86d94f8 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -2110,11 +2110,10 @@ struct drm_i915_gem_object {
> * requests on one ring where the write request is older than the
> * read request. This allows for the CPU to read from an active
> * buffer by only waiting for the write to complete.
> - * */
> - struct drm_i915_gem_request *last_read_req[I915_NUM_RINGS];
> - struct drm_i915_gem_request *last_write_req;
> - /** Breadcrumb of last fenced GPU access to the buffer. */
> - struct drm_i915_gem_request *last_fenced_req;
> + */
> + struct i915_gem_active last_read[I915_NUM_RINGS];
> + struct i915_gem_active last_write;
> + struct i915_gem_active last_fence;
>
> /** Current tiling stride for the object, if it's tiled. */
> uint32_t stride;
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index b0230e7151ce..77c253ddf060 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -1117,23 +1117,23 @@ i915_gem_object_wait_rendering(struct drm_i915_gem_object *obj,
> return 0;
>
> if (readonly) {
> - if (obj->last_write_req != NULL) {
> - ret = i915_wait_request(obj->last_write_req);
> + if (obj->last_write.request != NULL) {
> + ret = i915_wait_request(obj->last_write.request);
> if (ret)
> return ret;
>
> - i = obj->last_write_req->engine->id;
> - if (obj->last_read_req[i] == obj->last_write_req)
> + i = obj->last_write.request->engine->id;
> + if (obj->last_read[i].request == obj->last_write.request)
> i915_gem_object_retire__read(obj, i);
> else
> i915_gem_object_retire__write(obj);
> }
> } else {
> for (i = 0; i < I915_NUM_RINGS; i++) {
> - if (obj->last_read_req[i] == NULL)
> + if (obj->last_read[i].request == NULL)
> continue;
>
> - ret = i915_wait_request(obj->last_read_req[i]);
> + ret = i915_wait_request(obj->last_read[i].request);
> if (ret)
> return ret;
>
> @@ -1151,9 +1151,9 @@ i915_gem_object_retire_request(struct drm_i915_gem_object *obj,
> {
> int ring = req->engine->id;
>
> - if (obj->last_read_req[ring] == req)
> + if (obj->last_read[ring].request == req)
> i915_gem_object_retire__read(obj, ring);
> - else if (obj->last_write_req == req)
> + else if (obj->last_write.request == req)
> i915_gem_object_retire__write(obj);
>
> i915_gem_request_retire_upto(req);
> @@ -1181,7 +1181,7 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
> if (readonly) {
> struct drm_i915_gem_request *req;
>
> - req = obj->last_write_req;
> + req = obj->last_write.request;
> if (req == NULL)
> return 0;
>
> @@ -1190,7 +1190,7 @@ i915_gem_object_wait_rendering__nonblocking(struct drm_i915_gem_object *obj,
> for (i = 0; i < I915_NUM_RINGS; i++) {
> struct drm_i915_gem_request *req;
>
> - req = obj->last_read_req[i];
> + req = obj->last_read[i].request;
> if (req == NULL)
> continue;
>
> @@ -2070,7 +2070,7 @@ void i915_vma_move_to_active(struct i915_vma *vma,
> obj->active |= intel_engine_flag(engine);
>
> list_move_tail(&obj->ring_list[engine->id], &engine->active_list);
> - i915_gem_request_assign(&obj->last_read_req[engine->id], req);
> + i915_gem_request_mark_active(req, &obj->last_read[engine->id]);
>
> list_move_tail(&vma->mm_list, &vma->vm->active_list);
> }
> @@ -2078,10 +2078,10 @@ void i915_vma_move_to_active(struct i915_vma *vma,
> static void
> i915_gem_object_retire__write(struct drm_i915_gem_object *obj)
> {
> - GEM_BUG_ON(obj->last_write_req == NULL);
> - GEM_BUG_ON(!(obj->active & intel_engine_flag(obj->last_write_req->engine)));
> + GEM_BUG_ON(obj->last_write.request == NULL);
> + GEM_BUG_ON(!(obj->active & intel_engine_flag(obj->last_write.request->engine)));
>
> - i915_gem_request_assign(&obj->last_write_req, NULL);
> + i915_gem_request_assign(&obj->last_write.request, NULL);
> intel_fb_obj_flush(obj, true, ORIGIN_CS);
> }
>
> @@ -2090,13 +2090,13 @@ i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring)
> {
> struct i915_vma *vma;
>
> - GEM_BUG_ON(obj->last_read_req[ring] == NULL);
> + GEM_BUG_ON(obj->last_read[ring].request == NULL);
> GEM_BUG_ON(!(obj->active & (1 << ring)));
>
> list_del_init(&obj->ring_list[ring]);
> - i915_gem_request_assign(&obj->last_read_req[ring], NULL);
> + i915_gem_request_assign(&obj->last_read[ring].request, NULL);
>
> - if (obj->last_write_req && obj->last_write_req->engine->id == ring)
> + if (obj->last_write.request && obj->last_write.request->engine->id == ring)
> i915_gem_object_retire__write(obj);
>
> obj->active &= ~(1 << ring);
> @@ -2115,7 +2115,7 @@ i915_gem_object_retire__read(struct drm_i915_gem_object *obj, int ring)
> list_move_tail(&vma->mm_list, &vma->vm->inactive_list);
> }
>
> - i915_gem_request_assign(&obj->last_fenced_req, NULL);
> + i915_gem_request_assign(&obj->last_fence.request, NULL);
> drm_gem_object_unreference(&obj->base);
> }
>
> @@ -2336,7 +2336,7 @@ i915_gem_retire_requests_ring(struct intel_engine_cs *ring)
> struct drm_i915_gem_object,
> ring_list[ring->id]);
>
> - if (!list_empty(&obj->last_read_req[ring->id]->list))
> + if (!list_empty(&obj->last_read[ring->id].request->list))
> break;
>
> i915_gem_object_retire__read(obj, ring->id);
> @@ -2445,7 +2445,7 @@ i915_gem_object_flush_active(struct drm_i915_gem_object *obj)
> for (i = 0; i < I915_NUM_RINGS; i++) {
> struct drm_i915_gem_request *req;
>
> - req = obj->last_read_req[i];
> + req = obj->last_read[i].request;
> if (req == NULL)
> continue;
>
> @@ -2525,10 +2525,10 @@ i915_gem_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> drm_gem_object_unreference(&obj->base);
>
> for (i = 0; i < I915_NUM_RINGS; i++) {
> - if (obj->last_read_req[i] == NULL)
> + if (obj->last_read[i].request == NULL)
> continue;
>
> - req[n++] = i915_gem_request_get(obj->last_read_req[i]);
> + req[n++] = i915_gem_request_get(obj->last_read[i].request);
> }
>
> mutex_unlock(&dev->struct_mutex);
> @@ -2619,12 +2619,12 @@ i915_gem_object_sync(struct drm_i915_gem_object *obj,
>
> n = 0;
> if (readonly) {
> - if (obj->last_write_req)
> - req[n++] = obj->last_write_req;
> + if (obj->last_write.request)
> + req[n++] = obj->last_write.request;
> } else {
> for (i = 0; i < I915_NUM_RINGS; i++)
> - if (obj->last_read_req[i])
> - req[n++] = obj->last_read_req[i];
> + if (obj->last_read[i].request)
> + req[n++] = obj->last_read[i].request;
> }
> for (i = 0; i < n; i++) {
> ret = __i915_gem_object_sync(obj, to, req[i]);
> @@ -3695,8 +3695,8 @@ i915_gem_busy_ioctl(struct drm_device *dev, void *data,
>
> BUILD_BUG_ON(I915_NUM_RINGS > 16);
> args->busy = obj->active << 16;
> - if (obj->last_write_req)
> - args->busy |= obj->last_write_req->engine->id;
> + if (obj->last_write.request)
> + args->busy |= obj->last_write.request->engine->id;
>
> unref:
> drm_gem_object_unreference(&obj->base);
> diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> index 6dee27224ddb..56d6b5dbb121 100644
> --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
> @@ -1125,7 +1125,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
>
> i915_vma_move_to_active(vma, req);
> if (obj->base.write_domain) {
> - i915_gem_request_assign(&obj->last_write_req, req);
> + i915_gem_request_mark_active(req, &obj->last_write);
>
> intel_fb_obj_invalidate(obj, ORIGIN_CS);
>
> @@ -1133,7 +1133,7 @@ i915_gem_execbuffer_move_to_active(struct list_head *vmas,
> obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
> }
> if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
> - i915_gem_request_assign(&obj->last_fenced_req, req);
> + i915_gem_request_mark_active(req, &obj->last_fence);
> if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
> struct drm_i915_private *dev_priv = req->i915;
> list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
> diff --git a/drivers/gpu/drm/i915/i915_gem_fence.c b/drivers/gpu/drm/i915/i915_gem_fence.c
> index 598198543dcd..ab29c237ffa9 100644
> --- a/drivers/gpu/drm/i915/i915_gem_fence.c
> +++ b/drivers/gpu/drm/i915/i915_gem_fence.c
> @@ -261,12 +261,12 @@ static inline void i915_gem_object_fence_lost(struct drm_i915_gem_object *obj)
> static int
> i915_gem_object_wait_fence(struct drm_i915_gem_object *obj)
> {
> - if (obj->last_fenced_req) {
> - int ret = i915_wait_request(obj->last_fenced_req);
> + if (obj->last_fence.request) {
> + int ret = i915_wait_request(obj->last_fence.request);
> if (ret)
> return ret;
>
> - i915_gem_request_assign(&obj->last_fenced_req, NULL);
> + i915_gem_request_assign(&obj->last_fence.request, NULL);
> }
>
> return 0;
> diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
> index 2da9e0b5dfc7..0a21986c332b 100644
> --- a/drivers/gpu/drm/i915/i915_gem_request.h
> +++ b/drivers/gpu/drm/i915/i915_gem_request.h
> @@ -208,4 +208,42 @@ static inline bool i915_gem_request_completed(struct drm_i915_gem_request *req)
> req->fence.seqno);
> }
>
> +/* We treat requests as fences. This is not be to confused with our
> + * "fence registers" but pipeline synchronisation objects ala GL_ARB_sync.
> + * We use the fences to synchronize access from the CPU with activity on the
> + * GPU, for example, we should not rewrite an object's PTE whilst the GPU
> + * is reading them. We also track fences at a higher level to provide
> + * implicit synchronisation around GEM objects, e.g. set-domain will wait
> + * for outstanding GPU rendering before marking the object ready for CPU
> + * access, or a pageflip will wait until the GPU is complete before showing
> + * the frame on the scanout.
> + *
> + * In order to use a fence, the object must track the fence it needs to
> + * serialise with. For example, GEM objects want to track both read and
> + * write access so that we can perform concurrent read operations between
> + * the CPU and GPU engines, as well as waiting for all rendering to
> + * complete, or waiting for the last GPU user of a "fence register". The
> + * object then embeds a @i915_gem_active to track the most recent (in
> + * retirment order) request relevant for the desired mode of access.
> + * The @i915_gem_active is updated with i915_gem_request_mark_active() to
> + * track the most recent fence request, typically this is done as part of
> + * i915_vma_move_to_active().
> + *
> + * When the @i915_gem_active completes (is retired), it will
> + * signal its completion to the owner through a callback as well as mark
> + * itself as idle (i915_gem_active.request == NULL). The owner
> + * can then perform any action, such as delayed freeing of an active
> + * resource including itself.
> + */
> +struct i915_gem_active {
> + struct drm_i915_gem_request *request;
> +};
> +
> +static inline void
> +i915_gem_request_mark_active(struct drm_i915_gem_request *request,
> + struct i915_gem_active *active)
> +{
> + i915_gem_request_assign(&active->request, request);
> +}
This function name bothers me since I think the name is misleading and
unintuitive. It is not marking a request as active but associating it
with the second data structure.
Maybe i915_gem_request_move_to_active to keep the mental association
with the well established vma_move_to_active ?
Maybe struct i915_gem_active could also be better called
i915_gem_active_list ?
It is not ideal because of the future little reversal of who is in who's
list, so maybe there is something even better. But I think an intuitive
name is really important for code clarity and maintainability.
Regards,
Tvrtko
More information about the Intel-gfx
mailing list