[Intel-gfx] [PATCH 4/8] drm/i915: Allocate a common scratch page
Mika Kuoppala
mika.kuoppala at linux.intel.com
Mon Dec 3 15:28:22 UTC 2018
Chris Wilson <chris at chris-wilson.co.uk> writes:
> Currently we allocate a scratch page for each engine, but since we only
> ever write into it for post-sync operations, it is not exposed to
> userspace nor do we care for coherency. As we then do not care about its
> contents, we can use one page for all, reducing our allocations and
> avoid complications by not assuming per-engine isolation.
>
> For later use, it simplifies engine initialisation (by removing the
> allocation that required struct_mutex!) and means that we can always rely
> on there being a scratch page.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> ---
> drivers/gpu/drm/i915/i915_drv.h | 7 ++++
> drivers/gpu/drm/i915/i915_gem.c | 50 ++++++++++++++++++++++++-
> drivers/gpu/drm/i915/i915_gpu_error.c | 2 +-
> drivers/gpu/drm/i915/intel_engine_cs.c | 42 ---------------------
> drivers/gpu/drm/i915/intel_lrc.c | 17 +++------
> drivers/gpu/drm/i915/intel_ringbuffer.c | 33 +++++-----------
> drivers/gpu/drm/i915/intel_ringbuffer.h | 5 ---
> 7 files changed, 71 insertions(+), 85 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index d45475287130..0ec65cc48b5a 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -1996,6 +1996,8 @@ struct drm_i915_private {
> struct delayed_work idle_work;
>
> ktime_t last_init_time;
> +
> + struct i915_vma *scratch;
> } gt;
>
> /* perform PHY state sanity checks? */
> @@ -3724,4 +3726,9 @@ static inline int intel_hws_csb_write_index(struct drm_i915_private *i915)
> return I915_HWS_CSB_WRITE_INDEX;
> }
>
> +static inline u32 i915_scratch_offset(const struct drm_i915_private *i915)
> +{
> + return i915_ggtt_offset(i915->gt.scratch);
> +}
> +
> #endif
> diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
> index 834240a9b262..cca4285e3329 100644
> --- a/drivers/gpu/drm/i915/i915_gem.c
> +++ b/drivers/gpu/drm/i915/i915_gem.c
> @@ -5495,6 +5495,44 @@ static int __intel_engines_record_defaults(struct drm_i915_private *i915)
> goto out_ctx;
> }
>
> +static int
> +i915_gem_init_scratch(struct drm_i915_private *i915, unsigned int size)
> +{
> + struct drm_i915_gem_object *obj;
> + struct i915_vma *vma;
> + int ret;
> +
> + obj = i915_gem_object_create_stolen(i915, size);
> + if (!obj)
> + obj = i915_gem_object_create_internal(i915, size);
> + if (IS_ERR(obj)) {
> + DRM_ERROR("Failed to allocate scratch page\n");
> + return PTR_ERR(obj);
> + }
> +
> + vma = i915_vma_instance(obj, &i915->ggtt.vm, NULL);
> + if (IS_ERR(vma)) {
> + ret = PTR_ERR(vma);
> + goto err_unref;
> + }
> +
> + ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
> + if (ret)
> + goto err_unref;
> +
> + i915->gt.scratch = vma;
> + return 0;
> +
> +err_unref:
> + i915_gem_object_put(obj);
> + return ret;
> +}
> +
> +static void i915_gem_fini_scratch(struct drm_i915_private *i915)
> +{
> + i915_vma_unpin_and_release(&i915->gt.scratch, 0);
> +}
> +
> int i915_gem_init(struct drm_i915_private *dev_priv)
> {
> int ret;
> @@ -5541,12 +5579,19 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
> goto err_unlock;
> }
>
> - ret = i915_gem_contexts_init(dev_priv);
> + ret = i915_gem_init_scratch(dev_priv,
> + IS_GEN2(dev_priv) ? SZ_256K : PAGE_SIZE);
> if (ret) {
> GEM_BUG_ON(ret == -EIO);
> goto err_ggtt;
> }
>
> + ret = i915_gem_contexts_init(dev_priv);
> + if (ret) {
> + GEM_BUG_ON(ret == -EIO);
> + goto err_scratch;
> + }
> +
> ret = intel_engines_init(dev_priv);
> if (ret) {
> GEM_BUG_ON(ret == -EIO);
> @@ -5619,6 +5664,8 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
> err_context:
> if (ret != -EIO)
> i915_gem_contexts_fini(dev_priv);
> +err_scratch:
> + i915_gem_fini_scratch(dev_priv);
> err_ggtt:
> err_unlock:
> intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
> @@ -5670,6 +5717,7 @@ void i915_gem_fini(struct drm_i915_private *dev_priv)
> intel_uc_fini(dev_priv);
> i915_gem_cleanup_engines(dev_priv);
> i915_gem_contexts_fini(dev_priv);
> + i915_gem_fini_scratch(dev_priv);
> mutex_unlock(&dev_priv->drm.struct_mutex);
>
> intel_cleanup_gt_powersave(dev_priv);
> diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
> index a6885a59568b..07465123c166 100644
> --- a/drivers/gpu/drm/i915/i915_gpu_error.c
> +++ b/drivers/gpu/drm/i915/i915_gpu_error.c
> @@ -1571,7 +1571,7 @@ static void gem_record_rings(struct i915_gpu_state *error)
> if (HAS_BROKEN_CS_TLB(i915))
> ee->wa_batchbuffer =
> i915_error_object_create(i915,
> - engine->scratch);
> + i915->gt.scratch);
> request_record_user_bo(request, ee);
>
> ee->ctx =
> diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
> index 759c0fd58f8c..2390985384d6 100644
> --- a/drivers/gpu/drm/i915/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/intel_engine_cs.c
> @@ -493,46 +493,6 @@ void intel_engine_setup_common(struct intel_engine_cs *engine)
> intel_engine_init_cmd_parser(engine);
> }
>
> -int intel_engine_create_scratch(struct intel_engine_cs *engine,
> - unsigned int size)
> -{
> - struct drm_i915_gem_object *obj;
> - struct i915_vma *vma;
> - int ret;
> -
> - WARN_ON(engine->scratch);
> -
> - obj = i915_gem_object_create_stolen(engine->i915, size);
> - if (!obj)
> - obj = i915_gem_object_create_internal(engine->i915, size);
> - if (IS_ERR(obj)) {
> - DRM_ERROR("Failed to allocate scratch page\n");
> - return PTR_ERR(obj);
> - }
> -
> - vma = i915_vma_instance(obj, &engine->i915->ggtt.vm, NULL);
> - if (IS_ERR(vma)) {
> - ret = PTR_ERR(vma);
> - goto err_unref;
> - }
> -
> - ret = i915_vma_pin(vma, 0, 0, PIN_GLOBAL | PIN_HIGH);
> - if (ret)
> - goto err_unref;
> -
> - engine->scratch = vma;
> - return 0;
> -
> -err_unref:
> - i915_gem_object_put(obj);
> - return ret;
> -}
> -
> -void intel_engine_cleanup_scratch(struct intel_engine_cs *engine)
> -{
> - i915_vma_unpin_and_release(&engine->scratch, 0);
> -}
> -
> static void cleanup_status_page(struct intel_engine_cs *engine)
> {
> if (HWS_NEEDS_PHYSICAL(engine->i915)) {
> @@ -707,8 +667,6 @@ void intel_engine_cleanup_common(struct intel_engine_cs *engine)
> {
> struct drm_i915_private *i915 = engine->i915;
>
> - intel_engine_cleanup_scratch(engine);
> -
> cleanup_status_page(engine);
>
> intel_engine_fini_breadcrumbs(engine);
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index b5511a054f30..de070dca4033 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -1286,9 +1286,10 @@ static int execlists_request_alloc(struct i915_request *request)
> static u32 *
> gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
> {
> + /* NB no one else is allowed to scribble over scratch + 256! */
> *batch++ = MI_STORE_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
> *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
> - *batch++ = i915_ggtt_offset(engine->scratch) + 256;
> + *batch++ = i915_scratch_offset(engine->i915) + 256;
> *batch++ = 0;
>
> *batch++ = MI_LOAD_REGISTER_IMM(1);
> @@ -1302,7 +1303,7 @@ gen8_emit_flush_coherentl3_wa(struct intel_engine_cs *engine, u32 *batch)
>
> *batch++ = MI_LOAD_REGISTER_MEM_GEN8 | MI_SRM_LRM_GLOBAL_GTT;
> *batch++ = i915_mmio_reg_offset(GEN8_L3SQCREG4);
> - *batch++ = i915_ggtt_offset(engine->scratch) + 256;
> + *batch++ = i915_scratch_offset(engine->i915) + 256;
> *batch++ = 0;
>
> return batch;
> @@ -1339,7 +1340,7 @@ static u32 *gen8_init_indirectctx_bb(struct intel_engine_cs *engine, u32 *batch)
> PIPE_CONTROL_GLOBAL_GTT_IVB |
> PIPE_CONTROL_CS_STALL |
> PIPE_CONTROL_QW_WRITE,
> - i915_ggtt_offset(engine->scratch) +
> + i915_scratch_offset(engine->i915) +
> 2 * CACHELINE_BYTES);
>
> *batch++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
> @@ -1969,7 +1970,7 @@ static int gen8_emit_flush_render(struct i915_request *request,
> {
> struct intel_engine_cs *engine = request->engine;
> u32 scratch_addr =
> - i915_ggtt_offset(engine->scratch) + 2 * CACHELINE_BYTES;
> + i915_scratch_offset(engine->i915) + 2 * CACHELINE_BYTES;
> bool vf_flush_wa = false, dc_flush_wa = false;
> u32 *cs, flags = 0;
> int len;
> @@ -2306,10 +2307,6 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
> if (ret)
> return ret;
>
> - ret = intel_engine_create_scratch(engine, PAGE_SIZE);
> - if (ret)
> - goto err_cleanup_common;
> -
> ret = intel_init_workaround_bb(engine);
> if (ret) {
> /*
> @@ -2322,10 +2319,6 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
> }
>
> return 0;
> -
> -err_cleanup_common:
> - intel_engine_cleanup_common(engine);
> - return ret;
> }
>
> int logical_xcs_ring_init(struct intel_engine_cs *engine)
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
> index 81b10d85b738..a3d3126a3938 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.c
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
> @@ -150,8 +150,7 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode)
> */
> if (mode & EMIT_INVALIDATE) {
> *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
> - *cs++ = i915_ggtt_offset(rq->engine->scratch) |
> - PIPE_CONTROL_GLOBAL_GTT;
> + *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT;
> *cs++ = 0;
> *cs++ = 0;
>
> @@ -159,8 +158,7 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode)
> *cs++ = MI_FLUSH;
>
> *cs++ = GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE;
> - *cs++ = i915_ggtt_offset(rq->engine->scratch) |
> - PIPE_CONTROL_GLOBAL_GTT;
> + *cs++ = i915_scratch_offset(rq->i915) | PIPE_CONTROL_GLOBAL_GTT;
> *cs++ = 0;
> *cs++ = 0;
> }
> @@ -212,8 +210,7 @@ gen4_render_ring_flush(struct i915_request *rq, u32 mode)
> static int
> intel_emit_post_sync_nonzero_flush(struct i915_request *rq)
> {
> - u32 scratch_addr =
> - i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES;
> + u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES;
> u32 *cs;
>
> cs = intel_ring_begin(rq, 6);
> @@ -246,8 +243,7 @@ intel_emit_post_sync_nonzero_flush(struct i915_request *rq)
> static int
> gen6_render_ring_flush(struct i915_request *rq, u32 mode)
> {
> - u32 scratch_addr =
> - i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES;
> + u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES;
> u32 *cs, flags = 0;
> int ret;
>
> @@ -316,8 +312,7 @@ gen7_render_ring_cs_stall_wa(struct i915_request *rq)
> static int
> gen7_render_ring_flush(struct i915_request *rq, u32 mode)
> {
> - u32 scratch_addr =
> - i915_ggtt_offset(rq->engine->scratch) + 2 * CACHELINE_BYTES;
> + u32 scratch_addr = i915_scratch_offset(rq->i915) + 2 * CACHELINE_BYTES;
> u32 *cs, flags = 0;
>
> /*
> @@ -1002,7 +997,7 @@ i830_emit_bb_start(struct i915_request *rq,
> u64 offset, u32 len,
> unsigned int dispatch_flags)
> {
> - u32 *cs, cs_offset = i915_ggtt_offset(rq->engine->scratch);
> + u32 *cs, cs_offset = i915_scratch_offset(rq->i915);
>
> cs = intel_ring_begin(rq, 6);
> if (IS_ERR(cs))
> @@ -1459,7 +1454,6 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
> {
> struct i915_timeline *timeline;
> struct intel_ring *ring;
> - unsigned int size;
> int err;
>
> intel_engine_setup_common(engine);
> @@ -1484,21 +1478,12 @@ static int intel_init_ring_buffer(struct intel_engine_cs *engine)
> GEM_BUG_ON(engine->buffer);
> engine->buffer = ring;
>
> - size = PAGE_SIZE;
> - if (HAS_BROKEN_CS_TLB(engine->i915))
> - size = I830_WA_SIZE;
Hmm, this disappeared. I don't know this wa details
but apparently we do need 2 pages for this kind of
machines.
-Mika
> - err = intel_engine_create_scratch(engine, size);
> - if (err)
> - goto err_unpin;
> -
> err = intel_engine_init_common(engine);
> if (err)
> - goto err_scratch;
> + goto err_unpin;
>
> return 0;
>
> -err_scratch:
> - intel_engine_cleanup_scratch(engine);
> err_unpin:
> intel_ring_unpin(ring);
> err_ring:
> @@ -1572,7 +1557,7 @@ static int flush_pd_dir(struct i915_request *rq)
> /* Stall until the page table load is complete */
> *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
> *cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine));
> - *cs++ = i915_ggtt_offset(engine->scratch);
> + *cs++ = i915_scratch_offset(rq->i915);
> *cs++ = MI_NOOP;
>
> intel_ring_advance(rq, cs);
> @@ -1681,7 +1666,7 @@ static inline int mi_set_context(struct i915_request *rq, u32 flags)
> /* Insert a delay before the next switch! */
> *cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
> *cs++ = i915_mmio_reg_offset(last_reg);
> - *cs++ = i915_ggtt_offset(engine->scratch);
> + *cs++ = i915_scratch_offset(rq->i915);
> *cs++ = MI_NOOP;
> }
> *cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index 8a2270b209b0..970fb5c05c36 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -451,7 +451,6 @@ struct intel_engine_cs {
>
> struct intel_hw_status_page status_page;
> struct i915_ctx_workarounds wa_ctx;
> - struct i915_vma *scratch;
>
> u32 irq_keep_mask; /* always keep these interrupts */
> u32 irq_enable_mask; /* bitmask to enable ring interrupt */
> @@ -908,10 +907,6 @@ void intel_engine_setup_common(struct intel_engine_cs *engine);
> int intel_engine_init_common(struct intel_engine_cs *engine);
> void intel_engine_cleanup_common(struct intel_engine_cs *engine);
>
> -int intel_engine_create_scratch(struct intel_engine_cs *engine,
> - unsigned int size);
> -void intel_engine_cleanup_scratch(struct intel_engine_cs *engine);
> -
> int intel_init_render_ring_buffer(struct intel_engine_cs *engine);
> int intel_init_bsd_ring_buffer(struct intel_engine_cs *engine);
> int intel_init_blt_ring_buffer(struct intel_engine_cs *engine);
> --
> 2.20.0.rc1
More information about the Intel-gfx
mailing list