[Intel-gfx] [PATCH 12/23] drm/i915: Always allocate an object/vma for the HWSP
Chris Wilson
chris at chris-wilson.co.uk
Thu Jan 17 14:34:53 UTC 2019
Currently we only allocate an object and vma if we are using a GGTT
virtual HWSP, and a plain struct page for a physical HWSP. For
convenience later on with global timelines, it will be useful to always
have the status page being tracked by a struct i915_vma. Make it so.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Reviewed-by: Matthew Auld <matthew.auld at intel.com>
---
drivers/gpu/drm/i915/intel_engine_cs.c | 109 ++++++++++---------
drivers/gpu/drm/i915/intel_guc_submission.c | 5 +
drivers/gpu/drm/i915/intel_lrc.c | 11 +-
drivers/gpu/drm/i915/intel_ringbuffer.c | 20 +++-
drivers/gpu/drm/i915/intel_ringbuffer.h | 23 +---
drivers/gpu/drm/i915/selftests/mock_engine.c | 2 +-
6 files changed, 90 insertions(+), 80 deletions(-)
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index fc52737751e7..4b4b7358c482 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -506,27 +506,61 @@ void intel_engine_setup_common(struct intel_engine_cs *engine)
static void cleanup_status_page(struct intel_engine_cs *engine)
{
+ struct i915_vma *vma;
+
/* Prevent writes into HWSP after returning the page to the system */
intel_engine_set_hwsp_writemask(engine, ~0u);
- if (HWS_NEEDS_PHYSICAL(engine->i915)) {
- void *addr = fetch_and_zero(&engine->status_page.page_addr);
+ vma = fetch_and_zero(&engine->status_page.vma);
+ if (!vma)
+ return;
- __free_page(virt_to_page(addr));
- }
+ if (!HWS_NEEDS_PHYSICAL(engine->i915))
+ i915_vma_unpin(vma);
+
+ i915_gem_object_unpin_map(vma->obj);
+ __i915_gem_object_release_unless_active(vma->obj);
+}
+
+static int pin_ggtt_status_page(struct intel_engine_cs *engine,
+ struct i915_vma *vma)
+{
+ unsigned int flags;
+
+ flags = PIN_GLOBAL;
+ if (!HAS_LLC(engine->i915))
+ /*
+ * On g33, we cannot place HWS above 256MiB, so
+ * restrict its pinning to the low mappable arena.
+ * Though this restriction is not documented for
+ * gen4, gen5, or byt, they also behave similarly
+ * and hang if the HWS is placed at the top of the
+ * GTT. To generalise, it appears that all !llc
+ * platforms have issues with us placing the HWS
+ * above the mappable region (even though we never
+ * actually map it).
+ */
+ flags |= PIN_MAPPABLE;
+ else
+ flags |= PIN_HIGH;
- i915_vma_unpin_and_release(&engine->status_page.vma,
- I915_VMA_RELEASE_MAP);
+ return i915_vma_pin(vma, 0, 0, flags);
}
static int init_status_page(struct intel_engine_cs *engine)
{
struct drm_i915_gem_object *obj;
struct i915_vma *vma;
- unsigned int flags;
void *vaddr;
int ret;
+ /*
+ * Though the HWS register does support 36bit addresses, historically
+ * we have had hangs and corruption reported due to wild writes if
+ * the HWS is placed above 4G. We only allow objects to be allocated
+ * in GFP_DMA32 for i965, and no earlier physical address users had
+ * access to more than 4G.
+ */
obj = i915_gem_object_create_internal(engine->i915, PAGE_SIZE);
if (IS_ERR(obj)) {
DRM_ERROR("Failed to allocate status page\n");
@@ -543,61 +577,30 @@ static int init_status_page(struct intel_engine_cs *engine)
goto err;
}
- flags = PIN_GLOBAL;
- if (!HAS_LLC(engine->i915))
- /* On g33, we cannot place HWS above 256MiB, so
- * restrict its pinning to the low mappable arena.
- * Though this restriction is not documented for
- * gen4, gen5, or byt, they also behave similarly
- * and hang if the HWS is placed at the top of the
- * GTT. To generalise, it appears that all !llc
- * platforms have issues with us placing the HWS
- * above the mappable region (even though we never
- * actually map it).
- */
- flags |= PIN_MAPPABLE;
- else
- flags |= PIN_HIGH;
- ret = i915_vma_pin(vma, 0, 0, flags);
- if (ret)
- goto err;
-
vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
if (IS_ERR(vaddr)) {
ret = PTR_ERR(vaddr);
- goto err_unpin;
+ goto err;
}
+ engine->status_page.addr = memset(vaddr, 0, PAGE_SIZE);
engine->status_page.vma = vma;
- engine->status_page.ggtt_offset = i915_ggtt_offset(vma);
- engine->status_page.page_addr = memset(vaddr, 0, PAGE_SIZE);
+
+ if (!HWS_NEEDS_PHYSICAL(engine->i915)) {
+ ret = pin_ggtt_status_page(engine, vma);
+ if (ret)
+ goto err_unpin;
+ }
+
return 0;
err_unpin:
- i915_vma_unpin(vma);
+ i915_gem_object_unpin_map(obj);
err:
i915_gem_object_put(obj);
return ret;
}
-static int init_phys_status_page(struct intel_engine_cs *engine)
-{
- struct page *page;
-
- /*
- * Though the HWS register does support 36bit addresses, historically
- * we have had hangs and corruption reported due to wild writes if
- * the HWS is placed above 4G.
- */
- page = alloc_page(GFP_KERNEL | __GFP_DMA32 | __GFP_ZERO);
- if (!page)
- return -ENOMEM;
-
- engine->status_page.page_addr = page_address(page);
-
- return 0;
-}
-
static void __intel_context_unpin(struct i915_gem_context *ctx,
struct intel_engine_cs *engine)
{
@@ -650,10 +653,7 @@ int intel_engine_init_common(struct intel_engine_cs *engine)
if (ret)
goto err_unpin_preempt;
- if (HWS_NEEDS_PHYSICAL(i915))
- ret = init_phys_status_page(engine);
- else
- ret = init_status_page(engine);
+ ret = init_status_page(engine);
if (ret)
goto err_breadcrumbs;
@@ -1318,7 +1318,8 @@ static void intel_engine_print_registers(const struct intel_engine_cs *engine,
}
if (HAS_EXECLISTS(dev_priv)) {
- const u32 *hws = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
+ const u32 *hws =
+ &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
unsigned int idx;
u8 read, write;
@@ -1501,7 +1502,7 @@ void intel_engine_dump(struct intel_engine_cs *engine,
spin_unlock_irqrestore(&b->rb_lock, flags);
drm_printf(m, "HWSP:\n");
- hexdump(m, engine->status_page.page_addr, PAGE_SIZE);
+ hexdump(m, engine->status_page.addr, PAGE_SIZE);
drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine)));
}
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 7217c7e3ee8d..b044162a41d3 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -81,6 +81,11 @@
*
*/
+static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine)
+{
+ return i915_ggtt_offset(engine->status_page.vma) + I915_GEM_HWS_PREEMPT_ADDR;
+}
+
static inline struct i915_priolist *to_priolist(struct rb_node *rb)
{
return rb_entry(rb, struct i915_priolist, node);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 7b56f2f17203..edb26f69d864 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -172,6 +172,11 @@ static void execlists_init_reg_state(u32 *reg_state,
struct intel_engine_cs *engine,
struct intel_ring *ring);
+static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
+{
+ return i915_ggtt_offset(engine->status_page.vma) + I915_GEM_HWS_INDEX_ADDR;
+}
+
static inline struct i915_priolist *to_priolist(struct rb_node *rb)
{
return rb_entry(rb, struct i915_priolist, node);
@@ -1679,7 +1684,7 @@ static void enable_execlists(struct intel_engine_cs *engine)
_MASKED_BIT_DISABLE(STOP_RING));
I915_WRITE(RING_HWS_PGA(engine->mmio_base),
- engine->status_page.ggtt_offset);
+ i915_ggtt_offset(engine->status_page.vma));
POSTING_READ(RING_HWS_PGA(engine->mmio_base));
}
@@ -2226,10 +2231,10 @@ static int logical_ring_init(struct intel_engine_cs *engine)
}
execlists->csb_status =
- &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
+ &engine->status_page.addr[I915_HWS_CSB_BUF0_INDEX];
execlists->csb_write =
- &engine->status_page.page_addr[intel_hws_csb_write_index(i915)];
+ &engine->status_page.addr[intel_hws_csb_write_index(i915)];
reset_csb_pointers(execlists);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 662907e1a286..d72012b42f20 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -43,6 +43,11 @@
*/
#define LEGACY_REQUEST_SIZE 200
+static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
+{
+ return i915_ggtt_offset(engine->status_page.vma) + I915_GEM_HWS_INDEX_ADDR;
+}
+
static unsigned int __intel_ring_space(unsigned int head,
unsigned int tail,
unsigned int size)
@@ -499,12 +504,17 @@ static void set_hws_pga(struct intel_engine_cs *engine, phys_addr_t phys)
I915_WRITE(HWS_PGA, addr);
}
-static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
+static struct page *status_page(struct intel_engine_cs *engine)
{
- struct page *page = virt_to_page(engine->status_page.page_addr);
- phys_addr_t phys = PFN_PHYS(page_to_pfn(page));
+ struct drm_i915_gem_object *obj = engine->status_page.vma->obj;
- set_hws_pga(engine, phys);
+ GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
+ return sg_page(obj->mm.pages->sgl);
+}
+
+static void ring_setup_phys_status_page(struct intel_engine_cs *engine)
+{
+ set_hws_pga(engine, PFN_PHYS(page_to_pfn(status_page(engine))));
set_hwstam(engine, ~0u);
}
@@ -571,7 +581,7 @@ static void flush_cs_tlb(struct intel_engine_cs *engine)
static void ring_setup_status_page(struct intel_engine_cs *engine)
{
- set_hwsp(engine, engine->status_page.ggtt_offset);
+ set_hwsp(engine, i915_ggtt_offset(engine->status_page.vma));
set_hwstam(engine, ~0u);
flush_cs_tlb(engine);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 17e05d11ee34..9972c9016445 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -32,8 +32,7 @@ struct i915_sched_attr;
struct intel_hw_status_page {
struct i915_vma *vma;
- u32 *page_addr;
- u32 ggtt_offset;
+ u32 *addr;
};
#define I915_READ_TAIL(engine) I915_READ(RING_TAIL((engine)->mmio_base))
@@ -675,7 +674,7 @@ static inline u32
intel_read_status_page(const struct intel_engine_cs *engine, int reg)
{
/* Ensure that the compiler doesn't optimize away the load. */
- return READ_ONCE(engine->status_page.page_addr[reg]);
+ return READ_ONCE(engine->status_page.addr[reg]);
}
static inline void
@@ -688,12 +687,12 @@ intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
*/
if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
mb();
- clflush(&engine->status_page.page_addr[reg]);
- engine->status_page.page_addr[reg] = value;
- clflush(&engine->status_page.page_addr[reg]);
+ clflush(&engine->status_page.addr[reg]);
+ engine->status_page.addr[reg] = value;
+ clflush(&engine->status_page.addr[reg]);
mb();
} else {
- WRITE_ONCE(engine->status_page.page_addr[reg], value);
+ WRITE_ONCE(engine->status_page.addr[reg], value);
}
}
@@ -881,16 +880,6 @@ static inline bool intel_engine_has_started(struct intel_engine_cs *engine,
void intel_engine_get_instdone(struct intel_engine_cs *engine,
struct intel_instdone *instdone);
-static inline u32 intel_hws_seqno_address(struct intel_engine_cs *engine)
-{
- return engine->status_page.ggtt_offset + I915_GEM_HWS_INDEX_ADDR;
-}
-
-static inline u32 intel_hws_preempt_done_address(struct intel_engine_cs *engine)
-{
- return engine->status_page.ggtt_offset + I915_GEM_HWS_PREEMPT_ADDR;
-}
-
/* intel_breadcrumbs.c -- user interrupt bottom-half for waiters */
int intel_engine_init_breadcrumbs(struct intel_engine_cs *engine);
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 8b8d51af7d6a..968a7e139a67 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -201,7 +201,7 @@ struct intel_engine_cs *mock_engine(struct drm_i915_private *i915,
engine->base.i915 = i915;
snprintf(engine->base.name, sizeof(engine->base.name), "%s", name);
engine->base.id = id;
- engine->base.status_page.page_addr = (void *)(engine + 1);
+ engine->base.status_page.addr = (void *)(engine + 1);
engine->base.context_pin = mock_context_pin;
engine->base.request_alloc = mock_request_alloc;
--
2.20.1
More information about the Intel-gfx
mailing list