[Mesa-dev] [PATCH 07/70] i965: Combine the multiple pipelined register detection into one round-trip
Chris Wilson
chris at chris-wilson.co.uk
Fri Aug 7 13:13:11 PDT 2015
Combining the multiple access checks into a few batches and a single
serialising read can reduce detection times from around 100us to 70us on
a fast Haswell system.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Kenneth Graunke <kenneth at whitecape.org>
---
src/mesa/drivers/dri/i965/intel_screen.c | 165 +++++++++++++++++++------------
1 file changed, 101 insertions(+), 64 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c
index 36c7bb2..0b60f13 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -1185,6 +1185,13 @@ intel_detect_timestamp(struct intel_screen *screen)
return 0;
}
+struct detect_pipelined_register {
+ uint32_t reg;
+ uint32_t expected_value;
+ unsigned result;
+ bool reset;
+};
+
/**
* Test if we can use MI_LOAD_REGISTER_MEM from an untrusted batchbuffer.
*
@@ -1192,102 +1199,134 @@ intel_detect_timestamp(struct intel_screen *screen)
* while others don't. Instead of trying to enumerate every case, just
* try and write a register and see if works.
*/
-static bool
-intel_detect_pipelined_register(struct intel_screen *screen,
- int reg, uint32_t expected_value, bool reset)
+static void
+__intel_detect_pipelined_registers(struct intel_screen *screen,
+ struct detect_pipelined_register *r,
+ int count)
{
- drm_intel_bo *results, *bo;
- uint32_t *batch;
- uint32_t offset = 0;
- bool success = false;
+ drm_intel_bo *results;
+ int i;
+
+ if (count == 0)
+ return;
/* Create a zero'ed temporary buffer for reading our results */
results = drm_intel_bo_alloc(screen->bufmgr, "registers", 4096, 0);
if (results == NULL)
- goto err;
-
- bo = drm_intel_bo_alloc(screen->bufmgr, "batchbuffer", 4096, 0);
- if (bo == NULL)
- goto err_results;
+ return;
- if (drm_intel_bo_map(bo, 1))
- goto err_batch;
+ /* Emit each access in a separate batch buffer so that if the kernel
+ * rejects an individual access attempt, we don't incorrectly assume
+ * all the register accesses are invalid.
+ */
+ for (i = 0; i < count; i++) {
+ drm_intel_bo *bo;
+ uint32_t *batch;
- batch = bo->virtual;
+ bo = drm_intel_bo_alloc(screen->bufmgr, "batchbuffer", 4096, 0);
+ if (bo == NULL)
+ continue;
- /* Write the register. */
- *batch++ = MI_LOAD_REGISTER_IMM | (3 - 2);
- *batch++ = reg;
- *batch++ = expected_value;
+ if (drm_intel_bo_map(bo, 1))
+ goto err_batch;
- /* Save the register's value back to the buffer. */
- *batch++ = MI_STORE_REGISTER_MEM | (3 - 2);
- *batch++ = reg;
- drm_intel_bo_emit_reloc(bo, (char *)batch -(char *)bo->virtual,
- results, offset*sizeof(uint32_t),
- I915_GEM_DOMAIN_INSTRUCTION,
- I915_GEM_DOMAIN_INSTRUCTION);
- *batch++ = results->offset + offset*sizeof(uint32_t);
+ batch = bo->virtual;
- /* And afterwards clear the register */
- if (reset) {
+ /* Write the register. */
*batch++ = MI_LOAD_REGISTER_IMM | (3 - 2);
- *batch++ = reg;
- *batch++ = 0;
- }
+ *batch++ = r[i].reg;
+ *batch++ = r[i].expected_value;
+
+ /* Save the register's value back to the buffer. */
+ *batch++ = MI_STORE_REGISTER_MEM | (3 - 2);
+ *batch++ = r[i].reg;
+ drm_intel_bo_emit_reloc(bo, (char *)batch -(char *)bo->virtual,
+ results, i*sizeof(uint32_t),
+ I915_GEM_DOMAIN_INSTRUCTION,
+ I915_GEM_DOMAIN_INSTRUCTION);
+ *batch++ = results->offset + i*sizeof(uint32_t);
+
+ /* And afterwards clear the register */
+ if (r[i].reset) {
+ *batch++ = MI_LOAD_REGISTER_IMM | (3 - 2);
+ *batch++ = r[i].reg;
+ *batch++ = 0;
+ }
- *batch++ = MI_BATCH_BUFFER_END;
+ *batch++ = MI_BATCH_BUFFER_END;
- drm_intel_bo_mrb_exec(bo, ALIGN((char *)batch - (char *)bo->virtual, 8),
- NULL, 0, 0,
- I915_EXEC_RENDER);
+ drm_intel_bo_mrb_exec(bo, ALIGN((char *)batch - (char *)bo->virtual, 8),
+ NULL, 0, 0,
+ I915_EXEC_RENDER);
- /* Check whether the value got written. */
+err_batch:
+ drm_intel_bo_unreference(bo);
+ }
+
+ /* Check whether the values got written. */
if (drm_intel_bo_map(results, false) == 0) {
- success = *((uint32_t *)results->virtual + offset) == expected_value;
+ uint32_t *data = results->virtual;
+ for (i = 0; i < count; i++)
+ if (data[i] == r[i].expected_value)
+ screen->hw_has_pipelined_register |= r[i].result;
drm_intel_bo_unmap(results);
}
-err_batch:
- drm_intel_bo_unreference(bo);
-err_results:
drm_intel_bo_unreference(results);
-err:
- return success;
}
static bool
-intel_detect_pipelined_so(struct intel_screen *screen)
+intel_detect_pipelined_so(struct intel_screen *screen,
+ struct detect_pipelined_register *detect)
{
- /* Supposedly, Broadwell just works. */
- if (screen->devinfo->gen >= 8)
- return true;
-
if (screen->devinfo->gen <= 6)
- return false;
+ return 0;
+
+ /* Supposedly, Broadwell just works. */
+ if (screen->devinfo->gen >= 8) {
+ screen->hw_has_pipelined_register |= HW_HAS_PIPELINED_SOL_OFFSET;
+ return 0;
+ }
/* We use SO_WRITE_OFFSET0 since you're supposed to write it (unlike the
* statistics registers), and we already reset it to zero before using it.
*/
- return intel_detect_pipelined_register(screen,
- GEN7_SO_WRITE_OFFSET(0),
- 0x1337d0d0,
- false);
+ detect->reg = GEN7_SO_WRITE_OFFSET(0);
+ detect->expected_value = 0x1337d0d0;
+ detect->result = HW_HAS_PIPELINED_SOL_OFFSET;
+ detect->reset = false;
+ return 1;
}
-static bool
-intel_detect_pipelined_oacontrol(struct intel_screen *screen)
+static int
+intel_detect_pipelined_oacontrol(struct intel_screen *screen,
+ struct detect_pipelined_register *detect)
{
if (screen->devinfo->gen < 6 || screen->devinfo->gen >= 8)
- return false;
+ return 0;
/* Set "Select Context ID" to a particular address (which is likely not a
* context), but leave all counting disabled. This should be harmless.
*/
- return intel_detect_pipelined_register(screen,
- OACONTROL,
- 0x31337000,
- true);
+ detect->reg = OACONTROL;
+ detect->expected_value = 0x31337000;
+ detect->result = HW_HAS_PIPELINED_OACONTROL;
+ detect->reset = true;
+ return 1;
+}
+
+static void
+intel_detect_pipelined_register_access(struct intel_screen *screen)
+{
+ struct detect_pipelined_register regs[2], *r =regs;
+
+ /* Combine the multiple register access validation into a single
+ * round trip through the kernel + GPU.
+ */
+ r += intel_detect_pipelined_so(screen, r);
+ r += intel_detect_pipelined_oacontrol(screen, r);
+
+ __intel_detect_pipelined_registers(screen, regs, r-regs);
}
/**
@@ -1549,10 +1588,8 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp)
intelScreen->hw_has_swizzling = intel_detect_swizzling(intelScreen);
intelScreen->hw_has_timestamp = intel_detect_timestamp(intelScreen);
- if (intel_detect_pipelined_so(intelScreen))
- intelScreen->hw_has_pipelined_register |= HW_HAS_PIPELINED_SOL_OFFSET;
- if (intel_detect_pipelined_oacontrol(intelScreen))
- intelScreen->hw_has_pipelined_register |= HW_HAS_PIPELINED_OACONTROL;
+
+ intel_detect_pipelined_register_access(intelScreen);
const char *force_msaa = getenv("INTEL_FORCE_MSAA");
if (force_msaa) {
--
2.5.0
More information about the mesa-dev
mailing list