[Mesa-dev] [PATCH 5/5] i965: Combine the multiple pipelined register detection into one round-trip
Chris Wilson
chris at chris-wilson.co.uk
Wed Jul 8 06:48:42 PDT 2015
Combining the multiple access checks into a few batches and a single
serialising read can reduce detection times from around 100us to 70us on
a fast Haswell system.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Kenneth Graunke <kenneth at whitecape.org>
---
src/mesa/drivers/dri/i965/intel_screen.c | 177 +++++++++++++++++++------------
1 file changed, 109 insertions(+), 68 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/intel_screen.c b/src/mesa/drivers/dri/i965/intel_screen.c
index cb49e9a..595d2dc 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -1156,6 +1156,12 @@ intel_detect_timestamp(struct intel_screen *screen)
return loop > 0;
}
+struct detect_pipelined_register {
+ uint32_t reg;
+ uint32_t expected_value;
+ bool *result;
+};
+
/**
* Test if we can use MI_LOAD_REGISTER_MEM from an untrusted batchbuffer.
*
@@ -1163,107 +1169,143 @@ intel_detect_timestamp(struct intel_screen *screen)
* while others don't. Instead of trying to enumerate every case, just
* try and write a register and see if works.
*/
-static bool
-intel_detect_pipelined_register(struct intel_screen *screen,
- int reg, uint32_t expected_value)
+static void
+__intel_detect_pipelined_registers(struct intel_screen *screen,
+ struct detect_pipelined_register *r,
+ int count)
{
const int offset = 100;
-
- drm_intel_bo *bo;
- uint32_t buf[100];
- uint32_t *batch = buf;
+ int i;
uint32_t *data;
+
+ if (count == 0)
+ return;
+
+ if (drm_intel_bo_map(screen->workaround_bo, true))
+ return;
+
/* Set a value in a BO to a known quantity. The workaround BO already
* exists and doesn't contain anything important, so we may as well use it.
*/
- if (drm_intel_bo_map(screen->workaround_bo, true))
- return false;
-
data = screen->workaround_bo->virtual;
- data[offset] = 0xffffffff;
+ for (i = 0; i < count; i++)
+ data[offset+i] = 0xffffffff;
drm_intel_bo_unmap(screen->workaround_bo);
- bo = drm_intel_bo_alloc(screen->bufmgr, "batchbuffer", 4096, 0);
- if (bo == NULL)
- return false;
+ /* Emit each access in a separate batch buffer so that if the kernel
+ * rejects an individual access attempt, we don't incorrectly assume
+ * all the register accesses are invalid.
+ */
+ for (i = 0; i < count; i++) {
+ drm_intel_bo *bo;
+ uint32_t buf[100];
+ uint32_t *batch = buf;
+
+ bo = drm_intel_bo_alloc(screen->bufmgr, "batchbuffer", 4096, 0);
+ if (bo == NULL)
+ continue;
+
+ /* Write the register. */
+ *batch++ = MI_LOAD_REGISTER_IMM | (3 - 2);
+ *batch++ = r[i].reg;
+ *batch++ = r[i].expected_value;
+
+ /* Force a command barrier between the write then read */
+ *batch++ = _3DSTATE_PIPE_CONTROL | (5 - 2);
+ *batch++ = PIPE_CONTROL_NO_WRITE | PIPE_CONTROL_CS_STALL;
+ *batch++ = 0;
+ *batch++ = 0;
+ *batch++ = 0;
- /* Write the register. */
- *batch++ = MI_LOAD_REGISTER_IMM | (3 - 2);
- *batch++ = reg;
- *batch++ = expected_value;
-
- /* Force a command barrier between the write then read */
- *batch++ = _3DSTATE_PIPE_CONTROL | (5 - 2);
- *batch++ = PIPE_CONTROL_NO_WRITE | PIPE_CONTROL_CS_STALL;
- *batch++ = 0;
- *batch++ = 0;
- *batch++ = 0;
-
- /* Save the register's value back to the buffer. */
- *batch++ = MI_STORE_REGISTER_MEM | (3 - 2);
- *batch++ = reg;
- drm_intel_bo_emit_reloc(bo, (char *)batch -(char *)buf,
- screen->workaround_bo, offset*sizeof(uint32_t),
- I915_GEM_DOMAIN_INSTRUCTION,
- I915_GEM_DOMAIN_INSTRUCTION);
- *batch++ = screen->workaround_bo->offset + offset*sizeof(uint32_t);
-
- /* And afterwards clear the register */
- *batch++ = MI_LOAD_REGISTER_IMM | (3 - 2);
- *batch++ = reg;
- *batch++ = 0;
-
- *batch++ = MI_BATCH_BUFFER_END;
- if ((batch - buf) & 1)
+ /* Save the register's value back to the buffer. */
+ *batch++ = MI_STORE_REGISTER_MEM | (3 - 2);
+ *batch++ = r[i].reg;
+ drm_intel_bo_emit_reloc(bo, (char *)batch -(char *)buf,
+ screen->workaround_bo,
+ (offset+i)*sizeof(uint32_t),
+ I915_GEM_DOMAIN_INSTRUCTION,
+ I915_GEM_DOMAIN_INSTRUCTION);
+ *batch++ = screen->workaround_bo->offset + (offset+i)*sizeof(uint32_t);
+
+ /* And afterwards clear the register */
+ *batch++ = MI_LOAD_REGISTER_IMM | (3 - 2);
+ *batch++ = r[i].reg;
*batch++ = 0;
- if (drm_intel_bo_subdata(bo, 0, (char *)batch - (char *)buf, buf) == 0)
- drm_intel_bo_mrb_exec(bo, (char *)batch - (char *)buf,
- NULL, 0, 0,
- I915_EXEC_RENDER);
+ *batch++ = MI_BATCH_BUFFER_END;
+ if ((batch - buf) & 1)
+ *batch++ = 0;
- drm_intel_bo_unreference(bo);
+ if (drm_intel_bo_subdata(bo, 0, (char *)batch - (char *)buf, buf) == 0)
+ drm_intel_bo_mrb_exec(bo, (char *)batch - (char *)buf,
+ NULL, 0, 0,
+ I915_EXEC_RENDER);
- /* Check whether the value got written. */
- bool success = false;
+ drm_intel_bo_unreference(bo);
+ }
+
+ /* Check whether the values got written. */
if (drm_intel_bo_map(screen->workaround_bo, false) == 0) {
data = screen->workaround_bo->virtual;
- success = data[offset] == expected_value;
+ for (i = 0; i < count; i++)
+ *r[i].result = data[offset+i] == r[i].expected_value;
drm_intel_bo_unmap(screen->workaround_bo);
}
-
- return success;
}
static bool
-intel_detect_pipelined_so(struct intel_screen *screen)
+intel_detect_pipelined_so(struct intel_screen *screen,
+ struct detect_pipelined_register *detect)
{
- /* Supposedly, Broadwell just works. */
- if (screen->devinfo->gen >= 8)
- return true;
-
+ screen->hw_has_pipelined_so = false;
if (screen->devinfo->gen <= 6)
- return false;
+ return 0;
+
+ /* Supposedly, Broadwell just works. */
+ if (screen->devinfo->gen >= 8) {
+ screen->hw_has_pipelined_so = true;
+ return 0;
+ }
/* We use SO_WRITE_OFFSET0 since you're supposed to write it (unlike the
* statistics registers), and we already reset it to zero before using it.
*/
- return intel_detect_pipelined_register(screen,
- GEN7_SO_WRITE_OFFSET(0),
- 0x1337d0d0);
+ detect->reg = GEN7_SO_WRITE_OFFSET(0);
+ detect->expected_value = 0x1337d0d0;
+ detect->result = &screen->hw_has_pipelined_so;
+ return 1;
}
-static bool
-intel_detect_pipelined_oacontrol(struct intel_screen *screen)
+static int
+intel_detect_pipelined_oacontrol(struct intel_screen *screen,
+ struct detect_pipelined_register *reg)
{
+ screen->hw_has_pipelined_oacontrol = false;
if (screen->devinfo->gen < 6 || screen->devinfo->gen >= 8)
- return false;
+ return 0;
/* Set "Select Context ID" to a particular address (which is likely not a
* context), but leave all counting disabled. This should be harmless.
*/
- return intel_detect_pipelined_register(screen, OACONTROL, 0x31337000);
+ reg->reg = OACONTROL;
+ reg->expected_value = 0x31337000;
+ reg->result = &screen->hw_has_pipelined_oacontrol;
+ return 1;
+}
+
+static void
+intel_detect_pipelined_register_access(struct intel_screen *screen)
+{
+ struct detect_pipelined_register regs[2], *r =regs;
+
+ /* Combine the multiple register access validation into a single
+ * round trip through the kernel + GPU.
+ */
+ r += intel_detect_pipelined_so(screen, r);
+ r += intel_detect_pipelined_oacontrol(screen, r);
+
+ __intel_detect_pipelined_registers(screen, regs, r-regs);
}
/**
@@ -1525,9 +1567,8 @@ __DRIconfig **intelInitScreen2(__DRIscreen *psp)
intelScreen->hw_has_swizzling = intel_detect_swizzling(intelScreen);
intelScreen->hw_has_timestamp = intel_detect_timestamp(intelScreen);
- intelScreen->hw_has_pipelined_so = intel_detect_pipelined_so(intelScreen);
- intelScreen->hw_has_pipelined_oacontrol =
- intel_detect_pipelined_oacontrol(intelScreen);
+
+ intel_detect_pipelined_register_access(intelScreen);
const char *force_msaa = getenv("INTEL_FORCE_MSAA");
if (force_msaa) {
--
2.1.4
More information about the mesa-dev
mailing list