[Mesa-dev] [PATCH 09/11] i965/eu: Refactor jump distance scaling to use a helper function.
Kenneth Graunke
kenneth at whitecape.org
Sat Aug 9 14:28:49 PDT 2014
Different generations of hardware measure jump distances in different
units. Previously, every function that needed to set a jump target open
coded this scaling, or made a hardcoded assumption (i.e. just used 2).
Most functions start with the number of instructions to jump, and scale
up to the hardware-specific value. So, I made the function match that.
Others start with a byte offset, and divide by a constant (8) to obtain
the jump distance. This is actually 16 / 2 (the jump scale for Gen5-7).
Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
---
src/mesa/drivers/dri/i965/brw_eu.h | 2 ++
src/mesa/drivers/dri/i965/brw_eu_emit.c | 42 +++++++++++++++++---------
src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 8 +++--
3 files changed, 35 insertions(+), 17 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 7efc028..fdf6e4c 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -316,6 +316,8 @@ void brw_shader_time_add(struct brw_compile *p,
struct brw_reg payload,
uint32_t surf_index);
+unsigned brw_jump_scale(const struct brw_context *brw);
+
/* If/else/endif. Works by manipulating the execution flags on each
* channel.
*/
diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c
index 665fc07..3d9c96a 100644
--- a/src/mesa/drivers/dri/i965/brw_eu_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c
@@ -1170,6 +1170,27 @@ void brw_NOP(struct brw_compile *p)
* Comparisons, if/else/endif
*/
+/**
+ * Return the generation-specific jump distance scaling factor.
+ *
+ * Given the number of instructions to jump, we need to scale by
+ * some number to obtain the actual jump distance to program in an
+ * instruction.
+ */
+unsigned
+brw_jump_scale(const struct brw_context *brw)
+{
+ /* Ironlake and later measure jump targets in 64-bit data chunks (in order
+ * (to support compaction), so each 128-bit instruction requires 2 chunks.
+ */
+ if (brw->gen >= 5)
+ return 2;
+
+ /* Gen4 simply uses the number of 128-bit instructions. */
+ return 1;
+}
+
+
brw_inst *
brw_JMPI(struct brw_compile *p, struct brw_reg index,
unsigned predicate_control)
@@ -1376,12 +1397,7 @@ patch_IF_ELSE(struct brw_compile *p,
assert(endif_inst != NULL);
assert(else_inst == NULL || brw_inst_opcode(brw, else_inst) == BRW_OPCODE_ELSE);
- unsigned br = 1;
- /* Jump count is for 64bit data chunk each, so one 128bit instruction
- * requires 2 chunks.
- */
- if (brw->gen >= 5)
- br = 2;
+ unsigned br = brw_jump_scale(brw);
assert(brw_inst_opcode(brw, endif_inst) == BRW_OPCODE_ENDIF);
brw_inst_set_exec_size(brw, endif_inst, brw_inst_exec_size(brw, if_inst));
@@ -1678,7 +1694,7 @@ brw_patch_break_cont(struct brw_compile *p, brw_inst *while_inst)
struct brw_context *brw = p->brw;
brw_inst *do_inst = get_inner_do_insn(p);
brw_inst *inst;
- int br = (brw->gen == 5) ? 2 : 1;
+ unsigned br = brw_jump_scale(brw);
assert(brw->gen < 6);
@@ -1702,10 +1718,7 @@ brw_WHILE(struct brw_compile *p)
{
struct brw_context *brw = p->brw;
brw_inst *insn, *do_insn;
- unsigned br = 1;
-
- if (brw->gen >= 5)
- br = 2;
+ unsigned br = brw_jump_scale(brw);
if (brw->gen >= 7) {
insn = next_insn(p, BRW_OPCODE_WHILE);
@@ -2389,7 +2402,7 @@ brw_find_loop_end(struct brw_compile *p, int start_offset)
{
struct brw_context *brw = p->brw;
int offset;
- int scale = 8;
+ int scale = 16 / brw_jump_scale(brw);
void *store = p->store;
assert(brw->gen >= 6);
@@ -2421,7 +2434,8 @@ brw_set_uip_jip(struct brw_compile *p)
{
struct brw_context *brw = p->brw;
int offset;
- int scale = 8;
+ int br = brw_jump_scale(brw);
+ int scale = 16 / br;
void *store = p->store;
if (brw->gen < 6)
@@ -2461,7 +2475,7 @@ brw_set_uip_jip(struct brw_compile *p)
case BRW_OPCODE_ENDIF:
if (block_end_offset == 0)
- brw_inst_set_jip(brw, insn, 2);
+ brw_inst_set_jip(brw, insn, 1 * br);
else
brw_inst_set_jip(brw, insn, (block_end_offset - offset) / scale);
break;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
index cec2e82..d3509a0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp
@@ -67,6 +67,8 @@ fs_generator::patch_discard_jumps_to_fb_writes()
if (brw->gen < 6 || this->discard_halt_patches.is_empty())
return false;
+ int scale = brw_jump_scale(brw);
+
/* There is a somewhat strange undocumented requirement of using
* HALT, according to the simulator. If some channel has HALTed to
* a particular UIP, then by the end of the program, every channel
@@ -79,8 +81,8 @@ fs_generator::patch_discard_jumps_to_fb_writes()
* tests.
*/
brw_inst *last_halt = gen6_HALT(p);
- brw_inst_set_uip(brw, last_halt, 2);
- brw_inst_set_jip(brw, last_halt, 2);
+ brw_inst_set_uip(brw, last_halt, 1 * scale);
+ brw_inst_set_jip(brw, last_halt, 1 * scale);
int ip = p->nr_insn;
@@ -89,7 +91,7 @@ fs_generator::patch_discard_jumps_to_fb_writes()
assert(brw_inst_opcode(brw, patch) == BRW_OPCODE_HALT);
/* HALT takes a half-instruction distance from the pre-incremented IP. */
- brw_inst_set_uip(brw, patch, (ip - patch_ip->ip) * 2);
+ brw_inst_set_uip(brw, patch, (ip - patch_ip->ip) * scale);
}
this->discard_halt_patches.make_empty();
--
2.0.0
More information about the mesa-dev
mailing list