[Mesa-dev] [PATCH 3/3] i965/vec4: Pass const references to instruction functions.
Matt Turner
mattst88 at gmail.com
Sat Jun 28 16:49:03 PDT 2014
text data bss dec hex filename
4231165 123200 39648 4394013 430c1d i965_dri.so
4186277 123200 39648 4349125 425cc5 i965_dri.so
Cuts 43k of .text and saves a bunch of useless struct copies.
---
src/mesa/drivers/dri/i965/brw_vec4.h | 92 ++++++++++++++++----------
src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 16 +++--
2 files changed, 67 insertions(+), 41 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 5702d06..6ac35d7 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -448,45 +448,67 @@ public:
vec4_instruction *emit_before(vec4_instruction *inst,
vec4_instruction *new_inst);
- vec4_instruction *MOV(dst_reg dst, src_reg src0);
- vec4_instruction *NOT(dst_reg dst, src_reg src0);
- vec4_instruction *RNDD(dst_reg dst, src_reg src0);
- vec4_instruction *RNDE(dst_reg dst, src_reg src0);
- vec4_instruction *RNDZ(dst_reg dst, src_reg src0);
- vec4_instruction *FRC(dst_reg dst, src_reg src0);
- vec4_instruction *F32TO16(dst_reg dst, src_reg src0);
- vec4_instruction *F16TO32(dst_reg dst, src_reg src0);
- vec4_instruction *ADD(dst_reg dst, src_reg src0, src_reg src1);
- vec4_instruction *MUL(dst_reg dst, src_reg src0, src_reg src1);
- vec4_instruction *MACH(dst_reg dst, src_reg src0, src_reg src1);
- vec4_instruction *MAC(dst_reg dst, src_reg src0, src_reg src1);
- vec4_instruction *AND(dst_reg dst, src_reg src0, src_reg src1);
- vec4_instruction *OR(dst_reg dst, src_reg src0, src_reg src1);
- vec4_instruction *XOR(dst_reg dst, src_reg src0, src_reg src1);
- vec4_instruction *DP3(dst_reg dst, src_reg src0, src_reg src1);
- vec4_instruction *DP4(dst_reg dst, src_reg src0, src_reg src1);
- vec4_instruction *DPH(dst_reg dst, src_reg src0, src_reg src1);
- vec4_instruction *SHL(dst_reg dst, src_reg src0, src_reg src1);
- vec4_instruction *SHR(dst_reg dst, src_reg src0, src_reg src1);
- vec4_instruction *ASR(dst_reg dst, src_reg src0, src_reg src1);
+ vec4_instruction *MOV(const dst_reg &dst, const src_reg &src0);
+ vec4_instruction *NOT(const dst_reg &dst, const src_reg &src0);
+ vec4_instruction *RNDD(const dst_reg &dst, const src_reg &src0);
+ vec4_instruction *RNDE(const dst_reg &dst, const src_reg &src0);
+ vec4_instruction *RNDZ(const dst_reg &dst, const src_reg &src0);
+ vec4_instruction *FRC(const dst_reg &dst, const src_reg &src0);
+ vec4_instruction *F32TO16(const dst_reg &dst, const src_reg &src0);
+ vec4_instruction *F16TO32(const dst_reg &dst, const src_reg &src0);
+ vec4_instruction *ADD(const dst_reg &dst, const src_reg &src0,
+ const src_reg &src1);
+ vec4_instruction *MUL(const dst_reg &dst, const src_reg &src0,
+ const src_reg &src1);
+ vec4_instruction *MACH(const dst_reg &dst, const src_reg &src0,
+ const src_reg &src1);
+ vec4_instruction *MAC(const dst_reg &dst, const src_reg &src0,
+ const src_reg &src1);
+ vec4_instruction *AND(const dst_reg &dst, const src_reg &src0,
+ const src_reg &src1);
+ vec4_instruction *OR(const dst_reg &dst, const src_reg &src0,
+ const src_reg &src1);
+ vec4_instruction *XOR(const dst_reg &dst, const src_reg &src0,
+ const src_reg &src1);
+ vec4_instruction *DP3(const dst_reg &dst, const src_reg &src0,
+ const src_reg &src1);
+ vec4_instruction *DP4(const dst_reg &dst, const src_reg &src0,
+ const src_reg &src1);
+ vec4_instruction *DPH(const dst_reg &dst, const src_reg &src0,
+ const src_reg &src1);
+ vec4_instruction *SHL(const dst_reg &dst, const src_reg &src0,
+ const src_reg &src1);
+ vec4_instruction *SHR(const dst_reg &dst, const src_reg &src0,
+ const src_reg &src1);
+ vec4_instruction *ASR(const dst_reg &dst, const src_reg &src0,
+ const src_reg &src1);
vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1,
uint32_t condition);
vec4_instruction *IF(src_reg src0, src_reg src1, uint32_t condition);
vec4_instruction *IF(uint32_t predicate);
- vec4_instruction *PULL_CONSTANT_LOAD(dst_reg dst, src_reg index);
- vec4_instruction *SCRATCH_READ(dst_reg dst, src_reg index);
- vec4_instruction *SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index);
- vec4_instruction *LRP(dst_reg dst, src_reg a, src_reg y, src_reg x);
- vec4_instruction *BFREV(dst_reg dst, src_reg value);
- vec4_instruction *BFE(dst_reg dst, src_reg bits, src_reg offset, src_reg value);
- vec4_instruction *BFI1(dst_reg dst, src_reg bits, src_reg offset);
- vec4_instruction *BFI2(dst_reg dst, src_reg bfi1_dst, src_reg insert, src_reg base);
- vec4_instruction *FBH(dst_reg dst, src_reg value);
- vec4_instruction *FBL(dst_reg dst, src_reg value);
- vec4_instruction *CBIT(dst_reg dst, src_reg value);
- vec4_instruction *MAD(dst_reg dst, src_reg c, src_reg b, src_reg a);
- vec4_instruction *ADDC(dst_reg dst, src_reg src0, src_reg src1);
- vec4_instruction *SUBB(dst_reg dst, src_reg src0, src_reg src1);
+ vec4_instruction *PULL_CONSTANT_LOAD(const dst_reg &dst,
+ const src_reg &index);
+ vec4_instruction *SCRATCH_READ(const dst_reg &dst, const src_reg &index);
+ vec4_instruction *SCRATCH_WRITE(const dst_reg &dst, const src_reg &src,
+ const src_reg &index);
+ vec4_instruction *LRP(const dst_reg &dst, const src_reg &a,
+ const src_reg &y, const src_reg &x);
+ vec4_instruction *BFREV(const dst_reg &dst, const src_reg &value);
+ vec4_instruction *BFE(const dst_reg &dst, const src_reg &bits,
+ const src_reg &offset, const src_reg &value);
+ vec4_instruction *BFI1(const dst_reg &dst, const src_reg &bits,
+ const src_reg &offset);
+ vec4_instruction *BFI2(const dst_reg &dst, const src_reg &bfi1_dst,
+ const src_reg &insert, const src_reg &base);
+ vec4_instruction *FBH(const dst_reg &dst, const src_reg &value);
+ vec4_instruction *FBL(const dst_reg &dst, const src_reg &value);
+ vec4_instruction *CBIT(const dst_reg &dst, const src_reg &value);
+ vec4_instruction *MAD(const dst_reg &dst, const src_reg &c,
+ const src_reg &b, const src_reg &a);
+ vec4_instruction *ADDC(const dst_reg &dst, const src_reg &src0,
+ const src_reg &src1);
+ vec4_instruction *SUBB(const dst_reg &dst, const src_reg &src0,
+ const src_reg &src1);
int implied_mrf_writes(vec4_instruction *inst);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index c732c90..219515a 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -112,7 +112,7 @@ vec4_visitor::emit(enum opcode opcode)
#define ALU1(op) \
vec4_instruction * \
- vec4_visitor::op(dst_reg dst, src_reg src0) \
+ vec4_visitor::op(const dst_reg &dst, const src_reg &src0) \
{ \
return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
src0); \
@@ -120,7 +120,8 @@ vec4_visitor::emit(enum opcode opcode)
#define ALU2(op) \
vec4_instruction * \
- vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1) \
+ vec4_visitor::op(const dst_reg &dst, const src_reg &src0, \
+ const src_reg &src1) \
{ \
return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
src0, src1); \
@@ -128,7 +129,8 @@ vec4_visitor::emit(enum opcode opcode)
#define ALU2_ACC(op) \
vec4_instruction * \
- vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1) \
+ vec4_visitor::op(const dst_reg &dst, const src_reg &src0, \
+ const src_reg &src1) \
{ \
vec4_instruction *inst = new(mem_ctx) vec4_instruction(this, \
BRW_OPCODE_##op, dst, src0, src1); \
@@ -138,7 +140,8 @@ vec4_visitor::emit(enum opcode opcode)
#define ALU3(op) \
vec4_instruction * \
- vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1, src_reg src2)\
+ vec4_visitor::op(const dst_reg &dst, const src_reg &src0, \
+ const src_reg &src1, const src_reg &src2) \
{ \
assert(brw->gen >= 6); \
return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
@@ -238,7 +241,7 @@ vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1, uint32_t condition)
}
vec4_instruction *
-vec4_visitor::SCRATCH_READ(dst_reg dst, src_reg index)
+vec4_visitor::SCRATCH_READ(const dst_reg &dst, const src_reg &index)
{
vec4_instruction *inst;
@@ -251,7 +254,8 @@ vec4_visitor::SCRATCH_READ(dst_reg dst, src_reg index)
}
vec4_instruction *
-vec4_visitor::SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index)
+vec4_visitor::SCRATCH_WRITE(const dst_reg &dst, const src_reg &src,
+ const src_reg &index)
{
vec4_instruction *inst;
--
1.8.3.2
More information about the mesa-dev
mailing list