[Mesa-dev] [PATCH 3/3] i965/vec4: Pass const references to instruction functions.

Matt Turner mattst88 at gmail.com
Sat Jun 28 16:49:03 PDT 2014


   text	   data	    bss	    dec	    hex	filename
4231165	 123200	  39648	4394013	 430c1d	i965_dri.so
4186277	 123200	  39648	4349125	 425cc5	i965_dri.so

Cuts 43k of .text and saves a bunch of useless struct copies.
---
 src/mesa/drivers/dri/i965/brw_vec4.h           | 92 ++++++++++++++++----------
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 16 +++--
 2 files changed, 67 insertions(+), 41 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
index 5702d06..6ac35d7 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4.h
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -448,45 +448,67 @@ public:
    vec4_instruction *emit_before(vec4_instruction *inst,
 				 vec4_instruction *new_inst);
 
-   vec4_instruction *MOV(dst_reg dst, src_reg src0);
-   vec4_instruction *NOT(dst_reg dst, src_reg src0);
-   vec4_instruction *RNDD(dst_reg dst, src_reg src0);
-   vec4_instruction *RNDE(dst_reg dst, src_reg src0);
-   vec4_instruction *RNDZ(dst_reg dst, src_reg src0);
-   vec4_instruction *FRC(dst_reg dst, src_reg src0);
-   vec4_instruction *F32TO16(dst_reg dst, src_reg src0);
-   vec4_instruction *F16TO32(dst_reg dst, src_reg src0);
-   vec4_instruction *ADD(dst_reg dst, src_reg src0, src_reg src1);
-   vec4_instruction *MUL(dst_reg dst, src_reg src0, src_reg src1);
-   vec4_instruction *MACH(dst_reg dst, src_reg src0, src_reg src1);
-   vec4_instruction *MAC(dst_reg dst, src_reg src0, src_reg src1);
-   vec4_instruction *AND(dst_reg dst, src_reg src0, src_reg src1);
-   vec4_instruction *OR(dst_reg dst, src_reg src0, src_reg src1);
-   vec4_instruction *XOR(dst_reg dst, src_reg src0, src_reg src1);
-   vec4_instruction *DP3(dst_reg dst, src_reg src0, src_reg src1);
-   vec4_instruction *DP4(dst_reg dst, src_reg src0, src_reg src1);
-   vec4_instruction *DPH(dst_reg dst, src_reg src0, src_reg src1);
-   vec4_instruction *SHL(dst_reg dst, src_reg src0, src_reg src1);
-   vec4_instruction *SHR(dst_reg dst, src_reg src0, src_reg src1);
-   vec4_instruction *ASR(dst_reg dst, src_reg src0, src_reg src1);
+   vec4_instruction *MOV(const dst_reg &dst, const src_reg &src0);
+   vec4_instruction *NOT(const dst_reg &dst, const src_reg &src0);
+   vec4_instruction *RNDD(const dst_reg &dst, const src_reg &src0);
+   vec4_instruction *RNDE(const dst_reg &dst, const src_reg &src0);
+   vec4_instruction *RNDZ(const dst_reg &dst, const src_reg &src0);
+   vec4_instruction *FRC(const dst_reg &dst, const src_reg &src0);
+   vec4_instruction *F32TO16(const dst_reg &dst, const src_reg &src0);
+   vec4_instruction *F16TO32(const dst_reg &dst, const src_reg &src0);
+   vec4_instruction *ADD(const dst_reg &dst, const src_reg &src0,
+                         const src_reg &src1);
+   vec4_instruction *MUL(const dst_reg &dst, const src_reg &src0,
+                         const src_reg &src1);
+   vec4_instruction *MACH(const dst_reg &dst, const src_reg &src0,
+                          const src_reg &src1);
+   vec4_instruction *MAC(const dst_reg &dst, const src_reg &src0,
+                         const src_reg &src1);
+   vec4_instruction *AND(const dst_reg &dst, const src_reg &src0,
+                         const src_reg &src1);
+   vec4_instruction *OR(const dst_reg &dst, const src_reg &src0,
+                        const src_reg &src1);
+   vec4_instruction *XOR(const dst_reg &dst, const src_reg &src0,
+                         const src_reg &src1);
+   vec4_instruction *DP3(const dst_reg &dst, const src_reg &src0,
+                         const src_reg &src1);
+   vec4_instruction *DP4(const dst_reg &dst, const src_reg &src0,
+                         const src_reg &src1);
+   vec4_instruction *DPH(const dst_reg &dst, const src_reg &src0,
+                         const src_reg &src1);
+   vec4_instruction *SHL(const dst_reg &dst, const src_reg &src0,
+                         const src_reg &src1);
+   vec4_instruction *SHR(const dst_reg &dst, const src_reg &src0,
+                         const src_reg &src1);
+   vec4_instruction *ASR(const dst_reg &dst, const src_reg &src0,
+                         const src_reg &src1);
    vec4_instruction *CMP(dst_reg dst, src_reg src0, src_reg src1,
 			 uint32_t condition);
    vec4_instruction *IF(src_reg src0, src_reg src1, uint32_t condition);
    vec4_instruction *IF(uint32_t predicate);
-   vec4_instruction *PULL_CONSTANT_LOAD(dst_reg dst, src_reg index);
-   vec4_instruction *SCRATCH_READ(dst_reg dst, src_reg index);
-   vec4_instruction *SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index);
-   vec4_instruction *LRP(dst_reg dst, src_reg a, src_reg y, src_reg x);
-   vec4_instruction *BFREV(dst_reg dst, src_reg value);
-   vec4_instruction *BFE(dst_reg dst, src_reg bits, src_reg offset, src_reg value);
-   vec4_instruction *BFI1(dst_reg dst, src_reg bits, src_reg offset);
-   vec4_instruction *BFI2(dst_reg dst, src_reg bfi1_dst, src_reg insert, src_reg base);
-   vec4_instruction *FBH(dst_reg dst, src_reg value);
-   vec4_instruction *FBL(dst_reg dst, src_reg value);
-   vec4_instruction *CBIT(dst_reg dst, src_reg value);
-   vec4_instruction *MAD(dst_reg dst, src_reg c, src_reg b, src_reg a);
-   vec4_instruction *ADDC(dst_reg dst, src_reg src0, src_reg src1);
-   vec4_instruction *SUBB(dst_reg dst, src_reg src0, src_reg src1);
+   vec4_instruction *PULL_CONSTANT_LOAD(const dst_reg &dst,
+                                        const src_reg &index);
+   vec4_instruction *SCRATCH_READ(const dst_reg &dst, const src_reg &index);
+   vec4_instruction *SCRATCH_WRITE(const dst_reg &dst, const src_reg &src,
+                                   const src_reg &index);
+   vec4_instruction *LRP(const dst_reg &dst, const src_reg &a,
+                         const src_reg &y, const src_reg &x);
+   vec4_instruction *BFREV(const dst_reg &dst, const src_reg &value);
+   vec4_instruction *BFE(const dst_reg &dst, const src_reg &bits,
+                         const src_reg &offset, const src_reg &value);
+   vec4_instruction *BFI1(const dst_reg &dst, const src_reg &bits,
+                          const src_reg &offset);
+   vec4_instruction *BFI2(const dst_reg &dst, const src_reg &bfi1_dst,
+                          const src_reg &insert, const src_reg &base);
+   vec4_instruction *FBH(const dst_reg &dst, const src_reg &value);
+   vec4_instruction *FBL(const dst_reg &dst, const src_reg &value);
+   vec4_instruction *CBIT(const dst_reg &dst, const src_reg &value);
+   vec4_instruction *MAD(const dst_reg &dst, const src_reg &c,
+                         const src_reg &b, const src_reg &a);
+   vec4_instruction *ADDC(const dst_reg &dst, const src_reg &src0,
+                          const src_reg &src1);
+   vec4_instruction *SUBB(const dst_reg &dst, const src_reg &src0,
+                          const src_reg &src1);
 
    int implied_mrf_writes(vec4_instruction *inst);
 
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index c732c90..219515a 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -112,7 +112,7 @@ vec4_visitor::emit(enum opcode opcode)
 
 #define ALU1(op)							\
    vec4_instruction *							\
-   vec4_visitor::op(dst_reg dst, src_reg src0)				\
+   vec4_visitor::op(const dst_reg &dst, const src_reg &src0)		\
    {									\
       return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst,	\
 					   src0);			\
@@ -120,7 +120,8 @@ vec4_visitor::emit(enum opcode opcode)
 
 #define ALU2(op)							\
    vec4_instruction *							\
-   vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1)		\
+   vec4_visitor::op(const dst_reg &dst, const src_reg &src0,		\
+                    const src_reg &src1)				\
    {									\
       return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst,	\
 					   src0, src1);			\
@@ -128,7 +129,8 @@ vec4_visitor::emit(enum opcode opcode)
 
 #define ALU2_ACC(op)							\
    vec4_instruction *							\
-   vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1)		\
+   vec4_visitor::op(const dst_reg &dst, const src_reg &src0,		\
+                    const src_reg &src1)				\
    {									\
       vec4_instruction *inst = new(mem_ctx) vec4_instruction(this,     \
                        BRW_OPCODE_##op, dst, src0, src1);		\
@@ -138,7 +140,8 @@ vec4_visitor::emit(enum opcode opcode)
 
 #define ALU3(op)							\
    vec4_instruction *							\
-   vec4_visitor::op(dst_reg dst, src_reg src0, src_reg src1, src_reg src2)\
+   vec4_visitor::op(const dst_reg &dst, const src_reg &src0,		\
+                    const src_reg &src1, const src_reg &src2)		\
    {									\
       assert(brw->gen >= 6);						\
       return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst,	\
@@ -238,7 +241,7 @@ vec4_visitor::CMP(dst_reg dst, src_reg src0, src_reg src1, uint32_t condition)
 }
 
 vec4_instruction *
-vec4_visitor::SCRATCH_READ(dst_reg dst, src_reg index)
+vec4_visitor::SCRATCH_READ(const dst_reg &dst, const src_reg &index)
 {
    vec4_instruction *inst;
 
@@ -251,7 +254,8 @@ vec4_visitor::SCRATCH_READ(dst_reg dst, src_reg index)
 }
 
 vec4_instruction *
-vec4_visitor::SCRATCH_WRITE(dst_reg dst, src_reg src, src_reg index)
+vec4_visitor::SCRATCH_WRITE(const dst_reg &dst, const src_reg &src,
+                            const src_reg &index)
 {
    vec4_instruction *inst;
 
-- 
1.8.3.2



More information about the mesa-dev mailing list