[Beignet] [PATCH] Workgroup reduce add optimization

Grigore Lupescu grigore.lupescu at intel.com
Wed Dec 23 07:32:19 PST 2015


Signed-off-by: Grigore Lupescu <grigore.lupescu at intel.com>
---
 backend/src/backend/gen_context.cpp | 48 ++++++++++++++++++++++++-------------
 1 file changed, 32 insertions(+), 16 deletions(-)

diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index a2e11a4..52e988e 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -2943,21 +2943,38 @@ namespace gbe
           }
         }
       }
-    } else if (wg_op == ir::WORKGROUP_OP_REDUCE_ADD) {
-      GBE_ASSERT(tmp.type == theVal.type);
-      GenRegister v = GenRegister::toUniform(tmp, theVal.type);
-      for (uint32_t i = 0; i < simd; i++) {
-        p->ADD(threadData, threadData, v);
-        v.subnr += typeSize(theVal.type);
-        if (v.subnr == 32) {
-          v.subnr = 0;
-          v.nr++;
-        }
-      }
-    }
-
-    p->pop();
-  }
+    } else if (wg_op == ir::WORKGROUP_OP_REDUCE_ADD){
+
+		tmp.hstride = GEN_HORIZONTAL_STRIDE_1;
+		tmp.vstride = GEN_VERTICAL_STRIDE_4;
+		tmp.width = GEN_WIDTH_4;
+
+		GBE_ASSERT(tmp.type == theVal.type);
+		GenRegister partialSum = tmp;
+
+		/* adjust offset, compute add with ADD4/ADD */
+		for (uint32_t i = 1; i < simd/4; i++){
+			tmp = tmp.suboffset(tmp, 4);
+			GenNativeInstruction* insnQ1 = p->next(GEN_OPCODE_ADD);
+			p->setHeader(insnQ1);
+			p->setDst(insnQ1, partialSum);
+			p->setSrc0(insnQ1, partialSum);
+			p->setSrc1(insnQ1, tmp);
+			insnQ1->header.execution_size = GEN_WIDTH_4;
+		}
+
+		partialSum = GenRegister::toUniform(partialSum, theVal.type);
+		for (uint32_t i = 0; i < 4; i++){
+			p->ADD(threadData, threadData, partialSum);
+			partialSum.subnr += typeSize(theVal.type);
+			if (partialSum.subnr == 32) {
+				partialSum.subnr = 0;
+				partialSum.nr++;
+			}
+		}
+	}
+	p->pop();
+}
 
 #define SEND_RESULT_MSG() \
 do { \
@@ -3123,7 +3140,6 @@ do { \
         p->curr.predicate = GEN_PREDICATE_NONE;
         p->WAIT(2);
         p->patchJMPI(jip, (p->n_instruction() - jip), 0);
-    
         /* Do something when get the msg. */
         p->curr.execWidth = simd;
         p->MOV(dst, msgData);
-- 
2.1.4



More information about the Beignet mailing list