[Beignet] [PATCH] Add workgroup reduce add optimization
Grigore Lupescu
grigore.lupescu at intel.com
Tue Jan 5 08:35:53 PST 2016
Signed-off-by: Grigore Lupescu <grigore.lupescu at intel.com>
---
backend/src/backend/gen_context.cpp | 33 +++++++++++++++++++++------------
1 file changed, 21 insertions(+), 12 deletions(-)
diff --git a/backend/src/backend/gen_context.cpp b/backend/src/backend/gen_context.cpp
index 0ea0dd0..3370a7d 100644
--- a/backend/src/backend/gen_context.cpp
+++ b/backend/src/backend/gen_context.cpp
@@ -2943,21 +2943,31 @@ namespace gbe
}
}
}
- } else if (wg_op == ir::WORKGROUP_OP_REDUCE_ADD) {
+ }
+ else if (wg_op == ir::WORKGROUP_OP_REDUCE_ADD){
+ tmp.hstride = GEN_HORIZONTAL_STRIDE_1;
+ tmp.vstride = GEN_VERTICAL_STRIDE_4;
+ tmp.width = GEN_WIDTH_4;
+
GBE_ASSERT(tmp.type == theVal.type);
- GenRegister v = GenRegister::toUniform(tmp, theVal.type);
- for (uint32_t i = 0; i < simd; i++) {
- p->ADD(threadData, threadData, v);
- v.subnr += typeSize(theVal.type);
- if (v.subnr == 32) {
- v.subnr = 0;
- v.nr++;
- }
+ GenRegister partialSum = tmp;
+
+ /* adjust offset, compute add with ADD4/ADD */
+ for (uint32_t i = 1; i < simd/4; i++){
+ tmp = tmp.suboffset(tmp, 4);
+ p->push();
+ p->curr.execWidth = GEN_WIDTH_16;
+ p->ADD(partialSum, partialSum, tmp);
+ p->pop();
}
- }
+ for (uint32_t i = 0; i < 4; i++){
+ p->ADD(threadData, threadData, partialSum);
+ partialSum = GenRegister::suboffset(partialSum, 1);
+ }
+ }
p->pop();
- }
+}
#define SEND_RESULT_MSG() \
do { \
@@ -3123,7 +3133,6 @@ do { \
p->curr.predicate = GEN_PREDICATE_NONE;
p->WAIT(2);
p->patchJMPI(jip, (p->n_instruction() - jip), 0);
-
/* Do something when get the msg. */
p->curr.execWidth = simd;
p->MOV(dst, msgData);
--
2.1.4
More information about the Beignet
mailing list