[Beignet] [PATCH] enlarge buf size to avoid memory out of range written by GPU (kernel)
Guo, Yejun
yejun.guo at intel.com
Tue Jun 28 07:25:06 UTC 2016
ping for review, thanks.
-----Original Message-----
From: Guo, Yejun
Sent: Wednesday, June 15, 2016 10:36 AM
To: beignet at lists.freedesktop.org
Cc: Guo, Yejun
Subject: [PATCH] enlarge buf size to avoid memory out of range written by GPU (kernel)
pseudocode:
float input[] = {...}; --> float input[] = { ... ... more}
global_size = input_len --> global_size = input_len / vector
the value of vector is 1,2,... or 16.
ocl kernel looks like (for the case of vector=8):
int i = get_global_id(0);
dst[i * (*vector) + 0] = ret[0];
dst[i * (*vector) + 1] = ret[1];
dst[i * (*vector) + 2] = ret[2];
dst[i * (*vector) + 3] = ret[3];
dst[i * (*vector) + 4] = ret[4];
dst[i * (*vector) + 5] = ret[5];
dst[i * (*vector) + 6] = ret[6];
dst[i * (*vector) + 7] = ret[7];
Signed-off-by: Guo Yejun <yejun.guo at intel.com>
---
utests/utest_generator.py | 17 ++++++++++++-----
1 file changed, 12 insertions(+), 5 deletions(-)
diff --git a/utests/utest_generator.py b/utests/utest_generator.py
index cde2dbe..3591095 100644
--- a/utests/utest_generator.py
+++ b/utests/utest_generator.py
@@ -1,6 +1,6 @@
#!/usr/bin/python
from __future__ import print_function
-import os,sys,re
+import os,sys,re,string
FLT_MAX_POSI='0x1.fffffep127f'
FLT_MIN_NEGA='-0x1.fffffep127f'
@@ -247,7 +247,7 @@ which can print more values and information to assist debuging the issue.
def argvector(self,paraN,index):
vector=re.findall(r"[0-9]+",self.inputtype[paraN][index])
if vector:
- vector=vector[0]
+ vector=string.atoi(vector[0])
else:
vector=1
return vector
@@ -272,10 +272,17 @@ which can print more values and information to assist debuging the issue.
#####Cpu values analyse
def GenInputValues(self,index):
#namesuffix=self.inputtype[0][index]
+ vlen = self.argvector(self.inputtype.__len__()-1,index)
for i in range(0,self.values.__len__()):
- self.cpplines += [ "const %s input_data%d[] = {%s};" %(self.argtype(i,index),i+1,str(self.values[i]).strip('[]').replace('\'','')) ]
+ vals = []
+ for j in range(0, vlen):
+ if (len(vals) >= 128): #avoid too many data
+ vals = vals[0:128]
+ break
+ vals += self.values[i]
+ self.cpplines += [ "const %s input_data%d[] = {%s};" %(self.argtype(i,index),i+1,str(vals).strip('[]').replace('\'','')) ]
self.cpplines += [ "const int count_input = sizeof(input_data1) / sizeof(input_data1[0]);" ]
- self.cpplines += [ "const int vector = %s;\n"%(self.argvector(self.inputtype.__len__()-1,index)) ]
+ self.cpplines += [ "const int vector = %s;\n"%(vlen) ]
#####Cpu Function
def GenCpuCompilerMath(self,index):
@@ -340,7 +347,7 @@ static void %s_%s(void)
OCL_CREATE_KERNEL(\"%s_%s\");
OCL_CREATE_BUFFER(buf[0], CL_MEM_READ_WRITE, count_input * sizeof(%s), NULL);
- globals[0] = count_input;
+ globals[0] = count_input / vector;
locals[0] = 1;
'''%(self.fileName,namesuffix,\
self.retType(index),\
--
1.9.1
More information about the Beignet
mailing list