[Beignet] [PATCH] enlarge buf size to avoid memory out of range written by GPU (kernel)

Guo, Yejun yejun.guo at intel.com
Tue Jun 28 07:25:06 UTC 2016


ping for review, thanks.

-----Original Message-----
From: Guo, Yejun 
Sent: Wednesday, June 15, 2016 10:36 AM
To: beignet at lists.freedesktop.org
Cc: Guo, Yejun
Subject: [PATCH] enlarge buf size to avoid memory out of range written by GPU (kernel)

pseudocode:
float input[] = {...};  -->  float input[] = { ...  ... more}
global_size = input_len -->  global_size = input_len / vector
				the value of vector is 1,2,... or 16.

ocl kernel looks like (for the case of vector=8):
  int i = get_global_id(0);
  dst[i * (*vector) + 0] = ret[0];
  dst[i * (*vector) + 1] = ret[1];
  dst[i * (*vector) + 2] = ret[2];
  dst[i * (*vector) + 3] = ret[3];
  dst[i * (*vector) + 4] = ret[4];
  dst[i * (*vector) + 5] = ret[5];
  dst[i * (*vector) + 6] = ret[6];
  dst[i * (*vector) + 7] = ret[7];

Signed-off-by: Guo Yejun <yejun.guo at intel.com>
---
 utests/utest_generator.py | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/utests/utest_generator.py b/utests/utest_generator.py
index cde2dbe..3591095 100644
--- a/utests/utest_generator.py
+++ b/utests/utest_generator.py
@@ -1,6 +1,6 @@
 #!/usr/bin/python
 from __future__ import print_function
-import os,sys,re
+import os,sys,re,string
 
 FLT_MAX_POSI='0x1.fffffep127f'
 FLT_MIN_NEGA='-0x1.fffffep127f'
@@ -247,7 +247,7 @@ which can print more values and information to assist debuging the issue.
   def argvector(self,paraN,index):
     vector=re.findall(r"[0-9]+",self.inputtype[paraN][index])
     if vector:
-      vector=vector[0]
+      vector=string.atoi(vector[0])
     else:
       vector=1
     return vector
@@ -272,10 +272,17 @@ which can print more values and information to assist debuging the issue.
 #####Cpu values analyse
   def GenInputValues(self,index):
     #namesuffix=self.inputtype[0][index]
+    vlen = self.argvector(self.inputtype.__len__()-1,index)
     for i in range(0,self.values.__len__()):
-      self.cpplines += [ "const %s input_data%d[] = {%s};" %(self.argtype(i,index),i+1,str(self.values[i]).strip('[]').replace('\'','')) ]
+        vals = []
+        for j in range(0, vlen):
+            if (len(vals) >= 128):	#avoid too many data
+                vals = vals[0:128]
+                break
+            vals += self.values[i]
+        self.cpplines += [ "const %s input_data%d[] = {%s};" %(self.argtype(i,index),i+1,str(vals).strip('[]').replace('\'','')) ]
     self.cpplines += [ "const int count_input = sizeof(input_data1) / sizeof(input_data1[0]);" ]
-    self.cpplines += [ "const int vector = %s;\n"%(self.argvector(self.inputtype.__len__()-1,index)) ]
+    self.cpplines += [ "const int vector = %s;\n"%(vlen) ]
 
 #####Cpu Function
   def GenCpuCompilerMath(self,index):
@@ -340,7 +347,7 @@ static void %s_%s(void)
   OCL_CREATE_KERNEL(\"%s_%s\");
   OCL_CREATE_BUFFER(buf[0], CL_MEM_READ_WRITE, count_input * sizeof(%s), NULL); 
 
-  globals[0] = count_input;
+  globals[0] = count_input / vector;
   locals[0] = 1;
  '''%(self.fileName,namesuffix,\
      self.retType(index),\
-- 
1.9.1



More information about the Beignet mailing list