[Beignet] [PATCH] Fix the same kernel name issue of OCL_OUTPUT_KERNEL_PERF

Zhigang Gong zhigang.gong at linux.intel.com
Wed Jun 11 21:58:48 PDT 2014


One comment, you may need to use strncpy and strncmp to replace
strcpy and strcmp in this function to avoid buffer overflow.

On Sun, Jun 22, 2014 at 05:39:09PM +0800, Yongjia Zhang wrote:
> Now it treats kernels with same kernel name and different build
> options separately. When OCL_OUTPUT_KERNEL_PERF==1, it outputs the
> time summary as before, but if OCL_OUTPUT_KERNEL_PERF==2, it will
> output the time details including the kernel build options and
> kernels with same kernel name but different build options will
> output separately.
> 
> Signed-off-by: Yongjia Zhang <yongjia.zhang at intel.com>
> ---
>  src/cl_api.c      | 17 ++++++----
>  src/performance.c | 95 +++++++++++++++++++++++++++++++++++++++----------------
>  src/performance.h |  2 +-
>  3 files changed, 79 insertions(+), 35 deletions(-)
> 
> diff --git a/src/cl_api.c b/src/cl_api.c
> index 6206e10..8856f5f 100644
> --- a/src/cl_api.c
> +++ b/src/cl_api.c
> @@ -1681,7 +1681,7 @@ clEnqueueCopyBuffer(cl_command_queue     command_queue,
>    }
>  
>    if(b_output_kernel_perf)
> -	  time_end(command_queue->ctx, "beignet internal kernel : cl_mem_copy", command_queue);
> +	  time_end(command_queue->ctx, "beignet internal kernel : cl_mem_copy", "\0", command_queue);
>  
>    return 0;
>  
> @@ -1785,7 +1785,7 @@ clEnqueueCopyBufferRect(cl_command_queue     command_queue,
>    }
>  
>    if(b_output_kernel_perf)
> -    time_end(command_queue->ctx, "beignet internal kernel : cl_mem_copy_buffer_rect", command_queue);
> +    time_end(command_queue->ctx, "beignet internal kernel : cl_mem_copy_buffer_rect", "\0", command_queue);
>  
>  error:
>    return err;
> @@ -2027,7 +2027,7 @@ clEnqueueCopyImage(cl_command_queue      command_queue,
>    }
>  
>    if(b_output_kernel_perf)
> -    time_end(command_queue->ctx, "beignet internal kernel : cl_mem_kernel_copy_image", command_queue);
> +    time_end(command_queue->ctx, "beignet internal kernel : cl_mem_kernel_copy_image", "\0", command_queue);
>  
>  error:
>    return err;
> @@ -2091,7 +2091,7 @@ clEnqueueCopyImageToBuffer(cl_command_queue  command_queue,
>    }
>  
>    if(b_output_kernel_perf)
> -    time_end(command_queue->ctx, "beignet internal kernel : cl_mem_copy_image_to_buffer", command_queue);
> +    time_end(command_queue->ctx, "beignet internal kernel : cl_mem_copy_image_to_buffer", "\0", command_queue);
>  
>  error:
>    return err;
> @@ -2155,7 +2155,7 @@ clEnqueueCopyBufferToImage(cl_command_queue  command_queue,
>    }
>  
>    if(b_output_kernel_perf)
> -    time_end(command_queue->ctx, "beignet internal kernel : cl_mem_copy_buffer_to_image", command_queue);
> +    time_end(command_queue->ctx, "beignet internal kernel : cl_mem_copy_buffer_to_image", "\0", command_queue);
>  
>  error:
>    return err;
> @@ -2556,7 +2556,12 @@ clEnqueueNDRangeKernel(cl_command_queue  command_queue,
>    }
>  
>    if(b_output_kernel_perf)
> -    time_end(command_queue->ctx, cl_kernel_get_name(kernel), command_queue);
> +  {
> +    if(kernel->program->build_opts != NULL)
> +      time_end(command_queue->ctx, cl_kernel_get_name(kernel), kernel->program->build_opts, command_queue);
> +    else
> +      time_end(command_queue->ctx, cl_kernel_get_name(kernel), "\0", command_queue);
> +  }
>  error:
>    return err;
>  }
> diff --git a/src/performance.c b/src/performance.c
> index a785460..e095e02 100644
> --- a/src/performance.c
> +++ b/src/performance.c
> @@ -8,11 +8,12 @@
>  
>  #define MAX_KERNEL_NAME_LENGTH 100
>  #define MAX_KERNEL_EXECUTION_COUNT 100000
> -
> +#define MAX_KERNEL_BUILD_OPT 1000
>  typedef struct kernel_storage_node
>  {
>    char kernel_name[MAX_KERNEL_NAME_LENGTH];
>    float kernel_times[MAX_KERNEL_EXECUTION_COUNT];
> +  char build_option[MAX_KERNEL_BUILD_OPT];
>    int current_count;
>    float kernel_sum_time;
>    struct kernel_storage_node *next;
> @@ -81,11 +82,12 @@ static context_storage_node * find_context(cl_context context)
>    return pre;
>  }
>  
> -static kernel_storage_node * find_kernel(context_storage_node *p_context, const char *kernel_name)
> +static kernel_storage_node * find_kernel(context_storage_node *p_context, const char *kernel_name, const char *build_opt)
>  {
>    if(NULL != prev_kernel_pointer && NULL != prev_context_pointer &&
>       p_context == prev_context_pointer &&
> -     !strcmp(kernel_name, prev_kernel_pointer->kernel_name))
> +     !strcmp(kernel_name, prev_kernel_pointer->kernel_name) &&
> +     !strcmp(build_opt, prev_kernel_pointer->build_option))
>      return prev_kernel_pointer;
>  
>    if(NULL == p_context)
> @@ -96,22 +98,23 @@ static kernel_storage_node * find_kernel(context_storage_node *p_context, const
>      p_context->kernels_storage = (kernel_storage_node *)malloc(sizeof(kernel_storage_node));
>      p_context->kernel_count++;
>      strcpy(p_context->kernels_storage->kernel_name,kernel_name);
> +    strcpy(p_context->kernels_storage->build_option, build_opt);
       use strncpy should be safe here.
>      p_context->kernels_storage->current_count = 0;
>      p_context->kernels_storage->kernel_sum_time = 0.0f;
>      p_context->kernels_storage->next = NULL;
>      return p_context->kernels_storage;
>    }
> +
>    kernel_storage_node *pre = p_context->kernels_storage;
>    kernel_storage_node *cur = p_context->kernels_storage;
> -  while(NULL != cur && strcmp(cur->kernel_name, kernel_name))
> +  while(NULL != cur && (strcmp(cur->kernel_name, kernel_name) || strcmp(cur->build_option, build_opt)))
>    {
>      pre = cur;
>      cur = cur->next;
>    }
>    if(NULL != cur)
> -  {
>      return cur;
> -  }
> +
>    p_context->kernel_count++;
>    pre->next = (kernel_storage_node *)malloc(sizeof(kernel_storage_node));
>    pre = pre->next;
> @@ -119,6 +122,7 @@ static kernel_storage_node * find_kernel(context_storage_node *p_context, const
>    pre->kernel_sum_time = 0.0f;
>    pre->next = NULL;
>    strcpy(pre->kernel_name, kernel_name);
> +  strcpy(pre->build_option, build_opt);
>    return pre;
>  }
>  
> @@ -146,6 +150,8 @@ typedef struct time_element
>    float kernel_sum_time;
>    int kernel_execute_count;
>    double dev;
> +  float kernel_times[MAX_KERNEL_EXECUTION_COUNT];
> +  uint32_t time_index;
>  } time_element;
>  
>  static int cmp(const void *a, const void *b)
> @@ -172,40 +178,61 @@ static void print_time_info()
>    {
>      printf("[------------ CONTEXT %4d ------------]\n", tmp_context_id++);
>      printf("  ->>>> KERNELS TIME SUMMARY <<<<-\n");
> +
>      kernel_storage_node *p_kernel = p_context->kernels_storage;
>      kernel_storage_node *p_tmp_kernel = p_kernel;
>      time_element *te = (time_element *)malloc(sizeof(time_element)*p_context->kernel_count);
> -    int i = 0, j = 0;
> +    memset(te, 0, sizeof(time_element)*p_context->kernel_count);
> +    int i = -1, j = 0, k = 0;
>      while(NULL != p_tmp_kernel)
>      {
> -      te[i].kernel_execute_count = p_tmp_kernel->current_count;
> -      strcpy(te[i].kernel_name, p_tmp_kernel->kernel_name);
> -      te[i].kernel_sum_time = p_tmp_kernel->kernel_sum_time;
> -      float average = p_tmp_kernel->kernel_sum_time / p_tmp_kernel->current_count;
> -      float sumsquare = 0.0f;
> +      for(k=0; k<=i; k++)
> +      {
> +        if(!strcmp(te[k].kernel_name, p_tmp_kernel->kernel_name))
> +          break;
> +      }
> +      if(k == i+1)
> +      {
> +        i++;
> +        k = i;
> +      }
> +      te[k].kernel_execute_count += p_tmp_kernel->current_count;
> +      strcpy(te[k].kernel_name, p_tmp_kernel->kernel_name);
> +      te[k].kernel_sum_time += p_tmp_kernel->kernel_sum_time;
>        for(j=0; j != p_tmp_kernel->current_count; ++j)
> -        sumsquare += pow((p_tmp_kernel->kernel_times[j] - average), 2.0 );
> -      te[i++].dev = sqrt(sumsquare/p_tmp_kernel->current_count);
> +        te[k].kernel_times[te[k].time_index++] = p_tmp_kernel->kernel_times[j];
>        p_tmp_kernel = p_tmp_kernel->next;
>      }
> +
> +    for(k=0; k<=i; k++)
> +    {
> +      float average = te[k].kernel_sum_time / te[k].kernel_execute_count;
> +      double sumsquare = 0.0;
> +      for(j=0; j<te[k].time_index; ++j)
> +        sumsquare += pow((te[k].kernel_times[j] - average), 2.0);
> +      te[k].dev = sqrt(sumsquare / te[k].kernel_execute_count);
> +    }
> +
>      float sum_time = 0.0f;
>      qsort((void *)te, p_context->kernel_count, sizeof(time_element), cmp);
> -    for(i=0; i<p_context->kernel_count; ++i)
> -      sum_time += te[i].kernel_sum_time;
> -    for(i=0; i<p_context->kernel_count; ++i)
> +    for(j=0; j<=i; ++j)
> +      sum_time += te[j].kernel_sum_time;
> +
> +    for(j=0; j<=i; ++j)
>      {
> -      printf("    [Kernel Name: %-30s Time(ms): (%4.1f%%) %9.2f  Count: %-7d  Ave(ms): %7.2f  Dev: %.1lf%%] \n",
> -             te[i].kernel_name,
> -             te[i].kernel_sum_time / sum_time * 100,
> -             te[i].kernel_sum_time,
> -             te[i].kernel_execute_count,
> -             te[i].kernel_sum_time / te[i].kernel_execute_count,
> -             te[i].dev / te[i].kernel_sum_time * te[i].kernel_execute_count * 100);
> +      printf("    [Kernel Name: %-30s Time(ms): (%4.1f%%) %9.2f  Count: %-7d  Ave(ms): %7.2f  Dev: %.1lf%%]\n",
> +             te[j].kernel_name,
> +             te[j].kernel_sum_time / sum_time * 100,
> +             te[j].kernel_sum_time,
> +             te[j].kernel_execute_count,
> +             te[j].kernel_sum_time / te[j].kernel_execute_count,
> +             te[j].dev / te[j].kernel_sum_time * te[j].kernel_execute_count * 100);
>      }
>      free(te);
>      printf("    Total : %.2f\n", sum_time);
>      if(2 != b_output_kernel_perf)
>      {
> +      printf("[------------  CONTEXT ENDS------------]\n\n");
>        p_context = p_context->next;
>        continue;
>      }
> @@ -214,6 +241,18 @@ static void print_time_info()
>      while(NULL != p_kernel)
>      {
>        printf("    [Kernel Name : %30s   Time(ms): %.2f]\n", p_kernel->kernel_name, p_kernel->kernel_sum_time);
> +      if(*p_kernel->build_option != '\0')
> +      {
> +        int count = 0;
> +        printf("      ->Build Options : ");
> +        while(p_kernel->build_option[count] != '\0' )
> +        {
> +          printf("%c", p_kernel->build_option[count++]);
> +          if(count % 100 == 0)
> +            printf("\n                         ");
> +        }
> +        printf("\n");
> +      }
>        for(i=0; i!=p_kernel->current_count; ++i)
>          printf("      Execution Round%5d : %.2f (ms)\n", i+1, p_kernel->kernel_times[i]);
>        p_kernel = p_kernel->next;
> @@ -225,7 +264,7 @@ static void print_time_info()
>  }
>  
>  
> -static void insert(cl_context context, const char *kernel_name, float time)
> +static void insert(cl_context context, const char *kernel_name, const char *build_opt, float time)
>  {
>    if(!atexit_registered)
>    {
> @@ -233,7 +272,7 @@ static void insert(cl_context context, const char *kernel_name, float time)
>      atexit(print_time_info);
>    }
>    context_storage_node *p_context = find_context(context);
> -  kernel_storage_node *p_kernel = find_kernel(p_context, kernel_name);
> +  kernel_storage_node *p_kernel = find_kernel(p_context, kernel_name, build_opt);
>    prev_context_pointer = p_context;
>    prev_kernel_pointer = p_kernel;
>    p_kernel->kernel_times[p_kernel->current_count++] = time;
> @@ -267,11 +306,11 @@ void time_start(cl_context context, const char * kernel_name, cl_command_queue c
>    gettimeofday(&start, NULL);
>  }
>  
> -void time_end(cl_context context, const char * kernel_name, cl_command_queue cq)
> +void time_end(cl_context context, const char * kernel_name, const char * build_opt, cl_command_queue cq)
>  {
>    clFinish(cq);
>    gettimeofday(&end, NULL);
>    float t = (end.tv_sec - start.tv_sec)*1000 + (end.tv_usec - start.tv_usec)/1000.0f;
> -  insert(context, kernel_name, t);
> +  insert(context, kernel_name, build_opt, t);
>    pthread_mutex_unlock(&mutex);
>  }
> diff --git a/src/performance.h b/src/performance.h
> index c747743..1e75054 100644
> --- a/src/performance.h
> +++ b/src/performance.h
> @@ -5,7 +5,7 @@
>  
>  extern int b_output_kernel_perf;
>  void time_start(cl_context context, const char * kernel_name, cl_command_queue cq);
> -void time_end(cl_context context, const char * kernel_name, cl_command_queue cq);
> +void time_end(cl_context context, const char * kernel_name, const char * build_opt, cl_command_queue cq);
>  void initialize_env_var();
>  
>  
> -- 
> 1.8.3.2
> 
> 
> _______________________________________________
> Beignet mailing list
> Beignet at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/beignet


More information about the Beignet mailing list