[Mesa-dev] [PATCH 1/6] translate_generic: use memcpy if possible

Keith Whitwell keithw at vmware.com
Fri Aug 13 02:42:03 PDT 2010


Luca,

In this change you've got an int value (copy_size) which has some
special meaning when negative -- can you add comments explaining what
the meaning of a negative size is?  Is there a way to use some more
explicit flag value to indicate this condition?

Keith

On Thu, 2010-08-12 at 10:08 -0700, Luca Barbieri wrote:
> When used in GPU drivers, translate can be used to simultaneously
> perform a gather operation, and convert away from unsupported formats.
> 
> In this use case, input and output formats will often be identical: clearly
> it would make sense to use a memcpy in this case.
> 
> Instead, translate will insist to convert to and from 32-bit floating point
> numbers.
> 
> This is not only extremely expensive, but it also loses precision for
> 32/64-bit integers and 64-bit floating point numbers.
> 
> This patch changes translate_generic to just use memcpy if the formats are
> identical, non-blocked, and with an integral number of bytes per pixel (note
> that all sensible vertex formats are like this).
> ---
>  .../auxiliary/translate/translate_generic.c        |   93 +++++++++++++------
>  1 files changed, 63 insertions(+), 30 deletions(-)
> 
> diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c
> index 42cfd76..57a42b7 100644
> --- a/src/gallium/auxiliary/translate/translate_generic.c
> +++ b/src/gallium/auxiliary/translate/translate_generic.c
> @@ -63,6 +63,7 @@ struct translate_generic {
>        const uint8_t *input_ptr;
>        unsigned input_stride;
>        unsigned max_index;
> +      int copy_size;
>  
>     } attrib[PIPE_MAX_ATTRIBS];
>  
> @@ -380,9 +381,10 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate,
>  	 float data[4];
>  	 char *dst = vert + tg->attrib[attr].output_offset;
>  
> -         if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) {
> +	 if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) {
>              const uint8_t *src;
>              unsigned index;
> +            int copy_size;
>  
>              if (tg->attrib[attr].instance_divisor) {
>                 index = instance_id / tg->attrib[attr].instance_divisor;
> @@ -396,27 +398,34 @@ static void PIPE_CDECL generic_run_elts( struct translate *translate,
>              src = tg->attrib[attr].input_ptr +
>                    tg->attrib[attr].input_stride * index;
>  
> -            tg->attrib[attr].fetch( data, src, 0, 0 );
> -
> -            if (0)
> -               debug_printf("Fetch elt attr %d  from %p  stride %d  div %u  max %u  index %d:  "
> -                            " %f, %f, %f, %f \n",
> -                            attr,
> -                            tg->attrib[attr].input_ptr,
> -                            tg->attrib[attr].input_stride,
> -                            tg->attrib[attr].instance_divisor,
> -                            tg->attrib[attr].max_index,
> -                            index,
> -                            data[0], data[1],data[2], data[3]);
> +            copy_size = tg->attrib[attr].copy_size;
> +            if(likely(copy_size >= 0))
> +               memcpy(dst, src, tg->attrib[attr].copy_size);
> +            else
> +            {
> +               tg->attrib[attr].fetch( data, src, 0, 0 );
> +
> +               if (0)
> +                  debug_printf("Fetch elt attr %d  from %p  stride %d  div %u  max %u  index %d:  "
> +                               " %f, %f, %f, %f \n",
> +                               attr,
> +                               tg->attrib[attr].input_ptr,
> +                               tg->attrib[attr].input_stride,
> +                               tg->attrib[attr].instance_divisor,
> +                               tg->attrib[attr].max_index,
> +                               index,
> +                               data[0], data[1],data[2], data[3]);
> +               tg->attrib[attr].emit( data, dst );
> +            }
>           } else {
> -            data[0] = (float)instance_id;
> +            if(likely(tg->attrib[attr].copy_size >= 0))
> +               memcpy(data, &instance_id, 4);
> +            else
> +            {
> +               data[0] = (float)instance_id;
> +               tg->attrib[attr].emit( data, dst );
> +            }
>           }
> -
> -         if (0)
> -            debug_printf("vert %d/%d attr %d: %f %f %f %f\n",
> -                         i, elt, attr, data[0], data[1], data[2], data[3]);
> -
> -	 tg->attrib[attr].emit( data, dst );
>        }
>        vert += tg->translate.key.output_stride;
>     }
> @@ -448,6 +457,7 @@ static void PIPE_CDECL generic_run( struct translate *translate,
>           if (tg->attrib[attr].type == TRANSLATE_ELEMENT_NORMAL) {
>              const uint8_t *src;
>              unsigned index;
> +            int copy_size;
>  
>              if (tg->attrib[attr].instance_divisor) {
>                 index = instance_id / tg->attrib[attr].instance_divisor;
> @@ -462,25 +472,33 @@ static void PIPE_CDECL generic_run( struct translate *translate,
>              src = tg->attrib[attr].input_ptr +
>                    tg->attrib[attr].input_stride * index;
>  
> -            tg->attrib[attr].fetch( data, src, 0, 0 );
> +            copy_size = tg->attrib[attr].copy_size;
> +            if(likely(copy_size >= 0))
> +               memcpy(dst, src, tg->attrib[attr].copy_size);
> +            else
> +            {
> +               tg->attrib[attr].fetch( data, src, 0, 0 );
>  
> -            if (0)
> -               debug_printf("Fetch linear attr %d  from %p  stride %d  index %d: "
> +               if (0)
> +                  debug_printf("Fetch linear attr %d  from %p  stride %d  index %d: "
>                              " %f, %f, %f, %f \n",
>                              attr,
>                              tg->attrib[attr].input_ptr,
>                              tg->attrib[attr].input_stride,
>                              index,
>                              data[0], data[1],data[2], data[3]);
> +
> +               tg->attrib[attr].emit( data, dst );
> +            }
>           } else {
> -            data[0] = (float)instance_id;
> +            if(likely(tg->attrib[attr].copy_size >= 0))
> +               memcpy(data, &instance_id, 4);
> +            else
> +            {
> +               data[0] = (float)instance_id;
> +               tg->attrib[attr].emit( data, dst );
> +            }
>           }
> -
> -         if (0)
> -            debug_printf("vert %d attr %d: %f %f %f %f\n",
> -                         i, attr, data[0], data[1], data[2], data[3]);
> -
> -	 tg->attrib[attr].emit( data, dst );
>        }
>        
>        vert += tg->translate.key.output_stride;
> @@ -547,6 +565,21 @@ struct translate *translate_generic_create( const struct translate_key *key )
>        tg->attrib[i].emit = get_emit_func(key->element[i].output_format);
>        tg->attrib[i].output_offset = key->element[i].output_offset;
>  
> +      tg->attrib[i].copy_size = -1;
> +      if (tg->attrib[i].type == TRANSLATE_ELEMENT_INSTANCE_ID)
> +      {
> +            if(key->element[i].output_format == PIPE_FORMAT_R32_USCALED
> +                  || key->element[i].output_format == PIPE_FORMAT_R32_SSCALED)
> +               tg->attrib[i].copy_size = 4;
> +      }
> +      else
> +      {
> +         if(key->element[i].input_format == key->element[i].output_format
> +               && format_desc->block.width == 1
> +               && format_desc->block.height == 1
> +               && !(format_desc->block.bits & 7))
> +            tg->attrib[i].copy_size = format_desc->block.bits >> 3;
> +      }
>     }
>  
>     tg->nr_attrib = key->nr_elements;




More information about the mesa-dev mailing list