[Mesa-dev] [PATCH v3 09/28] mesa: Add RGBA to Luminance conversion helpers

Wed Dec 17 04:52:00 PST 2014

On Wed, 2014-12-17 at 03:51 -0800, Jason Ekstrand wrote:
> 
> 
> 
> 
> 
> On Tue, Dec 16, 2014 at 11:37 PM, Iago Toral <itoral at igalia.com>
> wrote:
>         On Tue, 2014-12-16 at 10:54 -0800, Jason Ekstrand wrote:
>         >
>         >
>         > On Tue, Dec 16, 2014 at 4:01 AM, Iago Toral
>         <itoral at igalia.com> wrote:
>         >         On Tue, 2014-12-16 at 08:29 +0100, Iago Toral wrote:
>         >         > On Mon, 2014-12-15 at 10:19 -0800, Jason Ekstrand
>         wrote:
>         >         > >
>         >         > >
>         >         > > On Mon, Dec 15, 2014 at 3:12 AM, Iago Toral
>         >         <itoral at igalia.com> wrote:
>         >         > >         On Fri, 2014-12-12 at 11:36 -0800, Jason
>         Ekstrand
>         >         wrote:
>         >         > >         >
>         >         > >         >
>         >         > >         > On Tue, Dec 9, 2014 at 4:07 AM, Iago
>         Toral
>         >         Quiroga
>         >         > >         <itoral at igalia.com>
>         >         > >         > wrote:
>         >         > >         >         For glReadPixels with a
>         Luminance
>         >         destination format
>         >         > >         we
>         >         > >         >         compute luminance
>         >         > >         >         values from RGBA as L=R+G+B.
>         This,
>         >         however, requires
>         >         > >         ad-hoc
>         >         > >         >         implementation,
>         >         > >         >         since pack/unpack functions or
>         >         > >         _mesa_swizzle_and_convert won't
>         >         > >         >         do this
>         >         > >         >         (and thus, neither will
>         >         _mesa_format_convert). This
>         >         > >         patch adds
>         >         > >         >         helpers
>         >         > >         >         to do this computation so they
>         can be
>         >         used to
>         >         > >         support
>         >         > >         >         conversion to luminance
>         >         > >         >         formats.
>         >         > >         >
>         >         > >         >         The current implementation of
>         >         glReadPixels does this
>         >         > >         >         computation as part
>         >         > >         >         of the span functions in
>         pack.c (see
>         >         > >         >         _mesa_pack_rgba_span_float),
>         that do
>         >         > >         >         this together with other
>         things like
>         >         type
>         >         > >         conversion, etc. We
>         >         > >         >         do not want
>         >         > >         >         to use these functions but use
>         >         _mesa_format_convert
>         >         > >         instead
>         >         > >         >         (later patches
>         >         > >         >         will remove the color span
>         functions),
>         >         so we need to
>         >         > >         extract
>         >         > >         >         this functionality
>         >         > >         >         as helpers.
>         >         > >         >         ---
>         >         > >         >          src/mesa/main/pack.c | 63
>         >         > >         >
>         >
>         ++++++++++++++++++++++++++++++++++++++++++++++++++++
>         >         > >         >          src/mesa/main/pack.h |  9
>         ++++++++
>         >         > >         >          2 files changed, 72
>         insertions(+)
>         >         > >         >
>         >         > >         >         diff --git
>         a/src/mesa/main/pack.c
>         >         > >         b/src/mesa/main/pack.c
>         >         > >         >         index de6ab27..fa4046c 100644
>         >         > >         >         --- a/src/mesa/main/pack.c
>         >         > >         >         +++ b/src/mesa/main/pack.c
>         >         > >         >         @@ -4334,4 +4334,67 @@
>         >         _mesa_rebase_rgba_uint(GLuint
>         >         > >         n, GLuint
>         >         > >         >         rgba[][4], GLenum baseFormat)
>         >         > >         >             }
>         >         > >         >          }
>         >         > >         >
>         >         > >         >         +void
>         >         > >         >
>         >          +_mesa_pack_luminance_from_rgba_float(GLuint n,
>         >         > >         GLfloat
>         >         > >         >         rgba[][4],
>         >         > >         >         +
>         >          GLvoid
>         >         > >         *dstAddr, GLenum
>         >         > >         >         dst_format,
>         >         > >         >         +
>         >          GLbitfield
>         >         > >         transferOps)
>         >         > >         >         +{
>         >         > >         >         +   int i;
>         >         > >         >         +   GLfloat *dst = (GLfloat *)
>         dstAddr;
>         >         > >         >         +
>         >         > >         >         +   switch (dst_format) {
>         >         > >         >         +   case GL_LUMINANCE:
>         >         > >         >         +      if (transferOps &
>         >         IMAGE_CLAMP_BIT) {
>         >         > >         >         +         for (i = 0; i < n; i
>         ++) {
>         >         > >         >         +            GLfloat sum =
>         >         rgba[i][RCOMP] +
>         >         > >         rgba[i][GCOMP] +
>         >         > >         >         rgba[i][BCOMP];
>         >         > >         >         +            dst[i] =
>         CLAMP(sum, 0.0F,
>         >         1.0F);
>         >         > >         >         +         }
>         >         > >         >         +      } else {
>         >         > >         >         +         for (i = 0; i < n; i
>         ++) {
>         >         > >         >         +            dst[i] =
>         rgba[i][RCOMP] +
>         >         > >         rgba[i][GCOMP] +
>         >         > >         >         rgba[i][BCOMP];
>         >         > >         >         +         }
>         >         > >         >         +      }
>         >         > >         >         +      return;
>         >         > >         >         +   case GL_LUMINANCE_ALPHA:
>         >         > >         >         +      if (transferOps &
>         >         IMAGE_CLAMP_BIT) {
>         >         > >         >         +         for (i = 0; i < n; i
>         ++) {
>         >         > >         >         +            GLfloat sum =
>         >         rgba[i][RCOMP] +
>         >         > >         rgba[i][GCOMP] +
>         >         > >         >         rgba[i][BCOMP];
>         >         > >         >         +            dst[2*i] =
>         CLAMP(sum, 0.0F,
>         >         1.0F);
>         >         > >         >         +            dst[2*i+1] =
>         >         rgba[i][ACOMP];
>         >         > >         >         +         }
>         >         > >         >         +      } else {
>         >         > >         >         +         for (i = 0; i < n; i
>         ++) {
>         >         > >         >         +            dst[2*i] =
>         rgba[i][RCOMP] +
>         >         > >         rgba[i][GCOMP] +
>         >         > >         >         rgba[i][BCOMP];
>         >         > >         >         +            dst[2*i+1] =
>         >         rgba[i][ACOMP];
>         >         > >         >         +         }
>         >         > >         >         +      }
>         >         > >         >         +      return;
>         >         > >         >         +   default:
>         >         > >         >         +      assert(!"Unsupported
>         format");
>         >         > >         >         +   }
>         >         > >         >         +}
>         >         > >         >         +
>         >         > >         >         +void
>         >         > >         >
>         >          +_mesa_pack_luminance_from_rgba_integer(GLuint n,
>         >         > >         GLuint
>         >         > >         >         rgba[][4],
>         >         > >         >         +
>         >          GLvoid
>         >         > >         *dstAddr,
>         >         > >         >         GLenum dst_format)
>         >         > >         >         +{
>         >         > >         >         +   int i;
>         >         > >         >         +   GLuint *dst = (GLuint *)
>         dstAddr;
>         >         > >         >         +
>         >         > >         >         +   switch (dst_format) {
>         >         > >         >         +   case GL_LUMINANCE:
>         >         > >         >         +      for (i = 0; i < n; i++)
>         {
>         >         > >         >         +         dst[i] =
>         rgba[i][RCOMP] +
>         >         rgba[i][GCOMP] +
>         >         > >         >         rgba[i][BCOMP];
>         >         > >         >
>         >         > >         >
>         >         > >         > I know this is getting old, but don't
>         we need to
>         >         do some
>         >         > >         clamping
>         >         > >         > here?  If we do, then we have to
>         handle signed
>         >         vs. unsigned
>         >         > >         > seperately.  Yeah, that's annoying.
>         >         > >
>         >         > >
>         >         > >         You are right, reviewing the original
>         code it
>         >         looks like it
>         >         > >         ends up
>         >         > >         clamping the result of the triple
>         addition to the
>         >         type of the
>         >         > >         dst when
>         >         > >         the type of the dst has a different
>         size.
>         >         > >
>         >         > >         In our case, we call this function from
>         the
>         >         implementation of
>         >         > >         glreadpixels when an integer dst
>         luminance format
>         >         is involved.
>         >         > >         In that
>         >         > >         case we convert to RGBA_INT RGBA_UINT
>         first
>         >         depending on the
>         >         > >         type of the
>         >         > >         luminance format and then we call this
>         function,
>         >         so I think we
>         >         > >         only need
>         >         > >         to handle clamping when we pack from
>         RGBA_INT to
>         >         short and
>         >         > >         when we pack
>         >         > >         from RGBA_UINT to unsigned short
>         (meaning that we
>         >         do not need
>         >         > >         to handle
>         >         > >         scenarios where we convert from int to
>         ushort for
>         >         example).
>         >         > >         I'll fix
>         >         > >         this.
>         >         > >
>         >         > >
>         >         > > Yeah, RGBA_[U]INT in is the only case that
>         matters there.
>         >         What were
>         >         > > you thinking here?  Personally, I'd probably
>         just make
>         >         this function
>         >         > > clamp and then not special case it.  That said,
>         I'm up for
>         >         other
>         >         > > solutions too.
>         >         >
>         >         > My plan was:
>         >         >
>         >         > 1) Add a "GLenum type" parameter so we can detect
>         if we are
>         >         in the int
>         >         > case (GL_BYTE, GL_SHORT, etc) or the uint case
>         >         (GL_UNSIGNED_BYTE,
>         >         > GL_UNSIGNED_SHORT, etc).
>         >         >
>         >         > 2) If we are clamping uints we use
>         >         _mesa_unsigned_to_unsigned to clamp.
>         >         > If we are clamping ints, we use
>         _mesa_signed_to_signed. This
>         >         is because
>         >         > glReadPixels converts from the render buffer
>         format to RGBA
>         >         UINT when
>         >         > the dst type is uint and to RGBA INT otherwise
>         before
>         >         calling this
>         >         > function.
>         >         >
>         >         > It would look something like this (pseudocode):
>         >         >
>         >         > void
>         >         > _mesa_pack_luminance_from_rgba_integer(GLuint n,
>         GLuint
>         >         rgba[][4],
>         >         >    GLvoid *dstAddr, GLenum dst_format, GLenum
>         dst_type)
>         >         > {
>         >         >    // Compute L = R + G + B as we do now, without
>         clamping
>         >         >
>         >         >    // Handle clamping if necessary (only if the
>         dst_type has
>         >         >    // a different size
>         >         >    int bits = bits = _mesa_sizeof_type(dst_type) *
>         8;
>         >         >    if (bits < 32) {
>         >         >       int components =
>         >         _mesa_components_in_format(dst_format);
>         >         >       int is_signed = !
>         _mesa_is_type_unsigned(dst_type);
>         >         >       if (is_signed)
>         >         >          for (i = 0; i < n; i++) {
>         >         >             dst[i*components] =
>         >         >
>         _mesa_signed_to_signed(dst[i*components],
>         >         bits);
>         >         >       } else {
>         >         >          for (i = 0; i < n; i++) {
>         >         >             dst[i*components] =
>         >         >
>         _mesa_unsigned_to_unsigned(dst[i*components],
>         >         bits);
>         >         >       }
>         >         >    }
>         >         > }
>         >         >
>         >         > We could add the unsigned_to_signed and
>         signed_to_unsigned
>         >         paths too if
>         >         > you prefer it that way, but they won't be used as
>         far as I
>         >         can see. In
>         >         > that case we would also need to add a src_type
>         parameter.
>         >
>         >
>         >         Well, in the end I could not do it like this. There
>         is a
>         >         subtle problem
>         >         with the RB format -> RGBA conversion that we do
>         before we
>         >         pack the
>         >         luminance values. The problem is that we do not want
>         this
>         >         conversion to
>         >         ever do int->uint clamping because we want to
>         compute L=R+G+B
>         >         with
>         >         unclamped values first, then clamp the result.
>         >
>         >         For example, if we have a signed integer render
>         buffer and a
>         >         pixel in
>         >         that buffer has this value:
>         >         r=-5, g=6, b=1
>         >
>         >         when we convert this to RGBA we want to have
>         >         r=-5, g=6, b=1, a = 1
>         >
>         >         instead of
>         >         r=0, g=6, b=1, a = 1
>         >
>         >         which is what we would get if we convert to RGBA
>         UINT. Then we
>         >         would
>         >         compute L = R+G+B = 2, and this is the value we
>         should clamp
>         >         to the type
>         >         of the dst if we need to.
>         >
>         >         So in the end I have to consider signed<->unsigned
>         conversions
>         >         when we
>         >         compute L anyway.
>         >
>         >         Here is a pastebin with the actual implementation of
>         the
>         >         luminance
>         >         packing helper:
>         >         http://pastebin.com/gB0aLtzx
>         >
>         >         I tested this with a few examples and seems to
>         produce correct
>         >         results.
>         >         Notice that this is currently broken in master,
>         because pack.c
>         >         defines
>         >         stuff like this:
>         >
>         >         #define SRC_CONVERT(x) CLAMP((int)x, -32768, 32767)
>         >         #define FN_NAME pack_short_from_uint_rgba
>         >
>         >         #define SRC_CONVERT(x) CLAMP((int)x, -128, 127)
>         >         #define FN_NAME pack_byte_from_uint_rgba
>         >
>         >         which does not do the right thing when 'x' is
>         expanded to be
>         >         an
>         >         expression that adds R, G and B components.
>         >
>         >
>         > Yes, I think that works but is a bit more complicated than
>         needed.
>         > Why can't we just do a 64-bit add of the components and then
>         clamp to
>         > a 32-bit value.  If the final value is 8 or 16 bits, it will
>         get
>         > clamped again, but that shouldn't be a problem.  We will
>         have to
>         > differentiate between signed and unsigned, but that's ok.
>         It would be
>         > much simpler.  Would that work or am I missing something?
>         
>         
>         I think it should work, although I wonder if this is better.
>         This is how
>         I see this being implemented:
>         
>         1) Do the 64-bit addition. This requires to allocate a temp
>         buffer for
>         the 64-bit luminance values.
>         2) Clamp the 64-bit buffer to 32-bit values. Check if the dst
>         format is
>         signed or not to decide if we need to do a signed or unsigned
>         32-bit
>         clamp. We don't have 64-bit to 32-bit integer clamp functions
>         at the
>         moment, so those  should be added.
>         3) If the dst type is 32-bit, then we copy that to dst.
>         3) If the dst type is <32 bits, copy to dst and clamp
>         (considering
>         whether we need a signed or unsigned clamp again) => that
>         means we need
>         to consider 4 paths here (unsigned byte, signed byte, unsigned
>         short,
>         signed short)
>         
>         In the end we have the same paths and the only gain is that we
>         can do
>         without a macro to compute the additions but in exchange we
>         need to
>         allocate a temporary buffer in all the scenarios and clamp
>         twice for
>         types < 32-bit... so I wonder if this is really worth it in
>         the end.
>         Maybe there is a better way to do it that I am missing?
> 
> 
> Oh, I wasn't thinking a 64bit temporary at all.  Just cast to the
> appropreate 64-bit integer type, add, clamp and go back to 32 in one

but we don't have any 32-bit storage to go back to... my implementation
packs directly to the dst buffer because it considers the type of the
dst and never needs to store intermediary values.

>  smooth motion in the original clampping function you had.  All of the
> malloc'd storage stays 32-bit, it just tweaks the math a bit.

I think we can't do this directly as you suggest, but then maybe I am
missing something...

This is how I think it would look like if we try to avoid a 32-bit
malloc for the entire size of the pixel data we process, we would have
to resolve clamping for every pixel immediately after computing the
luminance value, so we would have something like this (pseudocode):

for (<all pixel data>) {
   uint32_t temp32;
   uint64_t temp64 = (uint64_t) R + (uint64_t) G + (uint64_t) B);
   if (rgba_is_signed)
      temp32 = clamp64to32_signed(temp64);
   else
      temp32 = clamp64to32_unsigned(temp64);
   switch(dst_type) {
      case GL_BYTE:
         // clamp to signed 8-bit and store in dst
      case GL_UNSIGNED_BYTE:
         // clamp to unsigned 8-bit and store in dst
      case GL_SHORT:
         // clamp to signed 16-bit and store in dst
      case GL_UNSIGNED_SHORT:
         // clamp to unsigned 16-bit and store in dst
      case GL_INT:
         if (rgba_is_signed)
            // copy to dst directly (no clamp needed)
         else
            // clamp to signed dst and store in dst
      case GL_UNSIGNED_INT:
         if (!rgba_is_signed)
            // copy to dst directly (no clamp needed)
         else
            // clamp to signed dst
   }
}

but that would add a lot of logic that would ran for every pixel in the
loop, which seems undesirable... if we want to take that logic out of
the loop then we need to malloc, store the 32-bit values in the
temporary, then do a second pass to clamp and copy to the dst buffer
after the loop, and if we don't want that either then we need to make
this a macro that can take type information and the right clamp
functions as arguments so we don't need a switch inside the loop... but
in that case there is no gain over my original implementation.

Maybe I am missing something obvious?

>         
>         >         > >
>         >         > >         >
>         >         > >         >         +      }
>         >         > >         >         +      return;
>         >         > >         >         +   case GL_LUMINANCE_ALPHA:
>         >         > >         >         +      for (i = 0; i < n; i++)
>         {
>         >         > >         >         +         dst[2*i] =
>         rgba[i][RCOMP] +
>         >         rgba[i][GCOMP]
>         >         > >         +
>         >         > >         >         rgba[i][BCOMP];
>         >         > >         >         +         dst[2*i+1] =
>         rgba[i][ACOMP];
>         >         > >         >         +      }
>         >         > >         >         +      return;
>         >         > >         >         +   default:
>         >         > >         >         +      assert(!"Unsupported
>         format");
>         >         > >         >         +   }
>         >         > >         >         +}
>         >         > >         >
>         >         > >         >         diff --git
>         a/src/mesa/main/pack.h
>         >         > >         b/src/mesa/main/pack.h
>         >         > >         >         index 2173b65..2783f23 100644
>         >         > >         >         --- a/src/mesa/main/pack.h
>         >         > >         >         +++ b/src/mesa/main/pack.h
>         >         > >         >         @@ -155,4 +155,13 @@
>         >         _mesa_rebase_rgba_float(GLuint
>         >         > >         n, GLfloat
>         >         > >         >         rgba[][4], GLenum baseFormat);
>         >         > >         >          extern void
>         >         > >         >          _mesa_rebase_rgba_uint(GLuint
>         n, GLuint
>         >         rgba[][4],
>         >         > >         GLenum
>         >         > >         >         baseFormat);
>         >         > >         >
>         >         > >         >         +extern void
>         >         > >         >
>         >          +_mesa_pack_luminance_from_rgba_float(GLuint n,
>         >         > >         GLfloat
>         >         > >         >         rgba[][4],
>         >         > >         >         +
>         >          GLvoid
>         >         > >         *dstAddr, GLenum
>         >         > >         >         dst_format,
>         >         > >         >         +
>         >          GLbitfield
>         >         > >         transferOps);
>         >         > >         >         +
>         >         > >         >         +extern void
>         >         > >         >
>         >          +_mesa_pack_luminance_from_rgba_integer(GLuint n,
>         >         > >         GLuint
>         >         > >         >         rgba[][4],
>         >         > >         >         +
>         >          GLvoid
>         >         > >         *dstAddr,
>         >         > >         >         GLenum dst_format);
>         >         > >         >         +
>         >         > >         >          #endif
>         >         > >         >         --
>         >         > >         >         1.9.1
>         >         > >         >
>         >         > >         >
>         >          _______________________________________________
>         >         > >         >         mesa-dev mailing list
>         >         > >         >         mesa-dev at lists.freedesktop.org
>         >         > >         >
>         >         > >
>         >
>          http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>         >         > >
>         >         > >
>         >         > >
>         >         >
>         >         >
>         >         > _______________________________________________
>         >         > mesa-dev mailing list
>         >         > mesa-dev at lists.freedesktop.org
>         >         >
>         http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>         >
>         >
>         >
>         
>         
>