[Spice-commits] 5 commits - common/canvas_base.c common/quic.c common/quic_family_tmpl.c common/quic_rgb_tmpl.c common/quic_tmpl.c spice.proto

Thu Sep 12 04:49:44 PDT 2013

common/canvas_base.c      |   43 +++++++++++++++++++++++++++++++++++++++++++
 common/quic.c             |   25 ++++++++++++++++++++++++-
 common/quic_family_tmpl.c |   26 +++++++++++---------------
 common/quic_rgb_tmpl.c    |    6 +++---
 common/quic_tmpl.c        |   16 ++++++++--------
 spice.proto               |    4 ++++
 6 files changed, 93 insertions(+), 27 deletions(-)

New commits:
commit 4857653686d3b4c8f96aebe5a96d3573ecc5d147
Author: Marc-AndrÃ© Lureau <marcandre.lureau at gmail.com>
Date:   Sun Sep 8 21:04:49 2013 +0200

    quic: precompute golomb codes
    
    We can avoid repetitive computation by using two precomputed array, of
    8k each.
    
    before:
         1.79%  lt-spicy-stats  libspice-client-glib-2.0.so.8.4.0  [.]
         golomb_code_len_8bpc
    
    after:
         0.79%  lt-spicy-stats  libspice-client-glib-2.0.so.8.4.0  [.]
         golomb_code_len_8bpc

diff --git a/common/quic.c b/common/quic.c
index 3e7c802..c9c3624 100644
--- a/common/quic.c
+++ b/common/quic.c
@@ -75,6 +75,9 @@ typedef struct QuicFamily {
     unsigned int notGRsuffixlen[MAXNUMCODES];    /* indexed by code number, contains suffix
                                                     length of the not-GR codeword */
 
+    unsigned int golomb_code_len[256][MAXNUMCODES];
+    unsigned int golomb_code[256][MAXNUMCODES];
+
     /* array for translating distribution U to L for depths up to 8 bpp,
     initialized by decorelateinit() */
     BYTE xlatU2L[256];
@@ -360,9 +363,22 @@ static void corelate_init(QuicFamily *family, int bpc)
     }
 }
 
+static void golomb_coding_slow(QuicFamily *family, const BYTE n, const unsigned int l,
+                               unsigned int * const codeword,
+                               unsigned int * const codewordlen)
+{
+    if (n < family->nGRcodewords[l]) {
+        (*codeword) = bitat[l] | (n & bppmask[l]);
+        (*codewordlen) = (n >> l) + l + 1;
+    } else {
+        (*codeword) = n - family->nGRcodewords[l];
+        (*codewordlen) = family->notGRcwlen[l];
+    }
+}
+
 static void family_init(QuicFamily *family, int bpc, int limit)
 {
-    int l;
+    int l, b;
 
     for (l = 0; l < bpc; l++) { /* fill arrays indexed by code number */
         int altprefixlen, altcodewords;
@@ -378,6 +394,13 @@ static void family_init(QuicFamily *family, int bpc, int limit)
         family->notGRcwlen[l] = altprefixlen + ceil_log_2(altcodewords);
         family->notGRprefixmask[l] = bppmask[32 - altprefixlen]; /* needed for decoding only */
         family->notGRsuffixlen[l] = ceil_log_2(altcodewords); /* needed for decoding only */
+
+        for (b = 0; b < 256; b++) {
+            unsigned int code, len;
+            golomb_coding_slow(family, b, l, &code, &len);
+            family->golomb_code[b][l] = code;
+            family->golomb_code_len[b][l] = len;
+        }
     }
 
     decorelate_init(family, bpc);
diff --git a/common/quic_family_tmpl.c b/common/quic_family_tmpl.c
index 287ff6d..12ef62f 100644
--- a/common/quic_family_tmpl.c
+++ b/common/quic_family_tmpl.c
@@ -34,26 +34,21 @@
 #define BPC 5
 #endif
 
+static inline unsigned int FNAME(golomb_code)(const BYTE n, const unsigned int l)
+{
+    return VNAME(family).golomb_code[n][l];
+}
 
-static unsigned int FNAME(golomb_code_len)(const BYTE n, const unsigned int l)
+static inline unsigned int FNAME(golomb_code_len)(const BYTE n, const unsigned int l)
 {
-    if (n < VNAME(family).nGRcodewords[l]) {
-        return (n >> l) + 1 + l;
-    } else {
-        return VNAME(family).notGRcwlen[l];
-    }
+    return VNAME(family).golomb_code_len[n][l];
 }
 
 static void FNAME(golomb_coding)(const BYTE n, const unsigned int l, unsigned int * const codeword,
                                  unsigned int * const codewordlen)
 {
-    if (n < VNAME(family).nGRcodewords[l]) {
-        (*codeword) = bitat[l] | (n & bppmask[l]);
-        (*codewordlen) = (n >> l) + l + 1;
-    } else {
-        (*codeword) = n - VNAME(family).nGRcodewords[l];
-        (*codewordlen) = VNAME(family).notGRcwlen[l];
-    }
+    *codeword = FNAME(golomb_code)(n, l);
+    *codewordlen = FNAME(golomb_code_len)(n, l);
 }
 
 static unsigned int FNAME(golomb_decoding)(const unsigned int l, const unsigned int bits,
@@ -76,6 +71,7 @@ static unsigned int FNAME(golomb_decoding)(const unsigned int l, const unsigned
 static void FNAME(update_model)(CommonState *state, s_bucket * const bucket,
                                 const BYTE curval)
 {
+    spice_static_assert(BPC >= 1);
     const unsigned int bpp = BPC;
     COUNTER * const pcounters = bucket->pcounters;
     unsigned int i;
commit 8db88d28543b2f147a9e5b4bf6b0e5b5eba1c1c9
Author: Marc-AndrÃ© Lureau <marcandre.lureau at gmail.com>
Date:   Sun Sep 8 21:03:25 2013 +0200

    quic: compile with constant bpp
    
    This simplifies a little bit function calling, and allows for compiler
    to potentially specialize and optimize a bit better each version.

diff --git a/common/quic_family_tmpl.c b/common/quic_family_tmpl.c
index cca2c05..287ff6d 100644
--- a/common/quic_family_tmpl.c
+++ b/common/quic_family_tmpl.c
@@ -74,13 +74,13 @@ static unsigned int FNAME(golomb_decoding)(const unsigned int l, const unsigned
 
 /* update the bucket using just encoded curval */
 static void FNAME(update_model)(CommonState *state, s_bucket * const bucket,
-                                const BYTE curval, unsigned int bpp)
+                                const BYTE curval)
 {
+    const unsigned int bpp = BPC;
     COUNTER * const pcounters = bucket->pcounters;
     unsigned int i;
     unsigned int bestcode;
     unsigned int bestcodelen;
-    //unsigned int bpp = encoder->bpp;
 
     /* update counters, find minimum */
 
diff --git a/common/quic_rgb_tmpl.c b/common/quic_rgb_tmpl.c
index 37c908c..19cc348 100644
--- a/common/quic_rgb_tmpl.c
+++ b/common/quic_rgb_tmpl.c
@@ -178,11 +178,11 @@
 
 #define UPDATE_MODEL(index)                                                                 \
     update_model(&encoder->rgb_state, find_bucket(channel_r, correlate_row_r[index - 1]),   \
-                correlate_row_r[index], bpc);                                               \
+                correlate_row_r[index]);                                               \
     update_model(&encoder->rgb_state, find_bucket(channel_g, correlate_row_g[index - 1]),   \
-                correlate_row_g[index], bpc);                                               \
+                correlate_row_g[index]);                                               \
     update_model(&encoder->rgb_state, find_bucket(channel_b, correlate_row_b[index - 1]),   \
-                correlate_row_b[index], bpc);
+                correlate_row_b[index]);
 
 
 #ifdef RLE_PRED_1
diff --git a/common/quic_tmpl.c b/common/quic_tmpl.c
index b625daf..75f2ff0 100644
--- a/common/quic_tmpl.c
+++ b/common/quic_tmpl.c
@@ -173,7 +173,7 @@ static void FNAME(compress_row0_seg)(Encoder *encoder, Channel *channel, int i,
         } else {
             channel->state.waitcnt = (tabrand(&channel->state.tabrand_seed) & waitmask);
             update_model(&channel->state, find_bucket(channel, decorelate_drow[-1]),
-                         decorelate_drow[i], bpc);
+                         decorelate_drow[i]);
         }
         stopidx = ++i + channel->state.waitcnt;
     } else {
@@ -191,7 +191,7 @@ static void FNAME(compress_row0_seg)(Encoder *encoder, Channel *channel, int i,
         }
 
         update_model(&channel->state, find_bucket(channel, decorelate_drow[stopidx - 1]),
-                     decorelate_drow[stopidx], bpc);
+                     decorelate_drow[stopidx]);
         stopidx = i + (tabrand(&channel->state.tabrand_seed) & waitmask);
     }
 
@@ -272,7 +272,7 @@ static void FNAME(compress_row_seg)(Encoder *encoder, Channel *channel, int i,
         } else {
             channel->state.waitcnt = (tabrand(&channel->state.tabrand_seed) & waitmask);
             update_model(&channel->state, find_bucket(channel, decorelate_drow[-1]),
-                         decorelate_drow[0], bpc);
+                         decorelate_drow[0]);
         }
         stopidx = ++i + channel->state.waitcnt;
     } else {
@@ -295,7 +295,7 @@ static void FNAME(compress_row_seg)(Encoder *encoder, Channel *channel, int i,
             }
 
             update_model(&channel->state, find_bucket(channel, decorelate_drow[stopidx - 1]),
-                         decorelate_drow[stopidx], bpc);
+                         decorelate_drow[stopidx]);
             stopidx = i + (tabrand(&channel->state.tabrand_seed) & waitmask);
         }
 
@@ -406,7 +406,7 @@ static void FNAME(uncompress_row0_seg)(Encoder *encoder, Channel *channel, int i
         } else {
             channel->state.waitcnt = (tabrand(&channel->state.tabrand_seed) & waitmask);
             update_model(&channel->state, find_bucket(channel, correlate_row[-1]),
-                         correlate_row[0], bpc);
+                         correlate_row[0]);
         }
         stopidx = ++i + channel->state.waitcnt;
     } else {
@@ -426,7 +426,7 @@ static void FNAME(uncompress_row0_seg)(Encoder *encoder, Channel *channel, int i
             decode_eatbits(encoder, codewordlen);
         }
 
-        update_model(&channel->state, pbucket, correlate_row[stopidx], bpc);
+        update_model(&channel->state, pbucket, correlate_row[stopidx]);
 
         stopidx = i + (tabrand(&channel->state.tabrand_seed) & waitmask);
     }
@@ -511,7 +511,7 @@ static void FNAME(uncompress_row_seg)(Encoder *encoder, Channel *channel,
         } else {
             channel->state.waitcnt = (tabrand(&channel->state.tabrand_seed) & waitmask);
             update_model(&channel->state, find_bucket(channel, correlate_row[-1]),
-                         correlate_row[0], bpc);
+                         correlate_row[0]);
         }
         stopidx = ++i + channel->state.waitcnt;
     } else {
@@ -535,7 +535,7 @@ static void FNAME(uncompress_row_seg)(Encoder *encoder, Channel *channel,
                 decode_eatbits(encoder, codewordlen);
             }
 
-            update_model(&channel->state, pbucket, correlate_row[stopidx], bpc);
+            update_model(&channel->state, pbucket, correlate_row[stopidx]);
 
             stopidx = i + (tabrand(&channel->state.tabrand_seed) & waitmask);
         }
commit a7b93bd43d113af16220f86cbf3cd451ab859d7e
Author: Marc-AndrÃ© Lureau <marcandre.lureau at gmail.com>
Date:   Sun Sep 8 02:53:33 2013 +0200

    canvas: use precomputed revers_bits
    
    Thos function shows up in some profiling results, it seems we can
    trivially replace it with a precomputed array of 256bytes.
    
    before:
         5.66%           691  lt-spicy-stats
    libspice-client-glib-2.0.so.8.4.0  [.] revers_bits
    
    after:
         0.53%            64  lt-spicy-stats
    libspice-client-glib-2.0.so.8.4.0  [.] revers_bits

diff --git a/common/canvas_base.c b/common/canvas_base.c
index 38a8497..2753fae 100644
--- a/common/canvas_base.c
+++ b/common/canvas_base.c
@@ -1329,6 +1329,8 @@ static pixman_image_t* canvas_get_image_from_self(SpiceCanvas *canvas,
     return surface;
 }
 
+
+#ifdef REVERS_BITS_SLOW
 static inline uint8_t revers_bits(uint8_t byte)
 {
     uint8_t ret = 0;
@@ -1341,6 +1343,47 @@ static inline uint8_t revers_bits(uint8_t byte)
     }
     return ret;
 }
+#else
+static inline uint8_t revers_bits(uint8_t byte)
+{
+    static const uint8_t revers[] = {
+        0x0, 0x80, 0x40, 0xc0, 0x20, 0xa0, 0x60, 0xe0,
+        0x10, 0x90, 0x50, 0xd0, 0x30, 0xb0, 0x70, 0xf0,
+        0x8, 0x88, 0x48, 0xc8, 0x28, 0xa8, 0x68, 0xe8,
+        0x18, 0x98, 0x58, 0xd8, 0x38, 0xb8, 0x78, 0xf8,
+        0x4, 0x84, 0x44, 0xc4, 0x24, 0xa4, 0x64, 0xe4,
+        0x14, 0x94, 0x54, 0xd4, 0x34, 0xb4, 0x74, 0xf4,
+        0xc, 0x8c, 0x4c, 0xcc, 0x2c, 0xac, 0x6c, 0xec,
+        0x1c, 0x9c, 0x5c, 0xdc, 0x3c, 0xbc, 0x7c, 0xfc,
+        0x2, 0x82, 0x42, 0xc2, 0x22, 0xa2, 0x62, 0xe2,
+        0x12, 0x92, 0x52, 0xd2, 0x32, 0xb2, 0x72, 0xf2,
+        0xa, 0x8a, 0x4a, 0xca, 0x2a, 0xaa, 0x6a, 0xea,
+        0x1a, 0x9a, 0x5a, 0xda, 0x3a, 0xba, 0x7a, 0xfa,
+        0x6, 0x86, 0x46, 0xc6, 0x26, 0xa6, 0x66, 0xe6,
+        0x16, 0x96, 0x56, 0xd6, 0x36, 0xb6, 0x76, 0xf6,
+        0xe, 0x8e, 0x4e, 0xce, 0x2e, 0xae, 0x6e, 0xee,
+        0x1e, 0x9e, 0x5e, 0xde, 0x3e, 0xbe, 0x7e, 0xfe,
+        0x1, 0x81, 0x41, 0xc1, 0x21, 0xa1, 0x61, 0xe1,
+        0x11, 0x91, 0x51, 0xd1, 0x31, 0xb1, 0x71, 0xf1,
+        0x9, 0x89, 0x49, 0xc9, 0x29, 0xa9, 0x69, 0xe9,
+        0x19, 0x99, 0x59, 0xd9, 0x39, 0xb9, 0x79, 0xf9,
+        0x5, 0x85, 0x45, 0xc5, 0x25, 0xa5, 0x65, 0xe5,
+        0x15, 0x95, 0x55, 0xd5, 0x35, 0xb5, 0x75, 0xf5,
+        0xd, 0x8d, 0x4d, 0xcd, 0x2d, 0xad, 0x6d, 0xed,
+        0x1d, 0x9d, 0x5d, 0xdd, 0x3d, 0xbd, 0x7d, 0xfd,
+        0x3, 0x83, 0x43, 0xc3, 0x23, 0xa3, 0x63, 0xe3,
+        0x13, 0x93, 0x53, 0xd3, 0x33, 0xb3, 0x73, 0xf3,
+        0xb, 0x8b, 0x4b, 0xcb, 0x2b, 0xab, 0x6b, 0xeb,
+        0x1b, 0x9b, 0x5b, 0xdb, 0x3b, 0xbb, 0x7b, 0xfb,
+        0x7, 0x87, 0x47, 0xc7, 0x27, 0xa7, 0x67, 0xe7,
+        0x17, 0x97, 0x57, 0xd7, 0x37, 0xb7, 0x77, 0xf7,
+        0xf, 0x8f, 0x4f, 0xcf, 0x2f, 0xaf, 0x6f, 0xef,
+        0x1f, 0x9f, 0x5f, 0xdf, 0x3f, 0xbf, 0x7f, 0xff
+    };
+
+    return revers[byte];
+}
+#endif
 
 static pixman_image_t *canvas_get_bitmap_mask(CanvasBase *canvas, SpiceBitmap* bitmap, int invers)
 {
commit 6440a1e533c49f51e3e69f36c0218a6ec19d31ca
Author: Marc-AndrÃ© Lureau <marcandre.lureau at redhat.com>
Date:   Tue Sep 10 23:10:29 2013 +0200

    proto: add fake last message in base channel
    
    Make it explicit that 100 is the last value of the base channel
    messages. This allows clients to use the generated enum value too.

diff --git a/spice.proto b/spice.proto
index 728178b..04e7ea4 100644
--- a/spice.proto
+++ b/spice.proto
@@ -166,6 +166,8 @@ channel BaseChannel {
 
     Data list; /* the msg body is SpiceSubMessageList */
 
+    Empty base_last = 100;
+
  client:
     message {
 	uint32 generation;
commit 4d8d2b612473ddd645572b23f7bf2658de0ec4f5
Author: Marc-AndrÃ© Lureau <marcandre.lureau at redhat.com>
Date:   Tue Sep 10 12:30:57 2013 +0200

    proto: comment future surface flags usage

diff --git a/spice.proto b/spice.proto
index 5eede6b..728178b 100644
--- a/spice.proto
+++ b/spice.proto
@@ -425,6 +425,8 @@ flags8 string_flags {
 };
 
 flags32 surface_flags {
+    /* Adding flags requires some caps check, since old clients only
+       treat the value as an enum and not as a flag (flag == PRIMARY) */
     PRIMARY
 };