[Mesa-dev] [PATCH V2] util/disk_cache: compress individual cache entries

Timothy Arceri tarceri at itsqueeze.com
Thu Mar 2 01:36:21 UTC 2017


This reduces the cache size for Deus Ex from ~160M to ~30M for
radeonsi.

I'm also seeing the following improvements in minimum fps in the
Shadow of Mordor benchmark:

no-cache:                    ~10fps
with-cache-no-compression:   ~15fps
with-cache-and-compression:  ~20fps

Note the with cache results are from the second run after closing
and opening the game to avoid the in-memory cache.

Since we only really care about decompression I went with
Z_BEST_COMPRESSION as suggested on irc by Steinar H. Gunderson
who has benchmarked decompression speeds.

V2: fix pointer increments for reading/writing cache entry
    file data.
---
 configure.ac          |   4 ++
 src/util/Makefile.am  |   2 +
 src/util/disk_cache.c | 173 +++++++++++++++++++++++++++++++++++++++++++-------
 3 files changed, 156 insertions(+), 23 deletions(-)

diff --git a/configure.ac b/configure.ac
index 890a379..9fde95f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -92,20 +92,21 @@ LIBVA_REQUIRED=0.38.0
 VDPAU_REQUIRED=1.1
 WAYLAND_REQUIRED=1.11
 XCB_REQUIRED=1.9.3
 XCBDRI2_REQUIRED=1.8
 XCBGLX_REQUIRED=1.8.1
 XDAMAGE_REQUIRED=1.1
 XSHMFENCE_REQUIRED=1.1
 XVMC_REQUIRED=1.0.6
 PYTHON_MAKO_REQUIRED=0.8.0
 LIBSENSORS_REQUIRED=4.0.0
+ZLIB_REQUIRED=1.2.8
 
 dnl LLVM versions
 LLVM_REQUIRED_GALLIUM=3.3.0
 LLVM_REQUIRED_OPENCL=3.6.0
 LLVM_REQUIRED_R600=3.6.0
 LLVM_REQUIRED_RADEONSI=3.6.0
 LLVM_REQUIRED_RADV=3.9.0
 LLVM_REQUIRED_SWR=3.6.0
 
 dnl Check for progs
@@ -777,20 +778,23 @@ darwin*)
     AC_CHECK_FUNCS([clock_gettime], [CLOCK_LIB=],
                    [AC_CHECK_LIB([rt], [clock_gettime], [CLOCK_LIB=-lrt],
                                  [AC_MSG_ERROR([Could not find clock_gettime])])])
     AC_SUBST([CLOCK_LIB])
     ;;
 esac
 
 dnl See if posix_memalign is available
 AC_CHECK_FUNC([posix_memalign], [DEFINES="$DEFINES -DHAVE_POSIX_MEMALIGN"])
 
+dnl Check for zlib
+PKG_CHECK_MODULES([ZLIB], [zlib >= $ZLIB_REQUIRED])
+
 dnl Check for pthreads
 AX_PTHREAD
 if test "x$ax_pthread_ok" = xno; then
     AC_MSG_ERROR([Building mesa on this platform requires pthreads])
 fi
 dnl AX_PTHREADS leaves PTHREAD_LIBS empty for gcc and sets PTHREAD_CFLAGS
 dnl to -pthread, which causes problems if we need -lpthread to appear in
 dnl pkgconfig files.  Since Android doesn't have a pthread lib, this check
 dnl is not valid for that platform.
 if test "x$android" = xno; then
diff --git a/src/util/Makefile.am b/src/util/Makefile.am
index ae50a3b..e46d893 100644
--- a/src/util/Makefile.am
+++ b/src/util/Makefile.am
@@ -36,20 +36,22 @@ libmesautil_la_CPPFLAGS = \
 	-I$(top_srcdir)/src/mesa \
 	-I$(top_srcdir)/src/gallium/include \
 	-I$(top_srcdir)/src/gallium/auxiliary \
 	$(VISIBILITY_CFLAGS) \
 	$(MSVC2013_COMPAT_CFLAGS)
 
 libmesautil_la_SOURCES = \
 	$(MESA_UTIL_FILES) \
 	$(MESA_UTIL_GENERATED_FILES)
 
+libmesautil_la_LIBADD = -lz
+
 roundeven_test_LDADD = -lm
 
 check_PROGRAMS = u_atomic_test roundeven_test
 TESTS = $(check_PROGRAMS)
 
 BUILT_SOURCES = $(MESA_UTIL_GENERATED_FILES)
 CLEANFILES = $(BUILT_SOURCES)
 EXTRA_DIST = \
 	format_srgb.py \
 	SConscript \
diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c
index f8e9948..fafd329 100644
--- a/src/util/disk_cache.c
+++ b/src/util/disk_cache.c
@@ -30,20 +30,21 @@
 #include <stdio.h>
 #include <sys/file.h>
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/mman.h>
 #include <unistd.h>
 #include <fcntl.h>
 #include <pwd.h>
 #include <errno.h>
 #include <dirent.h>
+#include "zlib.h"
 
 #include "util/crc32.h"
 #include "util/u_atomic.h"
 #include "util/mesa-sha1.h"
 #include "util/ralloc.h"
 #include "main/errors.h"
 
 #include "disk_cache.h"
 
 /* Number of bits to mask off from a cache key to get an index. */
@@ -638,30 +639,106 @@ disk_cache_remove(struct disk_cache *cache, cache_key key)
       return;
    }
 
    unlink(filename);
    free(filename);
 
    if (sb.st_size)
       p_atomic_add(cache->size, - sb.st_size);
 }
 
+/* From the zlib docs:
+ *    "If the memory is available, buffers sizes on the order of 128K or 256K
+ *    bytes should be used."
+ */
+#define BUFSIZE 256 * 1024
+
+/**
+ * Compresses cache entry in memory and writes it to disk. Returns the size
+ * of the data written to disk.
+ */
+static size_t
+deflate_and_write_to_disk(const void *in_data, size_t in_data_size, int dest,
+                          char *filename)
+{
+   unsigned char out[BUFSIZE];
+
+   /* allocate deflate state */
+   z_stream strm;
+   strm.zalloc = Z_NULL;
+   strm.zfree = Z_NULL;
+   strm.opaque = Z_NULL;
+   strm.next_in = (uint8_t *) in_data;
+   strm.avail_in = in_data_size;
+
+   int ret = deflateInit(&strm, Z_BEST_COMPRESSION);
+   if (ret != Z_OK)
+       return 0;
+
+   /* compress until end of in_data */
+   size_t compressed_size = 0;
+   int flush;
+   do {
+      int remaining = in_data_size - BUFSIZE;
+      flush = remaining > 0 ? Z_NO_FLUSH : Z_FINISH;
+      in_data_size -= BUFSIZE;
+
+      /* Run deflate() on input until the output buffer is not full (which
+       * means there is no more data to deflate).
+       */
+      do {
+         strm.avail_out = BUFSIZE;
+         strm.next_out = out;
+
+         ret = deflate(&strm, flush);    /* no bad return value */
+         assert(ret != Z_STREAM_ERROR);  /* state not clobbered */
+
+         size_t have = BUFSIZE - strm.avail_out;
+         compressed_size += compressed_size + have;
+
+         size_t written = 0;
+         for (size_t len = 0; len < have; len += written) {
+            written = write(dest, out + len, have - len);
+            if (written == -1) {
+               (void)deflateEnd(&strm);
+               return 0;
+            }
+         }
+      } while (strm.avail_out == 0);
+
+      /* all input should be used */
+      assert(strm.avail_in == 0);
+
+   } while (flush != Z_FINISH);
+
+   /* stream should be complete */
+   assert(ret == Z_STREAM_END);
+
+   /* clean up and return */
+   (void)deflateEnd(&strm);
+   return compressed_size;
+}
+
+struct cache_entry_file_data {
+   uint32_t crc32;
+   uint32_t uncompressed_size;
+};
+
 void
 disk_cache_put(struct disk_cache *cache,
           cache_key key,
           const void *data,
           size_t size)
 {
    int fd = -1, fd_final = -1, err, ret;
    size_t len;
    char *filename = NULL, *filename_tmp = NULL;
-   const char *p = data;
 
    filename = get_cache_file(cache, key);
    if (filename == NULL)
       goto done;
 
    /* Write to a temporary file to allow for an atomic rename to the
     * final destination filename, (to prevent any readers from seeing
     * a partially written file).
     */
    if (asprintf(&filename_tmp, "%s.tmp", filename) == -1)
@@ -706,120 +783,170 @@ disk_cache_put(struct disk_cache *cache,
     *
     * Before we do that, if the cache is too large, evict something
     * else first.
     */
    if (*cache->size + size > cache->max_size)
       evict_random_item(cache);
 
    /* Create CRC of the data and store at the start of the file. We will
     * read this when restoring the cache and use it to check for corruption.
     */
-   uint32_t crc32 = util_hash_crc32(data, size);
-   size_t crc_size = sizeof(crc32);
-   for (len = 0; len < crc_size; len += ret) {
-      ret = write(fd, ((uint8_t *) &crc32) + len, crc_size - len);
+   struct cache_entry_file_data cf_data;
+   cf_data.crc32 = util_hash_crc32(data, size);
+   cf_data.uncompressed_size = size;
+
+   size_t cf_data_size = sizeof(cf_data);
+   for (len = 0; len < cf_data_size; len += ret) {
+      ret = write(fd, ((uint8_t *) &cf_data) + len, cf_data_size - len);
       if (ret == -1) {
          unlink(filename_tmp);
          goto done;
       }
    }
 
    /* Now, finally, write out the contents to the temporary file, then
     * rename them atomically to the destination filename, and also
     * perform an atomic increment of the total cache size.
     */
-   for (len = 0; len < size; len += ret) {
-      ret = write(fd, p + len, size - len);
-      if (ret == -1) {
-         unlink(filename_tmp);
-         goto done;
-      }
+   size_t file_size = deflate_and_write_to_disk(data, size, fd, filename_tmp);
+   if (file_size == 0) {
+      unlink(filename_tmp);
+      goto done;
    }
-
    rename(filename_tmp, filename);
 
-   size += crc_size;
-   p_atomic_add(cache->size, size);
+   file_size += cf_data_size;
+   p_atomic_add(cache->size, file_size);
 
  done:
    if (fd_final != -1)
       close(fd_final);
    /* This close finally releases the flock, (now that the final dile
     * has been renamed into place and the size has been added).
     */
    if (fd != -1)
       close(fd);
    if (filename_tmp)
       free(filename_tmp);
    if (filename)
       free(filename);
 }
 
+/**
+ * Decompresses cache entry, returns true if successful.
+ */
+static bool
+inflate_cache_data(uint8_t *in_data, size_t in_data_size,
+                   uint8_t *out_data, size_t out_data_size)
+{
+   z_stream strm;
+
+   /* allocate inflate state */
+   strm.zalloc = Z_NULL;
+   strm.zfree = Z_NULL;
+   strm.opaque = Z_NULL;
+   strm.next_in = in_data;
+   strm.avail_in = in_data_size;
+   strm.next_out = out_data;
+   strm.avail_out = out_data_size;
+
+   int ret = inflateInit(&strm);
+   if (ret != Z_OK)
+      return false;
+
+   ret = inflate(&strm, Z_NO_FLUSH);
+   assert(ret != Z_STREAM_ERROR);  /* state not clobbered */
+
+   /* Unless there was an error we should have decompressed everything in one
+    * go as we know the uncompressed file size.
+    */
+   if (ret != Z_STREAM_END) {
+      (void)inflateEnd(&strm);
+      return false;
+   }
+   assert(strm.avail_out == 0);
+
+   /* clean up and return */
+   (void)inflateEnd(&strm);
+   return true;
+}
+
 void *
 disk_cache_get(struct disk_cache *cache, cache_key key, size_t *size)
 {
    int fd = -1, ret, len;
    struct stat sb;
    char *filename = NULL;
    uint8_t *data = NULL;
+   uint8_t *uncompressed_data = NULL;
 
    if (size)
       *size = 0;
 
    filename = get_cache_file(cache, key);
    if (filename == NULL)
       goto fail;
 
    fd = open(filename, O_RDONLY | O_CLOEXEC);
    if (fd == -1)
       goto fail;
 
    if (fstat(fd, &sb) == -1)
       goto fail;
 
    data = malloc(sb.st_size);
    if (data == NULL)
       goto fail;
 
    /* Load the CRC that was created when the file was written. */
-   uint32_t crc32;
-   size_t crc_size = sizeof(crc32);
-   assert(sb.st_size > crc_size);
-   for (len = 0; len < crc_size; len += ret) {
-      ret = read(fd, ((uint8_t *) &crc32) + len, crc_size - len);
+   struct cache_entry_file_data cf_data;
+   size_t cf_data_size = sizeof(cf_data);
+   assert(sb.st_size > cf_data_size);
+   for (len = 0; len < cf_data_size; len += ret) {
+      ret = read(fd, ((uint8_t *) &cf_data) + len, cf_data_size - len);
       if (ret == -1)
          goto fail;
    }
 
    /* Load the actual cache data. */
-   size_t cache_data_size = sb.st_size - crc_size;
+   size_t cache_data_size = sb.st_size - cf_data_size;
    for (len = 0; len < cache_data_size; len += ret) {
       ret = read(fd, data + len, cache_data_size - len);
       if (ret == -1)
          goto fail;
    }
 
+   /* Uncompress the cache data */
+   uncompressed_data = malloc(cf_data.uncompressed_size);
+   if (!inflate_cache_data(data, cache_data_size, uncompressed_data,
+                           cf_data.uncompressed_size))
+      goto fail;
+
    /* Check the data for corruption */
-   if (crc32 != util_hash_crc32(data, cache_data_size))
+   if (cf_data.crc32 != util_hash_crc32(uncompressed_data,
+                                        cf_data.uncompressed_size))
       goto fail;
 
+   free(data);
    free(filename);
    close(fd);
 
    if (size)
-      *size = cache_data_size;
+      *size = cf_data.uncompressed_size;
 
-   return data;
+   return uncompressed_data;
 
  fail:
    if (data)
       free(data);
+   if (uncompressed_data)
+      free(uncompressed_data);
    if (filename)
       free(filename);
    if (fd != -1)
       close(fd);
 
    return NULL;
 }
 
 void
 disk_cache_put_key(struct disk_cache *cache, cache_key key)
-- 
2.9.3



More information about the mesa-dev mailing list