[Mesa-dev] [PATCH 2/4] util/disk_cache: compress individual cache entries

Wed Mar 1 10:38:10 UTC 2017

Hi,

On 01.03.2017 07:25, Timothy Arceri wrote:
> This reduces the cache size for Deus Ex from ~160M to ~30M for
> radeonsi.
>
> I'm also seeing the following improvements in minimum fps in the
> Shadow of Mordor benchmark:
>
> no-cache:                    ~10fps
> with-cache-no-compression:   ~15fps
> with-cache-and-compression:  ~20fps
>
> Note the with cache results are from the second run after closing
> and opening the game to avoid the in-memory cache.
>
> Since we only really care about decompression I went with
> Z_BEST_COMPRESSION as suggested on irc by Steinar H. Gunderson
> who has benchmarked decompression speeds.

Did he tried liblzo instead of zlib?

It should be faster than zlib while still having fairly OK compression 
ratio.

	- Eero

> ---
>  configure.ac          |   4 ++
>  src/util/Makefile.am  |   2 +
>  src/util/disk_cache.c | 173 +++++++++++++++++++++++++++++++++++++++++++-------
>  3 files changed, 156 insertions(+), 23 deletions(-)
>
> diff --git a/configure.ac b/configure.ac
> index 890a379..9fde95f 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -92,20 +92,21 @@ LIBVA_REQUIRED=0.38.0
>  VDPAU_REQUIRED=1.1
>  WAYLAND_REQUIRED=1.11
>  XCB_REQUIRED=1.9.3
>  XCBDRI2_REQUIRED=1.8
>  XCBGLX_REQUIRED=1.8.1
>  XDAMAGE_REQUIRED=1.1
>  XSHMFENCE_REQUIRED=1.1
>  XVMC_REQUIRED=1.0.6
>  PYTHON_MAKO_REQUIRED=0.8.0
>  LIBSENSORS_REQUIRED=4.0.0
> +ZLIB_REQUIRED=1.2.8
>
>  dnl LLVM versions
>  LLVM_REQUIRED_GALLIUM=3.3.0
>  LLVM_REQUIRED_OPENCL=3.6.0
>  LLVM_REQUIRED_R600=3.6.0
>  LLVM_REQUIRED_RADEONSI=3.6.0
>  LLVM_REQUIRED_RADV=3.9.0
>  LLVM_REQUIRED_SWR=3.6.0
>
>  dnl Check for progs
> @@ -777,20 +778,23 @@ darwin*)
>      AC_CHECK_FUNCS([clock_gettime], [CLOCK_LIB=],
>                     [AC_CHECK_LIB([rt], [clock_gettime], [CLOCK_LIB=-lrt],
>                                   [AC_MSG_ERROR([Could not find clock_gettime])])])
>      AC_SUBST([CLOCK_LIB])
>      ;;
>  esac
>
>  dnl See if posix_memalign is available
>  AC_CHECK_FUNC([posix_memalign], [DEFINES="$DEFINES -DHAVE_POSIX_MEMALIGN"])
>
> +dnl Check for zlib
> +PKG_CHECK_MODULES([ZLIB], [zlib >= $ZLIB_REQUIRED])
> +
>  dnl Check for pthreads
>  AX_PTHREAD
>  if test "x$ax_pthread_ok" = xno; then
>      AC_MSG_ERROR([Building mesa on this platform requires pthreads])
>  fi
>  dnl AX_PTHREADS leaves PTHREAD_LIBS empty for gcc and sets PTHREAD_CFLAGS
>  dnl to -pthread, which causes problems if we need -lpthread to appear in
>  dnl pkgconfig files.  Since Android doesn't have a pthread lib, this check
>  dnl is not valid for that platform.
>  if test "x$android" = xno; then
> diff --git a/src/util/Makefile.am b/src/util/Makefile.am
> index ae50a3b..e46d893 100644
> --- a/src/util/Makefile.am
> +++ b/src/util/Makefile.am
> @@ -36,20 +36,22 @@ libmesautil_la_CPPFLAGS = \
>  	-I$(top_srcdir)/src/mesa \
>  	-I$(top_srcdir)/src/gallium/include \
>  	-I$(top_srcdir)/src/gallium/auxiliary \
>  	$(VISIBILITY_CFLAGS) \
>  	$(MSVC2013_COMPAT_CFLAGS)
>
>  libmesautil_la_SOURCES = \
>  	$(MESA_UTIL_FILES) \
>  	$(MESA_UTIL_GENERATED_FILES)
>
> +libmesautil_la_LIBADD = -lz
> +
>  roundeven_test_LDADD = -lm
>
>  check_PROGRAMS = u_atomic_test roundeven_test
>  TESTS = $(check_PROGRAMS)
>
>  BUILT_SOURCES = $(MESA_UTIL_GENERATED_FILES)
>  CLEANFILES = $(BUILT_SOURCES)
>  EXTRA_DIST = \
>  	format_srgb.py \
>  	SConscript \
> diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c
> index 2a0edca..03aae02 100644
> --- a/src/util/disk_cache.c
> +++ b/src/util/disk_cache.c
> @@ -30,20 +30,21 @@
>  #include <stdio.h>
>  #include <sys/file.h>
>  #include <sys/types.h>
>  #include <sys/stat.h>
>  #include <sys/mman.h>
>  #include <unistd.h>
>  #include <fcntl.h>
>  #include <pwd.h>
>  #include <errno.h>
>  #include <dirent.h>
> +#include "zlib.h"
>
>  #include "util/crc32.h"
>  #include "util/u_atomic.h"
>  #include "util/mesa-sha1.h"
>  #include "util/ralloc.h"
>  #include "main/errors.h"
>
>  #include "disk_cache.h"
>
>  /* Number of bits to mask off from a cache key to get an index. */
> @@ -638,30 +639,106 @@ disk_cache_remove(struct disk_cache *cache, cache_key key)
>        return;
>     }
>
>     unlink(filename);
>     free(filename);
>
>     if (sb.st_size)
>        p_atomic_add(cache->size, - sb.st_size);
>  }
>
> +/* From the zlib docs:
> + *    "If the memory is available, buffers sizes on the order of 128K or 256K
> + *    bytes should be used."
> + */
> +#define BUFSIZE 256 * 1024
> +
> +/**
> + * Compresses cache entry in memeory and writes it to disk. Returns the size
> + * of the data written to disk.
> + */
> +static size_t
> +deflate_and_write_to_disk(const void *in_data, size_t in_data_size, int dest,
> +                          char *filename)
> +{
> +   unsigned char out[BUFSIZE];
> +
> +   /* allocate deflate state */
> +   z_stream strm;
> +   strm.zalloc = Z_NULL;
> +   strm.zfree = Z_NULL;
> +   strm.opaque = Z_NULL;
> +   strm.next_in = (uint8_t *) in_data;
> +   strm.avail_in = in_data_size;
> +
> +   int ret = deflateInit(&strm, Z_BEST_COMPRESSION);
> +   if (ret != Z_OK)
> +       return 0;
> +
> +   /* compress until end of in_data */
> +   size_t compressed_size = 0;
> +   int flush;
> +   do {
> +      int remaining = in_data_size - BUFSIZE;
> +      flush = remaining > 0 ? Z_NO_FLUSH : Z_FINISH;
> +      in_data_size -= BUFSIZE;
> +
> +      /* Run deflate() on input until the output buffer is not full (which
> +       * means there is no more data to deflate).
> +       */
> +      do {
> +         strm.avail_out = BUFSIZE;
> +         strm.next_out = out;
> +
> +         ret = deflate(&strm, flush);    /* no bad return value */
> +         assert(ret != Z_STREAM_ERROR);  /* state not clobbered */
> +
> +         size_t have = BUFSIZE - strm.avail_out;
> +         compressed_size += compressed_size + have;
> +
> +         size_t written = 0;
> +         for (size_t len = 0; len < have; len += written) {
> +            written = write(dest, out + len, have - len);
> +            if (written == -1) {
> +               (void)deflateEnd(&strm);
> +               return 0;
> +            }
> +         }
> +      } while (strm.avail_out == 0);
> +
> +      /* all input should be used */
> +      assert(strm.avail_in == 0);
> +
> +   } while (flush != Z_FINISH);
> +
> +   /* stream should be complete */
> +   assert(ret == Z_STREAM_END);
> +
> +   /* clean up and return */
> +   (void)deflateEnd(&strm);
> +   return compressed_size;
> +}
> +
> +struct cache_entry_file_data {
> +   uint32_t crc32;
> +   uint32_t uncompressed_size;
> +};
> +
>  void
>  disk_cache_put(struct disk_cache *cache,
>            cache_key key,
>            const void *data,
>            size_t size)
>  {
>     int fd = -1, fd_final = -1, err, ret;
>     size_t len;
>     char *filename = NULL, *filename_tmp = NULL;
> -   const char *p = data;
>
>     filename = get_cache_file(cache, key);
>     if (filename == NULL)
>        goto done;
>
>     /* Write to a temporary file to allow for an atomic rename to the
>      * final destination filename, (to prevent any readers from seeing
>      * a partially written file).
>      */
>     if (asprintf(&filename_tmp, "%s.tmp", filename) == -1)
> @@ -706,120 +783,170 @@ disk_cache_put(struct disk_cache *cache,
>      *
>      * Before we do that, if the cache is too large, evict something
>      * else first.
>      */
>     if (*cache->size + size > cache->max_size)
>        evict_random_item(cache);
>
>     /* Create CRC of the data and store at the start of the file. We will
>      * read this when restoring the cache and use it to check for corruption.
>      */
> -   uint32_t crc32 = util_hash_crc32(data, size);
> -   size_t crc_size = sizeof(crc32);
> -   for (len = 0; len < crc_size; len += ret) {
> -      ret = write(fd, &crc32, crc_size - len);
> +   struct cache_entry_file_data cf_data;
> +   cf_data.crc32 = util_hash_crc32(data, size);
> +   cf_data.uncompressed_size = size;
> +
> +   size_t cf_data_size = sizeof(cf_data);
> +   for (len = 0; len < cf_data_size; len += ret) {
> +      ret = write(fd, &cf_data, cf_data_size - len);
>        if (ret == -1) {
>           unlink(filename_tmp);
>           goto done;
>        }
>     }
>
>     /* Now, finally, write out the contents to the temporary file, then
>      * rename them atomically to the destination filename, and also
>      * perform an atomic increment of the total cache size.
>      */
> -   for (len = 0; len < size; len += ret) {
> -      ret = write(fd, p + len, size - len);
> -      if (ret == -1) {
> -         unlink(filename_tmp);
> -         goto done;
> -      }
> +   size_t file_size = deflate_and_write_to_disk(data, size, fd, filename_tmp);
> +   if (file_size == 0) {
> +      unlink(filename_tmp);
> +      goto done;
>     }
> -
>     rename(filename_tmp, filename);
>
> -   size += crc_size;
> -   p_atomic_add(cache->size, size);
> +   file_size += cf_data_size;
> +   p_atomic_add(cache->size, file_size);
>
>   done:
>     if (fd_final != -1)
>        close(fd_final);
>     /* This close finally releases the flock, (now that the final dile
>      * has been renamed into place and the size has been added).
>      */
>     if (fd != -1)
>        close(fd);
>     if (filename_tmp)
>        free(filename_tmp);
>     if (filename)
>        free(filename);
>  }
>
> +/**
> + * Decompresses cache entry, returns true if successful.
> + */
> +static bool
> +inflate_cache_data(uint8_t *in_data, size_t in_data_size,
> +                   uint8_t *out_data, size_t out_data_size)
> +{
> +   z_stream strm;
> +
> +   /* allocate inflate state */
> +   strm.zalloc = Z_NULL;
> +   strm.zfree = Z_NULL;
> +   strm.opaque = Z_NULL;
> +   strm.next_in = in_data;
> +   strm.avail_in = in_data_size;
> +   strm.next_out = out_data;
> +   strm.avail_out = out_data_size;
> +
> +   int ret = inflateInit(&strm);
> +   if (ret != Z_OK)
> +      return false;
> +
> +   ret = inflate(&strm, Z_NO_FLUSH);
> +   assert(ret != Z_STREAM_ERROR);  /* state not clobbered */
> +
> +   /* Unless there was an error we should have decompressed everything in one
> +    * go as we know the uncompressed file size.
> +    */
> +   if (ret != Z_STREAM_END) {
> +      (void)inflateEnd(&strm);
> +      return false;
> +   }
> +   assert(strm.avail_out == 0);
> +
> +   /* clean up and return */
> +   (void)inflateEnd(&strm);
> +   return true;
> +}
> +
>  void *
>  disk_cache_get(struct disk_cache *cache, cache_key key, size_t *size)
>  {
>     int fd = -1, ret, len;
>     struct stat sb;
>     char *filename = NULL;
>     uint8_t *data = NULL;
> +   uint8_t *uncompressed_data = NULL;
>
>     if (size)
>        *size = 0;
>
>     filename = get_cache_file(cache, key);
>     if (filename == NULL)
>        goto fail;
>
>     fd = open(filename, O_RDONLY | O_CLOEXEC);
>     if (fd == -1)
>        goto fail;
>
>     if (fstat(fd, &sb) == -1)
>        goto fail;
>
>     data = malloc(sb.st_size);
>     if (data == NULL)
>        goto fail;
>
>     /* Load the CRC that was created when the file was written. */
> -   uint32_t crc32;
> -   size_t crc_size = sizeof(crc32);
> -   assert(sb.st_size > crc_size);
> -   for (len = 0; len < crc_size; len += ret) {
> -      ret = read(fd, &crc32 + len, crc_size - len);
> +   struct cache_entry_file_data cf_data;
> +   size_t cf_data_size = sizeof(cf_data);
> +   assert(sb.st_size > cf_data_size);
> +   for (len = 0; len < cf_data_size; len += ret) {
> +      ret = read(fd, &cf_data + len, cf_data_size - len);
>        if (ret == -1)
>           goto fail;
>     }
>
>     /* Load the actual cache data. */
> -   size_t cache_data_size = sb.st_size - crc_size;
> +   size_t cache_data_size = sb.st_size - cf_data_size;
>     for (len = 0; len < cache_data_size; len += ret) {
>        ret = read(fd, data + len, cache_data_size - len);
>        if (ret == -1)
>           goto fail;
>     }
>
> +   /* Uncompress the cache data */
> +   uncompressed_data = malloc(cf_data.uncompressed_size);
> +   if (!inflate_cache_data(data, cache_data_size, uncompressed_data,
> +                           cf_data.uncompressed_size))
> +      goto fail;
> +
>     /* Check the data for corruption */
> -   if (crc32 != util_hash_crc32(data, cache_data_size))
> +   if (cf_data.crc32 != util_hash_crc32(uncompressed_data,
> +                                        cf_data.uncompressed_size))
>        goto fail;
>
> +   free(data);
>     free(filename);
>     close(fd);
>
>     if (size)
> -      *size = cache_data_size;
> +      *size = cf_data.uncompressed_size;
>
> -   return data;
> +   return uncompressed_data;
>
>   fail:
>     if (data)
>        free(data);
> +   if (uncompressed_data)
> +      free(uncompressed_data);
>     if (filename)
>        free(filename);
>     if (fd != -1)
>        close(fd);
>
>     return NULL;
>  }
>
>  void
>  disk_cache_put_key(struct disk_cache *cache, cache_key key)
>