[Mesa-dev] [PATCH 2/4] util/disk_cache: compress individual cache entries
Eero Tamminen
eero.t.tamminen at intel.com
Wed Mar 1 10:38:10 UTC 2017
Hi,
On 01.03.2017 07:25, Timothy Arceri wrote:
> This reduces the cache size for Deus Ex from ~160M to ~30M for
> radeonsi.
>
> I'm also seeing the following improvements in minimum fps in the
> Shadow of Mordor benchmark:
>
> no-cache: ~10fps
> with-cache-no-compression: ~15fps
> with-cache-and-compression: ~20fps
>
> Note the with cache results are from the second run after closing
> and opening the game to avoid the in-memory cache.
>
> Since we only really care about decompression I went with
> Z_BEST_COMPRESSION as suggested on irc by Steinar H. Gunderson
> who has benchmarked decompression speeds.
Did he tried liblzo instead of zlib?
It should be faster than zlib while still having fairly OK compression
ratio.
- Eero
> ---
> configure.ac | 4 ++
> src/util/Makefile.am | 2 +
> src/util/disk_cache.c | 173 +++++++++++++++++++++++++++++++++++++++++++-------
> 3 files changed, 156 insertions(+), 23 deletions(-)
>
> diff --git a/configure.ac b/configure.ac
> index 890a379..9fde95f 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -92,20 +92,21 @@ LIBVA_REQUIRED=0.38.0
> VDPAU_REQUIRED=1.1
> WAYLAND_REQUIRED=1.11
> XCB_REQUIRED=1.9.3
> XCBDRI2_REQUIRED=1.8
> XCBGLX_REQUIRED=1.8.1
> XDAMAGE_REQUIRED=1.1
> XSHMFENCE_REQUIRED=1.1
> XVMC_REQUIRED=1.0.6
> PYTHON_MAKO_REQUIRED=0.8.0
> LIBSENSORS_REQUIRED=4.0.0
> +ZLIB_REQUIRED=1.2.8
>
> dnl LLVM versions
> LLVM_REQUIRED_GALLIUM=3.3.0
> LLVM_REQUIRED_OPENCL=3.6.0
> LLVM_REQUIRED_R600=3.6.0
> LLVM_REQUIRED_RADEONSI=3.6.0
> LLVM_REQUIRED_RADV=3.9.0
> LLVM_REQUIRED_SWR=3.6.0
>
> dnl Check for progs
> @@ -777,20 +778,23 @@ darwin*)
> AC_CHECK_FUNCS([clock_gettime], [CLOCK_LIB=],
> [AC_CHECK_LIB([rt], [clock_gettime], [CLOCK_LIB=-lrt],
> [AC_MSG_ERROR([Could not find clock_gettime])])])
> AC_SUBST([CLOCK_LIB])
> ;;
> esac
>
> dnl See if posix_memalign is available
> AC_CHECK_FUNC([posix_memalign], [DEFINES="$DEFINES -DHAVE_POSIX_MEMALIGN"])
>
> +dnl Check for zlib
> +PKG_CHECK_MODULES([ZLIB], [zlib >= $ZLIB_REQUIRED])
> +
> dnl Check for pthreads
> AX_PTHREAD
> if test "x$ax_pthread_ok" = xno; then
> AC_MSG_ERROR([Building mesa on this platform requires pthreads])
> fi
> dnl AX_PTHREADS leaves PTHREAD_LIBS empty for gcc and sets PTHREAD_CFLAGS
> dnl to -pthread, which causes problems if we need -lpthread to appear in
> dnl pkgconfig files. Since Android doesn't have a pthread lib, this check
> dnl is not valid for that platform.
> if test "x$android" = xno; then
> diff --git a/src/util/Makefile.am b/src/util/Makefile.am
> index ae50a3b..e46d893 100644
> --- a/src/util/Makefile.am
> +++ b/src/util/Makefile.am
> @@ -36,20 +36,22 @@ libmesautil_la_CPPFLAGS = \
> -I$(top_srcdir)/src/mesa \
> -I$(top_srcdir)/src/gallium/include \
> -I$(top_srcdir)/src/gallium/auxiliary \
> $(VISIBILITY_CFLAGS) \
> $(MSVC2013_COMPAT_CFLAGS)
>
> libmesautil_la_SOURCES = \
> $(MESA_UTIL_FILES) \
> $(MESA_UTIL_GENERATED_FILES)
>
> +libmesautil_la_LIBADD = -lz
> +
> roundeven_test_LDADD = -lm
>
> check_PROGRAMS = u_atomic_test roundeven_test
> TESTS = $(check_PROGRAMS)
>
> BUILT_SOURCES = $(MESA_UTIL_GENERATED_FILES)
> CLEANFILES = $(BUILT_SOURCES)
> EXTRA_DIST = \
> format_srgb.py \
> SConscript \
> diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c
> index 2a0edca..03aae02 100644
> --- a/src/util/disk_cache.c
> +++ b/src/util/disk_cache.c
> @@ -30,20 +30,21 @@
> #include <stdio.h>
> #include <sys/file.h>
> #include <sys/types.h>
> #include <sys/stat.h>
> #include <sys/mman.h>
> #include <unistd.h>
> #include <fcntl.h>
> #include <pwd.h>
> #include <errno.h>
> #include <dirent.h>
> +#include "zlib.h"
>
> #include "util/crc32.h"
> #include "util/u_atomic.h"
> #include "util/mesa-sha1.h"
> #include "util/ralloc.h"
> #include "main/errors.h"
>
> #include "disk_cache.h"
>
> /* Number of bits to mask off from a cache key to get an index. */
> @@ -638,30 +639,106 @@ disk_cache_remove(struct disk_cache *cache, cache_key key)
> return;
> }
>
> unlink(filename);
> free(filename);
>
> if (sb.st_size)
> p_atomic_add(cache->size, - sb.st_size);
> }
>
> +/* From the zlib docs:
> + * "If the memory is available, buffers sizes on the order of 128K or 256K
> + * bytes should be used."
> + */
> +#define BUFSIZE 256 * 1024
> +
> +/**
> + * Compresses cache entry in memeory and writes it to disk. Returns the size
> + * of the data written to disk.
> + */
> +static size_t
> +deflate_and_write_to_disk(const void *in_data, size_t in_data_size, int dest,
> + char *filename)
> +{
> + unsigned char out[BUFSIZE];
> +
> + /* allocate deflate state */
> + z_stream strm;
> + strm.zalloc = Z_NULL;
> + strm.zfree = Z_NULL;
> + strm.opaque = Z_NULL;
> + strm.next_in = (uint8_t *) in_data;
> + strm.avail_in = in_data_size;
> +
> + int ret = deflateInit(&strm, Z_BEST_COMPRESSION);
> + if (ret != Z_OK)
> + return 0;
> +
> + /* compress until end of in_data */
> + size_t compressed_size = 0;
> + int flush;
> + do {
> + int remaining = in_data_size - BUFSIZE;
> + flush = remaining > 0 ? Z_NO_FLUSH : Z_FINISH;
> + in_data_size -= BUFSIZE;
> +
> + /* Run deflate() on input until the output buffer is not full (which
> + * means there is no more data to deflate).
> + */
> + do {
> + strm.avail_out = BUFSIZE;
> + strm.next_out = out;
> +
> + ret = deflate(&strm, flush); /* no bad return value */
> + assert(ret != Z_STREAM_ERROR); /* state not clobbered */
> +
> + size_t have = BUFSIZE - strm.avail_out;
> + compressed_size += compressed_size + have;
> +
> + size_t written = 0;
> + for (size_t len = 0; len < have; len += written) {
> + written = write(dest, out + len, have - len);
> + if (written == -1) {
> + (void)deflateEnd(&strm);
> + return 0;
> + }
> + }
> + } while (strm.avail_out == 0);
> +
> + /* all input should be used */
> + assert(strm.avail_in == 0);
> +
> + } while (flush != Z_FINISH);
> +
> + /* stream should be complete */
> + assert(ret == Z_STREAM_END);
> +
> + /* clean up and return */
> + (void)deflateEnd(&strm);
> + return compressed_size;
> +}
> +
> +struct cache_entry_file_data {
> + uint32_t crc32;
> + uint32_t uncompressed_size;
> +};
> +
> void
> disk_cache_put(struct disk_cache *cache,
> cache_key key,
> const void *data,
> size_t size)
> {
> int fd = -1, fd_final = -1, err, ret;
> size_t len;
> char *filename = NULL, *filename_tmp = NULL;
> - const char *p = data;
>
> filename = get_cache_file(cache, key);
> if (filename == NULL)
> goto done;
>
> /* Write to a temporary file to allow for an atomic rename to the
> * final destination filename, (to prevent any readers from seeing
> * a partially written file).
> */
> if (asprintf(&filename_tmp, "%s.tmp", filename) == -1)
> @@ -706,120 +783,170 @@ disk_cache_put(struct disk_cache *cache,
> *
> * Before we do that, if the cache is too large, evict something
> * else first.
> */
> if (*cache->size + size > cache->max_size)
> evict_random_item(cache);
>
> /* Create CRC of the data and store at the start of the file. We will
> * read this when restoring the cache and use it to check for corruption.
> */
> - uint32_t crc32 = util_hash_crc32(data, size);
> - size_t crc_size = sizeof(crc32);
> - for (len = 0; len < crc_size; len += ret) {
> - ret = write(fd, &crc32, crc_size - len);
> + struct cache_entry_file_data cf_data;
> + cf_data.crc32 = util_hash_crc32(data, size);
> + cf_data.uncompressed_size = size;
> +
> + size_t cf_data_size = sizeof(cf_data);
> + for (len = 0; len < cf_data_size; len += ret) {
> + ret = write(fd, &cf_data, cf_data_size - len);
> if (ret == -1) {
> unlink(filename_tmp);
> goto done;
> }
> }
>
> /* Now, finally, write out the contents to the temporary file, then
> * rename them atomically to the destination filename, and also
> * perform an atomic increment of the total cache size.
> */
> - for (len = 0; len < size; len += ret) {
> - ret = write(fd, p + len, size - len);
> - if (ret == -1) {
> - unlink(filename_tmp);
> - goto done;
> - }
> + size_t file_size = deflate_and_write_to_disk(data, size, fd, filename_tmp);
> + if (file_size == 0) {
> + unlink(filename_tmp);
> + goto done;
> }
> -
> rename(filename_tmp, filename);
>
> - size += crc_size;
> - p_atomic_add(cache->size, size);
> + file_size += cf_data_size;
> + p_atomic_add(cache->size, file_size);
>
> done:
> if (fd_final != -1)
> close(fd_final);
> /* This close finally releases the flock, (now that the final dile
> * has been renamed into place and the size has been added).
> */
> if (fd != -1)
> close(fd);
> if (filename_tmp)
> free(filename_tmp);
> if (filename)
> free(filename);
> }
>
> +/**
> + * Decompresses cache entry, returns true if successful.
> + */
> +static bool
> +inflate_cache_data(uint8_t *in_data, size_t in_data_size,
> + uint8_t *out_data, size_t out_data_size)
> +{
> + z_stream strm;
> +
> + /* allocate inflate state */
> + strm.zalloc = Z_NULL;
> + strm.zfree = Z_NULL;
> + strm.opaque = Z_NULL;
> + strm.next_in = in_data;
> + strm.avail_in = in_data_size;
> + strm.next_out = out_data;
> + strm.avail_out = out_data_size;
> +
> + int ret = inflateInit(&strm);
> + if (ret != Z_OK)
> + return false;
> +
> + ret = inflate(&strm, Z_NO_FLUSH);
> + assert(ret != Z_STREAM_ERROR); /* state not clobbered */
> +
> + /* Unless there was an error we should have decompressed everything in one
> + * go as we know the uncompressed file size.
> + */
> + if (ret != Z_STREAM_END) {
> + (void)inflateEnd(&strm);
> + return false;
> + }
> + assert(strm.avail_out == 0);
> +
> + /* clean up and return */
> + (void)inflateEnd(&strm);
> + return true;
> +}
> +
> void *
> disk_cache_get(struct disk_cache *cache, cache_key key, size_t *size)
> {
> int fd = -1, ret, len;
> struct stat sb;
> char *filename = NULL;
> uint8_t *data = NULL;
> + uint8_t *uncompressed_data = NULL;
>
> if (size)
> *size = 0;
>
> filename = get_cache_file(cache, key);
> if (filename == NULL)
> goto fail;
>
> fd = open(filename, O_RDONLY | O_CLOEXEC);
> if (fd == -1)
> goto fail;
>
> if (fstat(fd, &sb) == -1)
> goto fail;
>
> data = malloc(sb.st_size);
> if (data == NULL)
> goto fail;
>
> /* Load the CRC that was created when the file was written. */
> - uint32_t crc32;
> - size_t crc_size = sizeof(crc32);
> - assert(sb.st_size > crc_size);
> - for (len = 0; len < crc_size; len += ret) {
> - ret = read(fd, &crc32 + len, crc_size - len);
> + struct cache_entry_file_data cf_data;
> + size_t cf_data_size = sizeof(cf_data);
> + assert(sb.st_size > cf_data_size);
> + for (len = 0; len < cf_data_size; len += ret) {
> + ret = read(fd, &cf_data + len, cf_data_size - len);
> if (ret == -1)
> goto fail;
> }
>
> /* Load the actual cache data. */
> - size_t cache_data_size = sb.st_size - crc_size;
> + size_t cache_data_size = sb.st_size - cf_data_size;
> for (len = 0; len < cache_data_size; len += ret) {
> ret = read(fd, data + len, cache_data_size - len);
> if (ret == -1)
> goto fail;
> }
>
> + /* Uncompress the cache data */
> + uncompressed_data = malloc(cf_data.uncompressed_size);
> + if (!inflate_cache_data(data, cache_data_size, uncompressed_data,
> + cf_data.uncompressed_size))
> + goto fail;
> +
> /* Check the data for corruption */
> - if (crc32 != util_hash_crc32(data, cache_data_size))
> + if (cf_data.crc32 != util_hash_crc32(uncompressed_data,
> + cf_data.uncompressed_size))
> goto fail;
>
> + free(data);
> free(filename);
> close(fd);
>
> if (size)
> - *size = cache_data_size;
> + *size = cf_data.uncompressed_size;
>
> - return data;
> + return uncompressed_data;
>
> fail:
> if (data)
> free(data);
> + if (uncompressed_data)
> + free(uncompressed_data);
> if (filename)
> free(filename);
> if (fd != -1)
> close(fd);
>
> return NULL;
> }
>
> void
> disk_cache_put_key(struct disk_cache *cache, cache_key key)
>
More information about the mesa-dev
mailing list