Mesa (master): disk_cache: create new helper for writing cache items to disk

Fri Sep 11 06:17:26 UTC 2020

Module: Mesa
Branch: master
Commit: 593ef166aed12114d98afae81fcf75c363dc4582
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=593ef166aed12114d98afae81fcf75c363dc4582

Author: Timothy Arceri <tarceri at itsqueeze.com>
Date:   Wed Aug  5 14:21:24 2020 +1000

disk_cache: create new helper for writing cache items to disk

This pulls out the cache item writing code from cache_put() into
a new helper. In this patch we also move various functions called
by this code into the new disk_cache_os.c file.

Reviewed-by: Bas Nieuwenhuizen <bas at basnieuwenhuizen.nl>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6197>

---

 src/util/disk_cache.c    | 330 +----------------------------------------------
 src/util/disk_cache_os.c | 296 +++++++++++++++++++++++++++++++++++++++++-
 src/util/disk_cache_os.h |  26 ++++
 3 files changed, 324 insertions(+), 328 deletions(-)

diff --git a/src/util/disk_cache.c b/src/util/disk_cache.c
index a6f6f0590ed..577c7b99a77 100644
--- a/src/util/disk_cache.c
+++ b/src/util/disk_cache.c
@@ -67,58 +67,6 @@
  */
 #define CACHE_VERSION 1
 
-/* 3 is the recomended level, with 22 as the absolute maximum */
-#define ZSTD_COMPRESSION_LEVEL 3
-
-struct disk_cache_put_job {
-   struct util_queue_fence fence;
-
-   struct disk_cache *cache;
-
-   cache_key key;
-
-   /* Copy of cache data to be compressed and written. */
-   void *data;
-
-   /* Size of data to be compressed and written. */
-   size_t size;
-
-   struct cache_item_metadata cache_item_metadata;
-};
-
-/* Create a directory named 'path' if it does not already exist.
- *
- * Returns: 0 if path already exists as a directory or if created.
- *         -1 in all other cases.
- */
-static int
-mkdir_if_needed(const char *path)
-{
-   struct stat sb;
-
-   /* If the path exists already, then our work is done if it's a
-    * directory, but it's an error if it is not.
-    */
-   if (stat(path, &sb) == 0) {
-      if (S_ISDIR(sb.st_mode)) {
-         return 0;
-      } else {
-         fprintf(stderr, "Cannot use %s for shader cache (not a directory)"
-                         "---disabling.\n", path);
-         return -1;
-      }
-   }
-
-   int ret = mkdir(path, 0755);
-   if (ret == 0 || (ret == -1 && errno == EEXIST))
-     return 0;
-
-   fprintf(stderr, "Failed to create %s for shader cache (%s)---disabling.\n",
-           path, strerror(errno));
-
-   return -1;
-}
-
 #define DRV_KEY_CPY(_dst, _src, _src_size) \
 do {                                       \
    memcpy(_dst, _src, _src_size);          \
@@ -298,25 +246,6 @@ get_cache_file(struct disk_cache *cache, const cache_key key)
    return filename;
 }
 
-/* Create the directory that will be needed for the cache file for \key.
- *
- * Obviously, the implementation here must closely match
- * _get_cache_file above.
-*/
-static void
-make_cache_file_directory(struct disk_cache *cache, const cache_key key)
-{
-   char *dir;
-   char buf[41];
-
-   _mesa_sha1_format(buf, key);
-   if (asprintf(&dir, "%s/%c%c", cache->path, buf[0], buf[1]) == -1)
-      return;
-
-   mkdir_if_needed(dir);
-   free(dir);
-}
-
 void
 disk_cache_remove(struct disk_cache *cache, const cache_key key)
 {
@@ -354,119 +283,6 @@ read_all(int fd, void *buf, size_t count)
    return done;
 }
 
-static ssize_t
-write_all(int fd, const void *buf, size_t count)
-{
-   const char *out = buf;
-   ssize_t written;
-   size_t done;
-
-   for (done = 0; done < count; done += written) {
-      written = write(fd, out + done, count - done);
-      if (written == -1)
-         return -1;
-   }
-   return done;
-}
-
-/* From the zlib docs:
- *    "If the memory is available, buffers sizes on the order of 128K or 256K
- *    bytes should be used."
- */
-#define BUFSIZE 256 * 1024
-
-/**
- * Compresses cache entry in memory and writes it to disk. Returns the size
- * of the data written to disk.
- */
-static size_t
-deflate_and_write_to_disk(const void *in_data, size_t in_data_size, int dest,
-                          const char *filename)
-{
-#ifdef HAVE_ZSTD
-   /* from the zstd docs (https://facebook.github.io/zstd/zstd_manual.html):
-    * compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`.
-    */
-   size_t out_size = ZSTD_compressBound(in_data_size);
-   void * out = malloc(out_size);
-
-   size_t ret = ZSTD_compress(out, out_size, in_data, in_data_size,
-                              ZSTD_COMPRESSION_LEVEL);
-   if (ZSTD_isError(ret)) {
-      free(out);
-      return 0;
-   }
-   ssize_t written = write_all(dest, out, ret);
-   if (written == -1) {
-      free(out);
-      return 0;
-   }
-   free(out);
-   return ret;
-#else
-   unsigned char *out;
-
-   /* allocate deflate state */
-   z_stream strm;
-   strm.zalloc = Z_NULL;
-   strm.zfree = Z_NULL;
-   strm.opaque = Z_NULL;
-   strm.next_in = (uint8_t *) in_data;
-   strm.avail_in = in_data_size;
-
-   int ret = deflateInit(&strm, Z_BEST_COMPRESSION);
-   if (ret != Z_OK)
-       return 0;
-
-   /* compress until end of in_data */
-   size_t compressed_size = 0;
-   int flush;
-
-   out = malloc(BUFSIZE * sizeof(unsigned char));
-   if (out == NULL)
-      return 0;
-
-   do {
-      int remaining = in_data_size - BUFSIZE;
-      flush = remaining > 0 ? Z_NO_FLUSH : Z_FINISH;
-      in_data_size -= BUFSIZE;
-
-      /* Run deflate() on input until the output buffer is not full (which
-       * means there is no more data to deflate).
-       */
-      do {
-         strm.avail_out = BUFSIZE;
-         strm.next_out = out;
-
-         ret = deflate(&strm, flush);    /* no bad return value */
-         assert(ret != Z_STREAM_ERROR);  /* state not clobbered */
-
-         size_t have = BUFSIZE - strm.avail_out;
-         compressed_size += have;
-
-         ssize_t written = write_all(dest, out, have);
-         if (written == -1) {
-            (void)deflateEnd(&strm);
-            free(out);
-            return 0;
-         }
-      } while (strm.avail_out == 0);
-
-      /* all input should be used */
-      assert(strm.avail_in == 0);
-
-   } while (flush != Z_FINISH);
-
-   /* stream should be complete */
-   assert(ret == Z_STREAM_END);
-
-   /* clean up and return */
-   (void)deflateEnd(&strm);
-   free(out);
-   return compressed_size;
-# endif
-}
-
 static struct disk_cache_put_job *
 create_put_job(struct disk_cache *cache, const cache_key key,
                const void *data, size_t size,
@@ -523,19 +339,13 @@ destroy_put_job(void *job, int thread_index)
    }
 }
 
-struct cache_entry_file_data {
-   uint32_t crc32;
-   uint32_t uncompressed_size;
-};
-
 static void
 cache_put(void *job, int thread_index)
 {
    assert(job);
 
-   int fd = -1, fd_final = -1, err, ret;
    unsigned i = 0;
-   char *filename = NULL, *filename_tmp = NULL;
+   char *filename = NULL;
    struct disk_cache_put_job *dc_job = (struct disk_cache_put_job *) job;
 
    filename = get_cache_file(dc_job->cache, dc_job->key);
@@ -549,102 +359,6 @@ cache_put(void *job, int thread_index)
       i++;
    }
 
-   /* Write to a temporary file to allow for an atomic rename to the
-    * final destination filename, (to prevent any readers from seeing
-    * a partially written file).
-    */
-   if (asprintf(&filename_tmp, "%s.tmp", filename) == -1)
-      goto done;
-
-   fd = open(filename_tmp, O_WRONLY | O_CLOEXEC | O_CREAT, 0644);
-
-   /* Make the two-character subdirectory within the cache as needed. */
-   if (fd == -1) {
-      if (errno != ENOENT)
-         goto done;
-
-      make_cache_file_directory(dc_job->cache, dc_job->key);
-
-      fd = open(filename_tmp, O_WRONLY | O_CLOEXEC | O_CREAT, 0644);
-      if (fd == -1)
-         goto done;
-   }
-
-   /* With the temporary file open, we take an exclusive flock on
-    * it. If the flock fails, then another process still has the file
-    * open with the flock held. So just let that file be responsible
-    * for writing the file.
-    */
-#ifdef HAVE_FLOCK
-   err = flock(fd, LOCK_EX | LOCK_NB);
-#else
-   struct flock lock = {
-      .l_start = 0,
-      .l_len = 0, /* entire file */
-      .l_type = F_WRLCK,
-      .l_whence = SEEK_SET
-   };
-   err = fcntl(fd, F_SETLK, &lock);
-#endif
-   if (err == -1)
-      goto done;
-
-   /* Now that we have the lock on the open temporary file, we can
-    * check to see if the destination file already exists. If so,
-    * another process won the race between when we saw that the file
-    * didn't exist and now. In this case, we don't do anything more,
-    * (to ensure the size accounting of the cache doesn't get off).
-    */
-   fd_final = open(filename, O_RDONLY | O_CLOEXEC);
-   if (fd_final != -1) {
-      unlink(filename_tmp);
-      goto done;
-   }
-
-   /* OK, we're now on the hook to write out a file that we know is
-    * not in the cache, and is also not being written out to the cache
-    * by some other process.
-    */
-
-   /* Write the driver_keys_blob, this can be used find information about the
-    * mesa version that produced the entry or deal with hash collisions,
-    * should that ever become a real problem.
-    */
-   ret = write_all(fd, dc_job->cache->driver_keys_blob,
-                   dc_job->cache->driver_keys_blob_size);
-   if (ret == -1) {
-      unlink(filename_tmp);
-      goto done;
-   }
-
-   /* Write the cache item metadata. This data can be used to deal with
-    * hash collisions, as well as providing useful information to 3rd party
-    * tools reading the cache files.
-    */
-   ret = write_all(fd, &dc_job->cache_item_metadata.type,
-                   sizeof(uint32_t));
-   if (ret == -1) {
-      unlink(filename_tmp);
-      goto done;
-   }
-
-   if (dc_job->cache_item_metadata.type == CACHE_ITEM_TYPE_GLSL) {
-      ret = write_all(fd, &dc_job->cache_item_metadata.num_keys,
-                      sizeof(uint32_t));
-      if (ret == -1) {
-         unlink(filename_tmp);
-         goto done;
-      }
-
-      ret = write_all(fd, dc_job->cache_item_metadata.keys[0],
-                      dc_job->cache_item_metadata.num_keys *
-                      sizeof(cache_key));
-      if (ret == -1) {
-         unlink(filename_tmp);
-         goto done;
-      }
-   }
-
    /* Create CRC of the data. We will read this when restoring the cache and
     * use it to check for corruption.
     */
@@ -652,47 +366,9 @@ cache_put(void *job, int thread_index)
    cf_data.crc32 = util_hash_crc32(dc_job->data, dc_job->size);
    cf_data.uncompressed_size = dc_job->size;
 
-   size_t cf_data_size = sizeof(cf_data);
-   ret = write_all(fd, &cf_data, cf_data_size);
-   if (ret == -1) {
-      unlink(filename_tmp);
-      goto done;
-   }
+   disk_cache_write_item_to_disk(dc_job, &cf_data, filename);
 
-   /* Now, finally, write out the contents to the temporary file, then
-    * rename them atomically to the destination filename, and also
-    * perform an atomic increment of the total cache size.
-    */
-   size_t file_size = deflate_and_write_to_disk(dc_job->data, dc_job->size,
-                                                fd, filename_tmp);
-   if (file_size == 0) {
-      unlink(filename_tmp);
-      goto done;
-   }
-   ret = rename(filename_tmp, filename);
-   if (ret == -1) {
-      unlink(filename_tmp);
-      goto done;
-   }
-
-   struct stat sb;
-   if (stat(filename, &sb) == -1) {
-      /* Something went wrong remove the file */
-      unlink(filename);
-      goto done;
-   }
-
-   p_atomic_add(dc_job->cache->size, sb.st_blocks * 512);
-
- done:
-   if (fd_final != -1)
-      close(fd_final);
-   /* This close finally releases the flock, (now that the final file
-    * has been renamed into place and the size has been added).
-    */
-   if (fd != -1)
-      close(fd);
-   free(filename_tmp);
+done:
    free(filename);
 }
 
diff --git a/src/util/disk_cache_os.c b/src/util/disk_cache_os.c
index 3a146125fbb..a6c7504f08e 100644
--- a/src/util/disk_cache_os.c
+++ b/src/util/disk_cache_os.c
@@ -23,6 +23,120 @@
 
 #ifdef ENABLE_SHADER_CACHE
 
+#include <assert.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+#include "zlib.h"
+
+#ifdef HAVE_ZSTD
+#include "zstd.h"
+#endif
+
+/* 3 is the recomended level, with 22 as the absolute maximum */
+#define ZSTD_COMPRESSION_LEVEL 3
+
+/* From the zlib docs:
+ *    "If the memory is available, buffers sizes on the order of 128K or 256K
+ *    bytes should be used."
+ */
+#define BUFSIZE 256 * 1024
+
+static ssize_t
+write_all(int fd, const void *buf, size_t count);
+
+/**
+ * Compresses cache entry in memory and writes it to disk. Returns the size
+ * of the data written to disk.
+ */
+static size_t
+deflate_and_write_to_disk(const void *in_data, size_t in_data_size, int dest,
+                          const char *filename)
+{
+#ifdef HAVE_ZSTD
+   /* from the zstd docs (https://facebook.github.io/zstd/zstd_manual.html):
+    * compression runs faster if `dstCapacity` >= `ZSTD_compressBound(srcSize)`.
+    */
+   size_t out_size = ZSTD_compressBound(in_data_size);
+   void * out = malloc(out_size);
+
+   size_t ret = ZSTD_compress(out, out_size, in_data, in_data_size,
+                              ZSTD_COMPRESSION_LEVEL);
+   if (ZSTD_isError(ret)) {
+      free(out);
+      return 0;
+   }
+   ssize_t written = write_all(dest, out, ret);
+   if (written == -1) {
+      free(out);
+      return 0;
+   }
+   free(out);
+   return ret;
+#else
+   unsigned char *out;
+
+   /* allocate deflate state */
+   z_stream strm;
+   strm.zalloc = Z_NULL;
+   strm.zfree = Z_NULL;
+   strm.opaque = Z_NULL;
+   strm.next_in = (uint8_t *) in_data;
+   strm.avail_in = in_data_size;
+
+   int ret = deflateInit(&strm, Z_BEST_COMPRESSION);
+   if (ret != Z_OK)
+       return 0;
+
+   /* compress until end of in_data */
+   size_t compressed_size = 0;
+   int flush;
+
+   out = malloc(BUFSIZE * sizeof(unsigned char));
+   if (out == NULL)
+      return 0;
+
+   do {
+      int remaining = in_data_size - BUFSIZE;
+      flush = remaining > 0 ? Z_NO_FLUSH : Z_FINISH;
+      in_data_size -= BUFSIZE;
+
+      /* Run deflate() on input until the output buffer is not full (which
+       * means there is no more data to deflate).
+       */
+      do {
+         strm.avail_out = BUFSIZE;
+         strm.next_out = out;
+
+         ret = deflate(&strm, flush);    /* no bad return value */
+         assert(ret != Z_STREAM_ERROR);  /* state not clobbered */
+
+         size_t have = BUFSIZE - strm.avail_out;
+         compressed_size += have;
+
+         ssize_t written = write_all(dest, out, have);
+         if (written == -1) {
+            (void)deflateEnd(&strm);
+            free(out);
+            return 0;
+         }
+      } while (strm.avail_out == 0);
+
+      /* all input should be used */
+      assert(strm.avail_in == 0);
+
+   } while (flush != Z_FINISH);
+
+   /* stream should be complete */
+   assert(ret == Z_STREAM_END);
+
+   /* clean up and return */
+   (void)deflateEnd(&strm);
+   free(out);
+   return compressed_size;
+# endif
+}
+
 #if DETECT_OS_WINDOWS
 /* TODO: implement disk cache support on windows */
 
@@ -33,7 +147,6 @@
 #include <inttypes.h>
 #include <pwd.h>
 #include <stdio.h>
-#include <stdlib.h>
 #include <string.h>
 #include <sys/file.h>
 #include <sys/mman.h>
@@ -246,6 +359,40 @@ is_two_character_sub_directory(const char *path, const struct stat *sb,
    return true;
 }
 
+/* Create the directory that will be needed for the cache file for \key.
+ *
+ * Obviously, the implementation here must closely match
+ * _get_cache_file above.
+*/
+static void
+make_cache_file_directory(struct disk_cache *cache, const cache_key key)
+{
+   char *dir;
+   char buf[41];
+
+   _mesa_sha1_format(buf, key);
+   if (asprintf(&dir, "%s/%c%c", cache->path, buf[0], buf[1]) == -1)
+      return;
+
+   mkdir_if_needed(dir);
+   free(dir);
+}
+
+static ssize_t
+write_all(int fd, const void *buf, size_t count)
+{
+   const char *out = buf;
+   ssize_t written;
+   size_t done;
+
+   for (done = 0; done < count; done += written) {
+      written = write(fd, out + done, count - done);
+      if (written == -1)
+         return -1;
+   }
+   return done;
+}
+
 /* Evict least recently used cache item */
 void
 disk_cache_evict_lru_item(struct disk_cache *cache)
@@ -291,6 +438,153 @@ disk_cache_evict_lru_item(struct disk_cache *cache)
       p_atomic_add(cache->size, - (uint64_t)size);
 }
 
+void
+disk_cache_write_item_to_disk(struct disk_cache_put_job *dc_job,
+                              struct cache_entry_file_data *cf_data,
+                              char *filename)
+{
+   int fd = -1, fd_final = -1;
+
+   /* Write to a temporary file to allow for an atomic rename to the
+    * final destination filename, (to prevent any readers from seeing
+    * a partially written file).
+    */
+   char *filename_tmp = NULL;
+   if (asprintf(&filename_tmp, "%s.tmp", filename) == -1)
+      goto done;
+
+   fd = open(filename_tmp, O_WRONLY | O_CLOEXEC | O_CREAT, 0644);
+
+   /* Make the two-character subdirectory within the cache as needed. */
+   if (fd == -1) {
+      if (errno != ENOENT)
+         goto done;
+
+      make_cache_file_directory(dc_job->cache, dc_job->key);
+
+      fd = open(filename_tmp, O_WRONLY | O_CLOEXEC | O_CREAT, 0644);
+      if (fd == -1)
+         goto done;
+   }
+
+   /* With the temporary file open, we take an exclusive flock on
+    * it. If the flock fails, then another process still has the file
+    * open with the flock held. So just let that file be responsible
+    * for writing the file.
+    */
+#ifdef HAVE_FLOCK
+   int err = flock(fd, LOCK_EX | LOCK_NB);
+#else
+   struct flock lock = {
+      .l_start = 0,
+      .l_len = 0, /* entire file */
+      .l_type = F_WRLCK,
+      .l_whence = SEEK_SET
+   };
+   int err = fcntl(fd, F_SETLK, &lock);
+#endif
+   if (err == -1)
+      goto done;
+
+   /* Now that we have the lock on the open temporary file, we can
+    * check to see if the destination file already exists. If so,
+    * another process won the race between when we saw that the file
+    * didn't exist and now. In this case, we don't do anything more,
+    * (to ensure the size accounting of the cache doesn't get off).
+    */
+   fd_final = open(filename, O_RDONLY | O_CLOEXEC);
+   if (fd_final != -1) {
+      unlink(filename_tmp);
+      goto done;
+   }
+
+   /* OK, we're now on the hook to write out a file that we know is
+    * not in the cache, and is also not being written out to the cache
+    * by some other process.
+    */
+
+   /* Write the driver_keys_blob, this can be used find information about the
+    * mesa version that produced the entry or deal with hash collisions,
+    * should that ever become a real problem.
+    */
+   int ret = write_all(fd, dc_job->cache->driver_keys_blob,
+                       dc_job->cache->driver_keys_blob_size);
+   if (ret == -1) {
+      unlink(filename_tmp);
+      goto done;
+   }
+
+   /* Write the cache item metadata. This data can be used to deal with
+    * hash collisions, as well as providing useful information to 3rd party
+    * tools reading the cache files.
+    */
+   ret = write_all(fd, &dc_job->cache_item_metadata.type,
+                   sizeof(uint32_t));
+   if (ret == -1) {
+      unlink(filename_tmp);
+      goto done;
+   }
+
+   if (dc_job->cache_item_metadata.type == CACHE_ITEM_TYPE_GLSL) {
+      ret = write_all(fd, &dc_job->cache_item_metadata.num_keys,
+                      sizeof(uint32_t));
+      if (ret == -1) {
+         unlink(filename_tmp);
+         goto done;
+      }
+
+      ret = write_all(fd, dc_job->cache_item_metadata.keys[0],
+                      dc_job->cache_item_metadata.num_keys *
+                      sizeof(cache_key));
+      if (ret == -1) {
+         unlink(filename_tmp);
+         goto done;
+      }
+   }
+
+   size_t cf_data_size = sizeof(*cf_data);
+   ret = write_all(fd, cf_data, cf_data_size);
+   if (ret == -1) {
+      unlink(filename_tmp);
+      goto done;
+   }
+
+   /* Now, finally, write out the contents to the temporary file, then
+    * rename them atomically to the destination filename, and also
+    * perform an atomic increment of the total cache size.
+    */
+   size_t file_size = deflate_and_write_to_disk(dc_job->data, dc_job->size,
+                                                fd, filename_tmp);
+   if (file_size == 0) {
+      unlink(filename_tmp);
+      goto done;
+   }
+   ret = rename(filename_tmp, filename);
+   if (ret == -1) {
+      unlink(filename_tmp);
+      goto done;
+   }
+
+   struct stat sb;
+   if (stat(filename, &sb) == -1) {
+      /* Something went wrong remove the file */
+      unlink(filename);
+      goto done;
+   }
+
+   p_atomic_add(dc_job->cache->size, sb.st_blocks * 512);
+
+ done:
+   if (fd_final != -1)
+      close(fd_final);
+   /* This close finally releases the flock, (now that the final file
+    * has been renamed into place and the size has been added).
+    */
+   if (fd != -1)
+      close(fd);
+   free(filename_tmp);
+}
+
 /* Determine path for cache based on the first defined name as follows:
  *
  *   $MESA_GLSL_CACHE_DIR
diff --git a/src/util/disk_cache_os.h b/src/util/disk_cache_os.h
index f6da30047ef..da5b6a306f8 100644
--- a/src/util/disk_cache_os.h
+++ b/src/util/disk_cache_os.h
@@ -73,12 +73,38 @@ struct disk_cache {
    disk_cache_get_cb blob_get_cb;
 };
 
+struct disk_cache_put_job {
+   struct util_queue_fence fence;
+
+   struct disk_cache *cache;
+
+   cache_key key;
+
+   /* Copy of cache data to be compressed and written. */
+   void *data;
+
+   /* Size of data to be compressed and written. */
+   size_t size;
+
+   struct cache_item_metadata cache_item_metadata;
+};
+
+struct cache_entry_file_data {
+   uint32_t crc32;
+   uint32_t uncompressed_size;
+};
+
 char *
 disk_cache_generate_cache_dir(void *mem_ctx);
 
 void
 disk_cache_evict_lru_item(struct disk_cache *cache);
 
+void
+disk_cache_write_item_to_disk(struct disk_cache_put_job *dc_job,
+                              struct cache_entry_file_data *cf_data,
+                              char *filename);
+
 bool
 disk_cache_enabled(void);