[systemd-devel] [PATCH 2/3] coredump: use lz4frame api to compress coredumps

Zbigniew Jędrzejewski-Szmek zbyszek at in.waw.pl
Sun Dec 7 10:32:39 PST 2014


This converts the stream compression to use the new lz4frame api,
compatible with lz4cat. Previous code used custom headers, so the
compressed file was not compatible with lz4 command line tools.
I considered this the last blocker to using lz4 by default.

Speed seems to be reasonable, although a bit (a few percent) slower
than the lz4 binary, even though compression is the same. I don't
consider this important. It could be caused by the overhead of library
calls, but is probably caused by slightly different buffer sizes or
such. The code in this patch uses mmap, since since this allows the
buffer to be reused while not making the code more complicated at all.
In my testing, this version is noticably faster (~20%) than a naive
single-buffered version. mmap can cause the program to be killed with
SIGBUS, if the underlying file is truncated or a disk error occurs. We
only use this from within coredump and coredumpctl, so I don't
consider this an issue.

Old decompression code is retained and is used if the new code fails
indicating a format error. I have various coredumps in the old format
and would be unhappy if they suddenly stopped working. We can remove
the legacy code in a few versions.

The way that blobs are compressed in the journal is not affected.

Note that lz4 125 has not been released yet, so this patch should
not be applied currently.
---
 src/journal/compress.c      | 201 ++++++++++++++++++++++++++++++++------------
 src/journal/test-compress.c |   9 +-
 2 files changed, 150 insertions(+), 60 deletions(-)

diff --git a/src/journal/compress.c b/src/journal/compress.c
index 9440fcd60e..b92fdb1f4c 100644
--- a/src/journal/compress.c
+++ b/src/journal/compress.c
@@ -23,6 +23,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
+#include <sys/mman.h>
 
 #ifdef HAVE_XZ
 #  include <lzma.h>
@@ -30,6 +31,7 @@
 
 #ifdef HAVE_LZ4
 #  include <lz4.h>
+#  include <lz4frame.h>
 #endif
 
 #include "compress.h"
@@ -38,6 +40,11 @@
 #include "sparse-endian.h"
 #include "journal-def.h"
 
+#ifdef HAVE_LZ4
+DEFINE_TRIVIAL_CLEANUP_FUNC(LZ4F_compressionContext_t, LZ4F_freeCompressionContext);
+DEFINE_TRIVIAL_CLEANUP_FUNC(LZ4F_decompressionContext_t, LZ4F_freeDecompressionContext);
+#endif
+
 #define ALIGN_8(l) ALIGN_TO(l, sizeof(size_t))
 
 static const char* const object_compressed_table[_OBJECT_COMPRESSED_MAX] = {
@@ -418,81 +425,99 @@ int compress_stream_xz(int fdf, int fdt, off_t max_bytes) {
 #endif
 }
 
-#define LZ4_BUFSIZE (512*1024)
+#define LZ4_BUFSIZE (512*1024u)
 
 int compress_stream_lz4(int fdf, int fdt, off_t max_bytes) {
 
 #ifdef HAVE_LZ4
+        LZ4F_errorCode_t c;
+        _cleanup_(LZ4F_freeCompressionContextp) LZ4F_compressionContext_t ctx = NULL;
+        _cleanup_free_ char *buf = NULL;
+        char *src = NULL;
+        size_t size, n, total_in = 0, total_out = 0, offset = 0, frame_size;
+        struct stat st;
+        int r;
+        static const LZ4F_compressOptions_t options = {
+                .stableSrc = 1,
+        };
+        static const LZ4F_preferences_t preferences = {
+                .frameInfo.blockSizeID = 5,
+        };
 
-        _cleanup_free_ char *buf1 = NULL, *buf2 = NULL, *out = NULL;
-        char *buf;
-        LZ4_stream_t lz4_data = {};
-        le32_t header;
-        size_t total_in = 0, total_out = sizeof(header);
-        ssize_t n;
+        c = LZ4F_createCompressionContext(&ctx, LZ4F_VERSION);
+        if (LZ4F_isError(c))
+                return -ENOMEM;
 
-        assert(fdf >= 0);
-        assert(fdt >= 0);
+        if (fstat(fdf, &st) < 0)
+                return log_error_errno(errno, "fstat() failed: %m");
 
-        buf1 = malloc(LZ4_BUFSIZE);
-        buf2 = malloc(LZ4_BUFSIZE);
-        out = malloc(LZ4_COMPRESSBOUND(LZ4_BUFSIZE));
-        if (!buf1 || !buf2 || !out)
-                return log_oom();
+        frame_size = LZ4F_compressBound(LZ4_BUFSIZE, &preferences);
+        size =  frame_size + 64*1024; /* add some space for header and trailer */
+        buf = malloc(size);
+        if (!buf)
+                return -ENOMEM;
 
-        buf = buf1;
-        for (;;) {
-                size_t m;
-                int r;
+        n = offset = LZ4F_compressBegin(ctx, buf, size, &preferences);
+        if (LZ4F_isError(n))
+                return -EINVAL;
 
-                m = LZ4_BUFSIZE;
-                if (max_bytes != -1 && m > (size_t) max_bytes - total_in)
-                        m = max_bytes - total_in;
+        src = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fdf, 0);
+        if (!src)
+                return -errno;
 
-                n = read(fdf, buf, m);
-                if (n < 0)
-                        return -errno;
-                if (n == 0)
-                        break;
+        log_debug("Buffer size is %zu bytes, header size %zu bytes.", size, n);
 
-                total_in += n;
+        while (total_in < (size_t) st.st_size) {
+                ssize_t k;
 
-                r = LZ4_compress_continue(&lz4_data, buf, out, n);
-                if (r == 0) {
-                        log_error("LZ4 compression failed.");
-                        return -EBADMSG;
+                k = MIN(LZ4_BUFSIZE, st.st_size - total_in);
+                n = LZ4F_compressUpdate(ctx, buf + offset, size - offset,
+                                        src + total_in, k, &options);
+                if (LZ4F_isError(n)) {
+                        r = -ENOTRECOVERABLE;
+                        goto cleanup;
                 }
 
-                header = htole32(r);
-                errno = 0;
+                total_in += k;
+                offset += n;
+                total_out += n;
 
-                n = write(fdt, &header, sizeof(header));
-                if (n < 0)
-                        return -errno;
-                if (n != sizeof(header))
-                        return errno ? -errno : -EIO;
+                if (max_bytes != -1 && total_out > (size_t) max_bytes) {
+                        log_debug("Compressed stream longer than %zd bytes", max_bytes);
+                        return -EFBIG;
+                }
 
-                n = loop_write(fdt, out, r, false);
-                if (n < 0)
-                        return n;
+                if (size - offset < frame_size + 4) {
+                        log_debug("Writing %zu bytes", offset);
 
-                total_out += sizeof(header) + r;
+                        k = loop_write(fdt, buf, offset, false);
+                        if (k < 0) {
+                                r = k;
+                                goto cleanup;
+                        }
+                        offset = 0;
+                }
+        }
 
-                buf = buf == buf1 ? buf2 : buf1;
+        n = LZ4F_compressEnd(ctx, buf + offset, size - offset, &options);
+        if (LZ4F_isError(n)) {
+                r = -ENOTRECOVERABLE;
+                goto cleanup;
         }
 
-        header = htole32(0);
-        n = write(fdt, &header, sizeof(header));
-        if (n < 0)
-                return -errno;
-        if (n != sizeof(header))
-                return errno ? -errno : -EIO;
+        offset += n;
+        total_out += n;
+        log_debug("Writing %zu bytes", offset);
+        r = loop_write(fdt, buf, offset, false);
+        if (r < 0)
+                goto cleanup;
 
         log_debug("LZ4 compression finished (%zu -> %zu bytes, %.1f%%)",
                   total_in, total_out,
                   (double) total_out / total_in * 100);
-
-        return 0;
+ cleanup:
+        munmap(src, st.st_size);
+        return r;
 #else
         return -EPROTONOSUPPORT;
 #endif
@@ -573,9 +598,9 @@ int decompress_stream_xz(int fdf, int fdt, off_t max_bytes) {
 #endif
 }
 
-int decompress_stream_lz4(int fdf, int fdt, off_t max_bytes) {
-
 #ifdef HAVE_LZ4
+static int decompress_stream_lz4_v1(int fdf, int fdt, off_t max_bytes) {
+
         _cleanup_free_ char *buf = NULL, *out = NULL;
         size_t buf_size = 0;
         LZ4_streamDecode_t lz4_data = {};
@@ -628,7 +653,7 @@ int decompress_stream_lz4(int fdf, int fdt, off_t max_bytes) {
 
                 r = LZ4_decompress_safe_continue(&lz4_data, buf, out, m, 4*LZ4_BUFSIZE);
                 if (r <= 0)
-                        log_error("LZ4 decompression failed.");
+                        log_error("LZ4 decompression failed (legacy format).");
 
                 total_out += r;
 
@@ -642,11 +667,77 @@ int decompress_stream_lz4(int fdf, int fdt, off_t max_bytes) {
                         return n;
         }
 
-        log_debug("LZ4 decompression finished (%zu -> %zu bytes, %.1f%%)",
+        log_debug("LZ4 decompression finished (legacy format, %zu -> %zu bytes, %.1f%%)",
                   total_in, total_out,
                   (double) total_out / total_in * 100);
 
         return 0;
+}
+
+static int decompress_stream_lz4_v2(int in, int out, off_t max_bytes) {
+        size_t c;
+        _cleanup_(LZ4F_freeDecompressionContextp) LZ4F_decompressionContext_t ctx = NULL;
+        _cleanup_free_ char *buf = NULL;
+        char *src;
+        struct stat st;
+        int r = 0;
+        size_t total_in = 0, total_out = 0;
+
+        c = LZ4F_createDecompressionContext(&ctx, LZ4F_VERSION);
+        if (LZ4F_isError(c))
+                return -ENOMEM;
+
+        if (fstat(in, &st) < 0)
+                return log_error_errno(errno, "fstat() failed: %m");
+
+        buf = malloc(LZ4_BUFSIZE);
+        if (!buf)
+                return -ENOMEM;
+
+        src = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, in, 0);
+        if (!src)
+                return -errno;
+
+        while (total_in < (size_t) st.st_size) {
+                size_t produced = LZ4_BUFSIZE;
+                size_t used = st.st_size - total_in;
+
+                c = LZ4F_decompress(ctx, buf, &produced, src + total_in, &used, NULL);
+                if (LZ4F_isError(c)) {
+                        r = -EBADMSG;
+                        goto cleanup;
+                }
+
+                total_in += used;
+                total_out += produced;
+
+                if (max_bytes != -1 && total_out > (size_t) max_bytes) {
+                        r = log_debug_errno(EFBIG, "Decompressed stream longer than %zd bytes", max_bytes);
+                        goto cleanup;
+                }
+
+                r = loop_write(out, buf, produced, false);
+                if (r < 0)
+                        goto cleanup;
+        }
+
+        log_debug("LZ4 decompression finished (%zu -> %zu bytes, %.1f%%)",
+                  total_in, total_out,
+                  (double) total_out / total_in * 100);
+ cleanup:
+        munmap(src, st.st_size);
+        return r;
+}
+#endif
+
+int decompress_stream_lz4(int fdf, int fdt, off_t max_bytes) {
+#ifdef HAVE_LZ4
+        int r;
+
+        r = decompress_stream_lz4_v2(fdf, fdt, max_bytes);
+        if (r == -EBADMSG)
+                r = decompress_stream_lz4_v1(fdf, fdt, max_bytes);
+        return r;
 #else
         log_error("Cannot decompress file. Compiled without LZ4 support.");
         return -EPROTONOSUPPORT;
diff --git a/src/journal/test-compress.c b/src/journal/test-compress.c
index 97577e827c..26b019d345 100644
--- a/src/journal/test-compress.c
+++ b/src/journal/test-compress.c
@@ -143,8 +143,8 @@ static void test_compress_stream(int compression,
                                  const char *srcfile) {
 
         _cleanup_close_ int src = -1, dst = -1, dst2 = -1;
-        char pattern[] = "/tmp/systemd-test.xz.XXXXXX",
-             pattern2[] = "/tmp/systemd-test.xz.XXXXXX";
+        char pattern[] = "/tmp/systemd-test.compressed.XXXXXX",
+             pattern2[] = "/tmp/systemd-test.compressed.XXXXXX";
         int r;
         _cleanup_free_ char *cmd = NULL, *cmd2;
         struct stat st = {};
@@ -184,7 +184,7 @@ static void test_compress_stream(int compression,
 
         assert_se(lseek(dst, 1, SEEK_SET) == 1);
         r = decompress(dst, dst2, st.st_size);
-        assert_se(r == -EBADMSG);
+        assert_se(r == -EBADMSG || r == 0);
 
         assert_se(lseek(dst, 0, SEEK_SET) == 0);
         assert_se(lseek(dst2, 0, SEEK_SET) == 0);
@@ -235,8 +235,7 @@ int main(int argc, char *argv[]) {
                                    compress_blob_lz4, decompress_startswith_lz4,
                                    data, sizeof(data), true);
 
-        /* Produced stream is not compatible with lz4 binary, skip lz4cat check. */
-        test_compress_stream(OBJECT_COMPRESSED_LZ4, NULL,
+        test_compress_stream(OBJECT_COMPRESSED_LZ4, "lz4cat",
                              compress_stream_lz4, decompress_stream_lz4, argv[0]);
 #else
         log_info("/* LZ4 test skipped */");
-- 
1.9.3



More information about the systemd-devel mailing list