[systemd-commits] 10 commits - .gitignore Makefile.am src/journal src/shared

Lennart Poettering lennart at kemper.freedesktop.org
Thu Aug 16 08:11:37 PDT 2012


 .gitignore                         |    1 
 Makefile.am                        |   41 +
 src/journal/journal-authenticate.c |  440 +++++++++++++++++
 src/journal/journal-authenticate.h |   40 +
 src/journal/journal-def.h          |    8 
 src/journal/journal-file.c         |  819 ++------------------------------
 src/journal/journal-file.h         |   35 -
 src/journal/journal-internal.h     |    2 
 src/journal/journal-vacuum.c       |  230 +++++++++
 src/journal/journal-vacuum.h       |   26 +
 src/journal/journal-verify.c       |  937 +++++++++++++++++++++++++++++++++++++
 src/journal/journal-verify.h       |   26 +
 src/journal/journalctl.c           |  136 +++--
 src/journal/journald.c             |   17 
 src/journal/journald.h             |    2 
 src/journal/mmap-cache.c           |  696 +++++++++++++++++++++++++++
 src/journal/mmap-cache.h           |   35 +
 src/journal/sd-journal.c           |   16 
 src/journal/test-journal-stream.c  |    6 
 src/journal/test-journal-verify.c  |   78 +++
 src/journal/test-journal.c         |    6 
 src/shared/conf-parser.c           |    6 
 22 files changed, 2755 insertions(+), 848 deletions(-)

New commits:
commit 86adf873be22a38dbc9c6e86124c30b6caecd185
Author: Lennart Poettering <lennart at poettering.net>
Date:   Thu Aug 16 17:09:53 2012 +0200

    journal: verify structural consistency

diff --git a/src/journal/journal-def.h b/src/journal/journal-def.h
index a77f69e..e61e81c 100644
--- a/src/journal/journal-def.h
+++ b/src/journal/journal-def.h
@@ -47,12 +47,12 @@ typedef struct FSPRGHeader FSPRGHeader;
 /* Object types */
 enum {
         OBJECT_UNUSED,
-        OBJECT_DATA,
+        OBJECT_DATA,               /* !!! */
         OBJECT_FIELD,
         OBJECT_ENTRY,
         OBJECT_DATA_HASH_TABLE,
         OBJECT_FIELD_HASH_TABLE,
-        OBJECT_ENTRY_ARRAY,
+        OBJECT_ENTRY_ARRAY,        /* !!! */
         OBJECT_TAG,
         _OBJECT_TYPE_MAX
 };
diff --git a/src/journal/journal-verify.c b/src/journal/journal-verify.c
index 1a9a730..7c99d44 100644
--- a/src/journal/journal-verify.c
+++ b/src/journal/journal-verify.c
@@ -34,10 +34,11 @@
 
 /* FIXME:
  *
- * - follow all chains
- * - check for unreferenced objects
  * - verify FSPRG
  * - Allow building without libgcrypt
+ * - check with sparse
+ * - 64bit conversions
+ * - verification should use MAP_PRIVATE
  *
  * */
 
@@ -110,12 +111,18 @@ static int journal_file_object_verify(JournalFile *f, Object *o) {
                 if ((le64toh(o->object.size) - offsetof(HashTableObject, items)) % sizeof(HashItem) != 0)
                         return -EBADMSG;
 
+                if ((le64toh(o->object.size) - offsetof(HashTableObject, items)) / sizeof(HashItem) <= 0)
+                        return -EBADMSG;
+
                 break;
 
         case OBJECT_ENTRY_ARRAY:
                 if ((le64toh(o->object.size) - offsetof(EntryArrayObject, items)) % sizeof(le64_t) != 0)
                         return -EBADMSG;
 
+                if ((le64toh(o->object.size) - offsetof(EntryArrayObject, items)) / sizeof(le64_t) <= 0)
+                        return -EBADMSG;
+
                 break;
 
         case OBJECT_TAG:
@@ -206,7 +213,7 @@ static int contains_uint64(MMapCache *m, int fd, uint64_t n, uint64_t p) {
 
                 c = (a + b) / 2;
 
-                r = mmap_cache_get(m, fd, PROT_READ, 0, c * sizeof(uint64_t), sizeof(uint64_t), (void **) &z);
+                r = mmap_cache_get(m, fd, PROT_READ|PROT_WRITE, 0, c * sizeof(uint64_t), sizeof(uint64_t), (void **) &z);
                 if (r < 0)
                         return r;
 
@@ -222,6 +229,368 @@ static int contains_uint64(MMapCache *m, int fd, uint64_t n, uint64_t p) {
         return 0;
 }
 
+static int entry_points_to_data(
+                JournalFile *f,
+                int entry_fd,
+                uint64_t n_entries,
+                uint64_t entry_p,
+                uint64_t data_p) {
+
+        int r;
+        uint64_t i, n, a;
+        Object *o;
+        bool found = false;
+
+        assert(f);
+        assert(entry_fd >= 0);
+
+        if (!contains_uint64(f->mmap, entry_fd, n_entries, entry_p)) {
+                log_error("Data object references invalid entry at %llu", (unsigned long long) data_p);
+                return -EBADMSG;
+        }
+
+        r = journal_file_move_to_object(f, OBJECT_ENTRY, entry_p, &o);
+        if (r < 0)
+                return r;
+
+        n = journal_file_entry_n_items(o);
+        for (i = 0; i < n; i++)
+                if (le64toh(o->entry.items[i].object_offset) == data_p) {
+                        found = true;
+                        break;
+                }
+
+        if (!found) {
+                log_error("Data object not referenced by linked entry at %llu", (unsigned long long) data_p);
+                return -EBADMSG;
+        }
+
+        /* Check if this entry is also in main entry array. Since the
+         * main entry array has already been verified we can rely on
+         * its consistency.*/
+
+        n = le64toh(f->header->n_entries);
+        a = le64toh(f->header->entry_array_offset);
+        i = 0;
+
+        while (i < n) {
+                uint64_t m, j;
+
+                r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
+                if (r < 0)
+                        return r;
+
+                m = journal_file_entry_array_n_items(o);
+                for (j = 0; i < n && j < m; i++, j++)
+                        if (le64toh(o->entry_array.items[j]) == entry_p)
+                                return 0;
+
+                a = le64toh(o->entry_array.next_entry_array_offset);;
+        }
+
+        return 0;
+}
+
+static int verify_data(
+                JournalFile *f,
+                Object *o, uint64_t p,
+                int entry_fd, uint64_t n_entries,
+                int entry_array_fd, uint64_t n_entry_arrays) {
+
+        uint64_t i, n, a, last, q;
+        int r;
+
+        assert(f);
+        assert(o);
+        assert(entry_fd >= 0);
+        assert(entry_array_fd >= 0);
+
+        n = le64toh(o->data.n_entries);
+        a = le64toh(o->data.entry_array_offset);
+
+        /* We already checked this earlier */
+        assert(n > 0);
+
+        last = q = le64toh(o->data.entry_offset);
+        r = entry_points_to_data(f, entry_fd, n_entries, q, p);
+        if (r < 0)
+                return r;
+
+        while (i < n) {
+                uint64_t next, m, j;
+
+                if (a == 0) {
+                        log_error("Array chain too short at %llu.", (unsigned long long) p);
+                        return -EBADMSG;
+                }
+
+                if (!contains_uint64(f->mmap, entry_array_fd, n_entry_arrays, a)) {
+                        log_error("Invalid array at %llu.", (unsigned long long) p);
+                        return -EBADMSG;
+                }
+
+                r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
+                if (r < 0)
+                        return r;
+
+                next = le64toh(o->entry_array.next_entry_array_offset);
+                if (next != 0 && next <= a) {
+                        log_error("Array chain has cycle at %llu.", (unsigned long long) p);
+                        return -EBADMSG;
+                }
+
+                m = journal_file_entry_array_n_items(o);
+                for (j = 0; i < n && j < m; i++, j++) {
+
+                        q = le64toh(o->entry_array.items[j]);
+                        if (q <= last) {
+                                log_error("Data object's entry array not sorted at %llu.", (unsigned long long) p);
+                                return -EBADMSG;
+                        }
+                        last = q;
+
+                        r = entry_points_to_data(f, entry_fd, n_entries, q, p);
+                        if (r < 0)
+                                return r;
+
+                        /* Pointer might have moved, reposition */
+                        r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
+                        if (r < 0)
+                                return r;
+                }
+
+                a = next;
+        }
+
+        return 0;
+}
+
+static int verify_hash_table(
+                JournalFile *f,
+                int data_fd, uint64_t n_data,
+                int entry_fd, uint64_t n_entries,
+                int entry_array_fd, uint64_t n_entry_arrays,
+                usec_t *last_usec) {
+
+        uint64_t i, n;
+        int r;
+
+        assert(f);
+        assert(data_fd >= 0);
+        assert(entry_fd >= 0);
+        assert(entry_array_fd >= 0);
+        assert(last_usec);
+
+        n = le64toh(f->header->data_hash_table_size) / sizeof(HashItem);
+        for (i = 0; i < n; i++) {
+                uint64_t last = 0, p;
+
+                draw_progress(0xC000 + (0x3FFF * i / n), last_usec);
+
+                p = le64toh(f->data_hash_table[i].head_hash_offset);
+                while (p != 0) {
+                        Object *o;
+                        uint64_t next;
+
+                        if (!contains_uint64(f->mmap, data_fd, n_data, p)) {
+                                log_error("Invalid data object at hash entry %llu of %llu.",
+                                          (unsigned long long) i, (unsigned long long) n);
+                                return -EBADMSG;
+                        }
+
+                        r = journal_file_move_to_object(f, OBJECT_DATA, p, &o);
+                        if (r < 0)
+                                return r;
+
+                        next = le64toh(o->data.next_hash_offset);
+                        if (next != 0 && next <= p) {
+                                log_error("Hash chain has a cycle in hash entry %llu of %llu.",
+                                          (unsigned long long) i, (unsigned long long) n);
+                                return -EBADMSG;
+                        }
+
+                        if (le64toh(o->data.hash) % n != i) {
+                                log_error("Hash value mismatch in hash entry %llu of %llu.",
+                                          (unsigned long long) i, (unsigned long long) n);
+                                return -EBADMSG;
+                        }
+
+                        r = verify_data(f, o, p, entry_fd, n_entries, entry_array_fd, n_entry_arrays);
+                        if (r < 0)
+                                return r;
+
+                        last = p;
+                        p = next;
+                }
+
+                if (last != le64toh(f->data_hash_table[i].tail_hash_offset)) {
+                        log_error("Tail hash pointer mismatch in hash table.");
+                        return -EBADMSG;
+                }
+        }
+
+        return 0;
+}
+
+static int data_object_in_hash_table(JournalFile *f, uint64_t hash, uint64_t p) {
+        uint64_t n, h, q;
+        int r;
+        assert(f);
+
+        n = le64toh(f->header->data_hash_table_size) / sizeof(HashItem);
+        h = hash % n;
+
+        q = le64toh(f->data_hash_table[h].head_hash_offset);
+        while (q != 0) {
+                Object *o;
+
+                if (p == q)
+                        return 1;
+
+                r = journal_file_move_to_object(f, OBJECT_DATA, q, &o);
+                if (r < 0)
+                        return r;
+
+                q = le64toh(o->data.next_hash_offset);
+        }
+
+        return 0;
+}
+
+static int verify_entry(
+                JournalFile *f,
+                Object *o, uint64_t p,
+                int data_fd, uint64_t n_data) {
+
+        uint64_t i, n;
+        int r;
+
+        assert(f);
+        assert(o);
+        assert(data_fd >= 0);
+
+        n = journal_file_entry_n_items(o);
+        for (i = 0; i < n; i++) {
+                uint64_t q, h;
+                Object *u;
+
+                q = le64toh(o->entry.items[i].object_offset);
+                h = le64toh(o->entry.items[i].hash);
+
+                if (!contains_uint64(f->mmap, data_fd, n_data, q)) {
+                        log_error("Invalid data object at entry %llu.",
+                                  (unsigned long long) o);
+                                return -EBADMSG;
+                        }
+
+                r = journal_file_move_to_object(f, OBJECT_DATA, q, &u);
+                if (r < 0)
+                        return r;
+
+                if (le64toh(u->data.hash) != h) {
+                        log_error("Hash mismatch for data object at entry %llu.",
+                                  (unsigned long long) p);
+                        return -EBADMSG;
+                }
+
+                r = data_object_in_hash_table(f, h, q);
+                if (r < 0)
+                        return r;
+                if (r == 0) {
+                        log_error("Data object missing from hash at entry %llu.",
+                                  (unsigned long long) p);
+                        return -EBADMSG;
+                }
+        }
+
+        return 0;
+}
+
+static int verify_entry_array(
+                JournalFile *f,
+                int data_fd, uint64_t n_data,
+                int entry_fd, uint64_t n_entries,
+                int entry_array_fd, uint64_t n_entry_arrays,
+                usec_t *last_usec) {
+
+        uint64_t i = 0, a, n, last = 0;
+        int r;
+
+        assert(f);
+        assert(data_fd >= 0);
+        assert(entry_fd >= 0);
+        assert(entry_array_fd >= 0);
+        assert(last_usec);
+
+        n = le64toh(f->header->n_entries);
+        a = le64toh(f->header->entry_array_offset);
+        while (i < n) {
+                uint64_t next, m, j;
+                Object *o;
+
+                draw_progress(0x8000 + (0x3FFF * i / n), last_usec);
+
+                if (a == 0) {
+                        log_error("Array chain too short at %llu of %llu.",
+                                  (unsigned long long) i, (unsigned long long) n);
+                        return -EBADMSG;
+                }
+
+                if (!contains_uint64(f->mmap, entry_array_fd, n_entry_arrays, a)) {
+                        log_error("Invalid array at %llu of %llu.",
+                                  (unsigned long long) i, (unsigned long long) n);
+                        return -EBADMSG;
+                }
+
+                r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
+                if (r < 0)
+                        return r;
+
+                next = le64toh(o->entry_array.next_entry_array_offset);
+                if (next != 0 && next <= a) {
+                        log_error("Array chain has cycle at %llu of %llu.",
+                                  (unsigned long long) i, (unsigned long long) n);
+                        return -EBADMSG;
+                }
+
+                m = journal_file_entry_array_n_items(o);
+                for (j = 0; i < n && j < m; i++, j++) {
+                        uint64_t p;
+
+                        p = le64toh(o->entry_array.items[j]);
+                        if (p <= last) {
+                                log_error("Entry array not sorted at %llu of %llu.",
+                                          (unsigned long long) i, (unsigned long long) n);
+                                return -EBADMSG;
+                        }
+                        last = p;
+
+                        if (!contains_uint64(f->mmap, entry_fd, n_entries, p)) {
+                                log_error("Invalid array entry at %llu of %llu.",
+                                          (unsigned long long) i, (unsigned long long) n);
+                                return -EBADMSG;
+                        }
+
+                        r = journal_file_move_to_object(f, OBJECT_ENTRY, p, &o);
+                        if (r < 0)
+                                return r;
+
+                        r = verify_entry(f, o, p, data_fd, n_data);
+                        if (r < 0)
+                                return r;
+
+                        /* Pointer might have moved, reposition */
+                        r = journal_file_move_to_object(f, OBJECT_ENTRY_ARRAY, a, &o);
+                        if (r < 0)
+                                return r;
+                }
+
+                a = next;
+        }
+
+        return 0;
+}
+
 int journal_file_verify(JournalFile *f, const char *key) {
         int r;
         Object *o;
@@ -270,7 +639,7 @@ int journal_file_verify(JournalFile *f, const char *key) {
 
         p = le64toh(f->header->header_size);
         while (p != 0) {
-                draw_progress((0x7FFF * p) / le64toh(f->header->tail_object_offset), &last_usec);
+                draw_progress(0x7FFF * p / le64toh(f->header->tail_object_offset), &last_usec);
 
                 r = journal_file_move_to_object(f, -1, p, &o);
                 if (r < 0) {
@@ -504,53 +873,29 @@ int journal_file_verify(JournalFile *f, const char *key) {
                 goto fail;
         }
 
-        /* Second iteration: we go through all objects again, this
-         * time verify all pointers. */
-
-        p = le64toh(f->header->header_size);
-        while (p != 0) {
-                draw_progress(0x8000 + (0x7FFF * p) / le64toh(f->header->tail_object_offset), &last_usec);
-
-                r = journal_file_move_to_object(f, -1, p, &o);
-                if (r < 0) {
-                        log_error("Invalid object at %llu", (unsigned long long) p);
-                        goto fail;
-                }
-
-                if (o->object.type == OBJECT_ENTRY_ARRAY) {
-                        uint64_t i = 0, n;
-
-                        if (le64toh(o->entry_array.next_entry_array_offset) != 0 &&
-                            !contains_uint64(f->mmap, entry_array_fd, n_entry_arrays, le64toh(o->entry_array.next_entry_array_offset))) {
-                                log_error("Entry array chains up to invalid next array at %llu", (unsigned long long) p);
-                                r = -EBADMSG;
-                                goto fail;
-                        }
-
-                        n = journal_file_entry_array_n_items(o);
-                        for (i = 0; i < n; i++) {
-                                if (le64toh(o->entry_array.items[i]) != 0 &&
-                                    !contains_uint64(f->mmap, entry_fd, n_entries, le64toh(o->entry_array.items[i]))) {
-
-                                        log_error("Entry array points to invalid next array at %llu", (unsigned long long) p);
-                                        r = -EBADMSG;
-                                        goto fail;
-                                }
-                        }
-
-                }
-
-                r = journal_file_move_to_object(f, -1, p, &o);
-                if (r < 0) {
-                        log_error("Invalid object at %llu", (unsigned long long) p);
-                        goto fail;
-                }
+        /* Second iteration: we follow all objects referenced from the
+         * two entry points: the object hash table and the entry
+         * array. We also check that everything referenced (directly
+         * or indirectly) in the data hash table also exists in the
+         * entry array, and vice versa. Note that we do not care for
+         * unreferenced objects. We only care that everything that is
+         * referenced is consistent. */
+
+        r = verify_entry_array(f,
+                               data_fd, n_data,
+                               entry_fd, n_entries,
+                               entry_array_fd, n_entry_arrays,
+                               &last_usec);
+        if (r < 0)
+                goto fail;
 
-                if (p == le64toh(f->header->tail_object_offset))
-                        p = 0;
-                else
-                        p = p + ALIGN64(le64toh(o->object.size));
-        }
+        r = verify_hash_table(f,
+                              data_fd, n_data,
+                              entry_fd, n_entries,
+                              entry_array_fd, n_entry_arrays,
+                              &last_usec);
+        if (r < 0)
+                goto fail;
 
         flush_progress();
 

commit f9fffc31cdc4be7a0e4437837ae06a0c111fe020
Author: Lennart Poettering <lennart at poettering.net>
Date:   Thu Aug 16 03:45:10 2012 +0200

    journal: add color to verification progress bar

diff --git a/src/journal/journal-verify.c b/src/journal/journal-verify.c
index 8ef91ce..1a9a730 100644
--- a/src/journal/journal-verify.c
+++ b/src/journal/journal-verify.c
@@ -146,11 +146,13 @@ static void draw_progress(uint64_t p, usec_t *last_usec) {
         j = (n * (unsigned) p) / 65535ULL;
         k = n - j;
 
-        fputs("\r\x1B[?25l", stdout);
+        fputs("\r\x1B[?25l" ANSI_HIGHLIGHT_GREEN_ON, stdout);
 
         for (i = 0; i < j; i++)
                 fputs("\xe2\x96\x88", stdout);
 
+        fputs(ANSI_HIGHLIGHT_OFF, stdout);
+
         for (i = 0; i < k; i++)
                 fputs("\xe2\x96\x91", stdout);
 

commit fd5dc3204d350142a9105d3e9c83bf29d3a900ee
Author: Lennart Poettering <lennart at poettering.net>
Date:   Thu Aug 16 03:43:07 2012 +0200

    journal: verify compressed objects

diff --git a/src/journal/journal-verify.c b/src/journal/journal-verify.c
index 9318f3d..8ef91ce 100644
--- a/src/journal/journal-verify.c
+++ b/src/journal/journal-verify.c
@@ -30,13 +30,14 @@
 #include "journal-authenticate.h"
 #include "journal-verify.h"
 #include "lookup3.h"
+#include "compress.h"
 
 /* FIXME:
  *
- * - verify hashes of compressed objects
  * - follow all chains
  * - check for unreferenced objects
  * - verify FSPRG
+ * - Allow building without libgcrypt
  *
  * */
 
@@ -54,7 +55,9 @@ static int journal_file_object_verify(JournalFile *f, Object *o) {
 
         switch (o->object.type) {
 
-        case OBJECT_DATA:
+        case OBJECT_DATA: {
+                uint64_t h1, h2;
+
                 if (le64toh(o->data.entry_offset) <= 0 ||
                     le64toh(o->data.n_entries) <= 0)
                         return -EBADMSG;
@@ -62,17 +65,27 @@ static int journal_file_object_verify(JournalFile *f, Object *o) {
                 if (le64toh(o->object.size) - offsetof(DataObject, payload) <= 0)
                         return -EBADMSG;
 
-                if (!(o->object.flags & OBJECT_COMPRESSED)) {
-                        uint64_t h1, h2;
+                h1 = le64toh(o->data.hash);
 
-                        h1 = le64toh(o->data.hash);
-                        h2 = hash64(o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload));
+                if (o->object.flags & OBJECT_COMPRESSED) {
+                        void *b = NULL;
+                        uint64_t alloc = 0, b_size;
 
-                        if (h1 != h2)
+                        if (!uncompress_blob(o->data.payload,
+                                             le64toh(o->object.size) - offsetof(Object, data.payload),
+                                             &b, &alloc, &b_size))
                                 return -EBADMSG;
-                }
+
+                        h2 = hash64(b, b_size);
+                        free(b);
+                } else
+                        h2 = hash64(o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload));
+
+                if (h1 != h2)
+                        return -EBADMSG;
 
                 break;
+        }
 
         case OBJECT_FIELD:
                 if (le64toh(o->object.size) - offsetof(FieldObject, payload) <= 0)

commit 4da416aa20b956571d74720bc91222881443e24b
Author: Lennart Poettering <lennart at poettering.net>
Date:   Thu Aug 16 02:14:34 2012 +0200

    journalctl: add --verify-seed= switch to specify seed value

diff --git a/src/journal/journal-authenticate.c b/src/journal/journal-authenticate.c
index 827e4e4..5a0314b 100644
--- a/src/journal/journal-authenticate.c
+++ b/src/journal/journal-authenticate.c
@@ -432,3 +432,9 @@ int journal_file_append_first_tag(JournalFile *f) {
 
         return 0;
 }
+
+bool journal_file_fsprg_enabled(JournalFile *f) {
+        assert(f);
+
+        return !!(le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_AUTHENTICATED);
+}
diff --git a/src/journal/journal-authenticate.h b/src/journal/journal-authenticate.h
index c991b22..566d7a8 100644
--- a/src/journal/journal-authenticate.h
+++ b/src/journal/journal-authenticate.h
@@ -21,6 +21,9 @@
   along with systemd; If not, see <http://www.gnu.org/licenses/>.
 ***/
 
+#include <stdbool.h>
+#include <inttypes.h>
+
 #include "journal-file.h"
 
 int journal_file_append_tag(JournalFile *f);
@@ -33,3 +36,5 @@ int journal_file_hmac_put_object(JournalFile *f, int type, uint64_t p);
 int journal_file_load_fsprg(JournalFile *f);
 
 int journal_file_setup_hmac(JournalFile *f);
+
+bool journal_file_fsprg_enabled(JournalFile *f);
diff --git a/src/journal/journalctl.c b/src/journal/journalctl.c
index a70de06..3d274c8 100644
--- a/src/journal/journalctl.c
+++ b/src/journal/journalctl.c
@@ -41,9 +41,10 @@
 #include "logs-show.h"
 #include "strv.h"
 #include "journal-internal.h"
-#include "fsprg.h"
 #include "journal-def.h"
 #include "journal-verify.h"
+#include "journal-authenticate.h"
+#include "fsprg.h"
 
 #define DEFAULT_FSPRG_INTERVAL_USEC (15*USEC_PER_MINUTE)
 
@@ -58,6 +59,7 @@ static bool arg_local = false;
 static bool arg_this_boot = false;
 static const char *arg_directory = NULL;
 static int arg_priorities = 0xFF;
+static const char *arg_verify_seed = NULL;
 
 static enum {
         ACTION_SHOW,
@@ -71,25 +73,26 @@ static int help(void) {
 
         printf("%s [OPTIONS...] [MATCH]\n\n"
                "Send control commands to or query the journal.\n\n"
-               "  -h --help           Show this help\n"
-               "     --version        Show package version\n"
-               "     --no-pager       Do not pipe output into a pager\n"
-               "  -a --all            Show all fields, including long and unprintable\n"
-               "  -f --follow         Follow journal\n"
-               "  -n --lines=INTEGER  Journal entries to show\n"
-               "     --no-tail        Show all lines, even in follow mode\n"
-               "  -o --output=STRING  Change journal output mode (short, short-monotonic,\n"
-               "                      verbose, export, json, cat)\n"
-               "  -q --quiet          Don't show privilege warning\n"
-               "  -l --local          Only local entries\n"
-               "  -b --this-boot      Show data only from current boot\n"
-               "  -D --directory=PATH Show journal files from directory\n"
-               "  -p --priority=RANGE Show only messages within the specified priority range\n\n"
+               "  -h --help              Show this help\n"
+               "     --version           Show package version\n"
+               "     --no-pager          Do not pipe output into a pager\n"
+               "  -a --all               Show all fields, including long and unprintable\n"
+               "  -f --follow            Follow journal\n"
+               "  -n --lines=INTEGER     Journal entries to show\n"
+               "     --no-tail           Show all lines, even in follow mode\n"
+               "  -o --output=STRING     Change journal output mode (short, short-monotonic,\n"
+               "                         verbose, export, json, cat)\n"
+               "  -q --quiet             Don't show privilege warning\n"
+               "  -l --local             Only local entries\n"
+               "  -b --this-boot         Show data only from current boot\n"
+               "  -D --directory=PATH    Show journal files from directory\n"
+               "  -p --priority=RANGE    Show only messages within the specified priority range\n\n"
                "Commands:\n"
-               "     --new-id128      Generate a new 128 Bit ID\n"
-               "     --header         Show journal header information\n"
-               "     --setup-keys     Generate new FSPRG key pair\n"
-               "     --verify         Verify journal file consistency\n",
+               "     --new-id128         Generate a new 128 Bit ID\n"
+               "     --header            Show journal header information\n"
+               "     --verify            Verify journal file consistency\n"
+               "     --verify-seed=SEED  Specify FSPRG seed for verification\n"
+               "     --setup-keys        Generate new FSPRG key and seed\n",
                program_invocation_short_name);
 
         return 0;
@@ -104,28 +107,30 @@ static int parse_argv(int argc, char *argv[]) {
                 ARG_NEW_ID128,
                 ARG_HEADER,
                 ARG_SETUP_KEYS,
-                ARG_VERIFY
+                ARG_VERIFY,
+                ARG_VERIFY_SEED
         };
 
         static const struct option options[] = {
-                { "help",      no_argument,       NULL, 'h'           },
-                { "version" ,  no_argument,       NULL, ARG_VERSION   },
-                { "no-pager",  no_argument,       NULL, ARG_NO_PAGER  },
-                { "follow",    no_argument,       NULL, 'f'           },
-                { "output",    required_argument, NULL, 'o'           },
-                { "all",       no_argument,       NULL, 'a'           },
-                { "lines",     required_argument, NULL, 'n'           },
-                { "no-tail",   no_argument,       NULL, ARG_NO_TAIL   },
-                { "new-id128", no_argument,       NULL, ARG_NEW_ID128 },
-                { "quiet",     no_argument,       NULL, 'q'           },
-                { "local",     no_argument,       NULL, 'l'           },
-                { "this-boot", no_argument,       NULL, 'b'           },
-                { "directory", required_argument, NULL, 'D'           },
-                { "header",    no_argument,       NULL, ARG_HEADER    },
-                { "priority",  no_argument,       NULL, 'p'           },
-                { "setup-keys",no_argument,       NULL, ARG_SETUP_KEYS},
-                { "verify",    no_argument,       NULL, ARG_VERIFY    },
-                { NULL,        0,                 NULL, 0             }
+                { "help",        no_argument,       NULL, 'h'             },
+                { "version" ,    no_argument,       NULL, ARG_VERSION     },
+                { "no-pager",    no_argument,       NULL, ARG_NO_PAGER    },
+                { "follow",      no_argument,       NULL, 'f'             },
+                { "output",      required_argument, NULL, 'o'             },
+                { "all",         no_argument,       NULL, 'a'             },
+                { "lines",       required_argument, NULL, 'n'             },
+                { "no-tail",     no_argument,       NULL, ARG_NO_TAIL     },
+                { "new-id128",   no_argument,       NULL, ARG_NEW_ID128   },
+                { "quiet",       no_argument,       NULL, 'q'             },
+                { "local",       no_argument,       NULL, 'l'             },
+                { "this-boot",   no_argument,       NULL, 'b'             },
+                { "directory",   required_argument, NULL, 'D'             },
+                { "header",      no_argument,       NULL, ARG_HEADER      },
+                { "priority",    no_argument,       NULL, 'p'             },
+                { "setup-keys",  no_argument,       NULL, ARG_SETUP_KEYS  },
+                { "verify",      no_argument,       NULL, ARG_VERIFY      },
+                { "verify-seed", required_argument, NULL, ARG_VERIFY_SEED },
+                { NULL,          0,                 NULL, 0               }
         };
 
         int c, r;
@@ -212,6 +217,11 @@ static int parse_argv(int argc, char *argv[]) {
                         arg_action = ACTION_VERIFY;
                         break;
 
+                case ARG_VERIFY_SEED:
+                        arg_action = ACTION_VERIFY;
+                        arg_verify_seed = optarg;
+                        break;
+
                 case 'p': {
                         const char *dots;
 
@@ -541,8 +551,8 @@ static int setup_keys(void) {
                 fprintf(stderr,
                         "\n"
                         "The new key pair has been generated. The evolving key has been written to the\n"
-                        "following file. It will be used to protect local journal files. This file does\n"
-                        "not need to be kept secret. It should not be used on multiple hosts.\n"
+                        "following file. It will be used to protect local journal files. This file\n"
+                        "should be kept secret. It should not be used on multiple hosts.\n"
                         "\n"
                         "\t%s\n"
                         "\n"
@@ -591,7 +601,10 @@ static int verify(sd_journal *j) {
         HASHMAP_FOREACH(f, j->files, i) {
                 int k;
 
-                k = journal_file_verify(f, NULL);
+                if (!arg_verify_seed && journal_file_fsprg_enabled(f))
+                        log_warning("Journal file %s has authentication enabled but verification seed has not been passed using --verify-seed=.", f->path);
+
+                k = journal_file_verify(f, arg_verify_seed);
                 if (k < 0) {
                         log_warning("FAIL: %s (%s)", f->path, strerror(-k));
                         r = -r;

commit f59a5f6b873d8bf994e2d85671f2554b9fdd62db
Author: Lennart Poettering <lennart at poettering.net>
Date:   Thu Aug 16 01:59:25 2012 +0200

    journal: verify hashes only during actual verification, not all the time

diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c
index ff439f2..efa0910 100644
--- a/src/journal/journal-file.c
+++ b/src/journal/journal-file.c
@@ -319,23 +319,6 @@ static int journal_file_move_to(JournalFile *f, int context, uint64_t offset, ui
         return mmap_cache_get(f->mmap, f->fd, f->prot, context, offset, size, ret);
 }
 
-static bool verify_hash(Object *o) {
-        uint64_t h1, h2;
-
-        assert(o);
-
-        if (o->object.type == OBJECT_DATA && !(o->object.flags & OBJECT_COMPRESSED)) {
-                h1 = le64toh(o->data.hash);
-                h2 = hash64(o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload));
-        } else if (o->object.type == OBJECT_FIELD) {
-                h1 = le64toh(o->field.hash);
-                h2 = hash64(o->field.payload, le64toh(o->object.size) - offsetof(Object, field.payload));
-        } else
-                return true;
-
-        return h1 == h2;
-}
-
 static uint64_t minimum_header_size(Object *o) {
 
         static uint64_t table[] = {
@@ -394,9 +377,6 @@ int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Objec
                 o = (Object*) t;
         }
 
-        if (!verify_hash(o))
-                return -EBADMSG;
-
         *ret = o;
         return 0;
 }
diff --git a/src/journal/journal-verify.c b/src/journal/journal-verify.c
index f3182e8..9318f3d 100644
--- a/src/journal/journal-verify.c
+++ b/src/journal/journal-verify.c
@@ -29,6 +29,16 @@
 #include "journal-file.h"
 #include "journal-authenticate.h"
 #include "journal-verify.h"
+#include "lookup3.h"
+
+/* FIXME:
+ *
+ * - verify hashes of compressed objects
+ * - follow all chains
+ * - check for unreferenced objects
+ * - verify FSPRG
+ *
+ * */
 
 static int journal_file_object_verify(JournalFile *f, Object *o) {
         assert(f);
@@ -38,7 +48,12 @@ static int journal_file_object_verify(JournalFile *f, Object *o) {
          * possible field values. It does not follow any references to
          * other objects. */
 
+        if ((o->object.flags & OBJECT_COMPRESSED) &&
+            o->object.type != OBJECT_DATA)
+                return -EBADMSG;
+
         switch (o->object.type) {
+
         case OBJECT_DATA:
                 if (le64toh(o->data.entry_offset) <= 0 ||
                     le64toh(o->data.n_entries) <= 0)
@@ -46,6 +61,17 @@ static int journal_file_object_verify(JournalFile *f, Object *o) {
 
                 if (le64toh(o->object.size) - offsetof(DataObject, payload) <= 0)
                         return -EBADMSG;
+
+                if (!(o->object.flags & OBJECT_COMPRESSED)) {
+                        uint64_t h1, h2;
+
+                        h1 = le64toh(o->data.hash);
+                        h2 = hash64(o->data.payload, le64toh(o->object.size) - offsetof(Object, data.payload));
+
+                        if (h1 != h2)
+                                return -EBADMSG;
+                }
+
                 break;
 
         case OBJECT_FIELD:
@@ -251,12 +277,6 @@ int journal_file_verify(JournalFile *f, const char *key) {
                         goto fail;
                 }
 
-                r = journal_file_hmac_put_object(f, -1, p);
-                if (r < 0) {
-                        log_error("Failed to calculate HMAC at %llu", (unsigned long long) p);
-                        goto fail;
-                }
-
                 if (o->object.flags & OBJECT_COMPRESSED &&
                     !(le32toh(f->header->incompatible_flags) & HEADER_INCOMPATIBLE_COMPRESSED)) {
                         log_error("Compressed object without compression at %llu", (unsigned long long) p);
@@ -264,10 +284,9 @@ int journal_file_verify(JournalFile *f, const char *key) {
                         goto fail;
                 }
 
-                if (o->object.flags & OBJECT_COMPRESSED &&
-                    o->object.type != OBJECT_DATA) {
-                        log_error("Compressed non-data object at %llu", (unsigned long long) p);
-                        r = -EBADMSG;
+                r = journal_file_hmac_put_object(f, -1, p);
+                if (r < 0) {
+                        log_error("Failed to calculate HMAC at %llu", (unsigned long long) p);
                         goto fail;
                 }
 

commit 0284adc6a60ce0af1107cb0b50041a65d731f39e
Author: Lennart Poettering <lennart at poettering.net>
Date:   Thu Aug 16 01:51:54 2012 +0200

    journal: split up journal-file.c

diff --git a/.gitignore b/.gitignore
index 8928071..4c8bba8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+/test-journal-verify
 /test-journal-match
 /test-journal-stream
 /test-unit-name
diff --git a/Makefile.am b/Makefile.am
index f220b59..895dcfa 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -2397,6 +2397,12 @@ libsystemd_journal_la_SOURCES = \
 	src/systemd/sd-journal.h \
 	src/journal/journal-file.c \
 	src/journal/journal-file.h \
+	src/journal/journal-vacuum.c \
+	src/journal/journal-vacuum.h \
+	src/journal/journal-verify.c \
+	src/journal/journal-verify.h \
+	src/journal/journal-authenticate.c \
+	src/journal/journal-authenticate.h \
 	src/journal/lookup3.c \
 	src/journal/lookup3.h \
 	src/journal/journal-send.c \
diff --git a/src/journal/journal-authenticate.c b/src/journal/journal-authenticate.c
new file mode 100644
index 0000000..827e4e4
--- /dev/null
+++ b/src/journal/journal-authenticate.c
@@ -0,0 +1,434 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+  This file is part of systemd.
+
+  Copyright 2012 Lennart Poettering
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <fcntl.h>
+#include <sys/mman.h>
+
+#include "journal-def.h"
+#include "journal-file.h"
+#include "journal-authenticate.h"
+#include "fsprg.h"
+
+static void *fsprg_state(JournalFile *f) {
+        uint64_t a, b;
+        assert(f);
+
+        if (!f->authenticate)
+                return NULL;
+
+        a = le64toh(f->fsprg_header->header_size);
+        b = le64toh(f->fsprg_header->state_size);
+
+        if (a + b > f->fsprg_size)
+                return NULL;
+
+        return (uint8_t*) f->fsprg_header + a;
+}
+
+static uint64_t journal_file_tag_seqnum(JournalFile *f) {
+        uint64_t r;
+
+        assert(f);
+
+        r = le64toh(f->header->n_tags) + 1;
+        f->header->n_tags = htole64(r);
+
+        return r;
+}
+
+int journal_file_append_tag(JournalFile *f) {
+        Object *o;
+        uint64_t p;
+        int r;
+
+        assert(f);
+
+        if (!f->authenticate)
+                return 0;
+
+        if (!f->hmac_running)
+                return 0;
+
+        log_debug("Writing tag for epoch %llu\n", (unsigned long long) FSPRG_GetEpoch(fsprg_state(f)));
+
+        assert(f->hmac);
+
+        r = journal_file_append_object(f, OBJECT_TAG, sizeof(struct TagObject), &o, &p);
+        if (r < 0)
+                return r;
+
+        o->tag.seqnum = htole64(journal_file_tag_seqnum(f));
+
+        /* Add the tag object itself, so that we can protect its
+         * header. This will exclude the actual hash value in it */
+        r = journal_file_hmac_put_object(f, OBJECT_TAG, p);
+        if (r < 0)
+                return r;
+
+        /* Get the HMAC tag and store it in the object */
+        memcpy(o->tag.tag, gcry_md_read(f->hmac, 0), TAG_LENGTH);
+        f->hmac_running = false;
+
+        return 0;
+}
+
+static int journal_file_hmac_start(JournalFile *f) {
+        uint8_t key[256 / 8]; /* Let's pass 256 bit from FSPRG to HMAC */
+
+        assert(f);
+
+        if (!f->authenticate)
+                return 0;
+
+        if (f->hmac_running)
+                return 0;
+
+        /* Prepare HMAC for next cycle */
+        gcry_md_reset(f->hmac);
+        FSPRG_GetKey(fsprg_state(f), key, sizeof(key), 0);
+        gcry_md_setkey(f->hmac, key, sizeof(key));
+
+        f->hmac_running = true;
+
+        return 0;
+}
+
+static int journal_file_get_epoch(JournalFile *f, uint64_t realtime, uint64_t *epoch) {
+        uint64_t t;
+
+        assert(f);
+        assert(epoch);
+        assert(f->authenticate);
+
+        if (le64toh(f->fsprg_header->fsprg_start_usec) == 0 ||
+            le64toh(f->fsprg_header->fsprg_interval_usec) == 0)
+                return -ENOTSUP;
+
+        if (realtime < le64toh(f->fsprg_header->fsprg_start_usec))
+                return -ESTALE;
+
+        t = realtime - le64toh(f->fsprg_header->fsprg_start_usec);
+        t = t / le64toh(f->fsprg_header->fsprg_interval_usec);
+
+        *epoch = t;
+        return 0;
+}
+
+static int journal_file_need_evolve(JournalFile *f, uint64_t realtime) {
+        uint64_t goal, epoch;
+        int r;
+        assert(f);
+
+        if (!f->authenticate)
+                return 0;
+
+        r = journal_file_get_epoch(f, realtime, &goal);
+        if (r < 0)
+                return r;
+
+        epoch = FSPRG_GetEpoch(fsprg_state(f));
+        if (epoch > goal)
+                return -ESTALE;
+
+        return epoch != goal;
+}
+
+static int journal_file_evolve(JournalFile *f, uint64_t realtime) {
+        uint64_t goal, epoch;
+        int r;
+
+        assert(f);
+
+        if (!f->authenticate)
+                return 0;
+
+        r = journal_file_get_epoch(f, realtime, &goal);
+        if (r < 0)
+                return r;
+
+        epoch = FSPRG_GetEpoch(fsprg_state(f));
+        if (epoch < goal)
+                log_debug("Evolving FSPRG key from epoch %llu to %llu.", (unsigned long long) epoch, (unsigned long long) goal);
+
+        for (;;) {
+                if (epoch > goal)
+                        return -ESTALE;
+                if (epoch == goal)
+                        return 0;
+
+                FSPRG_Evolve(fsprg_state(f));
+                epoch = FSPRG_GetEpoch(fsprg_state(f));
+        }
+}
+
+int journal_file_maybe_append_tag(JournalFile *f, uint64_t realtime) {
+        int r;
+
+        assert(f);
+
+        if (!f->authenticate)
+                return 0;
+
+        r = journal_file_need_evolve(f, realtime);
+        if (r <= 0)
+                return 0;
+
+        r = journal_file_append_tag(f);
+        if (r < 0)
+                return r;
+
+        r = journal_file_evolve(f, realtime);
+        if (r < 0)
+                return r;
+
+        r = journal_file_hmac_start(f);
+        if (r < 0)
+                return r;
+
+        return 0;
+}
+
+int journal_file_hmac_put_object(JournalFile *f, int type, uint64_t p) {
+        int r;
+        Object *o;
+
+        assert(f);
+
+        if (!f->authenticate)
+                return 0;
+
+        r = journal_file_hmac_start(f);
+        if (r < 0)
+                return r;
+
+        r = journal_file_move_to_object(f, type, p, &o);
+        if (r < 0)
+                return r;
+
+        gcry_md_write(f->hmac, o, offsetof(ObjectHeader, payload));
+
+        switch (o->object.type) {
+
+        case OBJECT_DATA:
+                /* All but: hash and payload are mutable */
+                gcry_md_write(f->hmac, &o->data.hash, sizeof(o->data.hash));
+                gcry_md_write(f->hmac, o->data.payload, le64toh(o->object.size) - offsetof(DataObject, payload));
+                break;
+
+        case OBJECT_ENTRY:
+                /* All */
+                gcry_md_write(f->hmac, &o->entry.seqnum, le64toh(o->object.size) - offsetof(EntryObject, seqnum));
+                break;
+
+        case OBJECT_FIELD_HASH_TABLE:
+        case OBJECT_DATA_HASH_TABLE:
+        case OBJECT_ENTRY_ARRAY:
+                /* Nothing: everything is mutable */
+                break;
+
+        case OBJECT_TAG:
+                /* All but the tag itself */
+                gcry_md_write(f->hmac, &o->tag.seqnum, sizeof(o->tag.seqnum));
+                break;
+        default:
+                return -EINVAL;
+        }
+
+        return 0;
+}
+
+int journal_file_hmac_put_header(JournalFile *f) {
+        int r;
+
+        assert(f);
+
+        if (!f->authenticate)
+                return 0;
+
+        r = journal_file_hmac_start(f);
+        if (r < 0)
+                return r;
+
+        /* All but state+reserved, boot_id, arena_size,
+         * tail_object_offset, n_objects, n_entries, tail_seqnum,
+         * head_entry_realtime, tail_entry_realtime,
+         * tail_entry_monotonic, n_data, n_fields, header_tag */
+
+        gcry_md_write(f->hmac, f->header->signature, offsetof(Header, state) - offsetof(Header, signature));
+        gcry_md_write(f->hmac, &f->header->file_id, offsetof(Header, boot_id) - offsetof(Header, file_id));
+        gcry_md_write(f->hmac, &f->header->seqnum_id, offsetof(Header, arena_size) - offsetof(Header, seqnum_id));
+        gcry_md_write(f->hmac, &f->header->data_hash_table_offset, offsetof(Header, tail_object_offset) - offsetof(Header, data_hash_table_offset));
+        gcry_md_write(f->hmac, &f->header->head_entry_seqnum, offsetof(Header, head_entry_realtime) - offsetof(Header, head_entry_seqnum));
+
+        return 0;
+}
+
+int journal_file_load_fsprg(JournalFile *f) {
+        int r, fd = -1;
+        char *p = NULL;
+        struct stat st;
+        FSPRGHeader *m = NULL;
+        sd_id128_t machine;
+
+        assert(f);
+
+        if (!f->authenticate)
+                return 0;
+
+        r = sd_id128_get_machine(&machine);
+        if (r < 0)
+                return r;
+
+        if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/fsprg",
+                     SD_ID128_FORMAT_VAL(machine)) < 0)
+                return -ENOMEM;
+
+        fd = open(p, O_RDWR|O_CLOEXEC|O_NOCTTY, 0600);
+        if (fd < 0) {
+                log_error("Failed to open %s: %m", p);
+                r = -errno;
+                goto finish;
+        }
+
+        if (fstat(fd, &st) < 0) {
+                r = -errno;
+                goto finish;
+        }
+
+        if (st.st_size < (off_t) sizeof(FSPRGHeader)) {
+                r = -ENODATA;
+                goto finish;
+        }
+
+        m = mmap(NULL, PAGE_ALIGN(sizeof(FSPRGHeader)), PROT_READ, MAP_SHARED, fd, 0);
+        if (m == MAP_FAILED) {
+                m = NULL;
+                r = -errno;
+                goto finish;
+        }
+
+        if (memcmp(m->signature, FSPRG_HEADER_SIGNATURE, 8) != 0) {
+                r = -EBADMSG;
+                goto finish;
+        }
+
+        if (m->incompatible_flags != 0) {
+                r = -EPROTONOSUPPORT;
+                goto finish;
+        }
+
+        if (le64toh(m->header_size) < sizeof(FSPRGHeader)) {
+                r = -EBADMSG;
+                goto finish;
+        }
+
+        if (le64toh(m->state_size) != FSPRG_stateinbytes(m->secpar)) {
+                r = -EBADMSG;
+                goto finish;
+        }
+
+        f->fsprg_size = le64toh(m->header_size) + le64toh(m->state_size);
+        if ((uint64_t) st.st_size < f->fsprg_size) {
+                r = -ENODATA;
+                goto finish;
+        }
+
+        if (!sd_id128_equal(machine, m->machine_id)) {
+                r = -EHOSTDOWN;
+                goto finish;
+        }
+
+        if (le64toh(m->fsprg_start_usec) <= 0 ||
+            le64toh(m->fsprg_interval_usec) <= 0) {
+                r = -EBADMSG;
+                goto finish;
+        }
+
+        f->fsprg_header = mmap(NULL, PAGE_ALIGN(f->fsprg_size), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+        if (f->fsprg_header == MAP_FAILED) {
+                f->fsprg_header = NULL;
+                r = -errno;
+                goto finish;
+        }
+
+        r = 0;
+
+finish:
+        if (m)
+                munmap(m, PAGE_ALIGN(sizeof(FSPRGHeader)));
+
+        if (fd >= 0)
+                close_nointr_nofail(fd);
+
+        free(p);
+        return r;
+}
+
+int journal_file_setup_hmac(JournalFile *f) {
+        gcry_error_t e;
+
+        if (!f->authenticate)
+                return 0;
+
+        e = gcry_md_open(&f->hmac, GCRY_MD_SHA256, GCRY_MD_FLAG_HMAC);
+        if (e != 0)
+                return -ENOTSUP;
+
+        return 0;
+}
+
+int journal_file_append_first_tag(JournalFile *f) {
+        int r;
+        uint64_t p;
+
+        if (!f->authenticate)
+                return 0;
+
+        log_debug("Calculating first tag...");
+
+        r = journal_file_hmac_put_header(f);
+        if (r < 0)
+                return r;
+
+        p = le64toh(f->header->field_hash_table_offset);
+        if (p < offsetof(Object, hash_table.items))
+                return -EINVAL;
+        p -= offsetof(Object, hash_table.items);
+
+        r = journal_file_hmac_put_object(f, OBJECT_FIELD_HASH_TABLE, p);
+        if (r < 0)
+                return r;
+
+        p = le64toh(f->header->data_hash_table_offset);
+        if (p < offsetof(Object, hash_table.items))
+                return -EINVAL;
+        p -= offsetof(Object, hash_table.items);
+
+        r = journal_file_hmac_put_object(f, OBJECT_DATA_HASH_TABLE, p);
+        if (r < 0)
+                return r;
+
+        r = journal_file_append_tag(f);
+        if (r < 0)
+                return r;
+
+        return 0;
+}
diff --git a/src/journal/journal-authenticate.h b/src/journal/journal-authenticate.h
new file mode 100644
index 0000000..c991b22
--- /dev/null
+++ b/src/journal/journal-authenticate.h
@@ -0,0 +1,35 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+#pragma once
+
+/***
+  This file is part of systemd.
+
+  Copyright 2012 Lennart Poettering
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include "journal-file.h"
+
+int journal_file_append_tag(JournalFile *f);
+int journal_file_maybe_append_tag(JournalFile *f, uint64_t realtime);
+int journal_file_append_first_tag(JournalFile *f);
+
+int journal_file_hmac_put_header(JournalFile *f);
+int journal_file_hmac_put_object(JournalFile *f, int type, uint64_t p);
+
+int journal_file_load_fsprg(JournalFile *f);
+
+int journal_file_setup_hmac(JournalFile *f);
diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c
index 7beedb4..ff439f2 100644
--- a/src/journal/journal-file.c
+++ b/src/journal/journal-file.c
@@ -29,6 +29,7 @@
 
 #include "journal-def.h"
 #include "journal-file.h"
+#include "journal-authenticate.h"
 #include "lookup3.h"
 #include "compress.h"
 #include "fsprg.h"
@@ -60,14 +61,6 @@
 /* n_data was the first entry we added after the initial file format design */
 #define HEADER_SIZE_MIN ALIGN64(offsetof(Header, n_data))
 
-#define ALIGN64(x) (((x) + 7ULL) & ~7ULL)
-
-#define JOURNAL_HEADER_CONTAINS(h, field) \
-        (le64toh((h)->header_size) >= offsetof(Header, field) + sizeof((h)->field))
-
-static int journal_file_maybe_append_tag(JournalFile *f, uint64_t realtime);
-static int journal_file_hmac_put_object(JournalFile *f, int type, uint64_t p);
-
 void journal_file_close(JournalFile *f) {
         assert(f);
 
@@ -434,7 +427,7 @@ static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
         return r;
 }
 
-static int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
+int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset) {
         int r;
         uint64_t p;
         Object *tail, *o;
@@ -796,7 +789,7 @@ uint64_t journal_file_entry_n_items(Object *o) {
         return (le64toh(o->object.size) - offsetof(Object, entry.items)) / sizeof(EntryItem);
 }
 
-static uint64_t journal_file_entry_array_n_items(Object *o) {
+uint64_t journal_file_entry_array_n_items(Object *o) {
         assert(o);
         assert(o->object.type == OBJECT_ENTRY_ARRAY);
 
@@ -1823,939 +1816,6 @@ int journal_file_move_to_entry_by_realtime_for_data(
                                              ret, offset, NULL);
 }
 
-static void *fsprg_state(JournalFile *f) {
-        uint64_t a, b;
-        assert(f);
-
-        if (!f->authenticate)
-                return NULL;
-
-        a = le64toh(f->fsprg_header->header_size);
-        b = le64toh(f->fsprg_header->state_size);
-
-        if (a + b > f->fsprg_size)
-                return NULL;
-
-        return (uint8_t*) f->fsprg_header + a;
-}
-
-static uint64_t journal_file_tag_seqnum(JournalFile *f) {
-        uint64_t r;
-
-        assert(f);
-
-        r = le64toh(f->header->n_tags) + 1;
-        f->header->n_tags = htole64(r);
-
-        return r;
-}
-
-int journal_file_append_tag(JournalFile *f) {
-        Object *o;
-        uint64_t p;
-        int r;
-
-        assert(f);
-
-        if (!f->authenticate)
-                return 0;
-
-        if (!f->hmac_running)
-                return 0;
-
-        log_debug("Writing tag for epoch %llu\n", (unsigned long long) FSPRG_GetEpoch(fsprg_state(f)));
-
-        assert(f->hmac);
-
-        r = journal_file_append_object(f, OBJECT_TAG, sizeof(struct TagObject), &o, &p);
-        if (r < 0)
-                return r;
-
-        o->tag.seqnum = htole64(journal_file_tag_seqnum(f));
-
-        /* Add the tag object itself, so that we can protect its
-         * header. This will exclude the actual hash value in it */
-        r = journal_file_hmac_put_object(f, OBJECT_TAG, p);
-        if (r < 0)
-                return r;
-
-        /* Get the HMAC tag and store it in the object */
-        memcpy(o->tag.tag, gcry_md_read(f->hmac, 0), TAG_LENGTH);
-        f->hmac_running = false;
-
-        return 0;
-}
-
-static int journal_file_hmac_start(JournalFile *f) {
-        uint8_t key[256 / 8]; /* Let's pass 256 bit from FSPRG to HMAC */
-
-        assert(f);
-
-        if (!f->authenticate)
-                return 0;
-
-        if (f->hmac_running)
-                return 0;
-
-        /* Prepare HMAC for next cycle */
-        gcry_md_reset(f->hmac);
-        FSPRG_GetKey(fsprg_state(f), key, sizeof(key), 0);
-        gcry_md_setkey(f->hmac, key, sizeof(key));
-
-        f->hmac_running = true;
-
-        return 0;
-}
-
-static int journal_file_get_epoch(JournalFile *f, uint64_t realtime, uint64_t *epoch) {
-        uint64_t t;
-
-        assert(f);
-        assert(epoch);
-        assert(f->authenticate);
-
-        if (le64toh(f->fsprg_header->fsprg_start_usec) == 0 ||
-            le64toh(f->fsprg_header->fsprg_interval_usec) == 0)
-                return -ENOTSUP;
-
-        if (realtime < le64toh(f->fsprg_header->fsprg_start_usec))
-                return -ESTALE;
-
-        t = realtime - le64toh(f->fsprg_header->fsprg_start_usec);
-        t = t / le64toh(f->fsprg_header->fsprg_interval_usec);
-
-        *epoch = t;
-        return 0;
-}
-
-static int journal_file_need_evolve(JournalFile *f, uint64_t realtime) {
-        uint64_t goal, epoch;
-        int r;
-        assert(f);
-
-        if (!f->authenticate)
-                return 0;
-
-        r = journal_file_get_epoch(f, realtime, &goal);
-        if (r < 0)
-                return r;
-
-        epoch = FSPRG_GetEpoch(fsprg_state(f));
-        if (epoch > goal)
-                return -ESTALE;
-
-        return epoch != goal;
-}
-
-static int journal_file_evolve(JournalFile *f, uint64_t realtime) {
-        uint64_t goal, epoch;
-        int r;
-
-        assert(f);
-
-        if (!f->authenticate)
-                return 0;
-
-        r = journal_file_get_epoch(f, realtime, &goal);
-        if (r < 0)
-                return r;
-
-        epoch = FSPRG_GetEpoch(fsprg_state(f));
-        if (epoch < goal)
-                log_debug("Evolving FSPRG key from epoch %llu to %llu.", (unsigned long long) epoch, (unsigned long long) goal);
-
-        for (;;) {
-                if (epoch > goal)
-                        return -ESTALE;
-                if (epoch == goal)
-                        return 0;
-
-                FSPRG_Evolve(fsprg_state(f));
-                epoch = FSPRG_GetEpoch(fsprg_state(f));
-        }
-}
-
-static int journal_file_maybe_append_tag(JournalFile *f, uint64_t realtime) {
-        int r;
-
-        assert(f);
-
-        if (!f->authenticate)
-                return 0;
-
-        r = journal_file_need_evolve(f, realtime);
-        if (r <= 0)
-                return 0;
-
-        r = journal_file_append_tag(f);
-        if (r < 0)
-                return r;
-
-        r = journal_file_evolve(f, realtime);
-        if (r < 0)
-                return r;
-
-        r = journal_file_hmac_start(f);
-        if (r < 0)
-                return r;
-
-        return 0;
-}
-
-static int journal_file_hmac_put_object(JournalFile *f, int type, uint64_t p) {
-        int r;
-        Object *o;
-
-        assert(f);
-
-        if (!f->authenticate)
-                return 0;
-
-        r = journal_file_hmac_start(f);
-        if (r < 0)
-                return r;
-
-        r = journal_file_move_to_object(f, type, p, &o);
-        if (r < 0)
-                return r;
-
-        gcry_md_write(f->hmac, o, offsetof(ObjectHeader, payload));
-
-        switch (o->object.type) {
-
-        case OBJECT_DATA:
-                /* All but: hash and payload are mutable */
-                gcry_md_write(f->hmac, &o->data.hash, sizeof(o->data.hash));
-                gcry_md_write(f->hmac, o->data.payload, le64toh(o->object.size) - offsetof(DataObject, payload));
-                break;
-
-        case OBJECT_ENTRY:
-                /* All */
-                gcry_md_write(f->hmac, &o->entry.seqnum, le64toh(o->object.size) - offsetof(EntryObject, seqnum));
-                break;
-
-        case OBJECT_FIELD_HASH_TABLE:
-        case OBJECT_DATA_HASH_TABLE:
-        case OBJECT_ENTRY_ARRAY:
-                /* Nothing: everything is mutable */
-                break;
-
-        case OBJECT_TAG:
-                /* All but the tag itself */
-                gcry_md_write(f->hmac, &o->tag.seqnum, sizeof(o->tag.seqnum));
-                break;
-        default:
-                return -EINVAL;
-        }
-
-        return 0;
-}
-
-static int journal_file_hmac_put_header(JournalFile *f) {
-        int r;
-
-        assert(f);
-
-        if (!f->authenticate)
-                return 0;
-
-        r = journal_file_hmac_start(f);
-        if (r < 0)
-                return r;
-
-        /* All but state+reserved, boot_id, arena_size,
-         * tail_object_offset, n_objects, n_entries, tail_seqnum,
-         * head_entry_realtime, tail_entry_realtime,
-         * tail_entry_monotonic, n_data, n_fields, header_tag */
-
-        gcry_md_write(f->hmac, f->header->signature, offsetof(Header, state) - offsetof(Header, signature));
-        gcry_md_write(f->hmac, &f->header->file_id, offsetof(Header, boot_id) - offsetof(Header, file_id));
-        gcry_md_write(f->hmac, &f->header->seqnum_id, offsetof(Header, arena_size) - offsetof(Header, seqnum_id));
-        gcry_md_write(f->hmac, &f->header->data_hash_table_offset, offsetof(Header, tail_object_offset) - offsetof(Header, data_hash_table_offset));
-        gcry_md_write(f->hmac, &f->header->head_entry_seqnum, offsetof(Header, head_entry_realtime) - offsetof(Header, head_entry_seqnum));
-
-        return 0;
-}
-
-static int journal_file_load_fsprg(JournalFile *f) {
-        int r, fd = -1;
-        char *p = NULL;
-        struct stat st;
-        FSPRGHeader *m = NULL;
-        sd_id128_t machine;
-
-        assert(f);
-
-        if (!f->authenticate)
-                return 0;
-
-        r = sd_id128_get_machine(&machine);
-        if (r < 0)
-                return r;
-
-        if (asprintf(&p, "/var/log/journal/" SD_ID128_FORMAT_STR "/fsprg",
-                     SD_ID128_FORMAT_VAL(machine)) < 0)
-                return -ENOMEM;
-
-        fd = open(p, O_RDWR|O_CLOEXEC|O_NOCTTY, 0600);
-        if (fd < 0) {
-                log_error("Failed to open %s: %m", p);
-                r = -errno;
-                goto finish;
-        }
-
-        if (fstat(fd, &st) < 0) {
-                r = -errno;
-                goto finish;
-        }
-
-        if (st.st_size < (off_t) sizeof(FSPRGHeader)) {
-                r = -ENODATA;
-                goto finish;
-        }
-
-        m = mmap(NULL, PAGE_ALIGN(sizeof(FSPRGHeader)), PROT_READ, MAP_SHARED, fd, 0);
-        if (m == MAP_FAILED) {
-                m = NULL;
-                r = -errno;
-                goto finish;
-        }
-
-        if (memcmp(m->signature, FSPRG_HEADER_SIGNATURE, 8) != 0) {
-                r = -EBADMSG;
-                goto finish;
-        }
-
-        if (m->incompatible_flags != 0) {
-                r = -EPROTONOSUPPORT;
-                goto finish;
-        }
-
-        if (le64toh(m->header_size) < sizeof(FSPRGHeader)) {
-                r = -EBADMSG;
-                goto finish;
-        }
-
-        if (le64toh(m->state_size) != FSPRG_stateinbytes(m->secpar)) {
-                r = -EBADMSG;
-                goto finish;
-        }
-
-        f->fsprg_size = le64toh(m->header_size) + le64toh(m->state_size);
-        if ((uint64_t) st.st_size < f->fsprg_size) {
-                r = -ENODATA;
-                goto finish;
-        }
-
-        if (!sd_id128_equal(machine, m->machine_id)) {
-                r = -EHOSTDOWN;
-                goto finish;
-        }
-
-        if (le64toh(m->fsprg_start_usec) <= 0 ||
-            le64toh(m->fsprg_interval_usec) <= 0) {
-                r = -EBADMSG;
-                goto finish;
-        }
-
-        f->fsprg_header = mmap(NULL, PAGE_ALIGN(f->fsprg_size), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
-        if (f->fsprg_header == MAP_FAILED) {
-                f->fsprg_header = NULL;
-                r = -errno;
-                goto finish;
-        }
-
-        r = 0;
-
-finish:
-        if (m)
-                munmap(m, PAGE_ALIGN(sizeof(FSPRGHeader)));
-
-        if (fd >= 0)
-                close_nointr_nofail(fd);
-
-        free(p);
-        return r;
-}
-
-static int journal_file_setup_hmac(JournalFile *f) {
-        gcry_error_t e;
-
-        if (!f->authenticate)
-                return 0;
-
-        e = gcry_md_open(&f->hmac, GCRY_MD_SHA256, GCRY_MD_FLAG_HMAC);
-        if (e != 0)
-                return -ENOTSUP;
-
-        return 0;
-}
-
-static int journal_file_append_first_tag(JournalFile *f) {
-        int r;
-        uint64_t p;
-
-        if (!f->authenticate)
-                return 0;
-
-        log_debug("Calculating first tag...");
-
-        r = journal_file_hmac_put_header(f);
-        if (r < 0)
-                return r;
-
-        p = le64toh(f->header->field_hash_table_offset);
-        if (p < offsetof(Object, hash_table.items))
-                return -EINVAL;
-        p -= offsetof(Object, hash_table.items);
-
-        r = journal_file_hmac_put_object(f, OBJECT_FIELD_HASH_TABLE, p);
-        if (r < 0)
-                return r;
-
-        p = le64toh(f->header->data_hash_table_offset);
-        if (p < offsetof(Object, hash_table.items))
-                return -EINVAL;
-        p -= offsetof(Object, hash_table.items);
-
-        r = journal_file_hmac_put_object(f, OBJECT_DATA_HASH_TABLE, p);
-        if (r < 0)
-                return r;
-
-        r = journal_file_append_tag(f);
-        if (r < 0)
-                return r;
-
-        return 0;
-}
-
-static int journal_file_object_verify(JournalFile *f, Object *o) {
-        assert(f);
-        assert(o);
-
-        /* This does various superficial tests about the length an
-         * possible field values. It does not follow any references to
-         * other objects. */
-
-        switch (o->object.type) {
-        case OBJECT_DATA:
-                if (le64toh(o->data.entry_offset) <= 0 ||
-                    le64toh(o->data.n_entries) <= 0)
-                        return -EBADMSG;
-
-                if (le64toh(o->object.size) - offsetof(DataObject, payload) <= 0)
-                        return -EBADMSG;
-                break;
-
-        case OBJECT_FIELD:
-                if (le64toh(o->object.size) - offsetof(FieldObject, payload) <= 0)
-                        return -EBADMSG;
-                break;
-
-        case OBJECT_ENTRY:
-                if ((le64toh(o->object.size) - offsetof(EntryObject, items)) % sizeof(EntryItem) != 0)
-                        return -EBADMSG;
-
-                if ((le64toh(o->object.size) - offsetof(EntryObject, items)) / sizeof(EntryItem) <= 0)
-                        return -EBADMSG;
-
-                if (le64toh(o->entry.seqnum) <= 0 ||
-                    le64toh(o->entry.realtime) <= 0)
-                        return -EBADMSG;
-
-                break;
-
-        case OBJECT_DATA_HASH_TABLE:
-        case OBJECT_FIELD_HASH_TABLE:
-                if ((le64toh(o->object.size) - offsetof(HashTableObject, items)) % sizeof(HashItem) != 0)
-                        return -EBADMSG;
-
-                break;
-
-        case OBJECT_ENTRY_ARRAY:
-                if ((le64toh(o->object.size) - offsetof(EntryArrayObject, items)) % sizeof(le64_t) != 0)
-                        return -EBADMSG;
-
-                break;
-
-        case OBJECT_TAG:
-                if (le64toh(o->object.size) != sizeof(TagObject))
-                        return -EBADMSG;
-                break;
-        }
-
-        return 0;
-}
-
-static void draw_progress(uint64_t p, usec_t *last_usec) {
-        unsigned n, i, j, k;
-        usec_t z, x;
-
-        if (!isatty(STDOUT_FILENO))
-                return;
-
-        z = now(CLOCK_MONOTONIC);
-        x = *last_usec;
-
-        if (x != 0 && x + 40 * USEC_PER_MSEC > z)
-                return;
-
-        *last_usec = z;
-
-        n = (3 * columns()) / 4;
-        j = (n * (unsigned) p) / 65535ULL;
-        k = n - j;
-
-        fputs("\r\x1B[?25l", stdout);
-
-        for (i = 0; i < j; i++)
-                fputs("\xe2\x96\x88", stdout);
-
-        for (i = 0; i < k; i++)
-                fputs("\xe2\x96\x91", stdout);
-
-        printf(" %3lu%%", 100LU * (unsigned long) p / 65535LU);
-
-        fputs("\r\x1B[?25h", stdout);
-        fflush(stdout);
-}
-
-static void flush_progress(void) {
-        unsigned n, i;
-
-        if (!isatty(STDOUT_FILENO))
-                return;
-
-        n = (3 * columns()) / 4;
-
-        putchar('\r');
-
-        for (i = 0; i < n + 5; i++)
-                putchar(' ');
-
-        putchar('\r');
-        fflush(stdout);
-}
-
-static int write_uint64(int fd, uint64_t p) {
-        ssize_t k;
-
-        k = write(fd, &p, sizeof(p));
-        if (k < 0)
-                return -errno;
-        if (k != sizeof(p))
-                return -EIO;
-
-        return 0;
-}
-
-static int contains_uint64(MMapCache *m, int fd, uint64_t n, uint64_t p) {
-        uint64_t a, b;
-        int r;
-
-        assert(m);
-        assert(fd >= 0);
-
-        /* Bisection ... */
-
-        a = 0; b = n;
-        while (a < b) {
-                uint64_t c, *z;
-
-                c = (a + b) / 2;
-
-                r = mmap_cache_get(m, fd, PROT_READ, 0, c * sizeof(uint64_t), sizeof(uint64_t), (void **) &z);
-                if (r < 0)
-                        return r;
-
-                if (*z == p)
-                        return 1;
-
-                if (p < *z)
-                        b = c;
-                else
-                        a = c;
-        }
-
-        return 0;
-}
-
-int journal_file_verify(JournalFile *f, const char *key) {
-        int r;
-        Object *o;
-        uint64_t p = 0;
-        uint64_t tag_seqnum = 0, entry_seqnum = 0, entry_monotonic = 0, entry_realtime = 0;
-        sd_id128_t entry_boot_id;
-        bool entry_seqnum_set = false, entry_monotonic_set = false, entry_realtime_set = false, found_main_entry_array = false;
-        uint64_t n_weird = 0, n_objects = 0, n_entries = 0, n_data = 0, n_fields = 0, n_data_hash_tables = 0, n_field_hash_tables = 0, n_entry_arrays = 0;
-        usec_t last_usec = 0;
-        int data_fd = -1, entry_fd = -1, entry_array_fd = -1;
-        char data_path[] = "/var/tmp/journal-data-XXXXXX",
-                entry_path[] = "/var/tmp/journal-entry-XXXXXX",
-                entry_array_path[] = "/var/tmp/journal-entry-array-XXXXXX";
-
-        assert(f);
-
-        data_fd = mkostemp(data_path, O_CLOEXEC);
-        if (data_fd < 0) {
-                log_error("Failed to create data file: %m");
-                goto fail;
-        }
-        unlink(data_path);
-
-        entry_fd = mkostemp(entry_path, O_CLOEXEC);
-        if (entry_fd < 0) {
-                log_error("Failed to create entry file: %m");
-                goto fail;
-        }
-        unlink(entry_path);
-
-        entry_array_fd = mkostemp(entry_array_path, O_CLOEXEC);
-        if (entry_array_fd < 0) {
-                log_error("Failed to create entry array file: %m");
-                goto fail;
-        }
-        unlink(entry_array_path);
-
-        /* First iteration: we go through all objects, verify the
-         * superficial structure, headers, hashes. */
-
-        r = journal_file_hmac_put_header(f);
-        if (r < 0) {
-                log_error("Failed to calculate HMAC of header.");
-                goto fail;
-        }
-
-        p = le64toh(f->header->header_size);
-        while (p != 0) {
-                draw_progress((0x7FFF * p) / le64toh(f->header->tail_object_offset), &last_usec);
-
-                r = journal_file_move_to_object(f, -1, p, &o);
-                if (r < 0) {
-                        log_error("Invalid object at %llu", (unsigned long long) p);
-                        goto fail;
-                }
-
-                if (le64toh(f->header->tail_object_offset) < p) {
-                        log_error("Invalid tail object pointer.");
-                        r = -EBADMSG;
-                        goto fail;
-                }
-
-                n_objects ++;
-
-                r = journal_file_object_verify(f, o);
-                if (r < 0) {
-                        log_error("Invalid object contents at %llu", (unsigned long long) p);
-                        goto fail;
-                }
-
-                r = journal_file_hmac_put_object(f, -1, p);
-                if (r < 0) {
-                        log_error("Failed to calculate HMAC at %llu", (unsigned long long) p);
-                        goto fail;
-                }
-
-                if (o->object.flags & OBJECT_COMPRESSED &&
-                    !(le32toh(f->header->incompatible_flags) & HEADER_INCOMPATIBLE_COMPRESSED)) {
-                        log_error("Compressed object without compression at %llu", (unsigned long long) p);
-                        r = -EBADMSG;
-                        goto fail;
-                }
-
-                if (o->object.flags & OBJECT_COMPRESSED &&
-                    o->object.type != OBJECT_DATA) {
-                        log_error("Compressed non-data object at %llu", (unsigned long long) p);
-                        r = -EBADMSG;
-                        goto fail;
-                }
-
-                if (o->object.type == OBJECT_TAG) {
-
-                        if (!(le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_AUTHENTICATED)) {
-                                log_error("Tag object without authentication at %llu", (unsigned long long) p);
-                                r = -EBADMSG;
-                                goto fail;
-                        }
-
-                        if (le64toh(o->tag.seqnum) != tag_seqnum) {
-                                log_error("Tag sequence number out of synchronization at %llu", (unsigned long long) p);
-                                r = -EBADMSG;
-                                goto fail;
-                        }
-
-                } else if (o->object.type == OBJECT_ENTRY) {
-
-                        r = write_uint64(entry_fd, p);
-                        if (r < 0)
-                                goto fail;
-
-                        if (!entry_seqnum_set &&
-                            le64toh(o->entry.seqnum) != le64toh(f->header->head_entry_seqnum)) {
-                                log_error("Head entry sequence number incorrect");
-                                r = -EBADMSG;
-                                goto fail;
-                        }
-
-                        if (entry_seqnum_set &&
-                            entry_seqnum >= le64toh(o->entry.seqnum)) {
-                                log_error("Entry sequence number out of synchronization at %llu", (unsigned long long) p);
-                                r = -EBADMSG;
-                                goto fail;
-                        }
-
-                        entry_seqnum = le64toh(o->entry.seqnum);
-                        entry_seqnum_set = true;
-
-                        if (entry_monotonic_set &&
-                            sd_id128_equal(entry_boot_id, o->entry.boot_id) &&
-                            entry_monotonic > le64toh(o->entry.monotonic)) {
-                                log_error("Entry timestamp out of synchronization at %llu", (unsigned long long) p);
-                                r = -EBADMSG;
-                                goto fail;
-                        }
-
-                        entry_monotonic = le64toh(o->entry.monotonic);
-                        entry_boot_id = o->entry.boot_id;
-                        entry_monotonic_set = true;
-
-                        if (!entry_realtime_set &&
-                            le64toh(o->entry.realtime) != le64toh(f->header->head_entry_realtime)) {
-                                log_error("Head entry realtime timestamp incorrect");
-                                r = -EBADMSG;
-                                goto fail;
-                        }
-
-                        entry_realtime = le64toh(o->entry.realtime);
-                        entry_realtime_set = true;
-
-                        n_entries ++;
-                } else if (o->object.type == OBJECT_ENTRY_ARRAY) {
-
-                        r = write_uint64(entry_array_fd, p);
-                        if (r < 0)
-                                goto fail;
-
-                        if (p == le64toh(f->header->entry_array_offset)) {
-                                if (found_main_entry_array) {
-                                        log_error("More than one main entry array at %llu", (unsigned long long) p);
-                                        r = -EBADMSG;
-                                        goto fail;
-                                }
-
-                                found_main_entry_array = true;
-                        }
-
-                        n_entry_arrays++;
-
-                } else if (o->object.type == OBJECT_DATA) {
-
-                        r = write_uint64(data_fd, p);
-                        if (r < 0)
-                                goto fail;
-
-                        n_data++;
-
-                } else if (o->object.type == OBJECT_FIELD)
-                        n_fields++;
-                else if (o->object.type == OBJECT_DATA_HASH_TABLE) {
-                        n_data_hash_tables++;
-
-                        if (n_data_hash_tables > 1) {
-                                log_error("More than one data hash table at %llu", (unsigned long long) p);
-                                r = -EBADMSG;
-                                goto fail;
-                        }
-
-                        if (le64toh(f->header->data_hash_table_offset) != p + offsetof(HashTableObject, items) ||
-                            le64toh(f->header->data_hash_table_size) != le64toh(o->object.size) - offsetof(HashTableObject, items)) {
-                                log_error("Header fields for data hash table invalid.");
-                                r = -EBADMSG;
-                                goto fail;
-                        }
-                } else if (o->object.type == OBJECT_FIELD_HASH_TABLE) {
-                        n_field_hash_tables++;
-
-                        if (n_field_hash_tables > 1) {
-                                log_error("More than one field hash table at %llu", (unsigned long long) p);
-                                r = -EBADMSG;
-                                goto fail;
-                        }
-
-                        if (le64toh(f->header->field_hash_table_offset) != p + offsetof(HashTableObject, items) ||
-                            le64toh(f->header->field_hash_table_size) != le64toh(o->object.size) - offsetof(HashTableObject, items)) {
-                                log_error("Header fields for field hash table invalid.");
-                                r = -EBADMSG;
-                                goto fail;
-                        }
-                } else if (o->object.type >= _OBJECT_TYPE_MAX)
-                        n_weird ++;
-
-                if (p == le64toh(f->header->tail_object_offset))
-                        p = 0;
-                else
-                        p = p + ALIGN64(le64toh(o->object.size));
-        }
-
-        if (n_objects != le64toh(f->header->n_objects)) {
-                log_error("Object number mismatch");
-                r = -EBADMSG;
-                goto fail;
-        }
-
-        if (n_entries != le64toh(f->header->n_entries)) {
-                log_error("Entry number mismatch");
-                r = -EBADMSG;
-                goto fail;
-        }
-
-        if (JOURNAL_HEADER_CONTAINS(f->header, n_data) &&
-            n_data != le64toh(f->header->n_data)) {
-                log_error("Data number mismatch");
-                r = -EBADMSG;
-                goto fail;
-        }
-
-        if (JOURNAL_HEADER_CONTAINS(f->header, n_fields) &&
-            n_fields != le64toh(f->header->n_fields)) {
-                log_error("Field number mismatch");
-                r = -EBADMSG;
-                goto fail;
-        }
-
-        if (JOURNAL_HEADER_CONTAINS(f->header, n_tags) &&
-            tag_seqnum != le64toh(f->header->n_tags)) {
-                log_error("Tag number mismatch");
-                r = -EBADMSG;
-                goto fail;
-        }
-
-        if (n_data_hash_tables != 1) {
-                log_error("Missing data hash table");
-                r = -EBADMSG;
-                goto fail;
-        }
-
-        if (n_field_hash_tables != 1) {
-                log_error("Missing field hash table");
-                r = -EBADMSG;
-                goto fail;
-        }
-
-        if (!found_main_entry_array) {
-                log_error("Missing entry array");
-                r = -EBADMSG;
-                goto fail;
-        }
-
-        if (entry_seqnum_set &&
-            entry_seqnum != le64toh(f->header->tail_entry_seqnum)) {
-                log_error("Invalid tail seqnum");
-                r = -EBADMSG;
-                goto fail;
-        }
-
-        if (entry_monotonic_set &&
-            (!sd_id128_equal(entry_boot_id, f->header->boot_id) ||
-             entry_monotonic != le64toh(f->header->tail_entry_monotonic))) {
-                log_error("Invalid tail monotonic timestamp");
-                r = -EBADMSG;
-                goto fail;
-        }
-
-        if (entry_realtime_set && entry_realtime != le64toh(f->header->tail_entry_realtime)) {
-                log_error("Invalid tail realtime timestamp");
-                r = -EBADMSG;
-                goto fail;
-        }
-
-        /* Second iteration: we go through all objects again, this
-         * time verify all pointers. */
-
-        p = le64toh(f->header->header_size);
-        while (p != 0) {
-                draw_progress(0x8000 + (0x7FFF * p) / le64toh(f->header->tail_object_offset), &last_usec);
-
-                r = journal_file_move_to_object(f, -1, p, &o);
-                if (r < 0) {
-                        log_error("Invalid object at %llu", (unsigned long long) p);
-                        goto fail;
-                }
-
-                if (o->object.type == OBJECT_ENTRY_ARRAY) {
-                        uint64_t i = 0, n;
-
-                        if (le64toh(o->entry_array.next_entry_array_offset) != 0 &&
-                            !contains_uint64(f->mmap, entry_array_fd, n_entry_arrays, le64toh(o->entry_array.next_entry_array_offset))) {
-                                log_error("Entry array chains up to invalid next array at %llu", (unsigned long long) p);
-                                r = -EBADMSG;
-                                goto fail;
-                        }
-
-                        n = journal_file_entry_array_n_items(o);
-                        for (i = 0; i < n; i++) {
-                                if (le64toh(o->entry_array.items[i]) != 0 &&
-                                    !contains_uint64(f->mmap, entry_fd, n_entries, le64toh(o->entry_array.items[i]))) {
-
-                                        log_error("Entry array points to invalid next array at %llu", (unsigned long long) p);
-                                        r = -EBADMSG;
-                                        goto fail;
-                                }
-                        }
-
-                }
-
-                r = journal_file_move_to_object(f, -1, p, &o);
-                if (r < 0) {
-                        log_error("Invalid object at %llu", (unsigned long long) p);
-                        goto fail;
-                }
-
-                if (p == le64toh(f->header->tail_object_offset))
-                        p = 0;
-                else
-                        p = p + ALIGN64(le64toh(o->object.size));
-        }
-
-        flush_progress();
-
-        mmap_cache_close_fd(f->mmap, data_fd);
-        mmap_cache_close_fd(f->mmap, entry_fd);
-        mmap_cache_close_fd(f->mmap, entry_array_fd);
-
-        close_nointr_nofail(data_fd);
-        close_nointr_nofail(entry_fd);
-        close_nointr_nofail(entry_array_fd);
-
-        return 0;
-
-fail:
-        flush_progress();
-
-        log_error("File corruption detected at %s:%llu (of %llu, %llu%%).",
-                  f->path,
-                  (unsigned long long) p,
-                  (unsigned long long) f->last_stat.st_size,
-                  (unsigned long long) (100 * p / f->last_stat.st_size));
-
-        if (data_fd >= 0) {
-                mmap_cache_close_fd(f->mmap, data_fd);
-                close_nointr_nofail(data_fd);
-        }
-
-        if (entry_fd >= 0) {
-                mmap_cache_close_fd(f->mmap, entry_fd);
-                close_nointr_nofail(entry_fd);
-        }
-
-        if (entry_array_fd >= 0) {
-                mmap_cache_close_fd(f->mmap, entry_array_fd);
-                close_nointr_nofail(entry_array_fd);
-        }
-
-        return r;
-}
-
 void journal_file_dump(JournalFile *f) {
         Object *o;
         int r;
@@ -3139,203 +2199,6 @@ int journal_file_open_reliably(
         return journal_file_open(fname, flags, mode, compress, authenticate, metrics, mmap, template, ret);
 }
 
-struct vacuum_info {
-        off_t usage;
-        char *filename;
-
-        uint64_t realtime;
-        sd_id128_t seqnum_id;
-        uint64_t seqnum;
-
-        bool have_seqnum;
-};
-
-static int vacuum_compare(const void *_a, const void *_b) {
-        const struct vacuum_info *a, *b;
-
-        a = _a;
-        b = _b;
-
-        if (a->have_seqnum && b->have_seqnum &&
-            sd_id128_equal(a->seqnum_id, b->seqnum_id)) {
-                if (a->seqnum < b->seqnum)
-                        return -1;
-                else if (a->seqnum > b->seqnum)
-                        return 1;
-                else
-                        return 0;
-        }
-
-        if (a->realtime < b->realtime)
-                return -1;
-        else if (a->realtime > b->realtime)
-                return 1;
-        else if (a->have_seqnum && b->have_seqnum)
-                return memcmp(&a->seqnum_id, &b->seqnum_id, 16);
-        else
-                return strcmp(a->filename, b->filename);
-}
-
-int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t min_free) {
-        DIR *d;
-        int r = 0;
-        struct vacuum_info *list = NULL;
-        unsigned n_list = 0, n_allocated = 0, i;
-        uint64_t sum = 0;
-
-        assert(directory);
-
-        if (max_use <= 0)
-                return 0;
-
-        d = opendir(directory);
-        if (!d)
-                return -errno;
-
-        for (;;) {
-                int k;
-                struct dirent buf, *de;
-                size_t q;
-                struct stat st;
-                char *p;
-                unsigned long long seqnum = 0, realtime;
-                sd_id128_t seqnum_id;
-                bool have_seqnum;
-
-                k = readdir_r(d, &buf, &de);
-                if (k != 0) {
-                        r = -k;
-                        goto finish;
-                }
-
-                if (!de)
-                        break;
-
-                if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
-                        continue;
-
-                if (!S_ISREG(st.st_mode))
-                        continue;
-
-                q = strlen(de->d_name);
-
-                if (endswith(de->d_name, ".journal")) {
-
-                        /* Vacuum archived files */
-
-                        if (q < 1 + 32 + 1 + 16 + 1 + 16 + 8)
-                                continue;
-
-                        if (de->d_name[q-8-16-1] != '-' ||
-                            de->d_name[q-8-16-1-16-1] != '-' ||
-                            de->d_name[q-8-16-1-16-1-32-1] != '@')
-                                continue;
-
-                        p = strdup(de->d_name);
-                        if (!p) {
-                                r = -ENOMEM;
-                                goto finish;
-                        }
-
-                        de->d_name[q-8-16-1-16-1] = 0;
-                        if (sd_id128_from_string(de->d_name + q-8-16-1-16-1-32, &seqnum_id) < 0) {
-                                free(p);
-                                continue;
-                        }
-
-                        if (sscanf(de->d_name + q-8-16-1-16, "%16llx-%16llx.journal", &seqnum, &realtime) != 2) {
-                                free(p);
-                                continue;
-                        }
-
-                        have_seqnum = true;
-
-                } else if (endswith(de->d_name, ".journal~")) {
-                        unsigned long long tmp;
-
-                        /* Vacuum corrupted files */
-
-                        if (q < 1 + 16 + 1 + 16 + 8 + 1)
-                                continue;
-
-                        if (de->d_name[q-1-8-16-1] != '-' ||
-                            de->d_name[q-1-8-16-1-16-1] != '@')
-                                continue;
-
-                        p = strdup(de->d_name);
-                        if (!p) {
-                                r = -ENOMEM;
-                                goto finish;
-                        }
-
-                        if (sscanf(de->d_name + q-1-8-16-1-16, "%16llx-%16llx.journal~", &realtime, &tmp) != 2) {
-                                free(p);
-                                continue;
-                        }
-
-                        have_seqnum = false;
-                } else
-                        continue;
-
-                if (n_list >= n_allocated) {
-                        struct vacuum_info *j;
-
-                        n_allocated = MAX(n_allocated * 2U, 8U);
-                        j = realloc(list, n_allocated * sizeof(struct vacuum_info));
-                        if (!j) {
-                                free(p);
-                                r = -ENOMEM;
-                                goto finish;
-                        }
-
-                        list = j;
-                }
-
-                list[n_list].filename = p;
-                list[n_list].usage = 512UL * (uint64_t) st.st_blocks;
-                list[n_list].seqnum = seqnum;
-                list[n_list].realtime = realtime;
-                list[n_list].seqnum_id = seqnum_id;
-                list[n_list].have_seqnum = have_seqnum;
-
-                sum += list[n_list].usage;
-
-                n_list ++;
-        }
-
-        if (n_list > 0)
-                qsort(list, n_list, sizeof(struct vacuum_info), vacuum_compare);
-
-        for(i = 0; i < n_list; i++) {
-                struct statvfs ss;
-
-                if (fstatvfs(dirfd(d), &ss) < 0) {
-                        r = -errno;
-                        goto finish;
-                }
-
-                if (sum <= max_use &&
-                    (uint64_t) ss.f_bavail * (uint64_t) ss.f_bsize >= min_free)
-                        break;
-
-                if (unlinkat(dirfd(d), list[i].filename, 0) >= 0) {
-                        log_info("Deleted archived journal %s/%s.", directory, list[i].filename);
-                        sum -= list[i].usage;
-                } else if (errno != ENOENT)
-                        log_warning("Failed to delete %s/%s: %m", directory, list[i].filename);
-        }
-
-finish:
-        for (i = 0; i < n_list; i++)
-                free(list[i].filename);
-
-        free(list);
-
-        if (d)
-                closedir(d);
-
-        return r;
-}
 
 int journal_file_copy_entry(JournalFile *from, JournalFile *to, Object *o, uint64_t p, uint64_t *seqnum, Object **ret, uint64_t *offset) {
         uint64_t i, n;
diff --git a/src/journal/journal-file.h b/src/journal/journal-file.h
index 0305c97..aba3d9a 100644
--- a/src/journal/journal-file.h
+++ b/src/journal/journal-file.h
@@ -107,10 +107,17 @@ int journal_file_open_reliably(
                 JournalFile *template,
                 JournalFile **ret);
 
+#define ALIGN64(x) (((x) + 7ULL) & ~7ULL)
+
+#define JOURNAL_HEADER_CONTAINS(h, field) \
+        (le64toh((h)->header_size) >= offsetof(Header, field) + sizeof((h)->field))
+
 int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret);
 
 uint64_t journal_file_entry_n_items(Object *o);
+uint64_t journal_file_entry_array_n_items(Object *o);
 
+int journal_file_append_object(JournalFile *f, int type, uint64_t size, Object **ret, uint64_t *offset);
 int journal_file_append_entry(JournalFile *f, const dual_timestamp *ts, const struct iovec iovec[], unsigned n_iovec, uint64_t *seqno, Object **ret, uint64_t *offset);
 
 int journal_file_find_data_object(JournalFile *f, const void *data, uint64_t size, Object **ret, uint64_t *offset);
@@ -138,8 +145,6 @@ void journal_file_print_header(JournalFile *f);
 
 int journal_file_rotate(JournalFile **f, bool compress, bool authenticate);
 
-int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t min_free);
-
 void journal_file_post_change(JournalFile *f);
 
 void journal_default_metrics(JournalMetrics *m, int fd);
@@ -148,7 +153,3 @@ int journal_file_get_cutoff_realtime_usec(JournalFile *f, usec_t *from, usec_t *
 int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot, usec_t *from, usec_t *to);
 
 bool journal_file_rotate_suggested(JournalFile *f);
-
-int journal_file_append_tag(JournalFile *f);
-
-int journal_file_verify(JournalFile *f, const char *key);
diff --git a/src/journal/journal-vacuum.c b/src/journal/journal-vacuum.c
new file mode 100644
index 0000000..ff2cd33
--- /dev/null
+++ b/src/journal/journal-vacuum.c
@@ -0,0 +1,230 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+  This file is part of systemd.
+
+  Copyright 2011 Lennart Poettering
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <sys/types.h>
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/statvfs.h>
+#include <unistd.h>
+
+#include "journal-def.h"
+#include "journal-file.h"
+#include "journal-vacuum.h"
+#include "sd-id128.h"
+#include "util.h"
+
+struct vacuum_info {
+        off_t usage;
+        char *filename;
+
+        uint64_t realtime;
+        sd_id128_t seqnum_id;
+        uint64_t seqnum;
+
+        bool have_seqnum;
+};
+
+static int vacuum_compare(const void *_a, const void *_b) {
+        const struct vacuum_info *a, *b;
+
+        a = _a;
+        b = _b;
+
+        if (a->have_seqnum && b->have_seqnum &&
+            sd_id128_equal(a->seqnum_id, b->seqnum_id)) {
+                if (a->seqnum < b->seqnum)
+                        return -1;
+                else if (a->seqnum > b->seqnum)
+                        return 1;
+                else
+                        return 0;
+        }
+
+        if (a->realtime < b->realtime)
+                return -1;
+        else if (a->realtime > b->realtime)
+                return 1;
+        else if (a->have_seqnum && b->have_seqnum)
+                return memcmp(&a->seqnum_id, &b->seqnum_id, 16);
+        else
+                return strcmp(a->filename, b->filename);
+}
+
+int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t min_free) {
+        DIR *d;
+        int r = 0;
+        struct vacuum_info *list = NULL;
+        unsigned n_list = 0, n_allocated = 0, i;
+        uint64_t sum = 0;
+
+        assert(directory);
+
+        if (max_use <= 0)
+                return 0;
+
+        d = opendir(directory);
+        if (!d)
+                return -errno;
+
+        for (;;) {
+                int k;
+                struct dirent buf, *de;
+                size_t q;
+                struct stat st;
+                char *p;
+                unsigned long long seqnum = 0, realtime;
+                sd_id128_t seqnum_id;
+                bool have_seqnum;
+
+                k = readdir_r(d, &buf, &de);
+                if (k != 0) {
+                        r = -k;
+                        goto finish;
+                }
+
+                if (!de)
+                        break;
+
+                if (fstatat(dirfd(d), de->d_name, &st, AT_SYMLINK_NOFOLLOW) < 0)
+                        continue;
+
+                if (!S_ISREG(st.st_mode))
+                        continue;
+
+                q = strlen(de->d_name);
+
+                if (endswith(de->d_name, ".journal")) {
+
+                        /* Vacuum archived files */
+
+                        if (q < 1 + 32 + 1 + 16 + 1 + 16 + 8)
+                                continue;
+
+                        if (de->d_name[q-8-16-1] != '-' ||
+                            de->d_name[q-8-16-1-16-1] != '-' ||
+                            de->d_name[q-8-16-1-16-1-32-1] != '@')
+                                continue;
+
+                        p = strdup(de->d_name);
+                        if (!p) {
+                                r = -ENOMEM;
+                                goto finish;
+                        }
+
+                        de->d_name[q-8-16-1-16-1] = 0;
+                        if (sd_id128_from_string(de->d_name + q-8-16-1-16-1-32, &seqnum_id) < 0) {
+                                free(p);
+                                continue;
+                        }
+
+                        if (sscanf(de->d_name + q-8-16-1-16, "%16llx-%16llx.journal", &seqnum, &realtime) != 2) {
+                                free(p);
+                                continue;
+                        }
+
+                        have_seqnum = true;
+
+                } else if (endswith(de->d_name, ".journal~")) {
+                        unsigned long long tmp;
+
+                        /* Vacuum corrupted files */
+
+                        if (q < 1 + 16 + 1 + 16 + 8 + 1)
+                                continue;
+
+                        if (de->d_name[q-1-8-16-1] != '-' ||
+                            de->d_name[q-1-8-16-1-16-1] != '@')
+                                continue;
+
+                        p = strdup(de->d_name);
+                        if (!p) {
+                                r = -ENOMEM;
+                                goto finish;
+                        }
+
+                        if (sscanf(de->d_name + q-1-8-16-1-16, "%16llx-%16llx.journal~", &realtime, &tmp) != 2) {
+                                free(p);
+                                continue;
+                        }
+
+                        have_seqnum = false;
+                } else
+                        continue;
+
+                if (n_list >= n_allocated) {
+                        struct vacuum_info *j;
+
+                        n_allocated = MAX(n_allocated * 2U, 8U);
+                        j = realloc(list, n_allocated * sizeof(struct vacuum_info));
+                        if (!j) {
+                                free(p);
+                                r = -ENOMEM;
+                                goto finish;
+                        }
+
+                        list = j;
+                }
+
+                list[n_list].filename = p;
+                list[n_list].usage = 512UL * (uint64_t) st.st_blocks;
+                list[n_list].seqnum = seqnum;
+                list[n_list].realtime = realtime;
+                list[n_list].seqnum_id = seqnum_id;
+                list[n_list].have_seqnum = have_seqnum;
+
+                sum += list[n_list].usage;
+
+                n_list ++;
+        }
+
+        if (n_list > 0)
+                qsort(list, n_list, sizeof(struct vacuum_info), vacuum_compare);
+
+        for(i = 0; i < n_list; i++) {
+                struct statvfs ss;
+
+                if (fstatvfs(dirfd(d), &ss) < 0) {
+                        r = -errno;
+                        goto finish;
+                }
+
+                if (sum <= max_use &&
+                    (uint64_t) ss.f_bavail * (uint64_t) ss.f_bsize >= min_free)
+                        break;
+
+                if (unlinkat(dirfd(d), list[i].filename, 0) >= 0) {
+                        log_info("Deleted archived journal %s/%s.", directory, list[i].filename);
+                        sum -= list[i].usage;
+                } else if (errno != ENOENT)
+                        log_warning("Failed to delete %s/%s: %m", directory, list[i].filename);
+        }
+
+finish:
+        for (i = 0; i < n_list; i++)
+                free(list[i].filename);
+
+        free(list);
+
+        if (d)
+                closedir(d);
+
+        return r;
+}
diff --git a/src/journal/journal-vacuum.h b/src/journal/journal-vacuum.h
new file mode 100644
index 0000000..9841d72
--- /dev/null
+++ b/src/journal/journal-vacuum.h
@@ -0,0 +1,26 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+#pragma once
+
+/***
+  This file is part of systemd.
+
+  Copyright 2011 Lennart Poettering
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+
+int journal_directory_vacuum(const char *directory, uint64_t max_use, uint64_t min_free);
diff --git a/src/journal/journal-verify.c b/src/journal/journal-verify.c
new file mode 100644
index 0000000..f3182e8
--- /dev/null
+++ b/src/journal/journal-verify.c
@@ -0,0 +1,558 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+  This file is part of systemd.
+
+  Copyright 2012 Lennart Poettering
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <unistd.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+
+#include "util.h"
+#include "macro.h"
+#include "journal-def.h"
+#include "journal-file.h"
+#include "journal-authenticate.h"
+#include "journal-verify.h"
+
+static int journal_file_object_verify(JournalFile *f, Object *o) {
+        assert(f);
+        assert(o);
+
+        /* This does various superficial tests about the length an
+         * possible field values. It does not follow any references to
+         * other objects. */
+
+        switch (o->object.type) {
+        case OBJECT_DATA:
+                if (le64toh(o->data.entry_offset) <= 0 ||
+                    le64toh(o->data.n_entries) <= 0)
+                        return -EBADMSG;
+
+                if (le64toh(o->object.size) - offsetof(DataObject, payload) <= 0)
+                        return -EBADMSG;
+                break;
+
+        case OBJECT_FIELD:
+                if (le64toh(o->object.size) - offsetof(FieldObject, payload) <= 0)
+                        return -EBADMSG;
+                break;
+
+        case OBJECT_ENTRY:
+                if ((le64toh(o->object.size) - offsetof(EntryObject, items)) % sizeof(EntryItem) != 0)
+                        return -EBADMSG;
+
+                if ((le64toh(o->object.size) - offsetof(EntryObject, items)) / sizeof(EntryItem) <= 0)
+                        return -EBADMSG;
+
+                if (le64toh(o->entry.seqnum) <= 0 ||
+                    le64toh(o->entry.realtime) <= 0)
+                        return -EBADMSG;
+
+                break;
+
+        case OBJECT_DATA_HASH_TABLE:
+        case OBJECT_FIELD_HASH_TABLE:
+                if ((le64toh(o->object.size) - offsetof(HashTableObject, items)) % sizeof(HashItem) != 0)
+                        return -EBADMSG;
+
+                break;
+
+        case OBJECT_ENTRY_ARRAY:
+                if ((le64toh(o->object.size) - offsetof(EntryArrayObject, items)) % sizeof(le64_t) != 0)
+                        return -EBADMSG;
+
+                break;
+
+        case OBJECT_TAG:
+                if (le64toh(o->object.size) != sizeof(TagObject))
+                        return -EBADMSG;
+                break;
+        }
+
+        return 0;
+}
+
+static void draw_progress(uint64_t p, usec_t *last_usec) {
+        unsigned n, i, j, k;
+        usec_t z, x;
+
+        if (!isatty(STDOUT_FILENO))
+                return;
+
+        z = now(CLOCK_MONOTONIC);
+        x = *last_usec;
+
+        if (x != 0 && x + 40 * USEC_PER_MSEC > z)
+                return;
+
+        *last_usec = z;
+
+        n = (3 * columns()) / 4;
+        j = (n * (unsigned) p) / 65535ULL;
+        k = n - j;
+
+        fputs("\r\x1B[?25l", stdout);
+
+        for (i = 0; i < j; i++)
+                fputs("\xe2\x96\x88", stdout);
+
+        for (i = 0; i < k; i++)
+                fputs("\xe2\x96\x91", stdout);
+
+        printf(" %3lu%%", 100LU * (unsigned long) p / 65535LU);
+
+        fputs("\r\x1B[?25h", stdout);
+        fflush(stdout);
+}
+
+static void flush_progress(void) {
+        unsigned n, i;
+
+        if (!isatty(STDOUT_FILENO))
+                return;
+
+        n = (3 * columns()) / 4;
+
+        putchar('\r');
+
+        for (i = 0; i < n + 5; i++)
+                putchar(' ');
+
+        putchar('\r');
+        fflush(stdout);
+}
+
+static int write_uint64(int fd, uint64_t p) {
+        ssize_t k;
+
+        k = write(fd, &p, sizeof(p));
+        if (k < 0)
+                return -errno;
+        if (k != sizeof(p))
+                return -EIO;
+
+        return 0;
+}
+
+static int contains_uint64(MMapCache *m, int fd, uint64_t n, uint64_t p) {
+        uint64_t a, b;
+        int r;
+
+        assert(m);
+        assert(fd >= 0);
+
+        /* Bisection ... */
+
+        a = 0; b = n;
+        while (a < b) {
+                uint64_t c, *z;
+
+                c = (a + b) / 2;
+
+                r = mmap_cache_get(m, fd, PROT_READ, 0, c * sizeof(uint64_t), sizeof(uint64_t), (void **) &z);
+                if (r < 0)
+                        return r;
+
+                if (*z == p)
+                        return 1;
+
+                if (p < *z)
+                        b = c;
+                else
+                        a = c;
+        }
+
+        return 0;
+}
+
+int journal_file_verify(JournalFile *f, const char *key) {
+        int r;
+        Object *o;
+        uint64_t p = 0;
+        uint64_t tag_seqnum = 0, entry_seqnum = 0, entry_monotonic = 0, entry_realtime = 0;
+        sd_id128_t entry_boot_id;
+        bool entry_seqnum_set = false, entry_monotonic_set = false, entry_realtime_set = false, found_main_entry_array = false;
+        uint64_t n_weird = 0, n_objects = 0, n_entries = 0, n_data = 0, n_fields = 0, n_data_hash_tables = 0, n_field_hash_tables = 0, n_entry_arrays = 0;
+        usec_t last_usec = 0;
+        int data_fd = -1, entry_fd = -1, entry_array_fd = -1;
+        char data_path[] = "/var/tmp/journal-data-XXXXXX",
+                entry_path[] = "/var/tmp/journal-entry-XXXXXX",
+                entry_array_path[] = "/var/tmp/journal-entry-array-XXXXXX";
+
+        assert(f);
+
+        data_fd = mkostemp(data_path, O_CLOEXEC);
+        if (data_fd < 0) {
+                log_error("Failed to create data file: %m");
+                goto fail;
+        }
+        unlink(data_path);
+
+        entry_fd = mkostemp(entry_path, O_CLOEXEC);
+        if (entry_fd < 0) {
+                log_error("Failed to create entry file: %m");
+                goto fail;
+        }
+        unlink(entry_path);
+
+        entry_array_fd = mkostemp(entry_array_path, O_CLOEXEC);
+        if (entry_array_fd < 0) {
+                log_error("Failed to create entry array file: %m");
+                goto fail;
+        }
+        unlink(entry_array_path);
+
+        /* First iteration: we go through all objects, verify the
+         * superficial structure, headers, hashes. */
+
+        r = journal_file_hmac_put_header(f);
+        if (r < 0) {
+                log_error("Failed to calculate HMAC of header.");
+                goto fail;
+        }
+
+        p = le64toh(f->header->header_size);
+        while (p != 0) {
+                draw_progress((0x7FFF * p) / le64toh(f->header->tail_object_offset), &last_usec);
+
+                r = journal_file_move_to_object(f, -1, p, &o);
+                if (r < 0) {
+                        log_error("Invalid object at %llu", (unsigned long long) p);
+                        goto fail;
+                }
+
+                if (le64toh(f->header->tail_object_offset) < p) {
+                        log_error("Invalid tail object pointer.");
+                        r = -EBADMSG;
+                        goto fail;
+                }
+
+                n_objects ++;
+
+                r = journal_file_object_verify(f, o);
+                if (r < 0) {
+                        log_error("Invalid object contents at %llu", (unsigned long long) p);
+                        goto fail;
+                }
+
+                r = journal_file_hmac_put_object(f, -1, p);
+                if (r < 0) {
+                        log_error("Failed to calculate HMAC at %llu", (unsigned long long) p);
+                        goto fail;
+                }
+
+                if (o->object.flags & OBJECT_COMPRESSED &&
+                    !(le32toh(f->header->incompatible_flags) & HEADER_INCOMPATIBLE_COMPRESSED)) {
+                        log_error("Compressed object without compression at %llu", (unsigned long long) p);
+                        r = -EBADMSG;
+                        goto fail;
+                }
+
+                if (o->object.flags & OBJECT_COMPRESSED &&
+                    o->object.type != OBJECT_DATA) {
+                        log_error("Compressed non-data object at %llu", (unsigned long long) p);
+                        r = -EBADMSG;
+                        goto fail;
+                }
+
+                if (o->object.type == OBJECT_TAG) {
+
+                        if (!(le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_AUTHENTICATED)) {
+                                log_error("Tag object without authentication at %llu", (unsigned long long) p);
+                                r = -EBADMSG;
+                                goto fail;
+                        }
+
+                        if (le64toh(o->tag.seqnum) != tag_seqnum) {
+                                log_error("Tag sequence number out of synchronization at %llu", (unsigned long long) p);
+                                r = -EBADMSG;
+                                goto fail;
+                        }
+
+                } else if (o->object.type == OBJECT_ENTRY) {
+
+                        r = write_uint64(entry_fd, p);
+                        if (r < 0)
+                                goto fail;
+
+                        if (!entry_seqnum_set &&
+                            le64toh(o->entry.seqnum) != le64toh(f->header->head_entry_seqnum)) {
+                                log_error("Head entry sequence number incorrect");
+                                r = -EBADMSG;
+                                goto fail;
+                        }
+
+                        if (entry_seqnum_set &&
+                            entry_seqnum >= le64toh(o->entry.seqnum)) {
+                                log_error("Entry sequence number out of synchronization at %llu", (unsigned long long) p);
+                                r = -EBADMSG;
+                                goto fail;
+                        }
+
+                        entry_seqnum = le64toh(o->entry.seqnum);
+                        entry_seqnum_set = true;
+
+                        if (entry_monotonic_set &&
+                            sd_id128_equal(entry_boot_id, o->entry.boot_id) &&
+                            entry_monotonic > le64toh(o->entry.monotonic)) {
+                                log_error("Entry timestamp out of synchronization at %llu", (unsigned long long) p);
+                                r = -EBADMSG;
+                                goto fail;
+                        }
+
+                        entry_monotonic = le64toh(o->entry.monotonic);
+                        entry_boot_id = o->entry.boot_id;
+                        entry_monotonic_set = true;
+
+                        if (!entry_realtime_set &&
+                            le64toh(o->entry.realtime) != le64toh(f->header->head_entry_realtime)) {
+                                log_error("Head entry realtime timestamp incorrect");
+                                r = -EBADMSG;
+                                goto fail;
+                        }
+
+                        entry_realtime = le64toh(o->entry.realtime);
+                        entry_realtime_set = true;
+
+                        n_entries ++;
+                } else if (o->object.type == OBJECT_ENTRY_ARRAY) {
+
+                        r = write_uint64(entry_array_fd, p);
+                        if (r < 0)
+                                goto fail;
+
+                        if (p == le64toh(f->header->entry_array_offset)) {
+                                if (found_main_entry_array) {
+                                        log_error("More than one main entry array at %llu", (unsigned long long) p);
+                                        r = -EBADMSG;
+                                        goto fail;
+                                }
+
+                                found_main_entry_array = true;
+                        }
+
+                        n_entry_arrays++;
+
+                } else if (o->object.type == OBJECT_DATA) {
+
+                        r = write_uint64(data_fd, p);
+                        if (r < 0)
+                                goto fail;
+
+                        n_data++;
+
+                } else if (o->object.type == OBJECT_FIELD)
+                        n_fields++;
+                else if (o->object.type == OBJECT_DATA_HASH_TABLE) {
+                        n_data_hash_tables++;
+
+                        if (n_data_hash_tables > 1) {
+                                log_error("More than one data hash table at %llu", (unsigned long long) p);
+                                r = -EBADMSG;
+                                goto fail;
+                        }
+
+                        if (le64toh(f->header->data_hash_table_offset) != p + offsetof(HashTableObject, items) ||
+                            le64toh(f->header->data_hash_table_size) != le64toh(o->object.size) - offsetof(HashTableObject, items)) {
+                                log_error("Header fields for data hash table invalid.");
+                                r = -EBADMSG;
+                                goto fail;
+                        }
+                } else if (o->object.type == OBJECT_FIELD_HASH_TABLE) {
+                        n_field_hash_tables++;
+
+                        if (n_field_hash_tables > 1) {
+                                log_error("More than one field hash table at %llu", (unsigned long long) p);
+                                r = -EBADMSG;
+                                goto fail;
+                        }
+
+                        if (le64toh(f->header->field_hash_table_offset) != p + offsetof(HashTableObject, items) ||
+                            le64toh(f->header->field_hash_table_size) != le64toh(o->object.size) - offsetof(HashTableObject, items)) {
+                                log_error("Header fields for field hash table invalid.");
+                                r = -EBADMSG;
+                                goto fail;
+                        }
+                } else if (o->object.type >= _OBJECT_TYPE_MAX)
+                        n_weird ++;
+
+                if (p == le64toh(f->header->tail_object_offset))
+                        p = 0;
+                else
+                        p = p + ALIGN64(le64toh(o->object.size));
+        }
+
+        if (n_objects != le64toh(f->header->n_objects)) {
+                log_error("Object number mismatch");
+                r = -EBADMSG;
+                goto fail;
+        }
+
+        if (n_entries != le64toh(f->header->n_entries)) {
+                log_error("Entry number mismatch");
+                r = -EBADMSG;
+                goto fail;
+        }
+
+        if (JOURNAL_HEADER_CONTAINS(f->header, n_data) &&
+            n_data != le64toh(f->header->n_data)) {
+                log_error("Data number mismatch");
+                r = -EBADMSG;
+                goto fail;
+        }
+
+        if (JOURNAL_HEADER_CONTAINS(f->header, n_fields) &&
+            n_fields != le64toh(f->header->n_fields)) {
+                log_error("Field number mismatch");
+                r = -EBADMSG;
+                goto fail;
+        }
+
+        if (JOURNAL_HEADER_CONTAINS(f->header, n_tags) &&
+            tag_seqnum != le64toh(f->header->n_tags)) {
+                log_error("Tag number mismatch");
+                r = -EBADMSG;
+                goto fail;
+        }
+
+        if (n_data_hash_tables != 1) {
+                log_error("Missing data hash table");
+                r = -EBADMSG;
+                goto fail;
+        }
+
+        if (n_field_hash_tables != 1) {
+                log_error("Missing field hash table");
+                r = -EBADMSG;
+                goto fail;
+        }
+
+        if (!found_main_entry_array) {
+                log_error("Missing entry array");
+                r = -EBADMSG;
+                goto fail;
+        }
+
+        if (entry_seqnum_set &&
+            entry_seqnum != le64toh(f->header->tail_entry_seqnum)) {
+                log_error("Invalid tail seqnum");
+                r = -EBADMSG;
+                goto fail;
+        }
+
+        if (entry_monotonic_set &&
+            (!sd_id128_equal(entry_boot_id, f->header->boot_id) ||
+             entry_monotonic != le64toh(f->header->tail_entry_monotonic))) {
+                log_error("Invalid tail monotonic timestamp");
+                r = -EBADMSG;
+                goto fail;
+        }
+
+        if (entry_realtime_set && entry_realtime != le64toh(f->header->tail_entry_realtime)) {
+                log_error("Invalid tail realtime timestamp");
+                r = -EBADMSG;
+                goto fail;
+        }
+
+        /* Second iteration: we go through all objects again, this
+         * time verify all pointers. */
+
+        p = le64toh(f->header->header_size);
+        while (p != 0) {
+                draw_progress(0x8000 + (0x7FFF * p) / le64toh(f->header->tail_object_offset), &last_usec);
+
+                r = journal_file_move_to_object(f, -1, p, &o);
+                if (r < 0) {
+                        log_error("Invalid object at %llu", (unsigned long long) p);
+                        goto fail;
+                }
+
+                if (o->object.type == OBJECT_ENTRY_ARRAY) {
+                        uint64_t i = 0, n;
+
+                        if (le64toh(o->entry_array.next_entry_array_offset) != 0 &&
+                            !contains_uint64(f->mmap, entry_array_fd, n_entry_arrays, le64toh(o->entry_array.next_entry_array_offset))) {
+                                log_error("Entry array chains up to invalid next array at %llu", (unsigned long long) p);
+                                r = -EBADMSG;
+                                goto fail;
+                        }
+
+                        n = journal_file_entry_array_n_items(o);
+                        for (i = 0; i < n; i++) {
+                                if (le64toh(o->entry_array.items[i]) != 0 &&
+                                    !contains_uint64(f->mmap, entry_fd, n_entries, le64toh(o->entry_array.items[i]))) {
+
+                                        log_error("Entry array points to invalid next array at %llu", (unsigned long long) p);
+                                        r = -EBADMSG;
+                                        goto fail;
+                                }
+                        }
+
+                }
+
+                r = journal_file_move_to_object(f, -1, p, &o);
+                if (r < 0) {
+                        log_error("Invalid object at %llu", (unsigned long long) p);
+                        goto fail;
+                }
+
+                if (p == le64toh(f->header->tail_object_offset))
+                        p = 0;
+                else
+                        p = p + ALIGN64(le64toh(o->object.size));
+        }
+
+        flush_progress();
+
+        mmap_cache_close_fd(f->mmap, data_fd);
+        mmap_cache_close_fd(f->mmap, entry_fd);
+        mmap_cache_close_fd(f->mmap, entry_array_fd);
+
+        close_nointr_nofail(data_fd);
+        close_nointr_nofail(entry_fd);
+        close_nointr_nofail(entry_array_fd);
+
+        return 0;
+
+fail:
+        flush_progress();
+
+        log_error("File corruption detected at %s:%llu (of %llu, %llu%%).",
+                  f->path,
+                  (unsigned long long) p,
+                  (unsigned long long) f->last_stat.st_size,
+                  (unsigned long long) (100 * p / f->last_stat.st_size));
+
+        if (data_fd >= 0) {
+                mmap_cache_close_fd(f->mmap, data_fd);
+                close_nointr_nofail(data_fd);
+        }
+
+        if (entry_fd >= 0) {
+                mmap_cache_close_fd(f->mmap, entry_fd);
+                close_nointr_nofail(entry_fd);
+        }
+
+        if (entry_array_fd >= 0) {
+                mmap_cache_close_fd(f->mmap, entry_array_fd);
+                close_nointr_nofail(entry_array_fd);
+        }
+
+        return r;
+}
diff --git a/src/journal/journal-verify.h b/src/journal/journal-verify.h
new file mode 100644
index 0000000..3ebdd5e
--- /dev/null
+++ b/src/journal/journal-verify.h
@@ -0,0 +1,26 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+#pragma once
+
+/***
+  This file is part of systemd.
+
+  Copyright 2011 Lennart Poettering
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include "journal-file.h"
+
+int journal_file_verify(JournalFile *f, const char *key);
diff --git a/src/journal/journalctl.c b/src/journal/journalctl.c
index 8e09ff1..a70de06 100644
--- a/src/journal/journalctl.c
+++ b/src/journal/journalctl.c
@@ -43,6 +43,7 @@
 #include "journal-internal.h"
 #include "fsprg.h"
 #include "journal-def.h"
+#include "journal-verify.h"
 
 #define DEFAULT_FSPRG_INTERVAL_USEC (15*USEC_PER_MINUTE)
 
diff --git a/src/journal/journald.c b/src/journal/journald.c
index 384ed90..d431953 100644
--- a/src/journal/journald.c
+++ b/src/journal/journald.c
@@ -47,6 +47,7 @@
 #include "list.h"
 #include "journal-rate-limit.h"
 #include "journal-internal.h"
+#include "journal-vacuum.h"
 #include "conf-parser.h"
 #include "journald.h"
 #include "virt.h"
diff --git a/src/journal/test-journal-verify.c b/src/journal/test-journal-verify.c
new file mode 100644
index 0000000..bada498
--- /dev/null
+++ b/src/journal/test-journal-verify.c
@@ -0,0 +1,78 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+  This file is part of systemd.
+
+  Copyright 2012 Lennart Poettering
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <stdio.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "util.h"
+#include "log.h"
+#include "journal-file.h"
+#include "journal-verify.h"
+
+#define N_ENTRIES 6000
+#define RANDOM_RANGE 77
+
+int main(int argc, char *argv[]) {
+        char t[] = "/tmp/journal-XXXXXX";
+        unsigned n;
+        JournalFile *f;
+
+        log_set_max_level(LOG_DEBUG);
+
+        assert_se(mkdtemp(t));
+        assert_se(chdir(t) >= 0);
+
+        log_info("Generating...");
+
+        assert_se(journal_file_open("test.journal", O_RDWR|O_CREAT, 0666, true, true, NULL, NULL, NULL, &f) == 0);
+
+        for (n = 0; n < N_ENTRIES; n++) {
+                struct iovec iovec;
+                struct dual_timestamp ts;
+                char *test;
+
+                dual_timestamp_get(&ts);
+
+                assert_se(asprintf(&test, "RANDOM=%lu", random() % RANDOM_RANGE));
+
+                iovec.iov_base = (void*) test;
+                iovec.iov_len = strlen(test);
+
+                assert_se(journal_file_append_entry(f, &ts, &iovec, 1, NULL, NULL, NULL) == 0);
+
+                free(test);
+        }
+
+        journal_file_close(f);
+
+        log_info("Verifying...");
+
+        assert_se(journal_file_open("test.journal", O_RDONLY, 0666, false, false, NULL, NULL, NULL, &f) == 0);
+        assert_se(journal_file_verify(f, NULL) >= 0);
+        journal_file_close(f);
+
+        log_info("Exiting...");
+
+        assert_se(rm_rf_dangerous(t, false, true, false) >= 0);
+
+        return 0;
+}
diff --git a/src/journal/test-journal.c b/src/journal/test-journal.c
index 2fd19a7..05bb2ea 100644
--- a/src/journal/test-journal.c
+++ b/src/journal/test-journal.c
@@ -24,8 +24,10 @@
 
 #include <systemd/sd-journal.h>
 
-#include "journal-file.h"
 #include "log.h"
+#include "journal-file.h"
+#include "journal-authenticate.h"
+#include "journal-vacuum.h"
 
 int main(int argc, char *argv[]) {
         dual_timestamp ts;

commit f65425cbc4385b4f5b20ded641cbb07ad6a0938f
Author: Lennart Poettering <lennart at poettering.net>
Date:   Thu Aug 16 01:20:32 2012 +0200

    journal: add superficial structure verifier

diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c
index 709e15e..7beedb4 100644
--- a/src/journal/journal-file.c
+++ b/src/journal/journal-file.c
@@ -299,6 +299,8 @@ static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size)
         if (r != 0)
                 return -r;
 
+        mmap_cache_close_fd_range(f->mmap, f->fd, old_size);
+
         if (fstat(f->fd, &f->last_stat) < 0)
                 return -errno;
 
@@ -2335,18 +2337,86 @@ static void flush_progress(void) {
         fflush(stdout);
 }
 
+static int write_uint64(int fd, uint64_t p) {
+        ssize_t k;
+
+        k = write(fd, &p, sizeof(p));
+        if (k < 0)
+                return -errno;
+        if (k != sizeof(p))
+                return -EIO;
+
+        return 0;
+}
+
+static int contains_uint64(MMapCache *m, int fd, uint64_t n, uint64_t p) {
+        uint64_t a, b;
+        int r;
+
+        assert(m);
+        assert(fd >= 0);
+
+        /* Bisection ... */
+
+        a = 0; b = n;
+        while (a < b) {
+                uint64_t c, *z;
+
+                c = (a + b) / 2;
+
+                r = mmap_cache_get(m, fd, PROT_READ, 0, c * sizeof(uint64_t), sizeof(uint64_t), (void **) &z);
+                if (r < 0)
+                        return r;
+
+                if (*z == p)
+                        return 1;
+
+                if (p < *z)
+                        b = c;
+                else
+                        a = c;
+        }
+
+        return 0;
+}
+
 int journal_file_verify(JournalFile *f, const char *key) {
         int r;
         Object *o;
-        uint64_t p = 0, q = 0, e;
+        uint64_t p = 0;
         uint64_t tag_seqnum = 0, entry_seqnum = 0, entry_monotonic = 0, entry_realtime = 0;
         sd_id128_t entry_boot_id;
         bool entry_seqnum_set = false, entry_monotonic_set = false, entry_realtime_set = false, found_main_entry_array = false;
-        uint64_t n_weird = 0, n_objects = 0, n_entries = 0, n_data = 0, n_fields = 0, n_data_hash_tables = 0, n_field_hash_tables = 0;
+        uint64_t n_weird = 0, n_objects = 0, n_entries = 0, n_data = 0, n_fields = 0, n_data_hash_tables = 0, n_field_hash_tables = 0, n_entry_arrays = 0;
         usec_t last_usec = 0;
+        int data_fd = -1, entry_fd = -1, entry_array_fd = -1;
+        char data_path[] = "/var/tmp/journal-data-XXXXXX",
+                entry_path[] = "/var/tmp/journal-entry-XXXXXX",
+                entry_array_path[] = "/var/tmp/journal-entry-array-XXXXXX";
 
         assert(f);
 
+        data_fd = mkostemp(data_path, O_CLOEXEC);
+        if (data_fd < 0) {
+                log_error("Failed to create data file: %m");
+                goto fail;
+        }
+        unlink(data_path);
+
+        entry_fd = mkostemp(entry_path, O_CLOEXEC);
+        if (entry_fd < 0) {
+                log_error("Failed to create entry file: %m");
+                goto fail;
+        }
+        unlink(entry_path);
+
+        entry_array_fd = mkostemp(entry_array_path, O_CLOEXEC);
+        if (entry_array_fd < 0) {
+                log_error("Failed to create entry array file: %m");
+                goto fail;
+        }
+        unlink(entry_array_path);
+
         /* First iteration: we go through all objects, verify the
          * superficial structure, headers, hashes. */
 
@@ -2358,7 +2428,7 @@ int journal_file_verify(JournalFile *f, const char *key) {
 
         p = le64toh(f->header->header_size);
         while (p != 0) {
-                draw_progress((65535ULL * p / le64toh(f->header->tail_object_offset)), &last_usec);
+                draw_progress((0x7FFF * p) / le64toh(f->header->tail_object_offset), &last_usec);
 
                 r = journal_file_move_to_object(f, -1, p, &o);
                 if (r < 0) {
@@ -2416,6 +2486,10 @@ int journal_file_verify(JournalFile *f, const char *key) {
 
                 } else if (o->object.type == OBJECT_ENTRY) {
 
+                        r = write_uint64(entry_fd, p);
+                        if (r < 0)
+                                goto fail;
+
                         if (!entry_seqnum_set &&
                             le64toh(o->entry.seqnum) != le64toh(f->header->head_entry_seqnum)) {
                                 log_error("Head entry sequence number incorrect");
@@ -2458,6 +2532,10 @@ int journal_file_verify(JournalFile *f, const char *key) {
                         n_entries ++;
                 } else if (o->object.type == OBJECT_ENTRY_ARRAY) {
 
+                        r = write_uint64(entry_array_fd, p);
+                        if (r < 0)
+                                goto fail;
+
                         if (p == le64toh(f->header->entry_array_offset)) {
                                 if (found_main_entry_array) {
                                         log_error("More than one main entry array at %llu", (unsigned long long) p);
@@ -2468,9 +2546,17 @@ int journal_file_verify(JournalFile *f, const char *key) {
                                 found_main_entry_array = true;
                         }
 
-                } else if (o->object.type == OBJECT_DATA)
+                        n_entry_arrays++;
+
+                } else if (o->object.type == OBJECT_DATA) {
+
+                        r = write_uint64(data_fd, p);
+                        if (r < 0)
+                                goto fail;
+
                         n_data++;
-                else if (o->object.type == OBJECT_FIELD)
+
+                } else if (o->object.type == OBJECT_FIELD)
                         n_fields++;
                 else if (o->object.type == OBJECT_DATA_HASH_TABLE) {
                         n_data_hash_tables++;
@@ -2502,14 +2588,8 @@ int journal_file_verify(JournalFile *f, const char *key) {
                                 r = -EBADMSG;
                                 goto fail;
                         }
-                }
-
-                if (o->object.type >= _OBJECT_TYPE_MAX)
+                } else if (o->object.type >= _OBJECT_TYPE_MAX)
                         n_weird ++;
-                else {
-                        /* Write address to file... */
-
-                }
 
                 if (p == le64toh(f->header->tail_object_offset))
                         p = 0;
@@ -2592,36 +2672,86 @@ int journal_file_verify(JournalFile *f, const char *key) {
         /* Second iteration: we go through all objects again, this
          * time verify all pointers. */
 
-        /* q = le64toh(f->header->header_size); */
-        /* while (q != 0) { */
-        /*         r = journal_file_move_to_object(f, -1, q, &o); */
-        /*         if (r < 0) { */
-        /*                 log_error("Invalid object at %llu", (unsigned long long) q); */
-        /*                 goto fail; */
-        /*         } */
+        p = le64toh(f->header->header_size);
+        while (p != 0) {
+                draw_progress(0x8000 + (0x7FFF * p) / le64toh(f->header->tail_object_offset), &last_usec);
+
+                r = journal_file_move_to_object(f, -1, p, &o);
+                if (r < 0) {
+                        log_error("Invalid object at %llu", (unsigned long long) p);
+                        goto fail;
+                }
+
+                if (o->object.type == OBJECT_ENTRY_ARRAY) {
+                        uint64_t i = 0, n;
+
+                        if (le64toh(o->entry_array.next_entry_array_offset) != 0 &&
+                            !contains_uint64(f->mmap, entry_array_fd, n_entry_arrays, le64toh(o->entry_array.next_entry_array_offset))) {
+                                log_error("Entry array chains up to invalid next array at %llu", (unsigned long long) p);
+                                r = -EBADMSG;
+                                goto fail;
+                        }
+
+                        n = journal_file_entry_array_n_items(o);
+                        for (i = 0; i < n; i++) {
+                                if (le64toh(o->entry_array.items[i]) != 0 &&
+                                    !contains_uint64(f->mmap, entry_fd, n_entries, le64toh(o->entry_array.items[i]))) {
+
+                                        log_error("Entry array points to invalid next array at %llu", (unsigned long long) p);
+                                        r = -EBADMSG;
+                                        goto fail;
+                                }
+                        }
+
+                }
 
-        /*         if (q == le64toh(f->header->tail_object_offset)) */
-        /*                 q = 0; */
-        /*         else */
-        /*                 q = q + ALIGN64(le64toh(o->object.size)); */
-        /* } */
+                r = journal_file_move_to_object(f, -1, p, &o);
+                if (r < 0) {
+                        log_error("Invalid object at %llu", (unsigned long long) p);
+                        goto fail;
+                }
+
+                if (p == le64toh(f->header->tail_object_offset))
+                        p = 0;
+                else
+                        p = p + ALIGN64(le64toh(o->object.size));
+        }
 
         flush_progress();
 
+        mmap_cache_close_fd(f->mmap, data_fd);
+        mmap_cache_close_fd(f->mmap, entry_fd);
+        mmap_cache_close_fd(f->mmap, entry_array_fd);
+
+        close_nointr_nofail(data_fd);
+        close_nointr_nofail(entry_fd);
+        close_nointr_nofail(entry_array_fd);
+
         return 0;
 
 fail:
-        e = p <= 0 ? q :
-        q <= 0 ? p :
-        MIN(p, q);
-
         flush_progress();
 
         log_error("File corruption detected at %s:%llu (of %llu, %llu%%).",
                   f->path,
-                  (unsigned long long) e,
+                  (unsigned long long) p,
                   (unsigned long long) f->last_stat.st_size,
-                  (unsigned long long) (100 * e / f->last_stat.st_size));
+                  (unsigned long long) (100 * p / f->last_stat.st_size));
+
+        if (data_fd >= 0) {
+                mmap_cache_close_fd(f->mmap, data_fd);
+                close_nointr_nofail(data_fd);
+        }
+
+        if (entry_fd >= 0) {
+                mmap_cache_close_fd(f->mmap, entry_fd);
+                close_nointr_nofail(entry_fd);
+        }
+
+        if (entry_array_fd >= 0) {
+                mmap_cache_close_fd(f->mmap, entry_array_fd);
+                close_nointr_nofail(entry_array_fd);
+        }
 
         return r;
 }
diff --git a/src/journal/journald.c b/src/journal/journald.c
index 145663b..384ed90 100644
--- a/src/journal/journald.c
+++ b/src/journal/journald.c
@@ -2944,6 +2944,7 @@ int main(int argc, char *argv[]) {
 
         log_set_target(LOG_TARGET_SAFE);
         log_set_facility(LOG_SYSLOG);
+        log_set_max_level(LOG_DEBUG);
         log_parse_environment();
         log_open();
 
diff --git a/src/journal/mmap-cache.c b/src/journal/mmap-cache.c
index 77710ff..9782139 100644
--- a/src/journal/mmap-cache.c
+++ b/src/journal/mmap-cache.c
@@ -68,6 +68,8 @@ struct MMapCache {
         FileDescriptor *by_fd;
 };
 
+static int mmap_cache_peek_fd_index(MMapCache *m, int fd, unsigned *fd_index);
+
 static void mmap_cache_window_unmap(MMapCache *m, unsigned w) {
         Window *v;
 
@@ -89,6 +91,13 @@ static void mmap_cache_window_add_lru(MMapCache *m, unsigned w) {
         assert(w < m->n_windows);
 
         v = m->windows + w;
+        assert(v->n_ref == 0);
+
+        if (m->lru_last != (unsigned) -1) {
+                assert(m->windows[m->lru_last].lru_next == (unsigned) -1);
+                m->windows[m->lru_last].lru_next = w;
+        }
+
         v->lru_prev = m->lru_last;
         v->lru_next = (unsigned) -1;
 
@@ -105,15 +114,21 @@ static void mmap_cache_window_remove_lru(MMapCache *m, unsigned w) {
 
         v = m->windows + w;
 
-        if (v->lru_prev == (unsigned) -1)
+        if (v->lru_prev == (unsigned) -1) {
+                assert(m->lru_first == w);
                 m->lru_first = v->lru_next;
-        else
+        } else {
+                assert(m->windows[v->lru_prev].lru_next == w);
                 m->windows[v->lru_prev].lru_next = v->lru_next;
+        }
 
-        if (v->lru_next == (unsigned) -1)
+        if (v->lru_next == (unsigned) -1) {
+                assert(m->lru_last == w);
                 m->lru_last = v->lru_prev;
-        else
+        } else {
+                assert(m->windows[v->lru_next].lru_prev == w);
                 m->windows[v->lru_next].lru_prev = v->lru_prev;
+        }
 }
 
 static void mmap_cache_fd_add(MMapCache *m, unsigned fd_index, unsigned w) {
@@ -123,6 +138,13 @@ static void mmap_cache_fd_add(MMapCache *m, unsigned fd_index, unsigned w) {
         assert(fd_index < m->n_fds);
 
         v = m->windows + w;
+        assert(m->by_fd[fd_index].fd == v->fd);
+
+        if (m->by_fd[fd_index].windows != (unsigned) -1) {
+                assert(m->windows[m->by_fd[fd_index].windows].by_fd_prev == (unsigned) -1);
+                m->windows[m->by_fd[fd_index].windows].by_fd_prev = w;
+        }
+
         v->by_fd_next = m->by_fd[fd_index].windows;
         v->by_fd_prev = (unsigned) -1;
 
@@ -136,13 +158,22 @@ static void mmap_cache_fd_remove(MMapCache *m, unsigned fd_index, unsigned w) {
         assert(fd_index < m->n_fds);
 
         v = m->windows + w;
-        if (v->by_fd_prev == (unsigned) -1)
+        assert(m->by_fd[fd_index].fd == v->fd);
+        assert(v->by_fd_next == (unsigned) -1 || m->windows[v->by_fd_next].fd == v->fd);
+        assert(v->by_fd_prev == (unsigned) -1 || m->windows[v->by_fd_prev].fd == v->fd);
+
+        if (v->by_fd_prev == (unsigned) -1) {
+                assert(m->by_fd[fd_index].windows == w);
                 m->by_fd[fd_index].windows = v->by_fd_next;
-        else
+        } else {
+                assert(m->windows[v->by_fd_prev].by_fd_next == w);
                 m->windows[v->by_fd_prev].by_fd_next = v->by_fd_next;
+        }
 
-        if (v->by_fd_next != (unsigned) -1)
+        if (v->by_fd_next != (unsigned) -1) {
+                assert(m->windows[v->by_fd_next].by_fd_prev == w);
                 m->windows[v->by_fd_next].by_fd_prev = v->by_fd_prev;
+        }
 }
 
 static void mmap_cache_context_unset(MMapCache *m, unsigned c) {
@@ -182,6 +213,7 @@ static void mmap_cache_context_set(MMapCache *m, unsigned c, unsigned w) {
 
         v = m->windows + w;
         v->n_ref ++;
+
         if (v->n_ref == 1)
                 mmap_cache_window_remove_lru(m, w);
 }
@@ -264,6 +296,9 @@ MMapCache* mmap_cache_unref(MMapCache *m) {
 }
 
 static int mmap_cache_allocate_window(MMapCache *m, unsigned *w) {
+        Window *v;
+        unsigned fd_index;
+
         assert(m);
         assert(w);
 
@@ -276,7 +311,16 @@ static int mmap_cache_allocate_window(MMapCache *m, unsigned *w) {
                 return -E2BIG;
 
         *w = m->lru_first;
+        v = m->windows + *w;
+        assert(v->n_ref == 0);
+
         mmap_cache_window_unmap(m, *w);
+
+        if (v->fd >= 0) {
+                assert_se(mmap_cache_peek_fd_index(m, v->fd, &fd_index) > 0);
+                mmap_cache_fd_remove(m, fd_index, *w);
+        }
+
         mmap_cache_window_remove_lru(m, *w);
 
         return 0;
@@ -370,8 +414,7 @@ static int mmap_cache_put(
         v->size = wsize;
 
         v->n_ref = 0;
-        v->lru_prev = v->lru_next = (unsigned) -1;
-
+        mmap_cache_window_add_lru(m, w);
         mmap_cache_fd_add(m, fd_index, w);
         mmap_cache_context_set(m, context, w);
 
@@ -390,28 +433,48 @@ static int fd_cmp(const void *_a, const void *_b) {
         return 0;
 }
 
+static int mmap_cache_peek_fd_index(MMapCache *m, int fd, unsigned *fd_index) {
+        FileDescriptor *j;
+        unsigned r;
+
+        assert(m);
+        assert(fd >= 0);
+        assert(fd_index);
+
+        for (r = 0; r < m->n_fds; r++)
+                assert(m->by_fd[r].windows == (unsigned) -1 ||
+                       m->windows[m->by_fd[r].windows].fd == m->by_fd[r].fd);
+
+        j = bsearch(&fd, m->by_fd, m->n_fds, sizeof(FileDescriptor), fd_cmp);
+        if (!j)
+                return 0;
+
+        *fd_index = (unsigned) (j - m->by_fd);
+        return 1;
+}
+
 static int mmap_cache_get_fd_index(MMapCache *m, int fd, unsigned *fd_index) {
         FileDescriptor *j;
+        int r;
 
         assert(m);
         assert(fd >= 0);
         assert(fd_index);
 
-        j = bsearch(&fd, m->by_fd, m->n_fds, sizeof(m->by_fd[0]), fd_cmp);
-        if (!j) {
-                if (m->n_fds >= m->fds_max)
-                        return -E2BIG;
+        r = mmap_cache_peek_fd_index(m, fd, fd_index);
+        if (r != 0)
+                return r;
 
-                j = m->by_fd + m->n_fds ++;
-                j->fd = fd;
-                j->windows = (unsigned) -1;
+        if (m->n_fds >= m->fds_max)
+                return -E2BIG;
 
-                qsort(m->by_fd, m->n_fds, sizeof(m->by_fd[0]), fd_cmp);
-                j = bsearch(&fd, m->by_fd, m->n_fds, sizeof(m->by_fd[0]), fd_cmp);
-        }
+        j = m->by_fd + m->n_fds ++;
+        j->fd = fd;
+        j->windows = (unsigned) -1;
 
-        *fd_index = (unsigned) (j - m->by_fd);
-        return 0;
+        qsort(m->by_fd, m->n_fds, sizeof(FileDescriptor), fd_cmp);
+
+        return mmap_cache_peek_fd_index(m, fd, fd_index);
 }
 
 static bool mmap_cache_test_window(
@@ -466,6 +529,7 @@ static int mmap_cache_current(
 
 static int mmap_cache_find(
                 MMapCache *m,
+                int fd,
                 unsigned fd_index,
                 unsigned context,
                 uint64_t offset,
@@ -476,6 +540,7 @@ static int mmap_cache_find(
         unsigned w;
 
         assert(m);
+        assert(fd >= 0);
         assert(fd_index < m->n_fds);
         assert(context < m->contexts_max);
         assert(size > 0);
@@ -483,10 +548,13 @@ static int mmap_cache_find(
 
         w = m->by_fd[fd_index].windows;
         while (w != (unsigned) -1) {
+                v = m->windows + w;
+                assert(v->fd == fd);
+
                 if (mmap_cache_test_window(m, w, offset, size))
                         break;
 
-                w = m->windows[w].by_fd_next;
+                w = v->by_fd_next;
         }
 
         if (w == (unsigned) -1)
@@ -494,7 +562,6 @@ static int mmap_cache_find(
 
         mmap_cache_context_set(m, context, w);
 
-        v = m->windows + w;
         *ret = (uint8_t*) v->ptr + (offset - v->offset);
         return 1;
 }
@@ -523,13 +590,17 @@ int mmap_cache_get(
         if (r != 0)
                 return r;
 
+        /* Hmm, drop the reference to the current one, since it wasn't
+         * good enough */
+        mmap_cache_context_unset(m, context);
+
         /* OK, let's find the chain for this FD */
         r = mmap_cache_get_fd_index(m, fd, &fd_index);
         if (r < 0)
                 return r;
 
         /* And let's look through the available mmaps */
-        r = mmap_cache_find(m, fd_index, context, offset, size, ret);
+        r = mmap_cache_find(m, fd, fd_index, context, offset, size, ret);
         if (r != 0)
                 return r;
 
@@ -538,16 +609,15 @@ int mmap_cache_get(
 }
 
 void mmap_cache_close_fd(MMapCache *m, int fd) {
-        FileDescriptor *j;
         unsigned fd_index, c, w;
+        int r;
 
         assert(m);
         assert(fd > 0);
 
-        j = bsearch(&fd, m->by_fd, m->n_fds, sizeof(m->by_fd[0]), fd_cmp);
-        if (!j)
+        r = mmap_cache_peek_fd_index(m, fd, &fd_index);
+        if (r <= 0)
                 return;
-        fd_index = (unsigned) (j - m->by_fd);
 
         for (c = 0; c < m->contexts_max; c++) {
                 w = m->by_context[c];
@@ -560,9 +630,14 @@ void mmap_cache_close_fd(MMapCache *m, int fd) {
 
         w = m->by_fd[fd_index].windows;
         while (w != (unsigned) -1) {
+                Window *v;
+
+                v = m->windows + w;
+                assert(v->fd == fd);
 
-                mmap_cache_fd_remove(m, fd_index, w);
                 mmap_cache_window_unmap(m, w);
+                mmap_cache_fd_remove(m, fd_index, w);
+                v->fd = -1;
 
                 w = m->by_fd[fd_index].windows;
         }
@@ -571,6 +646,51 @@ void mmap_cache_close_fd(MMapCache *m, int fd) {
         m->n_fds --;
 }
 
+void mmap_cache_close_fd_range(MMapCache *m, int fd, uint64_t p) {
+        unsigned fd_index, c, w;
+        int r;
+
+        assert(m);
+        assert(fd > 0);
+
+        /* This drops all windows that include space right of the
+         * specified offset. This is useful to ensure that after the
+         * file size is extended we drop our mappings of the end and
+         * create it anew, since otherwise it is undefined whether
+         * mapping will continue to work as intended. */
+
+        r = mmap_cache_peek_fd_index(m, fd, &fd_index);
+        if (r <= 0)
+                return;
+
+        for (c = 0; c < m->contexts_max; c++) {
+                w = m->by_context[c];
+
+                if (w != (unsigned) -1 && m->windows[w].fd == fd)
+                        mmap_cache_context_unset(m, c);
+        }
+
+        w = m->by_fd[fd_index].windows;
+        while (w != (unsigned) -1) {
+                Window *v;
+
+                v = m->windows + w;
+                assert(v->fd == fd);
+                assert(v->by_fd_next == (unsigned) -1 ||
+                       m->windows[v->by_fd_next].fd == fd);
+
+                if (v->offset + v->size > p) {
+
+                        mmap_cache_window_unmap(m, w);
+                        mmap_cache_fd_remove(m, fd_index, w);
+                        v->fd = -1;
+
+                        w = m->by_fd[fd_index].windows;
+                } else
+                        w = v->by_fd_next;
+        }
+}
+
 void mmap_cache_close_context(MMapCache *m, unsigned context) {
         mmap_cache_context_unset(m, context);
 }
diff --git a/src/journal/mmap-cache.h b/src/journal/mmap-cache.h
index 0a88fc5..984b759 100644
--- a/src/journal/mmap-cache.h
+++ b/src/journal/mmap-cache.h
@@ -31,4 +31,5 @@ MMapCache* mmap_cache_unref(MMapCache *m);
 
 int mmap_cache_get(MMapCache *m, int fd, int prot, unsigned context, uint64_t offset, uint64_t size, void **ret);
 void mmap_cache_close_fd(MMapCache *m, int fd);
+void mmap_cache_close_fd_range(MMapCache *m, int fd, uint64_t range);
 void mmap_cache_close_context(MMapCache *m, unsigned context);

commit beec00856158b703f2125a3d936080346a8a8de1
Author: Lennart Poettering <lennart at poettering.net>
Date:   Wed Aug 15 01:54:09 2012 +0200

    journal: implement basic journal file verification logic

diff --git a/Makefile.am b/Makefile.am
index c90867c..f220b59 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -2384,6 +2384,14 @@ test_journal_stream_LDADD = \
 	libsystemd-journal-internal.la \
 	libsystemd-id128-internal.la
 
+test_journal_verify_SOURCES = \
+	src/journal/test-journal-verify.c
+
+test_journal_verify_LDADD = \
+	libsystemd-shared.la \
+	libsystemd-journal-internal.la \
+	libsystemd-id128-internal.la
+
 libsystemd_journal_la_SOURCES = \
 	src/journal/sd-journal.c \
 	src/systemd/sd-journal.h \
@@ -2482,12 +2490,14 @@ noinst_PROGRAMS += \
 	test-journal \
 	test-journal-send \
 	test-journal-match \
-	test-journal-stream
+	test-journal-stream \
+	test-journal-verify
 
 TESTS += \
 	test-journal \
 	test-journal-match \
-	test-journal-stream
+	test-journal-stream \
+	test-journal-verify
 
 pkginclude_HEADERS += \
 	src/systemd/sd-journal.h \
diff --git a/src/journal/journal-def.h b/src/journal/journal-def.h
index 82210bf..a77f69e 100644
--- a/src/journal/journal-def.h
+++ b/src/journal/journal-def.h
@@ -174,8 +174,8 @@ _packed_ struct Header {
         le64_t tail_object_offset;
         le64_t n_objects;
         le64_t n_entries;
-        le64_t tail_seqnum;
-        le64_t head_seqnum;
+        le64_t tail_entry_seqnum;
+        le64_t head_entry_seqnum;
         le64_t entry_array_offset;
         le64_t head_entry_realtime;
         le64_t tail_entry_realtime;
diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c
index 1f5e04d..709e15e 100644
--- a/src/journal/journal-file.c
+++ b/src/journal/journal-file.c
@@ -136,7 +136,7 @@ static int journal_file_init_header(JournalFile *f, JournalFile *template) {
 
         if (template) {
                 h.seqnum_id = template->header->seqnum_id;
-                h.tail_seqnum = template->header->tail_seqnum;
+                h.tail_entry_seqnum = template->header->tail_entry_seqnum;
         } else
                 h.seqnum_id = h.file_id;
 
@@ -210,6 +210,10 @@ static int journal_file_verify_header(JournalFile *f) {
         if (le64toh(f->header->header_size) < HEADER_SIZE_MIN)
                 return -EBADMSG;
 
+        if ((le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_AUTHENTICATED) &&
+                !JOURNAL_HEADER_CONTAINS(f->header, n_tags))
+                return -EBADMSG;
+
         if ((uint64_t) f->last_stat.st_size < (le64toh(f->header->header_size) + le64toh(f->header->arena_size)))
                 return -ENODATA;
 
@@ -407,7 +411,7 @@ static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
 
         assert(f);
 
-        r = le64toh(f->header->tail_seqnum) + 1;
+        r = le64toh(f->header->tail_entry_seqnum) + 1;
 
         if (seqnum) {
                 /* If an external seqnum counter was passed, we update
@@ -420,10 +424,10 @@ static uint64_t journal_file_entry_seqnum(JournalFile *f, uint64_t *seqnum) {
                 *seqnum = r;
         }
 
-        f->header->tail_seqnum = htole64(r);
+        f->header->tail_entry_seqnum = htole64(r);
 
-        if (f->header->head_seqnum == 0)
-                f->header->head_seqnum = htole64(r);
+        if (f->header->head_entry_seqnum == 0)
+                f->header->head_entry_seqnum = htole64(r);
 
         return r;
 }
@@ -2066,7 +2070,7 @@ static int journal_file_hmac_put_header(JournalFile *f) {
         gcry_md_write(f->hmac, &f->header->file_id, offsetof(Header, boot_id) - offsetof(Header, file_id));
         gcry_md_write(f->hmac, &f->header->seqnum_id, offsetof(Header, arena_size) - offsetof(Header, seqnum_id));
         gcry_md_write(f->hmac, &f->header->data_hash_table_offset, offsetof(Header, tail_object_offset) - offsetof(Header, data_hash_table_offset));
-        gcry_md_write(f->hmac, &f->header->head_seqnum, offsetof(Header, head_entry_realtime) - offsetof(Header, head_seqnum));
+        gcry_md_write(f->hmac, &f->header->head_entry_seqnum, offsetof(Header, head_entry_realtime) - offsetof(Header, head_entry_seqnum));
 
         return 0;
 }
@@ -2223,6 +2227,405 @@ static int journal_file_append_first_tag(JournalFile *f) {
         return 0;
 }
 
+static int journal_file_object_verify(JournalFile *f, Object *o) {
+        assert(f);
+        assert(o);
+
+        /* This does various superficial tests about the length an
+         * possible field values. It does not follow any references to
+         * other objects. */
+
+        switch (o->object.type) {
+        case OBJECT_DATA:
+                if (le64toh(o->data.entry_offset) <= 0 ||
+                    le64toh(o->data.n_entries) <= 0)
+                        return -EBADMSG;
+
+                if (le64toh(o->object.size) - offsetof(DataObject, payload) <= 0)
+                        return -EBADMSG;
+                break;
+
+        case OBJECT_FIELD:
+                if (le64toh(o->object.size) - offsetof(FieldObject, payload) <= 0)
+                        return -EBADMSG;
+                break;
+
+        case OBJECT_ENTRY:
+                if ((le64toh(o->object.size) - offsetof(EntryObject, items)) % sizeof(EntryItem) != 0)
+                        return -EBADMSG;
+
+                if ((le64toh(o->object.size) - offsetof(EntryObject, items)) / sizeof(EntryItem) <= 0)
+                        return -EBADMSG;
+
+                if (le64toh(o->entry.seqnum) <= 0 ||
+                    le64toh(o->entry.realtime) <= 0)
+                        return -EBADMSG;
+
+                break;
+
+        case OBJECT_DATA_HASH_TABLE:
+        case OBJECT_FIELD_HASH_TABLE:
+                if ((le64toh(o->object.size) - offsetof(HashTableObject, items)) % sizeof(HashItem) != 0)
+                        return -EBADMSG;
+
+                break;
+
+        case OBJECT_ENTRY_ARRAY:
+                if ((le64toh(o->object.size) - offsetof(EntryArrayObject, items)) % sizeof(le64_t) != 0)
+                        return -EBADMSG;
+
+                break;
+
+        case OBJECT_TAG:
+                if (le64toh(o->object.size) != sizeof(TagObject))
+                        return -EBADMSG;
+                break;
+        }
+
+        return 0;
+}
+
+static void draw_progress(uint64_t p, usec_t *last_usec) {
+        unsigned n, i, j, k;
+        usec_t z, x;
+
+        if (!isatty(STDOUT_FILENO))
+                return;
+
+        z = now(CLOCK_MONOTONIC);
+        x = *last_usec;
+
+        if (x != 0 && x + 40 * USEC_PER_MSEC > z)
+                return;
+
+        *last_usec = z;
+
+        n = (3 * columns()) / 4;
+        j = (n * (unsigned) p) / 65535ULL;
+        k = n - j;
+
+        fputs("\r\x1B[?25l", stdout);
+
+        for (i = 0; i < j; i++)
+                fputs("\xe2\x96\x88", stdout);
+
+        for (i = 0; i < k; i++)
+                fputs("\xe2\x96\x91", stdout);
+
+        printf(" %3lu%%", 100LU * (unsigned long) p / 65535LU);
+
+        fputs("\r\x1B[?25h", stdout);
+        fflush(stdout);
+}
+
+static void flush_progress(void) {
+        unsigned n, i;
+
+        if (!isatty(STDOUT_FILENO))
+                return;
+
+        n = (3 * columns()) / 4;
+
+        putchar('\r');
+
+        for (i = 0; i < n + 5; i++)
+                putchar(' ');
+
+        putchar('\r');
+        fflush(stdout);
+}
+
+int journal_file_verify(JournalFile *f, const char *key) {
+        int r;
+        Object *o;
+        uint64_t p = 0, q = 0, e;
+        uint64_t tag_seqnum = 0, entry_seqnum = 0, entry_monotonic = 0, entry_realtime = 0;
+        sd_id128_t entry_boot_id;
+        bool entry_seqnum_set = false, entry_monotonic_set = false, entry_realtime_set = false, found_main_entry_array = false;
+        uint64_t n_weird = 0, n_objects = 0, n_entries = 0, n_data = 0, n_fields = 0, n_data_hash_tables = 0, n_field_hash_tables = 0;
+        usec_t last_usec = 0;
+
+        assert(f);
+
+        /* First iteration: we go through all objects, verify the
+         * superficial structure, headers, hashes. */
+
+        r = journal_file_hmac_put_header(f);
+        if (r < 0) {
+                log_error("Failed to calculate HMAC of header.");
+                goto fail;
+        }
+
+        p = le64toh(f->header->header_size);
+        while (p != 0) {
+                draw_progress((65535ULL * p / le64toh(f->header->tail_object_offset)), &last_usec);
+
+                r = journal_file_move_to_object(f, -1, p, &o);
+                if (r < 0) {
+                        log_error("Invalid object at %llu", (unsigned long long) p);
+                        goto fail;
+                }
+
+                if (le64toh(f->header->tail_object_offset) < p) {
+                        log_error("Invalid tail object pointer.");
+                        r = -EBADMSG;
+                        goto fail;
+                }
+
+                n_objects ++;
+
+                r = journal_file_object_verify(f, o);
+                if (r < 0) {
+                        log_error("Invalid object contents at %llu", (unsigned long long) p);
+                        goto fail;
+                }
+
+                r = journal_file_hmac_put_object(f, -1, p);
+                if (r < 0) {
+                        log_error("Failed to calculate HMAC at %llu", (unsigned long long) p);
+                        goto fail;
+                }
+
+                if (o->object.flags & OBJECT_COMPRESSED &&
+                    !(le32toh(f->header->incompatible_flags) & HEADER_INCOMPATIBLE_COMPRESSED)) {
+                        log_error("Compressed object without compression at %llu", (unsigned long long) p);
+                        r = -EBADMSG;
+                        goto fail;
+                }
+
+                if (o->object.flags & OBJECT_COMPRESSED &&
+                    o->object.type != OBJECT_DATA) {
+                        log_error("Compressed non-data object at %llu", (unsigned long long) p);
+                        r = -EBADMSG;
+                        goto fail;
+                }
+
+                if (o->object.type == OBJECT_TAG) {
+
+                        if (!(le32toh(f->header->compatible_flags) & HEADER_COMPATIBLE_AUTHENTICATED)) {
+                                log_error("Tag object without authentication at %llu", (unsigned long long) p);
+                                r = -EBADMSG;
+                                goto fail;
+                        }
+
+                        if (le64toh(o->tag.seqnum) != tag_seqnum) {
+                                log_error("Tag sequence number out of synchronization at %llu", (unsigned long long) p);
+                                r = -EBADMSG;
+                                goto fail;
+                        }
+
+                } else if (o->object.type == OBJECT_ENTRY) {
+
+                        if (!entry_seqnum_set &&
+                            le64toh(o->entry.seqnum) != le64toh(f->header->head_entry_seqnum)) {
+                                log_error("Head entry sequence number incorrect");
+                                r = -EBADMSG;
+                                goto fail;
+                        }
+
+                        if (entry_seqnum_set &&
+                            entry_seqnum >= le64toh(o->entry.seqnum)) {
+                                log_error("Entry sequence number out of synchronization at %llu", (unsigned long long) p);
+                                r = -EBADMSG;
+                                goto fail;
+                        }
+
+                        entry_seqnum = le64toh(o->entry.seqnum);
+                        entry_seqnum_set = true;
+
+                        if (entry_monotonic_set &&
+                            sd_id128_equal(entry_boot_id, o->entry.boot_id) &&
+                            entry_monotonic > le64toh(o->entry.monotonic)) {
+                                log_error("Entry timestamp out of synchronization at %llu", (unsigned long long) p);
+                                r = -EBADMSG;
+                                goto fail;
+                        }
+
+                        entry_monotonic = le64toh(o->entry.monotonic);
+                        entry_boot_id = o->entry.boot_id;
+                        entry_monotonic_set = true;
+
+                        if (!entry_realtime_set &&
+                            le64toh(o->entry.realtime) != le64toh(f->header->head_entry_realtime)) {
+                                log_error("Head entry realtime timestamp incorrect");
+                                r = -EBADMSG;
+                                goto fail;
+                        }
+
+                        entry_realtime = le64toh(o->entry.realtime);
+                        entry_realtime_set = true;
+
+                        n_entries ++;
+                } else if (o->object.type == OBJECT_ENTRY_ARRAY) {
+
+                        if (p == le64toh(f->header->entry_array_offset)) {
+                                if (found_main_entry_array) {
+                                        log_error("More than one main entry array at %llu", (unsigned long long) p);
+                                        r = -EBADMSG;
+                                        goto fail;
+                                }
+
+                                found_main_entry_array = true;
+                        }
+
+                } else if (o->object.type == OBJECT_DATA)
+                        n_data++;
+                else if (o->object.type == OBJECT_FIELD)
+                        n_fields++;
+                else if (o->object.type == OBJECT_DATA_HASH_TABLE) {
+                        n_data_hash_tables++;
+
+                        if (n_data_hash_tables > 1) {
+                                log_error("More than one data hash table at %llu", (unsigned long long) p);
+                                r = -EBADMSG;
+                                goto fail;
+                        }
+
+                        if (le64toh(f->header->data_hash_table_offset) != p + offsetof(HashTableObject, items) ||
+                            le64toh(f->header->data_hash_table_size) != le64toh(o->object.size) - offsetof(HashTableObject, items)) {
+                                log_error("Header fields for data hash table invalid.");
+                                r = -EBADMSG;
+                                goto fail;
+                        }
+                } else if (o->object.type == OBJECT_FIELD_HASH_TABLE) {
+                        n_field_hash_tables++;
+
+                        if (n_field_hash_tables > 1) {
+                                log_error("More than one field hash table at %llu", (unsigned long long) p);
+                                r = -EBADMSG;
+                                goto fail;
+                        }
+
+                        if (le64toh(f->header->field_hash_table_offset) != p + offsetof(HashTableObject, items) ||
+                            le64toh(f->header->field_hash_table_size) != le64toh(o->object.size) - offsetof(HashTableObject, items)) {
+                                log_error("Header fields for field hash table invalid.");
+                                r = -EBADMSG;
+                                goto fail;
+                        }
+                }
+
+                if (o->object.type >= _OBJECT_TYPE_MAX)
+                        n_weird ++;
+                else {
+                        /* Write address to file... */
+
+                }
+
+                if (p == le64toh(f->header->tail_object_offset))
+                        p = 0;
+                else
+                        p = p + ALIGN64(le64toh(o->object.size));
+        }
+
+        if (n_objects != le64toh(f->header->n_objects)) {
+                log_error("Object number mismatch");
+                r = -EBADMSG;
+                goto fail;
+        }
+
+        if (n_entries != le64toh(f->header->n_entries)) {
+                log_error("Entry number mismatch");
+                r = -EBADMSG;
+                goto fail;
+        }
+
+        if (JOURNAL_HEADER_CONTAINS(f->header, n_data) &&
+            n_data != le64toh(f->header->n_data)) {
+                log_error("Data number mismatch");
+                r = -EBADMSG;
+                goto fail;
+        }
+
+        if (JOURNAL_HEADER_CONTAINS(f->header, n_fields) &&
+            n_fields != le64toh(f->header->n_fields)) {
+                log_error("Field number mismatch");
+                r = -EBADMSG;
+                goto fail;
+        }
+
+        if (JOURNAL_HEADER_CONTAINS(f->header, n_tags) &&
+            tag_seqnum != le64toh(f->header->n_tags)) {
+                log_error("Tag number mismatch");
+                r = -EBADMSG;
+                goto fail;
+        }
+
+        if (n_data_hash_tables != 1) {
+                log_error("Missing data hash table");
+                r = -EBADMSG;
+                goto fail;
+        }
+
+        if (n_field_hash_tables != 1) {
+                log_error("Missing field hash table");
+                r = -EBADMSG;
+                goto fail;
+        }
+
+        if (!found_main_entry_array) {
+                log_error("Missing entry array");
+                r = -EBADMSG;
+                goto fail;
+        }
+
+        if (entry_seqnum_set &&
+            entry_seqnum != le64toh(f->header->tail_entry_seqnum)) {
+                log_error("Invalid tail seqnum");
+                r = -EBADMSG;
+                goto fail;
+        }
+
+        if (entry_monotonic_set &&
+            (!sd_id128_equal(entry_boot_id, f->header->boot_id) ||
+             entry_monotonic != le64toh(f->header->tail_entry_monotonic))) {
+                log_error("Invalid tail monotonic timestamp");
+                r = -EBADMSG;
+                goto fail;
+        }
+
+        if (entry_realtime_set && entry_realtime != le64toh(f->header->tail_entry_realtime)) {
+                log_error("Invalid tail realtime timestamp");
+                r = -EBADMSG;
+                goto fail;
+        }
+
+        /* Second iteration: we go through all objects again, this
+         * time verify all pointers. */
+
+        /* q = le64toh(f->header->header_size); */
+        /* while (q != 0) { */
+        /*         r = journal_file_move_to_object(f, -1, q, &o); */
+        /*         if (r < 0) { */
+        /*                 log_error("Invalid object at %llu", (unsigned long long) q); */
+        /*                 goto fail; */
+        /*         } */
+
+        /*         if (q == le64toh(f->header->tail_object_offset)) */
+        /*                 q = 0; */
+        /*         else */
+        /*                 q = q + ALIGN64(le64toh(o->object.size)); */
+        /* } */
+
+        flush_progress();
+
+        return 0;
+
+fail:
+        e = p <= 0 ? q :
+        q <= 0 ? p :
+        MIN(p, q);
+
+        flush_progress();
+
+        log_error("File corruption detected at %s:%llu (of %llu, %llu%%).",
+                  f->path,
+                  (unsigned long long) e,
+                  (unsigned long long) f->last_stat.st_size,
+                  (unsigned long long) (100 * e / f->last_stat.st_size));
+
+        return r;
+}
+
 void journal_file_dump(JournalFile *f) {
         Object *o;
         int r;
@@ -2331,8 +2734,8 @@ void journal_file_print_header(JournalFile *f) {
                (unsigned long long) le64toh(f->header->n_objects),
                (unsigned long long) le64toh(f->header->n_entries),
                yes_no(journal_file_rotate_suggested(f)),
-               (unsigned long long) le64toh(f->header->head_seqnum),
-               (unsigned long long) le64toh(f->header->tail_seqnum),
+               (unsigned long long) le64toh(f->header->head_entry_seqnum),
+               (unsigned long long) le64toh(f->header->tail_entry_seqnum),
                format_timestamp(x, sizeof(x), le64toh(f->header->head_entry_realtime)),
                format_timestamp(y, sizeof(y), le64toh(f->header->tail_entry_realtime)));
 
@@ -2536,7 +2939,7 @@ int journal_file_rotate(JournalFile **f, bool compress, bool authenticate) {
         sd_id128_to_string(old_file->header->seqnum_id, p + l - 8 + 1);
         snprintf(p + l - 8 + 1 + 32, 1 + 16 + 1 + 16 + 8 + 1,
                  "-%016llx-%016llx.journal",
-                 (unsigned long long) le64toh((*f)->header->tail_seqnum),
+                 (unsigned long long) le64toh((*f)->header->tail_entry_seqnum),
                  (unsigned long long) le64toh((*f)->header->tail_entry_realtime));
 
         r = rename(old_file->path, p);
diff --git a/src/journal/journal-file.h b/src/journal/journal-file.h
index 11a1c7d..0305c97 100644
--- a/src/journal/journal-file.h
+++ b/src/journal/journal-file.h
@@ -150,3 +150,5 @@ int journal_file_get_cutoff_monotonic_usec(JournalFile *f, sd_id128_t boot, usec
 bool journal_file_rotate_suggested(JournalFile *f);
 
 int journal_file_append_tag(JournalFile *f);
+
+int journal_file_verify(JournalFile *f, const char *key);
diff --git a/src/journal/journalctl.c b/src/journal/journalctl.c
index b4874a7..8e09ff1 100644
--- a/src/journal/journalctl.c
+++ b/src/journal/journalctl.c
@@ -62,7 +62,8 @@ static enum {
         ACTION_SHOW,
         ACTION_NEW_ID128,
         ACTION_PRINT_HEADER,
-        ACTION_SETUP_KEYS
+        ACTION_SETUP_KEYS,
+        ACTION_VERIFY
 } arg_action = ACTION_SHOW;
 
 static int help(void) {
@@ -86,7 +87,8 @@ static int help(void) {
                "Commands:\n"
                "     --new-id128      Generate a new 128 Bit ID\n"
                "     --header         Show journal header information\n"
-               "     --setup-keys     Generate new FSPRG key pair\n",
+               "     --setup-keys     Generate new FSPRG key pair\n"
+               "     --verify         Verify journal file consistency\n",
                program_invocation_short_name);
 
         return 0;
@@ -100,7 +102,8 @@ static int parse_argv(int argc, char *argv[]) {
                 ARG_NO_TAIL,
                 ARG_NEW_ID128,
                 ARG_HEADER,
-                ARG_SETUP_KEYS
+                ARG_SETUP_KEYS,
+                ARG_VERIFY
         };
 
         static const struct option options[] = {
@@ -120,6 +123,7 @@ static int parse_argv(int argc, char *argv[]) {
                 { "header",    no_argument,       NULL, ARG_HEADER    },
                 { "priority",  no_argument,       NULL, 'p'           },
                 { "setup-keys",no_argument,       NULL, ARG_SETUP_KEYS},
+                { "verify",    no_argument,       NULL, ARG_VERIFY    },
                 { NULL,        0,                 NULL, 0             }
         };
 
@@ -203,6 +207,10 @@ static int parse_argv(int argc, char *argv[]) {
                         arg_action = ACTION_SETUP_KEYS;
                         break;
 
+                case ARG_VERIFY:
+                        arg_action = ACTION_VERIFY;
+                        break;
+
                 case 'p': {
                         const char *dots;
 
@@ -572,6 +580,27 @@ finish:
 #endif
 }
 
+static int verify(sd_journal *j) {
+        int r = 0;
+        Iterator i;
+        JournalFile *f;
+
+        assert(j);
+
+        HASHMAP_FOREACH(f, j->files, i) {
+                int k;
+
+                k = journal_file_verify(f, NULL);
+                if (k < 0) {
+                        log_warning("FAIL: %s (%s)", f->path, strerror(-k));
+                        r = -r;
+                } else
+                        log_info("PASS: %s", f->path);
+        }
+
+        return r;
+}
+
 int main(int argc, char *argv[]) {
         int r;
         sd_journal *j = NULL;
@@ -598,11 +627,6 @@ int main(int argc, char *argv[]) {
                 goto finish;
         }
 
-#ifdef HAVE_ACL
-        if (!arg_quiet && geteuid() != 0 && in_group("adm") <= 0)
-                log_warning("Showing user generated messages only. Users in the group 'adm' can see all messages. Pass -q to turn this message off.");
-#endif
-
         if (arg_directory)
                 r = sd_journal_open_directory(&j, arg_directory, 0);
         else
@@ -613,12 +637,22 @@ int main(int argc, char *argv[]) {
                 goto finish;
         }
 
+        if (arg_action == ACTION_VERIFY) {
+                r = verify(j);
+                goto finish;
+        }
+
         if (arg_action == ACTION_PRINT_HEADER) {
                 journal_print_header(j);
                 r = 0;
                 goto finish;
         }
 
+#ifdef HAVE_ACL
+        if (!arg_quiet && geteuid() != 0 && in_group("adm") <= 0)
+                log_warning("Showing user generated messages only. Users in the group 'adm' can see all messages. Pass -q to turn this message off.");
+#endif
+
         r = add_this_boot(j);
         if (r < 0)
                 goto finish;
diff --git a/src/journal/mmap-cache.c b/src/journal/mmap-cache.c
index 68dbe70..77710ff 100644
--- a/src/journal/mmap-cache.c
+++ b/src/journal/mmap-cache.c
@@ -232,7 +232,6 @@ MMapCache* mmap_cache_new(unsigned contexts_max, unsigned fds_max) {
                 mmap_cache_free(m);
                 return NULL;
         }
-
         memset(m->by_context, -1, m->contexts_max * sizeof(unsigned));
 
         m->by_fd = new(FileDescriptor, m->fds_max);
@@ -334,7 +333,7 @@ static int mmap_cache_put(
         if (wsize < WINDOW_SIZE) {
                 uint64_t delta;
 
-                delta = (WINDOW_SIZE - wsize) / 2;
+                delta = PAGE_ALIGN((WINDOW_SIZE - wsize) / 2);
 
                 if (delta > offset)
                         woffset = 0;

commit 07cacf5f3b80fa0dfa5dd12531881118aa3b09ca
Author: Lennart Poettering <lennart at poettering.net>
Date:   Tue Aug 14 22:04:11 2012 +0200

    conf-parser: make parsing exit status lists non-fatal

diff --git a/src/shared/conf-parser.c b/src/shared/conf-parser.c
index f3e258a..68ab804 100644
--- a/src/shared/conf-parser.c
+++ b/src/shared/conf-parser.c
@@ -981,8 +981,8 @@ int config_parse_set_status(
                                         return r;
                                 }
                         } else {
-                                log_error("[%s:%u] Failed to parse value: %s", filename, line, w);
-                                return r;
+                                log_error("[%s:%u] Failed to parse value, ignoring: %s", filename, line, w);
+                                return 0;
                         }
                 } else {
                         free(temp);
@@ -1001,7 +1001,7 @@ int config_parse_set_status(
                                 }
                         }
                 }
-
         }
+
         return 0;
 }

commit 16e9f408fa9a9626059bdd6c89dc175e06b9e976
Author: Lennart Poettering <lennart at poettering.net>
Date:   Tue Aug 14 22:02:24 2012 +0200

    journal: implement generic sharable mmap caching logic
    
    instead of having one simple per-file cache implement an more
    comprehensive one that works for multiple files and can actually
    maintain multiple maps per file and per object type.

diff --git a/Makefile.am b/Makefile.am
index 837bc6c..c90867c 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -2308,17 +2308,9 @@ EXTRA_DIST += \
 systemd_journald_SOURCES = \
 	src/journal/journald.c \
 	src/journal/journald.h \
-	src/journal/sd-journal.c \
-	src/journal/journal-file.c \
-	src/journal/journal-file.h \
-	src/journal/lookup3.c \
-	src/journal/lookup3.h \
 	src/journal/journal-rate-limit.c \
 	src/journal/journal-rate-limit.h \
-	src/journal/sparse-endian.h \
-	src/journal/journal-def.h \
-	src/journal/journal-internal.h \
-	src/journal/compress.h
+	src/journal/journal-internal.h
 
 nodist_systemd_journald_SOURCES = \
 	src/journal/journald-gperf.c
@@ -2394,9 +2386,18 @@ test_journal_stream_LDADD = \
 
 libsystemd_journal_la_SOURCES = \
 	src/journal/sd-journal.c \
+	src/systemd/sd-journal.h \
 	src/journal/journal-file.c \
+	src/journal/journal-file.h \
 	src/journal/lookup3.c \
-	src/journal/journal-send.c
+	src/journal/lookup3.h \
+	src/journal/journal-send.c \
+	src/journal/journal-send.h \
+	src/journal/sparse-endian.h \
+	src/journal/journal-def.h \
+	src/journal/compress.h \
+	src/journal/mmap-cache.c \
+	src/journal/mmap-cache.h
 
 libsystemd_journal_la_CFLAGS = \
 	$(AM_CFLAGS) \
diff --git a/src/journal/journal-file.c b/src/journal/journal-file.c
index 9235e5f..1f5e04d 100644
--- a/src/journal/journal-file.c
+++ b/src/journal/journal-file.c
@@ -36,8 +36,6 @@
 #define DEFAULT_DATA_HASH_TABLE_SIZE (2047ULL*sizeof(HashItem))
 #define DEFAULT_FIELD_HASH_TABLE_SIZE (333ULL*sizeof(HashItem))
 
-#define DEFAULT_WINDOW_SIZE (8ULL*1024ULL*1024ULL)
-
 #define COMPRESSION_SIZE_THRESHOLD (512ULL)
 
 /* This is the minimum journal file size */
@@ -71,8 +69,6 @@ static int journal_file_maybe_append_tag(JournalFile *f, uint64_t realtime);
 static int journal_file_hmac_put_object(JournalFile *f, int type, uint64_t p);
 
 void journal_file_close(JournalFile *f) {
-        int t;
-
         assert(f);
 
         /* Write the final tag */
@@ -80,9 +76,8 @@ void journal_file_close(JournalFile *f) {
                 journal_file_append_tag(f);
 
         /* Sync everything to disk, before we mark the file offline */
-        for (t = 0; t < _WINDOW_MAX; t++)
-                if (f->windows[t].ptr)
-                        munmap(f->windows[t].ptr, f->windows[t].size);
+        if (f->mmap && f->fd >= 0)
+                mmap_cache_close_fd(f->mmap, f->fd);
 
         if (f->writable && f->fd >= 0)
                 fdatasync(f->fd);
@@ -100,6 +95,9 @@ void journal_file_close(JournalFile *f) {
 
         free(f->path);
 
+        if (f->mmap)
+                mmap_cache_unref(f->mmap);
+
 #ifdef HAVE_XZ
         free(f->compress_buffer);
 #endif
@@ -305,59 +303,11 @@ static int journal_file_allocate(JournalFile *f, uint64_t offset, uint64_t size)
         return 0;
 }
 
-static int journal_file_map(
-                JournalFile *f,
-                uint64_t offset,
-                uint64_t size,
-                void **_window,
-                uint64_t *_woffset,
-                uint64_t *_wsize,
-                void **ret) {
-
-        uint64_t woffset, wsize;
-        void *window;
-
+static int journal_file_move_to(JournalFile *f, int context, uint64_t offset, uint64_t size, void **ret) {
         assert(f);
-        assert(size > 0);
         assert(ret);
 
-        woffset = offset & ~((uint64_t) page_size() - 1ULL);
-        wsize = size + (offset - woffset);
-        wsize = PAGE_ALIGN(wsize);
-
         /* Avoid SIGBUS on invalid accesses */
-        if (woffset + wsize > (uint64_t) PAGE_ALIGN(f->last_stat.st_size))
-                return -EADDRNOTAVAIL;
-
-        window = mmap(NULL, wsize, f->prot, MAP_SHARED, f->fd, woffset);
-        if (window == MAP_FAILED)
-                return -errno;
-
-        if (_window)
-                *_window = window;
-
-        if (_woffset)
-                *_woffset = woffset;
-
-        if (_wsize)
-                *_wsize = wsize;
-
-        *ret = (uint8_t*) window + (offset - woffset);
-
-        return 0;
-}
-
-static int journal_file_move_to(JournalFile *f, int wt, uint64_t offset, uint64_t size, void **ret) {
-        void *p = NULL;
-        uint64_t delta;
-        int r;
-        Window *w;
-
-        assert(f);
-        assert(ret);
-        assert(wt >= 0);
-        assert(wt < _WINDOW_MAX);
-
         if (offset + size > (uint64_t) f->last_stat.st_size) {
                 /* Hmm, out of range? Let's refresh the fstat() data
                  * first, before we trust that check. */
@@ -367,57 +317,7 @@ static int journal_file_move_to(JournalFile *f, int wt, uint64_t offset, uint64_
                         return -EADDRNOTAVAIL;
         }
 
-        w = f->windows + wt;
-
-        if (_likely_(w->ptr &&
-                     w->offset <= offset &&
-                     w->offset + w->size >= offset + size)) {
-
-                *ret = (uint8_t*) w->ptr + (offset - w->offset);
-                return 0;
-        }
-
-        if (w->ptr) {
-                if (munmap(w->ptr, w->size) < 0)
-                        return -errno;
-
-                w->ptr = NULL;
-                w->size = w->offset = 0;
-        }
-
-        if (size < DEFAULT_WINDOW_SIZE) {
-                /* If the default window size is larger then what was
-                 * asked for extend the mapping a bit in the hope to
-                 * minimize needed remappings later on. We add half
-                 * the window space before and half behind the
-                 * requested mapping */
-
-                delta = (DEFAULT_WINDOW_SIZE - size) / 2;
-
-                if (delta > offset)
-                        delta = offset;
-
-                offset -= delta;
-                size = DEFAULT_WINDOW_SIZE;
-        } else
-                delta = 0;
-
-        if (offset + size > (uint64_t) f->last_stat.st_size)
-                size = (uint64_t) f->last_stat.st_size - offset;
-
-        if (size <= 0)
-                return -EADDRNOTAVAIL;
-
-        r = journal_file_map(f,
-                             offset, size,
-                             &w->ptr, &w->offset, &w->size,
-                             &p);
-
-        if (r < 0)
-                return r;
-
-        *ret = (uint8_t*) p + delta;
-        return 0;
+        return mmap_cache_get(f->mmap, f->fd, f->prot, context, offset, size, ret);
 }
 
 static bool verify_hash(Object *o) {
@@ -437,17 +337,38 @@ static bool verify_hash(Object *o) {
         return h1 == h2;
 }
 
+static uint64_t minimum_header_size(Object *o) {
+
+        static uint64_t table[] = {
+                [OBJECT_DATA] = sizeof(DataObject),
+                [OBJECT_FIELD] = sizeof(FieldObject),
+                [OBJECT_ENTRY] = sizeof(EntryObject),
+                [OBJECT_DATA_HASH_TABLE] = sizeof(HashTableObject),
+                [OBJECT_FIELD_HASH_TABLE] = sizeof(HashTableObject),
+                [OBJECT_ENTRY_ARRAY] = sizeof(EntryArrayObject),
+                [OBJECT_TAG] = sizeof(TagObject),
+        };
+
+        if (o->object.type >= ELEMENTSOF(table) || table[o->object.type] <= 0)
+                return sizeof(ObjectHeader);
+
+        return table[o->object.type];
+}
+
 int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Object **ret) {
         int r;
         void *t;
         Object *o;
         uint64_t s;
+        unsigned context;
 
         assert(f);
         assert(ret);
-        assert(type < _OBJECT_TYPE_MAX);
 
-        r = journal_file_move_to(f, type >= 0 ? type : WINDOW_UNKNOWN, offset, sizeof(ObjectHeader), &t);
+        /* One context for each type, plus one catch-all for the rest */
+        context = type > 0 && type < _OBJECT_TYPE_MAX ? type : 0;
+
+        r = journal_file_move_to(f, context, offset, sizeof(ObjectHeader), &t);
         if (r < 0)
                 return r;
 
@@ -457,6 +378,12 @@ int journal_file_move_to_object(JournalFile *f, int type, uint64_t offset, Objec
         if (s < sizeof(ObjectHeader))
                 return -EBADMSG;
 
+        if (o->object.type <= OBJECT_UNUSED)
+                return -EBADMSG;
+
+        if (s < minimum_header_size(o))
+                return -EBADMSG;
+
         if (type >= 0 && o->object.type != type)
                 return -EBADMSG;
 
@@ -508,6 +435,7 @@ static int journal_file_append_object(JournalFile *f, int type, uint64_t size, O
         void *t;
 
         assert(f);
+        assert(type > 0 && type < _OBJECT_TYPE_MAX);
         assert(size >= sizeof(ObjectHeader));
         assert(offset);
         assert(ret);
@@ -613,7 +541,7 @@ static int journal_file_map_data_hash_table(JournalFile *f) {
         s = le64toh(f->header->data_hash_table_size);
 
         r = journal_file_move_to(f,
-                                 WINDOW_DATA_HASH_TABLE,
+                                 OBJECT_DATA_HASH_TABLE,
                                  p, s,
                                  &t);
         if (r < 0)
@@ -634,7 +562,7 @@ static int journal_file_map_field_hash_table(JournalFile *f) {
         s = le64toh(f->header->field_hash_table_size);
 
         r = journal_file_move_to(f,
-                                 WINDOW_FIELD_HASH_TABLE,
+                                 OBJECT_FIELD_HASH_TABLE,
                                  p, s,
                                  &t);
         if (r < 0)
@@ -2428,6 +2356,7 @@ int journal_file_open(
                 bool compress,
                 bool authenticate,
                 JournalMetrics *metrics,
+                MMapCache *mmap_cache,
                 JournalFile *template,
                 JournalFile **ret) {
 
@@ -2457,6 +2386,19 @@ int journal_file_open(
         f->compress = compress;
         f->authenticate = authenticate;
 
+        if (mmap_cache)
+                f->mmap = mmap_cache_ref(mmap_cache);
+        else {
+                /* One context for each type, plus the zeroth catchall
+                 * context. One fd for the file plus one for each type
+                 * (which we need during verification */
+                f->mmap = mmap_cache_new(_OBJECT_TYPE_MAX, 1 + _OBJECT_TYPE_MAX);
+                if (!f->mmap) {
+                        r = -ENOMEM;
+                        goto fail;
+                }
+        }
+
         f->path = strdup(fname);
         if (!f->path) {
                 r = -ENOMEM;
@@ -2605,7 +2547,7 @@ int journal_file_rotate(JournalFile **f, bool compress, bool authenticate) {
 
         old_file->header->state = STATE_ARCHIVED;
 
-        r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, authenticate, NULL, old_file, &new_file);
+        r = journal_file_open(old_file->path, old_file->flags, old_file->mode, compress, authenticate, NULL, old_file->mmap, old_file, &new_file);
         journal_file_close(old_file);
 
         *f = new_file;
@@ -2619,6 +2561,7 @@ int journal_file_open_reliably(
                 bool compress,
                 bool authenticate,
                 JournalMetrics *metrics,
+                MMapCache *mmap,
                 JournalFile *template,
                 JournalFile **ret) {
 
@@ -2626,7 +2569,7 @@ int journal_file_open_reliably(
         size_t l;
         char *p;
 
-        r = journal_file_open(fname, flags, mode, compress, authenticate, metrics, template, ret);
+        r = journal_file_open(fname, flags, mode, compress, authenticate, metrics, mmap, template, ret);
         if (r != -EBADMSG && /* corrupted */
             r != -ENODATA && /* truncated */
             r != -EHOSTDOWN && /* other machine */
@@ -2660,7 +2603,7 @@ int journal_file_open_reliably(
 
         log_warning("File %s corrupted or uncleanly shut down, renaming and replacing.", fname);
 
-        return journal_file_open(fname, flags, mode, compress, authenticate, metrics, template, ret);
+        return journal_file_open(fname, flags, mode, compress, authenticate, metrics, mmap, template, ret);
 }
 
 struct vacuum_info {
diff --git a/src/journal/journal-file.h b/src/journal/journal-file.h
index a16c8ff..11a1c7d 100644
--- a/src/journal/journal-file.h
+++ b/src/journal/journal-file.h
@@ -32,24 +32,7 @@
 #include "sparse-endian.h"
 #include "journal-def.h"
 #include "util.h"
-
-typedef struct Window {
-        void *ptr;
-        uint64_t offset;
-        uint64_t size;
-} Window;
-
-enum {
-        WINDOW_UNKNOWN = OBJECT_UNUSED,
-        WINDOW_DATA = OBJECT_DATA,
-        WINDOW_ENTRY = OBJECT_ENTRY,
-        WINDOW_DATA_HASH_TABLE = OBJECT_DATA_HASH_TABLE,
-        WINDOW_FIELD_HASH_TABLE = OBJECT_FIELD_HASH_TABLE,
-        WINDOW_ENTRY_ARRAY = OBJECT_ENTRY_ARRAY,
-        WINDOW_TAG = OBJECT_TAG,
-        WINDOW_HEADER,
-        _WINDOW_MAX
-};
+#include "mmap-cache.h"
 
 typedef struct JournalMetrics {
         uint64_t max_use;
@@ -76,11 +59,10 @@ typedef struct JournalFile {
         HashItem *data_hash_table;
         HashItem *field_hash_table;
 
-        Window windows[_WINDOW_MAX];
-
         uint64_t current_offset;
 
         JournalMetrics metrics;
+        MMapCache *mmap;
 
 #ifdef HAVE_XZ
         void *compress_buffer;
@@ -108,6 +90,7 @@ int journal_file_open(
                 bool compress,
                 bool authenticate,
                 JournalMetrics *metrics,
+                MMapCache *mmap,
                 JournalFile *template,
                 JournalFile **ret);
 
@@ -120,6 +103,7 @@ int journal_file_open_reliably(
                 bool compress,
                 bool authenticate,
                 JournalMetrics *metrics,
+                MMapCache *mmap,
                 JournalFile *template,
                 JournalFile **ret);
 
diff --git a/src/journal/journal-internal.h b/src/journal/journal-internal.h
index d17fdb2..86519be 100644
--- a/src/journal/journal-internal.h
+++ b/src/journal/journal-internal.h
@@ -92,6 +92,7 @@ struct sd_journal {
         char *path;
 
         Hashmap *files;
+        MMapCache *mmap;
 
         Location current_location;
 
@@ -110,4 +111,3 @@ struct sd_journal {
 
 char *journal_make_match_string(sd_journal *j);
 void journal_print_header(sd_journal *j);
-
diff --git a/src/journal/journald.c b/src/journal/journald.c
index 8c41d9b..145663b 100644
--- a/src/journal/journald.c
+++ b/src/journal/journald.c
@@ -315,7 +315,7 @@ static JournalFile* find_journal(Server *s, uid_t uid) {
                 journal_file_close(f);
         }
 
-        r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, false, &s->system_metrics, s->system_journal, &f);
+        r = journal_file_open_reliably(p, O_RDWR|O_CREAT, 0640, s->compress, false, &s->system_metrics, s->mmap, s->system_journal, &f);
         free(p);
 
         if (r < 0)
@@ -2006,7 +2006,7 @@ static int system_journal_open(Server *s) {
                 if (!fn)
                         return -ENOMEM;
 
-                r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, true, &s->system_metrics, NULL, &s->system_journal);
+                r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, true, &s->system_metrics, s->mmap, NULL, &s->system_journal);
                 free(fn);
 
                 if (r >= 0)
@@ -2033,7 +2033,7 @@ static int system_journal_open(Server *s) {
                          * if it already exists, so that we can flush
                          * it into the system journal */
 
-                        r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, NULL, &s->runtime_journal);
+                        r = journal_file_open(fn, O_RDWR, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
                         free(fn);
 
                         if (r < 0) {
@@ -2049,7 +2049,7 @@ static int system_journal_open(Server *s) {
                          * it if necessary. */
 
                         (void) mkdir_parents(fn, 0755);
-                        r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, NULL, &s->runtime_journal);
+                        r = journal_file_open_reliably(fn, O_RDWR|O_CREAT, 0640, s->compress, false, &s->runtime_metrics, s->mmap, NULL, &s->runtime_journal);
                         free(fn);
 
                         if (r < 0) {
@@ -2793,6 +2793,10 @@ static int server_init(Server *s) {
         if (!s->user_journals)
                 return log_oom();
 
+        s->mmap = mmap_cache_new(_OBJECT_TYPE_MAX, USER_JOURNALS_MAX + 2);
+        if (!s->mmap)
+                return log_oom();
+
         s->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
         if (s->epoll_fd < 0) {
                 log_error("Failed to create epoll object: %m");
@@ -2919,6 +2923,9 @@ static void server_done(Server *s) {
 
         free(s->buffer);
         free(s->tty_path);
+
+        if (s->mmap)
+                mmap_cache_unref(s->mmap);
 }
 
 int main(int argc, char *argv[]) {
diff --git a/src/journal/journald.h b/src/journal/journald.h
index d08a194..0202893 100644
--- a/src/journal/journald.h
+++ b/src/journal/journald.h
@@ -93,6 +93,8 @@ typedef struct Server {
 
         Storage storage;
 
+        MMapCache *mmap;
+
         bool dev_kmsg_readable;
 
         uint64_t *kernel_seqnum;
diff --git a/src/journal/mmap-cache.c b/src/journal/mmap-cache.c
new file mode 100644
index 0000000..68dbe70
--- /dev/null
+++ b/src/journal/mmap-cache.c
@@ -0,0 +1,577 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+/***
+  This file is part of systemd.
+
+  Copyright 2012 Lennart Poettering
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <assert.h>
+#include <sys/mman.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "util.h"
+
+#include "mmap-cache.h"
+
+#define WINDOW_SIZE (8ULL*1024ULL*1024ULL)
+#define WINDOWS_MAX 32
+
+typedef struct Window {
+        int fd;
+        void *ptr;
+        uint64_t offset;
+        uint64_t size;
+
+        unsigned n_ref;
+        unsigned lru_prev;
+        unsigned lru_next;
+
+        unsigned by_fd_prev;
+        unsigned by_fd_next;
+} Window;
+
+typedef struct FileDescriptor {
+        int fd;
+        unsigned windows;
+} FileDescriptor;
+
+struct MMapCache {
+        unsigned n_ref;
+
+        unsigned contexts_max;
+        unsigned windows_max;
+        unsigned fds_max;
+
+        unsigned n_windows;
+        unsigned n_fds;
+
+        unsigned lru_first, lru_last;
+
+        Window *windows;
+        unsigned *by_context;
+        FileDescriptor *by_fd;
+};
+
+static void mmap_cache_window_unmap(MMapCache *m, unsigned w) {
+        Window *v;
+
+        assert(m);
+        assert(w < m->n_windows);
+
+        v = m->windows + w;
+        if (!v->ptr)
+                return;
+
+        munmap(v->ptr, v->size);
+        v->ptr = NULL;
+}
+
+static void mmap_cache_window_add_lru(MMapCache *m, unsigned w) {
+        Window *v;
+
+        assert(m);
+        assert(w < m->n_windows);
+
+        v = m->windows + w;
+        v->lru_prev = m->lru_last;
+        v->lru_next = (unsigned) -1;
+
+        m->lru_last = w;
+        if (m->lru_first == (unsigned) -1)
+                m->lru_first = w;
+}
+
+static void mmap_cache_window_remove_lru(MMapCache *m, unsigned w) {
+        Window *v;
+
+        assert(m);
+        assert(w < m->n_windows);
+
+        v = m->windows + w;
+
+        if (v->lru_prev == (unsigned) -1)
+                m->lru_first = v->lru_next;
+        else
+                m->windows[v->lru_prev].lru_next = v->lru_next;
+
+        if (v->lru_next == (unsigned) -1)
+                m->lru_last = v->lru_prev;
+        else
+                m->windows[v->lru_next].lru_prev = v->lru_prev;
+}
+
+static void mmap_cache_fd_add(MMapCache *m, unsigned fd_index, unsigned w) {
+        Window *v;
+
+        assert(m);
+        assert(fd_index < m->n_fds);
+
+        v = m->windows + w;
+        v->by_fd_next = m->by_fd[fd_index].windows;
+        v->by_fd_prev = (unsigned) -1;
+
+        m->by_fd[fd_index].windows = w;
+}
+
+static void mmap_cache_fd_remove(MMapCache *m, unsigned fd_index, unsigned w) {
+        Window *v;
+
+        assert(m);
+        assert(fd_index < m->n_fds);
+
+        v = m->windows + w;
+        if (v->by_fd_prev == (unsigned) -1)
+                m->by_fd[fd_index].windows = v->by_fd_next;
+        else
+                m->windows[v->by_fd_prev].by_fd_next = v->by_fd_next;
+
+        if (v->by_fd_next != (unsigned) -1)
+                m->windows[v->by_fd_next].by_fd_prev = v->by_fd_prev;
+}
+
+static void mmap_cache_context_unset(MMapCache *m, unsigned c) {
+        Window *v;
+        unsigned w;
+
+        assert(m);
+        assert(c < m->contexts_max);
+
+        if (m->by_context[c] == (unsigned) -1)
+                return;
+
+        w = m->by_context[c];
+        m->by_context[c] = (unsigned) -1;
+
+        v = m->windows + w;
+        assert(v->n_ref > 0);
+        v->n_ref --;
+
+        if (v->n_ref == 0)
+                mmap_cache_window_add_lru(m, w);
+}
+
+static void mmap_cache_context_set(MMapCache *m, unsigned c, unsigned w) {
+        Window *v;
+
+        assert(m);
+        assert(c < m->contexts_max);
+        assert(w < m->n_windows);
+
+        if (m->by_context[c] == w)
+                return;
+
+        mmap_cache_context_unset(m, c);
+
+        m->by_context[c] = w;
+
+        v = m->windows + w;
+        v->n_ref ++;
+        if (v->n_ref == 1)
+                mmap_cache_window_remove_lru(m, w);
+}
+
+static void mmap_cache_free(MMapCache *m) {
+
+        assert(m);
+
+        if (m->windows) {
+                unsigned w;
+
+                for (w = 0; w < m->n_windows; w++)
+                        mmap_cache_window_unmap(m, w);
+
+                free(m->windows);
+        }
+
+        free(m->by_context);
+        free(m->by_fd);
+        free(m);
+}
+
+MMapCache* mmap_cache_new(unsigned contexts_max, unsigned fds_max) {
+        MMapCache *m;
+
+        assert(contexts_max > 0);
+        assert(fds_max > 0);
+
+        m = new0(MMapCache, 1);
+        if (!m)
+                return NULL;
+
+        m->contexts_max = contexts_max;
+        m->fds_max = fds_max;
+        m->windows_max = MAX(m->contexts_max, WINDOWS_MAX);
+        m->n_ref = 1;
+        m->lru_first = (unsigned) -1;
+        m->lru_last = (unsigned) -1;
+
+        m->windows = new(Window, m->windows_max);
+        if (!m->windows) {
+                mmap_cache_free(m);
+                return NULL;
+        }
+
+        m->by_context = new(unsigned, m->contexts_max);
+        if (!m->by_context) {
+                mmap_cache_free(m);
+                return NULL;
+        }
+
+        memset(m->by_context, -1, m->contexts_max * sizeof(unsigned));
+
+        m->by_fd = new(FileDescriptor, m->fds_max);
+        if (!m->by_fd) {
+                mmap_cache_free(m);
+                return NULL;
+        }
+
+        return m;
+}
+
+MMapCache* mmap_cache_ref(MMapCache *m) {
+        assert(m);
+        assert(m->n_ref > 0);
+
+        m->n_ref++;
+        return m;
+}
+
+MMapCache* mmap_cache_unref(MMapCache *m) {
+        assert(m);
+        assert(m->n_ref > 0);
+
+        if (m->n_ref == 1)
+                mmap_cache_free(m);
+        else
+                m->n_ref--;
+
+        return NULL;
+}
+
+static int mmap_cache_allocate_window(MMapCache *m, unsigned *w) {
+        assert(m);
+        assert(w);
+
+        if (m->n_windows < m->windows_max) {
+                *w = m->n_windows ++;
+                return 0;
+        }
+
+        if (m->lru_first == (unsigned) -1)
+                return -E2BIG;
+
+        *w = m->lru_first;
+        mmap_cache_window_unmap(m, *w);
+        mmap_cache_window_remove_lru(m, *w);
+
+        return 0;
+}
+
+static int mmap_cache_make_room(MMapCache *m) {
+        unsigned w;
+
+        assert(m);
+
+        w = m->lru_first;
+        while (w != (unsigned) -1) {
+                Window *v;
+
+                v = m->windows + w;
+
+                if (v->ptr) {
+                        mmap_cache_window_unmap(m, w);
+                        return 1;
+                }
+
+                w = v->lru_next;
+        }
+
+        return 0;
+}
+
+static int mmap_cache_put(
+                MMapCache *m,
+                int fd,
+                unsigned fd_index,
+                int prot,
+                unsigned context,
+                uint64_t offset,
+                uint64_t size,
+                void **ret) {
+
+        unsigned w;
+        Window *v;
+        void *d;
+        uint64_t woffset, wsize;
+        int r;
+
+        assert(m);
+        assert(fd >= 0);
+        assert(context < m->contexts_max);
+        assert(size > 0);
+        assert(ret);
+
+        woffset = offset & ~((uint64_t) page_size() - 1ULL);
+        wsize = size + (offset - woffset);
+        wsize = PAGE_ALIGN(wsize);
+
+        if (wsize < WINDOW_SIZE) {
+                uint64_t delta;
+
+                delta = (WINDOW_SIZE - wsize) / 2;
+
+                if (delta > offset)
+                        woffset = 0;
+                else
+                        woffset -= delta;
+
+                wsize = WINDOW_SIZE;
+        }
+
+        for (;;) {
+                d = mmap(NULL, wsize, prot, MAP_SHARED, fd, woffset);
+                if (d != MAP_FAILED)
+                        break;
+                if (errno != ENOMEM)
+                        return -errno;
+
+                r = mmap_cache_make_room(m);
+                if (r < 0)
+                        return r;
+                if (r == 0)
+                        return -ENOMEM;
+        }
+
+        r = mmap_cache_allocate_window(m, &w);
+        if (r < 0) {
+                munmap(d, wsize);
+                return r;
+        }
+
+        v = m->windows + w;
+        v->fd = fd;
+        v->ptr = d;
+        v->offset = woffset;
+        v->size = wsize;
+
+        v->n_ref = 0;
+        v->lru_prev = v->lru_next = (unsigned) -1;
+
+        mmap_cache_fd_add(m, fd_index, w);
+        mmap_cache_context_set(m, context, w);
+
+        *ret = (uint8_t*) d + (offset - woffset);
+        return 1;
+}
+
+static int fd_cmp(const void *_a, const void *_b) {
+        const FileDescriptor *a = _a, *b = _b;
+
+        if (a->fd < b->fd)
+                return -1;
+        if (a->fd > b->fd)
+                return 1;
+
+        return 0;
+}
+
+static int mmap_cache_get_fd_index(MMapCache *m, int fd, unsigned *fd_index) {
+        FileDescriptor *j;
+
+        assert(m);
+        assert(fd >= 0);
+        assert(fd_index);
+
+        j = bsearch(&fd, m->by_fd, m->n_fds, sizeof(m->by_fd[0]), fd_cmp);
+        if (!j) {
+                if (m->n_fds >= m->fds_max)
+                        return -E2BIG;
+
+                j = m->by_fd + m->n_fds ++;
+                j->fd = fd;
+                j->windows = (unsigned) -1;
+
+                qsort(m->by_fd, m->n_fds, sizeof(m->by_fd[0]), fd_cmp);
+                j = bsearch(&fd, m->by_fd, m->n_fds, sizeof(m->by_fd[0]), fd_cmp);
+        }
+
+        *fd_index = (unsigned) (j - m->by_fd);
+        return 0;
+}
+
+static bool mmap_cache_test_window(
+                MMapCache *m,
+                unsigned w,
+                uint64_t offset,
+                uint64_t size) {
+        Window *v;
+
+        assert(m);
+        assert(w < m->n_windows);
+        assert(size > 0);
+
+        v = m->windows + w;
+
+        return offset >= v->offset &&
+                offset + size <= v->offset + v->size;
+}
+
+static int mmap_cache_current(
+                MMapCache *m,
+                int fd,
+                unsigned context,
+                uint64_t offset,
+                uint64_t size,
+                void **ret) {
+
+        Window *v;
+        unsigned w;
+
+        assert(m);
+        assert(fd >= 0);
+        assert(context < m->contexts_max);
+        assert(size > 0);
+        assert(ret);
+
+        if (m->by_context[context] == (unsigned) -1)
+                return 0;
+
+        w = m->by_context[context];
+        v = m->windows + w;
+
+        if (v->fd != fd)
+                return 0;
+
+        if (!mmap_cache_test_window(m, w, offset, size))
+                return 0;
+
+        *ret = (uint8_t*) v->ptr + (offset - v->offset);
+        return 1;
+}
+
+static int mmap_cache_find(
+                MMapCache *m,
+                unsigned fd_index,
+                unsigned context,
+                uint64_t offset,
+                uint64_t size,
+                void **ret) {
+
+        Window *v = NULL;
+        unsigned w;
+
+        assert(m);
+        assert(fd_index < m->n_fds);
+        assert(context < m->contexts_max);
+        assert(size > 0);
+        assert(ret);
+
+        w = m->by_fd[fd_index].windows;
+        while (w != (unsigned) -1) {
+                if (mmap_cache_test_window(m, w, offset, size))
+                        break;
+
+                w = m->windows[w].by_fd_next;
+        }
+
+        if (w == (unsigned) -1)
+                return 0;
+
+        mmap_cache_context_set(m, context, w);
+
+        v = m->windows + w;
+        *ret = (uint8_t*) v->ptr + (offset - v->offset);
+        return 1;
+}
+
+int mmap_cache_get(
+                MMapCache *m,
+                int fd,
+                int prot,
+                unsigned context,
+                uint64_t offset,
+                uint64_t size,
+                void **ret) {
+
+        unsigned fd_index;
+        int r;
+
+        assert(m);
+        assert(fd >= 0);
+        assert(context < m->contexts_max);
+        assert(size > 0);
+        assert(ret);
+
+        /* Maybe the current pointer for this context is already the
+         * right one? */
+        r = mmap_cache_current(m, fd, context, offset, size, ret);
+        if (r != 0)
+                return r;
+
+        /* OK, let's find the chain for this FD */
+        r = mmap_cache_get_fd_index(m, fd, &fd_index);
+        if (r < 0)
+                return r;
+
+        /* And let's look through the available mmaps */
+        r = mmap_cache_find(m, fd_index, context, offset, size, ret);
+        if (r != 0)
+                return r;
+
+        /* Not found? Then, let's add it */
+        return mmap_cache_put(m, fd, fd_index, prot, context, offset, size, ret);
+}
+
+void mmap_cache_close_fd(MMapCache *m, int fd) {
+        FileDescriptor *j;
+        unsigned fd_index, c, w;
+
+        assert(m);
+        assert(fd > 0);
+
+        j = bsearch(&fd, m->by_fd, m->n_fds, sizeof(m->by_fd[0]), fd_cmp);
+        if (!j)
+                return;
+        fd_index = (unsigned) (j - m->by_fd);
+
+        for (c = 0; c < m->contexts_max; c++) {
+                w = m->by_context[c];
+                if (w == (unsigned) -1)
+                        continue;
+
+                if (m->windows[w].fd == fd)
+                        mmap_cache_context_unset(m, c);
+        }
+
+        w = m->by_fd[fd_index].windows;
+        while (w != (unsigned) -1) {
+
+                mmap_cache_fd_remove(m, fd_index, w);
+                mmap_cache_window_unmap(m, w);
+
+                w = m->by_fd[fd_index].windows;
+        }
+
+        memmove(m->by_fd + fd_index, m->by_fd + fd_index + 1, (m->n_fds - (fd_index + 1)) * sizeof(FileDescriptor));
+        m->n_fds --;
+}
+
+void mmap_cache_close_context(MMapCache *m, unsigned context) {
+        mmap_cache_context_unset(m, context);
+}
diff --git a/src/journal/mmap-cache.h b/src/journal/mmap-cache.h
new file mode 100644
index 0000000..0a88fc5
--- /dev/null
+++ b/src/journal/mmap-cache.h
@@ -0,0 +1,34 @@
+/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
+
+#pragma once
+
+/***
+  This file is part of systemd.
+
+  Copyright 2012 Lennart Poettering
+
+  systemd is free software; you can redistribute it and/or modify it
+  under the terms of the GNU Lesser General Public License as published by
+  the Free Software Foundation; either version 2.1 of the License, or
+  (at your option) any later version.
+
+  systemd is distributed in the hope that it will be useful, but
+  WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+  Lesser General Public License for more details.
+
+  You should have received a copy of the GNU Lesser General Public License
+  along with systemd; If not, see <http://www.gnu.org/licenses/>.
+***/
+
+#include <inttypes.h>
+
+typedef struct MMapCache MMapCache;
+
+MMapCache* mmap_cache_new(unsigned contexts_max, unsigned fds_max);
+MMapCache* mmap_cache_ref(MMapCache *m);
+MMapCache* mmap_cache_unref(MMapCache *m);
+
+int mmap_cache_get(MMapCache *m, int fd, int prot, unsigned context, uint64_t offset, uint64_t size, void **ret);
+void mmap_cache_close_fd(MMapCache *m, int fd);
+void mmap_cache_close_context(MMapCache *m, unsigned context);
diff --git a/src/journal/sd-journal.c b/src/journal/sd-journal.c
index 359a7ca..41526b3 100644
--- a/src/journal/sd-journal.c
+++ b/src/journal/sd-journal.c
@@ -1118,7 +1118,7 @@ static int add_file(sd_journal *j, const char *prefix, const char *filename) {
                 return 0;
         }
 
-        r = journal_file_open(path, O_RDONLY, 0, false, false, NULL, NULL, &f);
+        r = journal_file_open(path, O_RDONLY, 0, false, false, NULL, j->mmap, NULL, &f);
         free(path);
 
         if (r < 0) {
@@ -1439,6 +1439,17 @@ static sd_journal *journal_new(int flags, const char *path) {
                 return NULL;
         }
 
+        /* One context for each type, plus the zeroth catchall
+         * context. One fd for each file plus one for each type, which
+         * is need when verifying things */
+        j->mmap = mmap_cache_new(_OBJECT_TYPE_MAX, JOURNAL_FILES_MAX + _OBJECT_TYPE_MAX);
+        if (!j->mmap) {
+                hashmap_free(j->files);
+                hashmap_free(j->directories_by_path);
+                free(j->path);
+                free(j);
+        }
+
         return j;
 }
 
@@ -1527,6 +1538,9 @@ _public_ void sd_journal_close(sd_journal *j) {
 
         sd_journal_flush_matches(j);
 
+        if (j->mmap)
+                mmap_cache_unref(j->mmap);
+
         free(j->path);
         free(j);
 }
diff --git a/src/journal/test-journal-stream.c b/src/journal/test-journal-stream.c
index 0925995..707dcc1 100644
--- a/src/journal/test-journal-stream.c
+++ b/src/journal/test-journal-stream.c
@@ -79,9 +79,9 @@ int main(int argc, char *argv[]) {
         assert_se(mkdtemp(t));
         assert_se(chdir(t) >= 0);
 
-        assert_se(journal_file_open("one.journal", O_RDWR|O_CREAT, 0666, true, false, NULL, NULL, &one) == 0);
-        assert_se(journal_file_open("two.journal", O_RDWR|O_CREAT, 0666, true, false, NULL, NULL, &two) == 0);
-        assert_se(journal_file_open("three.journal", O_RDWR|O_CREAT, 0666, true, false, NULL, NULL, &three) == 0);
+        assert_se(journal_file_open("one.journal", O_RDWR|O_CREAT, 0666, true, false, NULL, NULL, NULL, &one) == 0);
+        assert_se(journal_file_open("two.journal", O_RDWR|O_CREAT, 0666, true, false, NULL, NULL, NULL, &two) == 0);
+        assert_se(journal_file_open("three.journal", O_RDWR|O_CREAT, 0666, true, false, NULL, NULL, NULL, &three) == 0);
 
         for (i = 0; i < N_ENTRIES; i++) {
                 char *p, *q;
diff --git a/src/journal/test-journal.c b/src/journal/test-journal.c
index 8f01b4d..2fd19a7 100644
--- a/src/journal/test-journal.c
+++ b/src/journal/test-journal.c
@@ -41,7 +41,7 @@ int main(int argc, char *argv[]) {
         assert_se(mkdtemp(t));
         assert_se(chdir(t) >= 0);
 
-        assert_se(journal_file_open("test.journal", O_RDWR|O_CREAT, 0666, true, false, NULL, NULL, &f) == 0);
+        assert_se(journal_file_open("test.journal", O_RDWR|O_CREAT, 0666, true, true, NULL, NULL, NULL, &f) == 0);
 
         dual_timestamp_get(&ts);
 



More information about the systemd-commits mailing list