[PATCH 3/6] Unified file for LZ4 and Snappy

Eugene Velesevich evel at ispras.ru
Fri May 31 13:05:15 PDT 2013


This patch implements LZ4 compression in a Snappy-like manner.
Trace file class is unified for lz4 and snappy compression formats
(new CommonFile class is SnappyFile renamed and generalized).
Compression library interfaces are implemented as CompressionLibrary
derived classes with virtual methods implementing library-specific interaction.

Zlib-compressed traces are supported only for reading.
---
 CMakeLists.txt                |    3 +-
 cli/cli_repack.cpp            |    6 +-
 common/trace_file.hpp         |  201 +++++++++++++++++++-
 common/trace_file_common.cpp  |  418 +++++++++++++++++++++++++++++++++++++++++
 common/trace_file_read.cpp    |   19 +-
 common/trace_file_snappy.cpp  |  406 ---------------------------------------
 common/trace_file_write.cpp   |   50 -----
 common/trace_writer.cpp       |    2 +-
 common/trace_writer_local.cpp |    2 +-
 9 files changed, 632 insertions(+), 475 deletions(-)
 create mode 100644 common/trace_file_common.cpp
 delete mode 100644 common/trace_file_snappy.cpp
 delete mode 100644 common/trace_file_write.cpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0d92ef3..15a3ee5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -319,9 +319,7 @@ add_library (common STATIC
     common/trace_dump.cpp
     common/trace_file.cpp
     common/trace_file_read.cpp
-    common/trace_file_write.cpp
     common/trace_file_zlib.cpp
-    common/trace_file_snappy.cpp
     common/trace_model.cpp
     common/trace_parser.cpp
     common/trace_parser_flags.cpp
@@ -333,6 +331,7 @@ add_library (common STATIC
     common/trace_option.cpp
     common/${os}
     common/trace_backtrace.cpp
+    common/trace_file_common.cpp
 )
 
 set_target_properties (common PROPERTIES
diff --git a/cli/cli_repack.cpp b/cli/cli_repack.cpp
index 6f08896..6275c1d 100644
--- a/cli/cli_repack.cpp
+++ b/cli/cli_repack.cpp
@@ -65,12 +65,12 @@ repack(const char *inFileName, const char *outFileName)
         return 1;
     }
 
-    trace::File *outFile = trace::File::createForWrite(outFileName);
+    trace::File *outFile = trace::File::createCommonFile(trace::File::SNAPPY);
     if (!outFile) {
         delete inFile;
         return 1;
     }
-
+    outFile->open(outFileName, trace::File::Write);
     size_t size = 8192;
     char *buf = new char[size];
     size_t read;
@@ -78,7 +78,7 @@ repack(const char *inFileName, const char *outFileName)
     while ((read = inFile->read(buf, size)) != 0) {
         outFile->write(buf, read);
     }
-
+    outFile->close();
     delete [] buf;
     delete outFile;
     delete inFile;
diff --git a/common/trace_file.hpp b/common/trace_file.hpp
index 0c176aa..78c0ad4 100644
--- a/common/trace_file.hpp
+++ b/common/trace_file.hpp
@@ -31,13 +31,199 @@
 #include <fstream>
 #include <stdint.h>
 
-
-#define SNAPPY_BYTE1 'a'
-#define SNAPPY_BYTE2 't'
+#include <snappy.h>
+#include <lz4.h>
+#include <lz4hc.h>
+#include <zlib.h>
+#include <assert.h>
 
 
 namespace trace {
 
+class CompressionLibrary {
+
+public:
+    virtual void compress(const char *src, size_t inputLength, char *dst, size_t *outLength) = 0;
+    virtual void uncompress(const char *src, size_t inputLength, char *dst) = 0;
+    virtual size_t maxCompressedLength(size_t inputLength) = 0;
+    virtual size_t uncompressedLength(const char *src, size_t inputLength) = 0;
+    virtual ~CompressionLibrary(){};
+
+    static const size_t m_lengthSize = 4;
+
+    size_t getLength(const unsigned char* src) const {
+        size_t length;
+        length  =  (size_t)src[0];
+        length |= ((size_t)src[1] <<  8);
+        length |= ((size_t)src[2] << 16);
+        length |= ((size_t)src[3] << 24);
+        return length;
+    }
+
+    void setLength(unsigned char* dst, size_t length) {
+        dst[0] = length & 0xff; length >>= 8;
+        dst[1] = length & 0xff; length >>= 8;
+        dst[2] = length & 0xff; length >>= 8;
+        dst[3] = length & 0xff; length >>= 8;
+    }
+
+    static const unsigned char m_sigByte1 = 'z';
+    static const unsigned char m_sigByte2 = 'z';
+    virtual unsigned int getSignature() = 0;
+
+    const char *name;
+};
+
+class SnappyLibrary : public CompressionLibrary {
+
+public:
+
+    static const unsigned char m_sigByte1 = 'a';
+    static const unsigned char m_sigByte2 = 't';
+
+    SnappyLibrary()
+    {
+        name = "SNAPPY";
+    }
+
+    virtual void compress(const char *src, size_t inputLength, char *dst, size_t *outLength)
+    {
+        ::snappy::RawCompress(src, inputLength, dst, outLength);
+    }
+
+    virtual void uncompress(const char *src, size_t inputLength, char *dst)
+    {
+        ::snappy::RawUncompress(src, inputLength, dst);
+    }
+
+    virtual size_t maxCompressedLength(size_t inputLength)
+    {
+        return ::snappy::MaxCompressedLength(inputLength);
+    }
+
+    virtual size_t uncompressedLength(const char *src, size_t inputLength)
+    {
+        size_t result;
+        ::snappy::GetUncompressedLength(src, inputLength, &result);
+        return result;
+    }
+
+    static bool isSnappyCompressed(char b1, char b2)
+    {
+        return (b1 == m_sigByte1 && b2 == m_sigByte2);
+    }
+
+    virtual unsigned int getSignature()
+    {
+        return (m_sigByte1 << 8) + m_sigByte2;
+    }
+};
+
+class LZ4Library : public CompressionLibrary
+{
+
+private:
+    static const size_t m_chunkSize = 1 * 1024 * 1024;
+    bool m_highCompression;
+
+public:
+
+    static const unsigned char m_sigByte1 = 'l';
+    static const unsigned char m_sigByte2 = 'z';
+
+    LZ4Library(bool highCompression) : m_highCompression(highCompression)
+    {
+        name = "LZ4";
+    }
+
+    virtual void compress(const char *src, size_t inputLength, char *dst, size_t *outLength)
+    {
+        setLength((unsigned char*)dst, inputLength);
+        if (m_highCompression) {
+            *outLength = LZ4_compressHC(src, dst + m_lengthSize, inputLength);
+        }
+        else {
+            *outLength = LZ4_compress(src, dst + m_lengthSize, inputLength);
+        }
+        *outLength += m_lengthSize;
+    }
+
+    virtual void uncompress(const char *src, size_t inputLength, char *dst)
+    {
+        LZ4_uncompress_unknownOutputSize(src + m_lengthSize, dst, inputLength - m_lengthSize, m_chunkSize);
+    }
+
+    virtual size_t maxCompressedLength(size_t inputLength)
+    {
+        return LZ4_compressBound(inputLength) + m_lengthSize;
+    }
+
+    virtual size_t uncompressedLength(const char *src, size_t inputLength)
+    {
+        return getLength((const unsigned char*)src);
+    }
+
+    static bool isLZ4Compressed(char b1, char b2)
+    {
+        return (b1 == m_sigByte1 && b2 == m_sigByte2);
+    }
+
+    virtual unsigned int getSignature()
+    {
+        return (m_sigByte1 << 8) + m_sigByte2;
+    }
+
+};
+
+class ZLibrary : public CompressionLibrary
+{
+
+private:
+    unsigned long i;
+public:
+
+    static const unsigned char m_sigByte1 = 0x1f;
+    static const unsigned char m_sigByte2 = 0x8b;
+
+    ZLibrary()
+    {
+        name = "Zlib";
+    }
+
+    virtual void compress(const char *src, size_t inputLength, char *dst, size_t *outLength)
+    {
+        assert(false);
+    }
+
+    virtual void uncompress(const char *src, size_t inputLength, char *dst)
+    {
+        assert(false);
+    }
+
+    virtual size_t maxCompressedLength(size_t inputLength)
+    {
+        assert(false);
+        return 0;
+    }
+
+    virtual size_t uncompressedLength(const char *src, size_t inputLength)
+    {
+        assert(false);
+        return 0;
+    }
+
+    static bool isZlibCompressed(unsigned char b1, unsigned char b2)
+    {
+        return (b1 == m_sigByte1 && b2 == m_sigByte2);
+    }
+
+    virtual unsigned int getSignature()
+    {
+        return (m_sigByte1 << 8) + m_sigByte2;
+    }
+
+};
+
 class File {
 public:
     enum Mode {
@@ -52,12 +238,17 @@ public:
         uint64_t chunk;
         uint32_t offsetInChunk;
     };
+    enum Compressor {
+        SNAPPY,
+        LZ4,
+        LZ4HC,
+        ZLIB
+    };
 
 public:
     static File *createZLib(void);
-    static File *createSnappy(void);
     static File *createForRead(const char *filename);
-    static File *createForWrite(const char *filename);
+    static File *createCommonFile(File::Compressor compressor);
 public:
     File(const std::string &filename = std::string(),
          File::Mode mode = File::Read);
diff --git a/common/trace_file_common.cpp b/common/trace_file_common.cpp
new file mode 100644
index 0000000..c602ff4
--- /dev/null
+++ b/common/trace_file_common.cpp
@@ -0,0 +1,418 @@
+/**************************************************************************
+ *
+ * Copyright 2011 Zack Rusin
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ *
+ **************************************************************************/
+
+/*
+ * Unified file for tracing/retracing with multiple compression
+ * library support.
+ *
+ * Currently, LZ4 and Snappy are supported.
+ * Zlib is supported only for read by specified class in trace_file_zlib.cpp
+ *
+ * The file is composed of a number of chunks, they are:
+ * chunk {
+ *     uint32 - specifying the length of the compressed data
+ *     compressed data, in little endian
+ * }
+ *
+ * File can contain any number of such chunks.
+ *
+ * Note:
+ * Currently the default size for a a to-be-compressed data is
+ * 1mb, meaning that the compressed data will be <= 1mb.
+ * The reason it's 1mb is because it seems
+ * to offer a pretty good compression/disk io speed ratio
+ * but that might change.
+
+ */
+
+#include <iostream>
+
+#include <assert.h>
+#include <string.h>
+
+#include "trace_file.hpp"
+
+
+using namespace trace;
+
+
+class CommonFile : public File {
+public:
+    CommonFile(CompressionLibrary * lib = new LZ4Library(false),
+                const std::string &filename = std::string(),
+                File::Mode mode = File::Read);
+    virtual ~CommonFile();
+
+    virtual bool supportsOffsets() const;
+    virtual File::Offset currentOffset();
+    virtual void setCurrentOffset(const File::Offset &offset);
+protected:
+    virtual bool rawOpen(const std::string &filename, File::Mode mode);
+    virtual bool rawWrite(const void *buffer, size_t length);
+    virtual size_t rawRead(void *buffer, size_t length);
+    virtual int rawGetc();
+    virtual void rawClose();
+    virtual void rawFlush();
+    virtual bool rawSkip(size_t length);
+    virtual int rawPercentRead();
+
+private:
+    inline size_t usedCacheSize() const
+    {
+        assert(m_cachePtr >= m_cache);
+        return m_cachePtr - m_cache;
+    }
+    inline size_t freeCacheSize() const
+    {
+        assert(m_cacheSize >= usedCacheSize());
+        if (m_cacheSize > 0) {
+            return m_cacheSize - usedCacheSize();
+        } else {
+            return 0;
+        }
+    }
+    inline bool endOfData() const
+    {
+        return m_stream.eof() && freeCacheSize() == 0;
+    }
+    void flushWriteCache();
+    void flushReadCache(size_t skipLength = 0);
+    void createCache(size_t size);
+    void reallocCompressedCache(size_t size);
+    void writeLength(size_t length);
+    size_t readLength();
+private:
+    std::fstream m_stream;
+    size_t m_cacheMaxSize;
+    size_t m_cacheSize;
+    char *m_cache;
+    char *m_cachePtr;
+
+    size_t m_compressedCacheSize;
+    char *m_compressedCache;
+
+    File::Offset m_currentOffset;
+    std::streampos m_endPos;
+
+    CompressionLibrary * m_library;
+    static const size_t CACHE_SIZE = 1 * 1024 * 1024;
+
+};
+
+CommonFile::CommonFile(CompressionLibrary * lib, const std::string &filename,
+                              File::Mode mode)
+    : File(),
+      m_cacheMaxSize(CACHE_SIZE),
+      m_cacheSize(m_cacheMaxSize),
+      m_cache(new char [m_cacheMaxSize]),
+      m_cachePtr(m_cache),
+      m_compressedCacheSize(CACHE_SIZE),
+      m_compressedCache(new char[m_compressedCacheSize]),
+      m_library(lib)
+{
+}
+
+CommonFile::~CommonFile()
+{
+    close();
+    delete [] m_compressedCache;
+    delete [] m_cache;
+    delete m_library;
+}
+
+bool CommonFile::rawOpen(const std::string &filename, File::Mode mode)
+{
+    std::ios_base::openmode fmode = std::fstream::binary;
+    if (mode == File::Write) {
+        fmode |= (std::fstream::out | std::fstream::trunc);
+        createCache(CACHE_SIZE);
+    } else if (mode == File::Read) {
+        fmode |= std::fstream::in;
+    }
+
+    m_stream.open(filename.c_str(), fmode);
+
+    //read in the initial buffer if we're reading
+    if (m_stream.is_open() && mode == File::Read) {
+        m_stream.seekg(0, std::ios::end);
+        m_endPos = m_stream.tellg();
+        m_stream.seekg(0, std::ios::beg);
+
+        unsigned int sig = m_library->getSignature();
+        unsigned char byte1, byte2;
+        m_stream >> byte1;
+        m_stream >> byte2;
+        assert(byte1 == (unsigned char)((sig >> 8) & 0xFF));
+        assert(byte2 == (unsigned char)(sig & 0xFF));
+
+        flushReadCache();
+    } else if (m_stream.is_open() && mode == File::Write) {
+        unsigned int sig = m_library->getSignature();
+        m_stream << ((unsigned char)((sig >> 8) & 0xFF));
+        m_stream << ((unsigned char)(sig & 0xFF));
+    }
+    return m_stream.is_open();
+}
+
+bool CommonFile::rawWrite(const void *buffer, size_t length)
+{
+    if (freeCacheSize() > length) {
+        memcpy(m_cachePtr, buffer, length);
+        m_cachePtr += length;
+    } else if (freeCacheSize() == length) {
+        memcpy(m_cachePtr, buffer, length);
+        m_cachePtr += length;
+        flushWriteCache();
+    } else {
+        size_t sizeToWrite = length;
+
+        while (sizeToWrite >= freeCacheSize()) {
+            size_t endSize = freeCacheSize();
+            size_t offset = length - sizeToWrite;
+            memcpy(m_cachePtr, (const char*)buffer + offset, endSize);
+            sizeToWrite -= endSize;
+            m_cachePtr += endSize;
+            flushWriteCache();
+        }
+        if (sizeToWrite) {
+            size_t offset = length - sizeToWrite;
+            memcpy(m_cachePtr, (const char*)buffer + offset, sizeToWrite);
+            m_cachePtr += sizeToWrite;
+        }
+    }
+
+    return true;
+}
+
+size_t CommonFile::rawRead(void *buffer, size_t length)
+{
+    if (endOfData()) {
+        return 0;
+    }
+
+    if (freeCacheSize() >= length) {
+        memcpy(buffer, m_cachePtr, length);
+        m_cachePtr += length;
+    } else {
+        size_t sizeToRead = length;
+        size_t offset = 0;
+        while (sizeToRead) {
+            size_t chunkSize = std::min(freeCacheSize(), sizeToRead);
+            offset = length - sizeToRead;
+            memcpy((char*)buffer + offset, m_cachePtr, chunkSize);
+            m_cachePtr += chunkSize;
+            sizeToRead -= chunkSize;
+            if (sizeToRead > 0) {
+                flushReadCache();
+            }
+            if (!m_cacheSize) {
+                return length - sizeToRead;
+            }
+        }
+    }
+
+    return length;
+}
+
+int CommonFile::rawGetc()
+{
+    unsigned char c = 0;
+    if (rawRead(&c, 1) != 1)
+        return -1;
+    return c;
+}
+
+void CommonFile::rawClose()
+{
+    if (!m_stream.is_open()) {
+        return;
+    }
+    if (m_mode == File::Write) {
+        flushWriteCache();
+    }
+    m_stream.close();
+    delete [] m_cache;
+    m_cache = NULL;
+    m_cachePtr = NULL;
+}
+
+void CommonFile::rawFlush()
+{
+    assert(m_mode == File::Write);
+    flushWriteCache();
+    m_stream.flush();
+}
+
+void CommonFile::flushWriteCache()
+{
+    size_t inputLength = usedCacheSize();
+
+    if (inputLength) {
+        size_t compressedLength;
+
+        m_library->compress(m_cache, inputLength, m_compressedCache, &compressedLength);
+        writeLength(compressedLength);
+        m_stream.write(m_compressedCache, compressedLength);
+        m_cachePtr = m_cache;
+    }
+    assert(m_cachePtr == m_cache);
+}
+
+void CommonFile::flushReadCache(size_t skipLength)
+{
+    m_currentOffset.chunk = m_stream.tellg();
+    size_t compressedLength;
+    compressedLength = readLength();
+
+    if (compressedLength) {
+        reallocCompressedCache(compressedLength);
+        m_stream.read((char*)m_compressedCache, compressedLength);
+        int cacheSize = m_library->uncompressedLength(m_compressedCache, compressedLength);
+        createCache(cacheSize);
+        if (skipLength < cacheSize) {
+            m_library->uncompress(m_compressedCache, compressedLength, m_cache);
+        }
+    } else {
+        createCache(0);
+    }
+}
+
+void CommonFile::createCache(size_t size)
+{
+    if (size > m_cacheMaxSize) {
+        do {
+            m_cacheMaxSize <<= 1;
+        } while (size > m_cacheMaxSize);
+
+        delete [] m_cache;
+        m_cache = new char[m_cacheMaxSize];
+    }
+
+    m_cachePtr = m_cache;
+    m_cacheSize = size;
+}
+
+void CommonFile::reallocCompressedCache(size_t size)
+{
+    if (size > m_compressedCacheSize) {
+        do {
+            m_compressedCacheSize <<= 1;
+        } while (size > m_compressedCacheSize);
+
+        delete [] m_compressedCache;
+        m_compressedCache = new char[m_compressedCacheSize];
+    }
+}
+
+void CommonFile::writeLength(size_t length)
+{
+    unsigned char buf[m_library->m_lengthSize];
+    m_library->setLength(buf, length);
+    m_stream.write((const char *)buf, sizeof buf);
+}
+
+size_t CommonFile::readLength()
+{
+    unsigned char buf[m_library->m_lengthSize];
+    size_t length;
+    m_stream.read((char *)buf, sizeof buf);
+    if (m_stream.fail()) {
+        length = 0;
+    } else {
+        length = m_library->getLength(buf);
+    }
+    return length;
+}
+
+bool CommonFile::supportsOffsets() const
+{
+    return true;
+}
+
+File::Offset CommonFile::currentOffset()
+{
+    m_currentOffset.offsetInChunk = m_cachePtr - m_cache;
+    return m_currentOffset;
+}
+
+void CommonFile::setCurrentOffset(const File::Offset &offset)
+{
+    // to remove eof bit
+    m_stream.clear();
+    // seek to the start of a chunk
+    m_stream.seekg(offset.chunk, std::ios::beg);
+    // load the chunk
+    flushReadCache();
+    assert(m_cacheSize >= offset.offsetInChunk);
+    // seek within our cache to the correct location within the chunk
+    m_cachePtr = m_cache + offset.offsetInChunk;
+
+}
+
+bool CommonFile::rawSkip(size_t length)
+{
+    if (endOfData()) {
+        return false;
+    }
+
+    if (freeCacheSize() >= length) {
+        m_cachePtr += length;
+    } else {
+        size_t sizeToRead = length;
+        while (sizeToRead) {
+            size_t chunkSize = std::min(freeCacheSize(), sizeToRead);
+            m_cachePtr += chunkSize;
+            sizeToRead -= chunkSize;
+            if (sizeToRead > 0) {
+                flushReadCache(sizeToRead);
+            }
+            if (!m_cacheSize) {
+                break;
+            }
+        }
+    }
+
+    return true;
+}
+
+int CommonFile::rawPercentRead()
+{
+    return 100 * (double(m_stream.tellg()) / double(m_endPos));
+}
+
+
+File* File::createCommonFile(File::Compressor compressor)
+{
+    switch (compressor) {
+        case SNAPPY:
+            return new CommonFile(new SnappyLibrary());
+        case LZ4:
+            return new CommonFile(new LZ4Library(false));
+        case LZ4HC:
+            return new CommonFile(new LZ4Library(true));
+        default:
+            return new CommonFile(new SnappyLibrary());
+    }
+}
diff --git a/common/trace_file_read.cpp b/common/trace_file_read.cpp
index 46c83ff..d8bbd84 100644
--- a/common/trace_file_read.cpp
+++ b/common/trace_file_read.cpp
@@ -47,13 +47,18 @@ File::createForRead(const char *filename)
     stream.close();
 
     File *file;
-    if (byte1 == SNAPPY_BYTE1 && byte2 == SNAPPY_BYTE2) {
-        file = File::createSnappy();
-    } else if (byte1 == 0x1f && byte2 == 0x8b) {
-        file = File::createZLib();
-    } else  {
-        os::log("error: %s: unkwnown compression\n", filename);
-        return NULL;
+    if (SnappyLibrary::isSnappyCompressed(byte1, byte2)) {
+        file = createCommonFile(SNAPPY);
+    }
+    else if (LZ4Library::isLZ4Compressed(byte1, byte2)) {
+        file = createCommonFile(LZ4);
+    }
+    else if (ZLibrary::isZlibCompressed(byte1, byte2)) {
+        file = createZLib();
+    }
+    else {
+        os::log("error: could not determine %s compression type\n", filename);
+        file = NULL;
     }
     if (!file) {
         return NULL;
diff --git a/common/trace_file_snappy.cpp b/common/trace_file_snappy.cpp
deleted file mode 100644
index f47c89b..0000000
--- a/common/trace_file_snappy.cpp
+++ /dev/null
@@ -1,406 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2011 Zack Rusin
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- *
- **************************************************************************/
-
-
-/*
- * Snappy file format.
- * -------------------
- *
- * Snappy at its core is just a compressoin algorithm so we're
- * creating a new file format which uses snappy compression
- * to hold the trace data.
- *
- * The file is composed of a number of chunks, they are:
- * chunk {
- *     uint32 - specifying the length of the compressed data
- *     compressed data, in little endian
- * }
- * File can contain any number of such chunks.
- * The default size of an uncompressed chunk is specified in
- * SNAPPY_CHUNK_SIZE.
- *
- * Note:
- * Currently the default size for a a to-be-compressed data is
- * 1mb, meaning that the compressed data will be <= 1mb.
- * The reason it's 1mb is because it seems
- * to offer a pretty good compression/disk io speed ratio
- * but that might change.
- *
- */
-
-
-#include <snappy.h>
-
-#include <iostream>
-
-#include <assert.h>
-#include <string.h>
-
-#include "trace_file.hpp"
-
-
-#define SNAPPY_CHUNK_SIZE (1 * 1024 * 1024)
-
-
-
-using namespace trace;
-
-
-class SnappyFile : public File {
-public:
-    SnappyFile(const std::string &filename = std::string(),
-               File::Mode mode = File::Read);
-    virtual ~SnappyFile();
-
-    virtual bool supportsOffsets() const;
-    virtual File::Offset currentOffset();
-    virtual void setCurrentOffset(const File::Offset &offset);
-protected:
-    virtual bool rawOpen(const std::string &filename, File::Mode mode);
-    virtual bool rawWrite(const void *buffer, size_t length);
-    virtual size_t rawRead(void *buffer, size_t length);
-    virtual int rawGetc();
-    virtual void rawClose();
-    virtual void rawFlush();
-    virtual bool rawSkip(size_t length);
-    virtual int rawPercentRead();
-
-private:
-    inline size_t usedCacheSize() const
-    {
-        assert(m_cachePtr >= m_cache);
-        return m_cachePtr - m_cache;
-    }
-    inline size_t freeCacheSize() const
-    {
-        assert(m_cacheSize >= usedCacheSize());
-        if (m_cacheSize > 0) {
-            return m_cacheSize - usedCacheSize();
-        } else {
-            return 0;
-        }
-    }
-    inline bool endOfData() const
-    {
-        return m_stream.eof() && freeCacheSize() == 0;
-    }
-    void flushWriteCache();
-    void flushReadCache(size_t skipLength = 0);
-    void createCache(size_t size);
-    void writeCompressedLength(size_t length);
-    size_t readCompressedLength();
-private:
-    std::fstream m_stream;
-    size_t m_cacheMaxSize;
-    size_t m_cacheSize;
-    char *m_cache;
-    char *m_cachePtr;
-
-    char *m_compressedCache;
-
-    File::Offset m_currentOffset;
-    std::streampos m_endPos;
-};
-
-SnappyFile::SnappyFile(const std::string &filename,
-                              File::Mode mode)
-    : File(),
-      m_cacheMaxSize(SNAPPY_CHUNK_SIZE),
-      m_cacheSize(m_cacheMaxSize),
-      m_cache(new char [m_cacheMaxSize]),
-      m_cachePtr(m_cache)
-{
-    size_t maxCompressedLength =
-        snappy::MaxCompressedLength(SNAPPY_CHUNK_SIZE);
-    m_compressedCache = new char[maxCompressedLength];
-}
-
-SnappyFile::~SnappyFile()
-{
-    close();
-    delete [] m_compressedCache;
-    delete [] m_cache;
-}
-
-bool SnappyFile::rawOpen(const std::string &filename, File::Mode mode)
-{
-    std::ios_base::openmode fmode = std::fstream::binary;
-    if (mode == File::Write) {
-        fmode |= (std::fstream::out | std::fstream::trunc);
-        createCache(SNAPPY_CHUNK_SIZE);
-    } else if (mode == File::Read) {
-        fmode |= std::fstream::in;
-    }
-
-    m_stream.open(filename.c_str(), fmode);
-
-    //read in the initial buffer if we're reading
-    if (m_stream.is_open() && mode == File::Read) {
-        m_stream.seekg(0, std::ios::end);
-        m_endPos = m_stream.tellg();
-        m_stream.seekg(0, std::ios::beg);
-
-        // read the snappy file identifier
-        unsigned char byte1, byte2;
-        m_stream >> byte1;
-        m_stream >> byte2;
-        assert(byte1 == SNAPPY_BYTE1 && byte2 == SNAPPY_BYTE2);
-
-        flushReadCache();
-    } else if (m_stream.is_open() && mode == File::Write) {
-        // write the snappy file identifier
-        m_stream << SNAPPY_BYTE1;
-        m_stream << SNAPPY_BYTE2;
-    }
-    return m_stream.is_open();
-}
-
-bool SnappyFile::rawWrite(const void *buffer, size_t length)
-{
-    if (freeCacheSize() > length) {
-        memcpy(m_cachePtr, buffer, length);
-        m_cachePtr += length;
-    } else if (freeCacheSize() == length) {
-        memcpy(m_cachePtr, buffer, length);
-        m_cachePtr += length;
-        flushWriteCache();
-    } else {
-        size_t sizeToWrite = length;
-
-        while (sizeToWrite >= freeCacheSize()) {
-            size_t endSize = freeCacheSize();
-            size_t offset = length - sizeToWrite;
-            memcpy(m_cachePtr, (const char*)buffer + offset, endSize);
-            sizeToWrite -= endSize;
-            m_cachePtr += endSize;
-            flushWriteCache();
-        }
-        if (sizeToWrite) {
-            size_t offset = length - sizeToWrite;
-            memcpy(m_cachePtr, (const char*)buffer + offset, sizeToWrite);
-            m_cachePtr += sizeToWrite;
-        }
-    }
-
-    return true;
-}
-
-size_t SnappyFile::rawRead(void *buffer, size_t length)
-{
-    if (endOfData()) {
-        return 0;
-    }
-
-    if (freeCacheSize() >= length) {
-        memcpy(buffer, m_cachePtr, length);
-        m_cachePtr += length;
-    } else {
-        size_t sizeToRead = length;
-        size_t offset = 0;
-        while (sizeToRead) {
-            size_t chunkSize = std::min(freeCacheSize(), sizeToRead);
-            offset = length - sizeToRead;
-            memcpy((char*)buffer + offset, m_cachePtr, chunkSize);
-            m_cachePtr += chunkSize;
-            sizeToRead -= chunkSize;
-            if (sizeToRead > 0) {
-                flushReadCache();
-            }
-            if (!m_cacheSize) {
-                return length - sizeToRead;
-            }
-        }
-    }
-
-    return length;
-}
-
-int SnappyFile::rawGetc()
-{
-    unsigned char c = 0;
-    if (rawRead(&c, 1) != 1)
-        return -1;
-    return c;
-}
-
-void SnappyFile::rawClose()
-{
-    if (m_mode == File::Write) {
-        flushWriteCache();
-    }
-    m_stream.close();
-    delete [] m_cache;
-    m_cache = NULL;
-    m_cachePtr = NULL;
-}
-
-void SnappyFile::rawFlush()
-{
-    assert(m_mode == File::Write);
-    flushWriteCache();
-    m_stream.flush();
-}
-
-void SnappyFile::flushWriteCache()
-{
-    size_t inputLength = usedCacheSize();
-
-    if (inputLength) {
-        size_t compressedLength;
-
-        ::snappy::RawCompress(m_cache, inputLength,
-                              m_compressedCache, &compressedLength);
-
-        writeCompressedLength(compressedLength);
-        m_stream.write(m_compressedCache, compressedLength);
-        m_cachePtr = m_cache;
-    }
-    assert(m_cachePtr == m_cache);
-}
-
-void SnappyFile::flushReadCache(size_t skipLength)
-{
-    //assert(m_cachePtr == m_cache + m_cacheSize);
-    m_currentOffset.chunk = m_stream.tellg();
-    size_t compressedLength;
-    compressedLength = readCompressedLength();
-
-    if (compressedLength) {
-        m_stream.read((char*)m_compressedCache, compressedLength);
-        ::snappy::GetUncompressedLength(m_compressedCache, compressedLength,
-                                        &m_cacheSize);
-        createCache(m_cacheSize);
-        if (skipLength < m_cacheSize) {
-            ::snappy::RawUncompress(m_compressedCache, compressedLength,
-                                    m_cache);
-        }
-    } else {
-        createCache(0);
-    }
-}
-
-void SnappyFile::createCache(size_t size)
-{
-    if (size > m_cacheMaxSize) {
-        do {
-            m_cacheMaxSize <<= 1;
-        } while (size > m_cacheMaxSize);
-
-        delete [] m_cache;
-        m_cache = new char[size];
-        m_cacheMaxSize = size;
-    }
-
-    m_cachePtr = m_cache;
-    m_cacheSize = size;
-}
-
-void SnappyFile::writeCompressedLength(size_t length)
-{
-    unsigned char buf[4];
-    buf[0] = length & 0xff; length >>= 8;
-    buf[1] = length & 0xff; length >>= 8;
-    buf[2] = length & 0xff; length >>= 8;
-    buf[3] = length & 0xff; length >>= 8;
-    assert(length == 0);
-    m_stream.write((const char *)buf, sizeof buf);
-}
-
-size_t SnappyFile::readCompressedLength()
-{
-    unsigned char buf[4];
-    size_t length;
-    m_stream.read((char *)buf, sizeof buf);
-    if (m_stream.fail()) {
-        length = 0;
-    } else {
-        length  =  (size_t)buf[0];
-        length |= ((size_t)buf[1] <<  8);
-        length |= ((size_t)buf[2] << 16);
-        length |= ((size_t)buf[3] << 24);
-    }
-    return length;
-}
-
-bool SnappyFile::supportsOffsets() const
-{
-    return true;
-}
-
-File::Offset SnappyFile::currentOffset()
-{
-    m_currentOffset.offsetInChunk = m_cachePtr - m_cache;
-    return m_currentOffset;
-}
-
-void SnappyFile::setCurrentOffset(const File::Offset &offset)
-{
-    // to remove eof bit
-    m_stream.clear();
-    // seek to the start of a chunk
-    m_stream.seekg(offset.chunk, std::ios::beg);
-    // load the chunk
-    flushReadCache();
-    assert(m_cacheSize >= offset.offsetInChunk);
-    // seek within our cache to the correct location within the chunk
-    m_cachePtr = m_cache + offset.offsetInChunk;
-
-}
-
-bool SnappyFile::rawSkip(size_t length)
-{
-    if (endOfData()) {
-        return false;
-    }
-
-    if (freeCacheSize() >= length) {
-        m_cachePtr += length;
-    } else {
-        size_t sizeToRead = length;
-        while (sizeToRead) {
-            size_t chunkSize = std::min(freeCacheSize(), sizeToRead);
-            m_cachePtr += chunkSize;
-            sizeToRead -= chunkSize;
-            if (sizeToRead > 0) {
-                flushReadCache(sizeToRead);
-            }
-            if (!m_cacheSize) {
-                break;
-            }
-        }
-    }
-
-    return true;
-}
-
-int SnappyFile::rawPercentRead()
-{
-    return 100 * (double(m_stream.tellg()) / double(m_endPos));
-}
-
-
-File* File::createSnappy(void) {
-    return new SnappyFile;
-}
diff --git a/common/trace_file_write.cpp b/common/trace_file_write.cpp
deleted file mode 100644
index 4cc8984..0000000
--- a/common/trace_file_write.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
-/**************************************************************************
- *
- * Copyright 2011 Jose Fonseca
- * All Rights Reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- *
- **************************************************************************/
-
-
-#include "os.hpp"
-#include "trace_file.hpp"
-
-
-using namespace trace;
-
-
-File *
-File::createForWrite(const char *filename)
-{
-    File *file;
-    file = File::createSnappy();
-    if (!file) {
-        return NULL;
-    }
-
-    if (!file->open(filename, File::Write)) {
-        os::log("error: could not open %s for writing\n", filename);
-        delete file;
-        return NULL;
-    }
-
-    return file;
-}
diff --git a/common/trace_writer.cpp b/common/trace_writer.cpp
index 46e6392..dbf00b5 100644
--- a/common/trace_writer.cpp
+++ b/common/trace_writer.cpp
@@ -43,7 +43,7 @@ namespace trace {
 Writer::Writer() :
     call_no(0)
 {
-    m_file = File::createSnappy();
+    m_file = File::createCommonFile(File::LZ4);
     close();
 }
 
diff --git a/common/trace_writer_local.cpp b/common/trace_writer_local.cpp
index c5a5c44..5f86f7d 100644
--- a/common/trace_writer_local.cpp
+++ b/common/trace_writer_local.cpp
@@ -146,7 +146,7 @@ void LocalWriter::checkProcessId(void) {
         // create a new file.  We can't call any method of the current
         // file, as it may cause it to flush and corrupt the parent's
         // trace, so we effectively leak the old file object.
-        m_file = File::createSnappy();
+        m_file = File::createCommonFile(File::SNAPPY);
         // Don't want to open the same file again
         os::unsetEnvironment("TRACE_FILE");
         open();
-- 
1.7.9.5



More information about the apitrace mailing list