[Libreoffice-commits] online.git: 2 commits - common/Util.hpp net/Socket.cpp net/Socket.hpp net/WebSocketHandler.hpp test/helpers.hpp test/UnitHTTP.cpp wsd/LOOLWSD.cpp

Libreoffice Gerrit user logerrit at kemper.freedesktop.org
Wed May 22 10:08:04 UTC 2019


 common/Util.hpp          |   23 ++++++++
 net/Socket.cpp           |  126 ++++++++++++++++++++++++++++++++++++++++++-
 net/Socket.hpp           |   22 ++++++-
 net/WebSocketHandler.hpp |    8 +-
 test/UnitHTTP.cpp        |  135 +++++++++++++++++++++++++++++++++++++++++++++--
 test/helpers.hpp         |   23 ++++++--
 wsd/LOOLWSD.cpp          |   24 +++++---
 7 files changed, 335 insertions(+), 26 deletions(-)

New commits:
commit 05ca302c2bc5fd3be3598da503b209c5cdde5dbe
Author:     Michael Meeks <michael.meeks at collabora.com>
AuthorDate: Wed May 22 10:54:36 2019 +0100
Commit:     Michael Meeks <michael.meeks at collabora.com>
CommitDate: Wed May 22 11:07:42 2019 +0100

    tests for chunked transfer encoding parser.
    
    Change-Id: Ic55669ab7cc55bb44e8f7a00f30231b44f10535a

diff --git a/test/UnitHTTP.cpp b/test/UnitHTTP.cpp
index d0530fe10..e86593f22 100644
--- a/test/UnitHTTP.cpp
+++ b/test/UnitHTTP.cpp
@@ -13,6 +13,7 @@
 
 #include <helpers.hpp>
 #include <Poco/Util/Application.h>
+#include <Poco/Net/StreamSocket.h>
 #include <Poco/Net/StringPartSource.h>
 #include <Poco/Net/HTMLForm.h>
 #include <Poco/Net/HTTPRequest.h>
@@ -37,9 +38,9 @@ public:
         config.setBool("ssl.enable", true);
     }
 
-    // FIXME: can hook with (UnitWSD::get().handleHttpRequest(request, message, socket)) ...
-    void invokeTest() override
+    void testContinue()
     {
+        std::cerr << "testContinue\n";
         for (int i = 0; i < 3; ++i)
         {
             std::unique_ptr<Poco::Net::HTTPClientSession> session(helpers::createSession(Poco::URI(helpers::getTestServerURI())));
@@ -81,9 +82,135 @@ public:
                 return;
             }
         }
-        // Give those convertors time to save and cleanup.
-        std::this_thread::sleep_for(std::chrono::milliseconds(1000));
+    }
+
+    void writeString(const std::shared_ptr<Poco::Net::StreamSocket> &socket, std::string str)
+    {
+        socket->sendBytes(str.c_str(), str.size());
+    }
+
+    bool expectString(const std::shared_ptr<Poco::Net::StreamSocket> &socket, std::string str)
+    {
+        char buffer[str.size() + 64] = { 0, };
+        int got = socket->receiveBytes(buffer, str.size());
+        if (got != (int)str.size() ||
+            strncmp(buffer, str.c_str(), got))
+        {
+            std::cerr << "testChunks got " << got << " mismatching strings '" << buffer << " vs. expected '" << str << "'\n";
+            exitTest(TestResult::Failed);
+            return false;
+        }
+        else
+            return true;
+    }
+
+    void testChunks()
+    {
+        std::cerr << "testChunks\n";
+
+        std::shared_ptr<Poco::Net::StreamSocket> socket = helpers::createRawSocket();
+
+        writeString(
+            socket,
+            "POST /lool/convert-to/txt HTTP/1.1\r\n"
+            "Host: localhost:9980\r\n"
+            "User-Agent: looltests/1.2.3\r\n"
+            "Accept: */*\r\n"
+            "Expect: 100-continue\r\n"
+            "Transfer-Encoding: chunked\r\n"
+            "Content-Type: multipart/form-data; "
+            "boundary=------------------------5a0cd5c881663db4\r\n\r\n");
+        if (!expectString(
+                socket,
+                "HTTP/1.1 100 Continue\r\n\r\n"))
+            return;
+
+#define START_CHUNK_HEX(len) len "\r\n"
+#define END_CHUNK "\r\n"
+        writeString(
+            socket,
+            START_CHUNK_HEX("8A")
+            "--------------------------5a0cd5c881663db4\r\n"
+            "Content-Disposition: form-data; name=\"data\"; filename=\"test.txt\"\r\n"
+            "Content-Type: text/plain\r\n"
+            "\r\n"
+            END_CHUNK
+
+            START_CHUNK_HEX("12")
+            "This is some text."
+            END_CHUNK
+
+            START_CHUNK_HEX("1")
+            "\n"
+            END_CHUNK
+
+            "  4 room:for expansion!! cf. leading spaces and nasies <>!\"\'?=)\r\n"
+            "And "
+            END_CHUNK
+
+            START_CHUNK_HEX("1")
+            "s"
+            END_CHUNK
+
+            START_CHUNK_HEX("a")
+            "ome more.\n"
+            END_CHUNK
+            );
+        writeString(
+            socket,
+            START_CHUNK_HEX("30")
+            "\r\n"
+            "--------------------------5a0cd5c881663db4--\r\n"
+            END_CHUNK);
+
+        writeString(socket, START_CHUNK_HEX("0"));
+
+        char buffer[4096] = { 0, };
+        int got = socket->receiveBytes(buffer, 4096);
+        std::string start =
+            "HTTP/1.0 200 OK\r\n"
+            "Content-Disposition: attachment; filename=\"test.txt\"\r\n";
+
+        if (strncmp(buffer, start.c_str(), start.size()))
+        {
+            std::cerr << "missing pre-amble " << got << " '" << buffer << " vs. expected '" << start << "'\n";
+            exitTest(TestResult::Failed);
+            return;
+        }
 
+        // TODO: check content-length etc.
+
+        const char *ptr = strstr(buffer, "\r\n\r\n");
+        if (!ptr)
+        {
+            std::cerr << "missing separator " << got << " '" << buffer << "\n";
+            exitTest(TestResult::Failed);
+            return;
+        }
+
+        // Oddly we need another read to get the content.
+        got = socket->receiveBytes(buffer, 4096);
+        if (got >=0 )
+            buffer[got] = '\0';
+        else
+        {
+            std::cerr << "No content returned " << got << "\n";
+            exitTest(TestResult::Failed);
+            return;
+        }
+
+        if (strcmp(buffer, "\357\273\277This is some text.\nAnd some more.\n"))
+        {
+            std::cerr << "unexpected file content " << got << " '" << buffer << "\n";
+            exitTest(TestResult::Failed);
+            return;
+        }
+    }
+
+    void invokeTest() override
+    {
+        testChunks();
+        testContinue();
         std::cerr << "All tests passed.\n";
         exitTest(TestResult::Ok);
     }
diff --git a/test/helpers.hpp b/test/helpers.hpp
index c0f9d112c..708806cd2 100644
--- a/test/helpers.hpp
+++ b/test/helpers.hpp
@@ -21,6 +21,8 @@
 #include <Poco/Net/HTTPResponse.h>
 #include <Poco/Net/HTTPSClientSession.h>
 #include <Poco/Net/NetException.h>
+#include <Poco/Net/StreamSocket.h>
+#include <Poco/Net/SecureStreamSocket.h>
 #include <Poco/Net/Socket.h>
 #include <Poco/Path.h>
 #include <Poco/StringTokenizer.h>
@@ -175,18 +177,33 @@ Poco::Net::HTTPClientSession* createSession(const Poco::URI& uri)
     return new Poco::Net::HTTPClientSession(uri.getHost(), uri.getPort());
 }
 
-inline
-std::string const & getTestServerURI()
+inline int getClientPort()
 {
     static const char* clientPort = std::getenv("LOOL_TEST_CLIENT_PORT");
+    return clientPort? atoi(clientPort) : DEFAULT_CLIENT_PORT_NUMBER;
+}
+
+inline std::shared_ptr<Poco::Net::StreamSocket> createRawSocket()
+{
+    return
+#if ENABLE_SSL
+        std::make_shared<Poco::Net::SecureStreamSocket>
+#else
+        std::make_shared<Poco::Net::StreamSocket>
+#endif
+            (Poco::Net::SocketAddress("127.0.0.1", getClientPort()));
+}
 
+inline
+std::string const & getTestServerURI()
+{
     static std::string serverURI(
 #if ENABLE_SSL
             "https://127.0.0.1:"
 #else
             "http://127.0.0.1:"
 #endif
-            + (clientPort? std::string(clientPort) : std::to_string(DEFAULT_CLIENT_PORT_NUMBER)));
+            + std::to_string(getClientPort()));
 
     return serverURI;
 }
commit 9d723cb230986e2d464d19a6a1518c7bf0f8df1e
Author:     Michael Meeks <michael.meeks at collabora.com>
AuthorDate: Wed May 22 03:21:50 2019 +0100
Commit:     Michael Meeks <michael.meeks at collabora.com>
CommitDate: Wed May 22 11:07:42 2019 +0100

    Initial chunked transfer encoding.
    
    Important for convert-to on larger documents and/or with newer curls.
    
    Change-Id: Id18be6d22741a3af7cee39a069c509e4f662977b

diff --git a/common/Util.hpp b/common/Util.hpp
index 7c967784c..e6e426ed0 100644
--- a/common/Util.hpp
+++ b/common/Util.hpp
@@ -165,6 +165,18 @@ namespace Util
     // Extract all json entries into a map.
     std::map<std::string, std::string> JsonToMap(const std::string& jsonString);
 
+    inline int hexDigitFromChar(char c)
+    {
+        if (c >= '0' && c <= '9')
+            return c - '0';
+        else if (c >= 'a' && c <= 'f')
+            return c - 'a' + 10;
+        else if (c >= 'A' && c <= 'F')
+            return c - 'A' + 10;
+        else
+            return -1;
+    }
+
     /// Dump a lineof data as hex
     inline std::string stringifyHexLine(
                             const std::vector<char> &buffer,
@@ -233,6 +245,17 @@ namespace Util
         os.flush();
     }
 
+    inline std::string dumpHex (const char *legend, const char *prefix,
+                                const std::vector<char>::iterator &startIt,
+                                const std::vector<char>::iterator &endIt,
+                                bool skipDup = true, const unsigned int width = 32)
+    {
+        std::ostringstream oss;
+        std::vector<char> data(startIt, endIt);
+        dumpHex(oss, legend, prefix, data, skipDup, width);
+        return oss.str();
+    }
+
     /// Trim spaces from the left. Just spaces.
     inline std::string& ltrim(std::string& s)
     {
diff --git a/net/Socket.cpp b/net/Socket.cpp
index ba72e16c1..34942d2f1 100644
--- a/net/Socket.cpp
+++ b/net/Socket.cpp
@@ -628,14 +628,21 @@ std::string LocalServerSocket::bind()
     return "";
 }
 
+// For a verbose life, tweak here:
+#if 0
+#  define LOG_CHUNK(X) LOG_TRC(X)
+#else
+#  define LOG_CHUNK(X)
+#endif
+
 bool StreamSocket::parseHeader(const char *clientName,
                                Poco::MemoryInputStream &message,
                                Poco::Net::HTTPRequest &request,
-                               size_t *requestSize)
+                               MessageMap *map)
 {
     LOG_TRC("#" << getFD() << " handling incoming " << _inBuffer.size() << " bytes.");
 
-    assert(!requestSize || *requestSize == 0);
+    assert(!map || (map->_headerSize == 0 && map->_messageSize == 0));
 
     // Find the end of the header, if any.
     static const std::string marker("\r\n\r\n");
@@ -649,8 +656,11 @@ bool StreamSocket::parseHeader(const char *clientName,
 
     // Skip the marker.
     itBody += marker.size();
-    if (requestSize)
-        *requestSize = static_cast<size_t>(itBody - _inBuffer.begin());
+    if (map) // a reasonable guess so far
+    {
+        map->_headerSize = static_cast<size_t>(itBody - _inBuffer.begin());
+        map->_messageSize = map->_headerSize;
+    }
 
     try
     {
@@ -681,6 +691,8 @@ bool StreamSocket::parseHeader(const char *clientName,
             LOG_DBG("Not enough content yet: ContentLength: " << contentLength << ", available: " << available);
             return false;
         }
+        if (map)
+            map->_messageSize += contentLength;
 
         if (request.getExpectContinue() && !_sentHTTPContinue)
         {
@@ -690,6 +702,79 @@ bool StreamSocket::parseHeader(const char *clientName,
                  sizeof("HTTP/1.1 100 Continue\r\n\r\n") - 1);
             _sentHTTPContinue = true;
         }
+
+        if (request.getChunkedTransferEncoding())
+        {
+            // keep the header
+            if (map)
+                map->_spans.push_back(std::pair<size_t, size_t>(0, itBody - _inBuffer.begin()));
+
+            int chunk = 0;
+            while (itBody != _inBuffer.end())
+            {
+                auto chunkStart = itBody;
+
+                // skip whitespace
+                for (; itBody != _inBuffer.end() && isascii(*itBody) && isspace(*itBody); ++itBody)
+                    ; // skip.
+
+                // each chunk is preceeded by its length in hex.
+                size_t chunkLen = 0;
+                for (; itBody != _inBuffer.end(); ++itBody)
+                {
+                    int digit = Util::hexDigitFromChar(*itBody);
+                    if (digit >= 0)
+                        chunkLen = chunkLen * 16 + digit;
+                    else
+                        break;
+                }
+
+                LOG_CHUNK("Chunk of length " << chunkLen);
+
+                for (; itBody != _inBuffer.end() && *itBody != '\n'; ++itBody)
+                    ; // skip to end of line
+
+                if (itBody != _inBuffer.end())
+                    itBody++; /* \n */;
+
+                // skip the chunk.
+                auto chunkOffset = itBody - _inBuffer.begin();
+                auto chunkAvailable = _inBuffer.size() - chunkOffset;
+
+                if (chunkLen == 0) // we're complete.
+                {
+                    map->_messageSize = chunkOffset;
+                    return true;
+                }
+
+                if (chunkLen > chunkAvailable + 2)
+                {
+                    LOG_DBG("Not enough content yet in chunk " << chunk <<
+                            " starting at offset " << (chunkStart - _inBuffer.begin()) <<
+                            " chunk len: " << chunkLen << ", available: " << chunkAvailable);
+                    return false;
+                }
+                itBody += chunkLen;
+
+                map->_spans.push_back(std::pair<size_t,size_t>(chunkOffset, chunkLen));
+
+                if (*itBody != '\r' || *(itBody + 1) != '\n')
+                {
+                    LOG_ERR("Missing \\r\\n at end of chunk " << chunk << " of length " << chunkLen);
+                    LOG_CHUNK("Chunk " << chunk << " is: \n" << Util::dumpHex("", "", chunkStart, itBody + 1, false));
+                    return false; // TODO: throw something sensible in this case
+                }
+                else
+                {
+                    LOG_CHUNK("Chunk " << chunk << " is: \n" << Util::dumpHex("", "", chunkStart, itBody + 1, false));
+                }
+
+                itBody+=2;
+                chunk++;
+            }
+            LOG_TRC("Not enough chunks yet, so far " << chunk << " chunks of total length " << (itBody - _inBuffer.begin()));
+            return false;
+        }
     }
     catch (const Poco::Exception& exc)
     {
@@ -709,6 +794,39 @@ bool StreamSocket::parseHeader(const char *clientName,
     return true;
 }
 
+bool StreamSocket::compactChunks(MessageMap *map)
+{
+    assert (map);
+    if (!map->_spans.size())
+        return false; // single message.
+
+    LOG_CHUNK("Pre-compact " << map->_spans.size() << " chunks: \n" <<
+              Util::dumpHex("", "", _inBuffer.begin(), _inBuffer.end(), false));
+
+    char *first = &_inBuffer[0];
+    char *dest = first;
+    for (auto &span : map->_spans)
+    {
+        std::memmove(dest, &_inBuffer[span.first], span.second);
+        dest += span.second;
+    }
+
+    // Erase the duplicate bits.
+    size_t newEnd = dest - first;
+    size_t gap = map->_messageSize - newEnd;
+    _inBuffer.erase(_inBuffer.begin() + newEnd, _inBuffer.begin() + map->_messageSize);
+
+    LOG_CHUNK("Post-compact with erase of " << newEnd << " to " << map->_messageSize << " giving: \n" <<
+              Util::dumpHex("", "", _inBuffer.begin(), _inBuffer.end(), false));
+
+    // shrink our size to fit
+    map->_messageSize -= gap;
+
+    dumpState(std::cerr);
+
+    return true;
+}
+
 namespace HttpHelper
 {
     void sendUncompressedFileContent(const std::shared_ptr<StreamSocket>& socket,
diff --git a/net/Socket.hpp b/net/Socket.hpp
index 5995570dc..2d96d1981 100644
--- a/net/Socket.hpp
+++ b/net/Socket.hpp
@@ -949,9 +949,21 @@ public:
         return socket;
     }
 
+        /// Messages can be in chunks, only parts of message being valid.
+    struct MessageMap {
+        MessageMap() : _headerSize(0), _messageSize(0) {}
+        /// Size of HTTP headers
+        size_t _headerSize;
+        /// Entire size of data associated with this message
+        size_t _messageSize;
+        // offset + lengths to collate into the real stream
+        std::vector<std::pair<size_t, size_t>> _spans;
+    };
+
     /// Remove the first @count bytes from input buffer
-    void eraseFirstInputBytes(size_t count)
+    void eraseFirstInputBytes(const MessageMap &map)
     {
+        size_t count = map._headerSize;
         size_t toErase = std::min(count, _inBuffer.size());
         if (toErase < count)
             LOG_ERR("#" << getFD() << ": attempted to remove: " << count << " which is > size: " << _inBuffer.size() << " clamped to " << toErase);
@@ -959,12 +971,16 @@ public:
             _inBuffer.erase(_inBuffer.begin(), _inBuffer.begin() + count);
     }
 
+    /// Compacts chunk headers away leaving just the data we want
+    /// returns true if we did any re-sizing/movement of _inBuffer.
+    bool compactChunks(MessageMap *map);
+
     /// Detects if we have an HTTP header in the provided message and
     /// populates a request for that.
     bool parseHeader(const char *clientLoggingName,
                      Poco::MemoryInputStream &message,
                      Poco::Net::HTTPRequest &request,
-                     size_t *requestSize = nullptr);
+                     MessageMap *map = nullptr);
 
     /// Get input/output statistics on this stream
     void getIOStats(uint64_t &sent, uint64_t &recv)
@@ -985,6 +1001,8 @@ public:
 
 protected:
 
+    std::vector<std::pair<size_t, size_t>> findChunks(Poco::Net::HTTPRequest &request);
+
     /// Called when a polling event is received.
     /// @events is the mask of events that triggered the wake.
     void handlePoll(SocketDisposition &disposition,
diff --git a/net/WebSocketHandler.hpp b/net/WebSocketHandler.hpp
index c943a5bd1..f5f767e4f 100644
--- a/net/WebSocketHandler.hpp
+++ b/net/WebSocketHandler.hpp
@@ -692,7 +692,7 @@ protected:
         LOG_TRC("Incoming client websocket upgrade response: " << std::string(&socket->getInBuffer()[0], socket->getInBuffer().size()));
 
         bool bOk = false;
-        size_t responseSize = 0;
+        StreamSocket::MessageMap map;
 
         try
         {
@@ -708,7 +708,7 @@ protected:
                                           marker.begin(), marker.end());
 
                 if (itBody != socket->getInBuffer().end())
-                    responseSize = itBody - socket->getInBuffer().begin() + marker.size();
+                    map._headerSize = itBody - socket->getInBuffer().begin() + marker.size();
             }
 
             if (response.getStatus() == Poco::Net::HTTPResponse::HTTP_SWITCHING_PROTOCOLS &&
@@ -737,7 +737,7 @@ protected:
             LOG_DBG("handleClientUpgrade exception caught.");
         }
 
-        if (!bOk || responseSize == 0)
+        if (!bOk || map._headerSize == 0)
         {
             LOG_ERR("Bad websocker server response.");
 
@@ -746,7 +746,7 @@ protected:
         }
 
         setWebSocket();
-        socket->eraseFirstInputBytes(responseSize);
+        socket->eraseFirstInputBytes(map);
     }
 #endif
 
diff --git a/wsd/LOOLWSD.cpp b/wsd/LOOLWSD.cpp
index 25877811a..98a97fb9f 100644
--- a/wsd/LOOLWSD.cpp
+++ b/wsd/LOOLWSD.cpp
@@ -1836,9 +1836,7 @@ private:
         try
         {
 #if !MOBILEAPP
-            size_t requestSize = 0;
-
-            if (!socket->parseHeader("Prisoner", message, request, &requestSize))
+            if (!socket->parseHeader("Prisoner", message, request))
                 return;
 
             LOG_TRC("Child connection with URI [" << LOOLWSD::anonymizeUrl(request.getURI()) << "].");
@@ -2051,16 +2049,23 @@ private:
             return;
         }
 
-        Poco::MemoryInputStream message(&socket->getInBuffer()[0],
-                                        socket->getInBuffer().size());;
+        Poco::MemoryInputStream startmessage(&socket->getInBuffer()[0],
+                                             socket->getInBuffer().size());;
         Poco::Net::HTTPRequest request;
-        size_t requestSize = 0;
 
-        if (!socket->parseHeader("Client", message, request, &requestSize))
+        StreamSocket::MessageMap map;
+        if (!socket->parseHeader("Client", startmessage, request, &map))
             return;
 
         try
         {
+            // We may need to re-write the chunks moving the inBuffer.
+            socket->compactChunks(&map);
+            Poco::MemoryInputStream message(&socket->getInBuffer()[0],
+                                            socket->getInBuffer().size());
+            // update the read cursor - headers are not altered by chunks.
+            message.seekg(startmessage.tellg(), std::ios::beg);
+
             // Check and remove the ServiceRoot from the request.getURI()
             if (!Util::startsWith(request.getURI(), LOOLWSD::ServiceRoot))
                 throw BadRequestException("The request does not start with prefix: " + LOOLWSD::ServiceRoot);
@@ -2171,7 +2176,7 @@ private:
 
         // if we succeeded - remove the request from our input buffer
         // we expect one request per socket
-        socket->eraseFirstInputBytes(requestSize);
+        socket->eraseFirstInputBytes(map);
 #else
         Poco::Net::HTTPRequest request;
         handleClientWsUpgrade(request, std::string(socket->getInBuffer().data(), socket->getInBuffer().size()), disposition);
@@ -2336,7 +2341,8 @@ private:
         return "application/octet-stream";
     }
 
-    void handlePostRequest(const Poco::Net::HTTPRequest& request, Poco::MemoryInputStream& message,
+    void handlePostRequest(const Poco::Net::HTTPRequest& request,
+                           Poco::MemoryInputStream& message,
                            SocketDisposition &disposition)
     {
         LOG_INF("Post request: [" << LOOLWSD::anonymizeUrl(request.getURI()) << "]");


More information about the Libreoffice-commits mailing list