[Libreoffice-commits] core.git: Branch 'aoo/trunk' - 2 commits - connectivity/source tools/source

Damjan Jovanovic damjan at apache.org
Sun Apr 17 18:08:36 UTC 2016


 connectivity/source/drivers/flat/ETable.cxx |   62 +++++++++++++++++++++++++---
 tools/source/stream/stream.cxx              |   59 ++++++++++++++++++--------
 2 files changed, 97 insertions(+), 24 deletions(-)

New commits:
commit 39abfdb0779d252d916a72f5563e064764cd68b0
Author: Damjan Jovanovic <damjan at apache.org>
Date:   Sun Apr 17 16:56:00 2016 +0000

    Fix a string limit error in my previous patch.
    
    Patch by: me

diff --git a/connectivity/source/drivers/flat/ETable.cxx b/connectivity/source/drivers/flat/ETable.cxx
index a885f1f..6fdef44 100644
--- a/connectivity/source/drivers/flat/ETable.cxx
+++ b/connectivity/source/drivers/flat/ETable.cxx
@@ -910,7 +910,7 @@ sal_Bool OFlatTable::readLine(QuotedTokenizedString& line, sal_Int32& _rnCurrent
     xub_StrLen nLastOffset = 0;
     bool isQuoted = false;
     bool isFieldStarting = true;
-    while (true)
+    while (sLine.Len() < STRING_MAXLEN)
     {
         bool wasQuote = false;
         const sal_Unicode *p;
commit 60e93b8b5b6bc4220d66e95cd234a37f3c8f8fd7
Author: Damjan Jovanovic <damjan at apache.org>
Date:   Sun Apr 17 16:44:43 2016 +0000

    Make CSV line parsers consistent with CSV field parsers.
    
    Our CSV field parsing algorithms treats fields starting with a quote
    (immediately at the beginning of the row, or after the field delimiter) as
    quoted. A quoted field ends at the corresponding closing quote, and any
    remaining text between the closing quote and the next field delimeter or end
    of line is appended to the text already extracted from the field, but not
    processed further. Any quotes in this extra text are taken verbatim - they
    do not quote anything.
    
    Our CSV line parsers were big hacks - they essentially read and concatenate
    lines until an even number of quote characters is found, and then feed this
    through the CSV field parsers.
    
    This patch rewrites the line parsers to work exactly how the field parsers
    work. Text such as:
    "another" ",something else
    is now correctly parsed by both Calc and Base as:
    [another "],[something else]
    instead of breaking all further parsing.
    
    Patch by: me

diff --git a/connectivity/source/drivers/flat/ETable.cxx b/connectivity/source/drivers/flat/ETable.cxx
index 1620b64..a885f1f 100644
--- a/connectivity/source/drivers/flat/ETable.cxx
+++ b/connectivity/source/drivers/flat/ETable.cxx
@@ -907,14 +907,64 @@ sal_Bool OFlatTable::readLine(QuotedTokenizedString& line, sal_Int32& _rnCurrent
         return sal_False;
 
     QuotedTokenizedString sLine = line; // check if the string continues on next line
-    while( (sLine.GetString().GetTokenCount(m_cStringDelimiter) % 2) != 1 )
+    xub_StrLen nLastOffset = 0;
+    bool isQuoted = false;
+    bool isFieldStarting = true;
+    while (true)
     {
-        m_pFileStream->ReadByteStringLine(sLine,nEncoding);
-        if ( !m_pFileStream->IsEof() )
+        bool wasQuote = false;
+        const sal_Unicode *p;
+        p = sLine.GetString().GetBuffer();
+        p += nLastOffset;
+
+        while (*p)
         {
-            line.GetString().Append('\n');
-            line.GetString() += sLine.GetString();
-            sLine = line;
+            if (isQuoted)
+            {
+                if (*p == m_cStringDelimiter)
+                    wasQuote = !wasQuote;
+                else
+                {
+                    if (wasQuote)
+                    {
+                        wasQuote = false;
+                        isQuoted = false;
+                        if (*p == m_cFieldDelimiter)
+                            isFieldStarting = true;
+                    }
+                }
+            }
+            else
+            {
+                if (isFieldStarting)
+                {
+                    isFieldStarting = false;
+                    if (*p == m_cStringDelimiter)
+                        isQuoted = true;
+                    else if (*p == m_cFieldDelimiter)
+                        isFieldStarting = true;
+                }
+                else if (*p == m_cFieldDelimiter)
+                    isFieldStarting = true;
+            }
+            ++p;
+        }
+
+        if (wasQuote)
+            isQuoted = false;
+
+        if (isQuoted)
+        {
+            nLastOffset = sLine.Len();
+            m_pFileStream->ReadByteStringLine(sLine,nEncoding);
+            if ( !m_pFileStream->IsEof() )
+            {
+                line.GetString().Append('\n');
+                line.GetString() += sLine.GetString();
+                sLine = line;
+            }
+            else
+                break;
         }
         else
             break;
diff --git a/tools/source/stream/stream.cxx b/tools/source/stream/stream.cxx
index 9389794..a0c8428 100644
--- a/tools/source/stream/stream.cxx
+++ b/tools/source/stream/stream.cxx
@@ -1128,38 +1128,59 @@ sal_Bool SvStream::ReadCsvLine( String& rStr, sal_Bool bEmbeddedLineBreak,
     {
         const sal_Unicode* pSeps = rFieldSeparators.GetBuffer();
         xub_StrLen nLastOffset = 0;
-        xub_StrLen nQuotes = 0;
+        bool isQuoted = false;
+        bool isFieldStarting = true;
         while (!IsEof() && rStr.Len() < STRING_MAXLEN)
         {
+            bool wasQuote = false;
             bool bBackslashEscaped = false;
-            const sal_Unicode *p, *pStart;
-            p = pStart = rStr.GetBuffer();
+            const sal_Unicode *p;
+            p = rStr.GetBuffer();
             p += nLastOffset;
             while (*p)
             {
-                if (nQuotes)
+                if (isQuoted)
                 {
                     if (*p == cFieldQuote && !bBackslashEscaped)
-                        ++nQuotes;
-                    else if (bAllowBackslashEscape)
+                        wasQuote = !wasQuote;
+                    else
                     {
-                        if (*p == '\\')
-                            bBackslashEscaped = !bBackslashEscaped;
-                        else
-                            bBackslashEscaped = false;
+                        if (bAllowBackslashEscape)
+                        {
+                            if (*p == '\\')
+                                bBackslashEscaped = !bBackslashEscaped;
+                            else
+                                bBackslashEscaped = false;
+                        }
+                        if (wasQuote)
+                        {
+                            wasQuote = false;
+                            isQuoted = false;
+                            if (lcl_UnicodeStrChr( pSeps, *p ))
+                                isFieldStarting = true;
+                        }
                     }
                 }
-                else if (*p == cFieldQuote && (p == pStart ||
-                            lcl_UnicodeStrChr( pSeps, p[-1])))
-                    nQuotes = 1;
-                // A quote character inside a field content does not start
-                // a quote.
+                else
+                {
+                    if (isFieldStarting)
+                    {
+                        isFieldStarting = false;
+                        if (*p == cFieldQuote)
+                            isQuoted = true;
+                        else if (lcl_UnicodeStrChr( pSeps, *p ))
+                            isFieldStarting = true;
+                    }
+                    else if (lcl_UnicodeStrChr( pSeps, *p ))
+                        isFieldStarting = true;
+                }
                 ++p;
             }
 
-            if (nQuotes % 2 == 0)
-                break;
-            else
+            if (wasQuote)
+                isQuoted = false;
+
+            if (isQuoted)
             {
                 nLastOffset = rStr.Len();
                 String aNext;
@@ -1167,6 +1188,8 @@ sal_Bool SvStream::ReadCsvLine( String& rStr, sal_Bool bEmbeddedLineBreak,
                 rStr += sal_Unicode(_LF);
                 rStr += aNext;
             }
+            else
+                break;
         }
     }
     return nError == SVSTREAM_OK;


More information about the Libreoffice-commits mailing list