[Libreoffice-commits] core.git: sc/source

Eike Rathke (via logerrit) logerrit at kemper.freedesktop.org
Sun Aug 29 20:10:02 UTC 2021


 sc/source/ui/dbgui/scuiasciiopt.cxx |   89 ++++++++++++++++++++++++++++--------
 sc/source/ui/docshell/docsh.cxx     |   43 +++++++++++++++++
 sc/source/ui/inc/scuiasciiopt.hxx   |    5 +-
 3 files changed, 117 insertions(+), 20 deletions(-)

New commits:
commit 451e4abb5377f6d923860e9adfce82d46f31e049
Author:     Eike Rathke <erack at redhat.com>
AuthorDate: Sun Aug 29 18:23:35 2021 +0200
Commit:     Eike Rathke <erack at redhat.com>
CommitDate: Sun Aug 29 22:09:29 2021 +0200

    Resolves: tdf#117868 CSV: support sep=; and "sep=;" separator setting
    
    When reading CSV the separator (any BMP character) is taken from
    an initial
    
    sep=;
    or
    "sep=;"
    
    single field if that is the only row content.
    The quoted form is preserved as (unquoted) cell content, of the
    unquoted form the separator is discarded as contextually it is a
    real field separator.
    
    When writing CSV an existing single top left cell's content if
    that is the only cell in the row is adapted to the current
    separator (any BMP character) in the quoted form
    
    "sep=;"
    
    (if quotes / text delimiters aren't set empty ...) and always uses
    the ASCII '"' double quote character.
    
    Change-Id: I854477bd0f9d1cafaa51a2130b616292347519cf
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/121232
    Reviewed-by: Eike Rathke <erack at redhat.com>
    Tested-by: Jenkins

diff --git a/sc/source/ui/dbgui/scuiasciiopt.cxx b/sc/source/ui/dbgui/scuiasciiopt.cxx
index 3ed0b05cf677..7390786c5cd9 100644
--- a/sc/source/ui/dbgui/scuiasciiopt.cxx
+++ b/sc/source/ui/dbgui/scuiasciiopt.cxx
@@ -381,14 +381,6 @@ ScImportAsciiDlg::ScImportAsciiDlg(weld::Window* pParent, const OUString& aDatNa
     if (nFromRow != 1)
         mxNfRow->set_value(nFromRow);
 
-    if ( bIsTSV )
-        mxCkbTab->set_active(true);
-    else
-        SetSeparators(); // Set Separators in the dialog from maFieldSeparators (empty are not set)
-
-    // Get Separators from the dialog (empty are set from default)
-    maFieldSeparators = GetSeparators();
-
     // Clipboard is always Unicode, else detect.
     rtl_TextEncoding ePreselectUnicode = (meCall == SC_IMPORTFILE ?
             RTL_TEXTENCODING_DONTKNOW : RTL_TEXTENCODING_UNICODE);
@@ -438,6 +430,40 @@ ScImportAsciiDlg::ScImportAsciiDlg(weld::Window* pParent, const OUString& aDatNa
         mnStreamPos = mpDatStream->Tell();
     }
 
+    if (bIsTSV)
+        SetSeparators('\t');
+    else
+    {
+        // Some MS-Excel convention is the first line containing the field
+        // separator as "sep=|" (without quotes and any field separator
+        // character). The second possibility seems to be it is present *with*
+        // quotes so it shows up as cell content *including* the separator and
+        // can be preserved during round trips. Check for an exact match of
+        // any such and set separator.
+        /* TODO: it is debatable whether the unquoted form should rather be
+         * treated special to actually include the separator in the field data.
+         * Currently it does not. */
+        sal_Unicode cSep = 0;
+        OUString aLine;
+        // Try to read one more character, if more than 7 it can't be an exact
+        // match of any.
+        mpDatStream->ReadUniOrByteStringLine( aLine, mpDatStream->GetStreamCharSet(), 8);
+        mpDatStream->Seek(mnStreamPos);
+        if (aLine.getLength() == 8)
+            ;   // nothing
+        else if (aLine.getLength() == 5 && aLine.startsWithIgnoreAsciiCase("sep="))
+            cSep = aLine[4];
+        else if (aLine.getLength() == 7 && aLine[6] == '"' && aLine.startsWithIgnoreAsciiCase("\"sep="))
+            cSep = aLine[5];
+
+        // Set Separators in the dialog from maFieldSeparators (empty are not
+        // set) or an optionally defined by file content field separator.
+        SetSeparators(cSep);
+    }
+
+    // Get Separators from the dialog (empty are set from default)
+    maFieldSeparators = GetSeparators();
+
     mxNfRow->connect_value_changed( LINK( this, ScImportAsciiDlg, FirstRowHdl ) );
 
     // *** Separator characters ***
@@ -650,19 +676,46 @@ void ScImportAsciiDlg::SaveParameters()
                      mxCkbSkipEmptyCells->get_active(), mxCkbRemoveSpace->get_active(), meCall );
 }
 
-void ScImportAsciiDlg::SetSeparators()
+void ScImportAsciiDlg::SetSeparators( sal_Unicode cSep )
 {
-    for (sal_Int32 i = 0; i < maFieldSeparators.getLength(); ++i)
+    if (cSep)
     {
-        switch (maFieldSeparators[i])
+        // Exclusively set a separator, maFieldSeparators needs not be
+        // modified, it's obtained by GetSeparators() after this call.
+        constexpr sal_Unicode aSeps[] = { '\t', ';', ',', ' ' };
+        for (const sal_Unicode c : aSeps)
         {
-            case '\t':  mxCkbTab->set_active(true);        break;
-            case ';':   mxCkbSemicolon->set_active(true);  break;
-            case ',':   mxCkbComma->set_active(true);      break;
-            case ' ':   mxCkbSpace->set_active(true);      break;
-            default:
-                mxCkbOther->set_active(true);
-                mxEdOther->set_text(mxEdOther->get_text() + OUStringChar(maFieldSeparators[i]));
+            const bool bSet = (c == cSep);
+            switch (c)
+            {
+                case '\t':  mxCkbTab->set_active(bSet);        break;
+                case ';':   mxCkbSemicolon->set_active(bSet);  break;
+                case ',':   mxCkbComma->set_active(bSet);      break;
+                case ' ':   mxCkbSpace->set_active(bSet);      break;
+            }
+            if (bSet)
+                cSep = 0;
+        }
+        if (cSep)
+        {
+            mxCkbOther->set_active(true);
+            mxEdOther->set_text(OUStringChar(cSep));
+        }
+    }
+    else
+    {
+        for (sal_Int32 i = 0; i < maFieldSeparators.getLength(); ++i)
+        {
+            switch (maFieldSeparators[i])
+            {
+                case '\t':  mxCkbTab->set_active(true);        break;
+                case ';':   mxCkbSemicolon->set_active(true);  break;
+                case ',':   mxCkbComma->set_active(true);      break;
+                case ' ':   mxCkbSpace->set_active(true);      break;
+                default:
+                            mxCkbOther->set_active(true);
+                            mxEdOther->set_text(mxEdOther->get_text() + OUStringChar(maFieldSeparators[i]));
+            }
         }
     }
 }
diff --git a/sc/source/ui/docshell/docsh.cxx b/sc/source/ui/docshell/docsh.cxx
index 5801636d3cc2..47d8adc1843b 100644
--- a/sc/source/ui/docshell/docsh.cxx
+++ b/sc/source/ui/docshell/docsh.cxx
@@ -1968,6 +1968,49 @@ void ScDocShell::AsciiSave( SvStream& rStream, const ScImportOptions& rAsciiOpt,
 
     SCCOL nCol;
     SCROW nRow;
+
+    // Treat the top left cell separator "sep=" special.
+    // Here nStartRow == 0 && nStartCol == 0
+    if (!bFixedWidth && cDelim != 0)
+    {
+        // First row iterator.
+        ScHorizontalCellIterator aIter( m_aDocument, nTab, nStartCol, nStartRow, nEndCol, nStartRow);
+        ScRefCellValue* pCell;
+        // Must be first column and all following cells on this row must be
+        // empty to fiddle with "sep=".
+        if ((pCell = aIter.GetNext( nCol, nRow)) != nullptr && nCol == nStartCol && !aIter.GetNext( nCol, nRow))
+        {
+            if (pCell->meType == CELLTYPE_STRING)
+            {
+                aString = pCell->mpString->getString();
+                if (aString.getLength() <= 5 && aString.startsWithIgnoreAsciiCase("sep="))
+                {
+                    // Cell content is /^sep=.?$/ so write current separator.
+                    // Force the quote character to '"' regardless what is set
+                    // for export because that is the only one recognized on
+                    // import.
+                    aString = "sep=" + OUStringChar(cDelim);
+                    if (cStrDelim != 0)
+                        rStream.WriteUniOrByteChar( '"', eCharSet);
+                    if (eCharSet == RTL_TEXTENCODING_UNICODE)
+                    {
+                        write_uInt16s_FromOUString( rStream, aString);
+                    }
+                    else
+                    {
+                        OString aStrEnc = OUStringToOString( aString, eCharSet);
+                        // write byte encoded
+                        rStream.WriteBytes( aStrEnc.getStr(), aStrEnc.getLength());
+                    }
+                    if (cStrDelim != 0)
+                        rStream.WriteUniOrByteChar( '"', eCharSet);
+                    endlub( rStream );
+                    ++nStartRow;
+                }
+            }
+        }
+    }
+
     SCCOL nNextCol = nStartCol;
     SCROW nNextRow = nStartRow;
     SCCOL nEmptyCol;
diff --git a/sc/source/ui/inc/scuiasciiopt.hxx b/sc/source/ui/inc/scuiasciiopt.hxx
index eae2f2f06bc0..2bb7e23252b1 100644
--- a/sc/source/ui/inc/scuiasciiopt.hxx
+++ b/sc/source/ui/inc/scuiasciiopt.hxx
@@ -91,8 +91,9 @@ public:
 private:
     /** Sets the selected char set data to meCharSet and mbCharSetSystem. */
     void                        SetSelectedCharSet();
-    /** Set separators in ui from maFieldSeparators    */
-    void                        SetSeparators();
+    /** Set separators in ui from maFieldSeparators or an optionally defined
+        separator. */
+    void                        SetSeparators( sal_Unicode cSep );
     /** Returns all separator characters in a string. */
     OUString                    GetSeparators() const;
 


More information about the Libreoffice-commits mailing list