[Libreoffice-commits] core.git: formula/source include/formula offapi/com sc/inc sc/source

Eike Rathke (via logerrit) logerrit at kemper.freedesktop.org
Wed Jul 28 16:57:06 UTC 2021


 formula/source/core/api/FormulaCompiler.cxx                |   29 +++-
 formula/source/core/api/token.cxx                          |   39 +++++-
 formula/source/ui/dlg/formula.cxx                          |    8 -
 include/formula/compiler.hxx                               |   41 +++---
 include/formula/opcode.hxx                                 |    2 
 include/formula/token.hxx                                  |   20 +++
 offapi/com/sun/star/sheet/FormulaMapGroupSpecialOffset.idl |   24 +++-
 sc/inc/compiler.hxx                                        |   20 +++
 sc/source/core/tool/compiler.cxx                           |   78 +++++++++----
 sc/source/core/tool/parclass.cxx                           |    1 
 sc/source/core/tool/token.cxx                              |   18 ++-
 sc/source/filter/excel/xeformula.cxx                       |    8 +
 sc/source/filter/excel/xlformula.cxx                       |    5 
 sc/source/ui/app/inputhdl.cxx                              |    2 
 sc/source/ui/unoobj/tokenuno.cxx                           |   13 ++
 sc/source/ui/view/viewfunc.cxx                             |    2 
 16 files changed, 240 insertions(+), 70 deletions(-)

New commits:
commit 516318113f0bd2b3c658aba9b285165e63a280e2
Author:     Eike Rathke <erack at redhat.com>
AuthorDate: Wed Jul 28 17:31:56 2021 +0200
Commit:     Eike Rathke <erack at redhat.com>
CommitDate: Wed Jul 28 18:56:29 2021 +0200

    Resolves: tdf#76310 Preserve whitespace TAB, CR, LF in formula expressions
    
    Allowed whitespace in ODFF and OOXML are
    U+0020 SPACE
    U+0009 CHARACTER TABULATION
    U+000A LINE FEED
    U+000D CARRIAGE RETURN
    
    Line feed and carriage return look a bit funny in the Function Wizard if
    part of a function's argument but work. Once a formula is edited, CR are
    converted to LF though, probably already in EditEngine, didn't
    investigate.
    
    Change-Id: I6278f6be48872e0710a3d74212db391dda249ed2
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/119635
    Reviewed-by: Eike Rathke <erack at redhat.com>
    Tested-by: Jenkins

diff --git a/formula/source/core/api/FormulaCompiler.cxx b/formula/source/core/api/FormulaCompiler.cxx
index be5ce09d132f..f7174807f0f4 100644
--- a/formula/source/core/api/FormulaCompiler.cxx
+++ b/formula/source/core/api/FormulaCompiler.cxx
@@ -475,7 +475,8 @@ uno::Sequence< sheet::FormulaOpCodeMapEntry > FormulaCompiler::OpCodeMap::create
             { FormulaMapGroupSpecialOffset::DB_AREA           , ocDBArea }         ,
             /* TODO: { FormulaMapGroupSpecialOffset::TABLE_REF         , ocTableRef }       , */
             { FormulaMapGroupSpecialOffset::MACRO             , ocMacro }          ,
-            { FormulaMapGroupSpecialOffset::COL_ROW_NAME      , ocColRowName }
+            { FormulaMapGroupSpecialOffset::COL_ROW_NAME      , ocColRowName }     ,
+            { FormulaMapGroupSpecialOffset::WHITESPACE        , ocWhitespace }
         };
         const size_t nCount = SAL_N_ELEMENTS(aMap);
         // Preallocate vector elements.
@@ -1267,14 +1268,18 @@ bool FormulaCompiler::GetToken()
              nWasColRowName = 1;
         else
              nWasColRowName = 0;
+        OpCode eTmpOp;
         mpToken = maArrIterator.Next();
-        while( mpToken && mpToken->GetOpCode() == ocSpaces )
+        while (mpToken && ((eTmpOp = mpToken->GetOpCode()) == ocSpaces || eTmpOp == ocWhitespace))
         {
-            // For significant whitespace remember last ocSpaces token. Usually
-            // there's only one even for multiple spaces.
-            pSpacesToken = mpToken;
-            if ( nWasColRowName )
-                nWasColRowName++;
+            if (eTmpOp == ocSpaces)
+            {
+                // For significant whitespace remember last ocSpaces token.
+                // Usually there's only one even for multiple spaces.
+                pSpacesToken = mpToken;
+                if ( nWasColRowName )
+                    nWasColRowName++;
+            }
             if ( bAutoCorrect && !pStack )
                 CreateStringFromToken( aCorrectedFormula, mpToken.get() );
             mpToken = maArrIterator.Next();
@@ -2272,10 +2277,10 @@ const FormulaToken* FormulaCompiler::CreateStringFromToken( OUStringBuffer& rBuf
     if( bSpaces )
         rBuffer.append( ' ');
 
-    if( eOp == ocSpaces )
+    if (eOp == ocSpaces || eOp == ocWhitespace)
     {
         bool bWriteSpaces = true;
-        if (mxSymbols->isODFF())
+        if (eOp == ocSpaces && mxSymbols->isODFF())
         {
             const FormulaToken* p = maArrIterator.PeekPrevNoSpaces();
             bool bIntersectionOp = (p && p->GetOpCode() == ocColRowName);
@@ -2316,7 +2321,10 @@ const FormulaToken* FormulaCompiler::CreateStringFromToken( OUStringBuffer& rBuf
             sal_uInt8 n = t->GetByte();
             for ( sal_uInt8 j=0; j<n; ++j )
             {
-                rBuffer.append( ' ');
+                if (eOp == ocWhitespace)
+                    rBuffer.append( t->GetChar());
+                else
+                    rBuffer.append( ' ');
             }
         }
     }
@@ -2403,6 +2411,7 @@ const FormulaToken* FormulaCompiler::CreateStringFromToken( OUStringBuffer& rBuf
                                 case ocPush:
                                 case ocRange:
                                 case ocSpaces:
+                                case ocWhitespace:
                                     break;
                                 default:
                                     nLevel = 0;
diff --git a/formula/source/core/api/token.cxx b/formula/source/core/api/token.cxx
index 0af1f63f0e5e..c5b69acf2c90 100644
--- a/formula/source/core/api/token.cxx
+++ b/formula/source/core/api/token.cxx
@@ -244,6 +244,13 @@ void FormulaToken::SetSheet( sal_Int16 )
     assert( !"virtual dummy called" );
 }
 
+sal_Unicode FormulaToken::GetChar() const
+{
+    // This Get is worth an assert.
+    assert( !"virtual dummy called" );
+    return 0;
+}
+
 short* FormulaToken::GetJump() const
 {
     SAL_WARN( "formula.core", "FormulaToken::GetJump: virtual dummy called" );
@@ -348,6 +355,15 @@ bool FormulaToken::TextEqual( const FormulaToken& rToken ) const
 // real implementations of virtual functions
 
 
+sal_uInt8   FormulaSpaceToken::GetByte() const  { return nByte; }
+sal_Unicode FormulaSpaceToken::GetChar() const  { return cChar; }
+bool FormulaSpaceToken::operator==( const FormulaToken& r ) const
+{
+    return FormulaToken::operator==( r ) && nByte == r.GetByte() &&
+        cChar == r.GetChar();
+}
+
+
 sal_uInt8   FormulaByteToken::GetByte() const           { return nByte; }
 void        FormulaByteToken::SetByte( sal_uInt8 n )    { nByte = n; }
 ParamClass  FormulaByteToken::GetInForceArray() const    { return eInForceArray; }
@@ -425,6 +441,13 @@ bool FormulaTokenArray::AddFormulaToken(
                     AddStringXML( aStrVal );
                 else if ( eOpCode == ocExternal || eOpCode == ocMacro )
                     Add( new formula::FormulaExternalToken( eOpCode, aStrVal ) );
+                else if ( eOpCode == ocWhitespace )
+                {
+                    // Simply ignore empty string.
+                    // Convention is one character repeated.
+                    if (!aStrVal.isEmpty())
+                        Add( new formula::FormulaSpaceToken( static_cast<sal_uInt8>(aStrVal.getLength()), aStrVal[0]));
+                }
                 else
                     bError = true;      // unexpected string: don't know what to do with it
             }
@@ -1472,17 +1495,21 @@ FormulaTokenArray * FormulaTokenArray::RewriteMissing( const MissingConvention &
     return pNewArr;
 }
 
+namespace {
+inline bool isWhitespace( OpCode eOp ) { return eOp == ocSpaces || eOp == ocWhitespace; }
+}
+
 bool FormulaTokenArray::MayReferenceFollow()
 {
     if ( pCode && nLen > 0 )
     {
         // ignore trailing spaces
         sal_uInt16 i = nLen - 1;
-        while ( i > 0 && pCode[i]->GetOpCode() == SC_OPCODE_SPACES )
+        while (i > 0 && isWhitespace( pCode[i]->GetOpCode()))
         {
             --i;
         }
-        if ( i > 0 || pCode[i]->GetOpCode() != SC_OPCODE_SPACES )
+        if (i > 0 || !isWhitespace( pCode[i]->GetOpCode()))
         {
             OpCode eOp = pCode[i]->GetOpCode();
             if ( (SC_OPCODE_START_BIN_OP <= eOp && eOp < SC_OPCODE_STOP_BIN_OP ) ||
@@ -1756,7 +1783,7 @@ FormulaToken* FormulaTokenArrayPlainIterator::NextNoSpaces()
 {
     if( mpFTA->GetArray() )
     {
-        while( (mnIndex < mpFTA->GetLen()) && (mpFTA->GetArray()[ mnIndex ]->GetOpCode() == ocSpaces) )
+        while ((mnIndex < mpFTA->GetLen()) && isWhitespace( mpFTA->GetArray()[ mnIndex ]->GetOpCode()))
             ++mnIndex;
         if( mnIndex < mpFTA->GetLen() )
             return mpFTA->GetArray()[ mnIndex++ ];
@@ -1793,7 +1820,7 @@ FormulaToken* FormulaTokenArrayPlainIterator::PeekNextNoSpaces() const
     if( mpFTA->GetArray() && mnIndex < mpFTA->GetLen() )
     {
         sal_uInt16 j = mnIndex;
-        while ( j < mpFTA->GetLen() && mpFTA->GetArray()[j]->GetOpCode() == ocSpaces )
+        while (j < mpFTA->GetLen() && isWhitespace( mpFTA->GetArray()[j]->GetOpCode()))
             j++;
         if ( j < mpFTA->GetLen() )
             return mpFTA->GetArray()[ j ];
@@ -1809,9 +1836,9 @@ FormulaToken* FormulaTokenArrayPlainIterator::PeekPrevNoSpaces() const
     if( mpFTA->GetArray() && mnIndex > 1 )
     {
         sal_uInt16 j = mnIndex - 2;
-        while ( mpFTA->GetArray()[j]->GetOpCode() == ocSpaces && j > 0 )
+        while (isWhitespace( mpFTA->GetArray()[j]->GetOpCode()) && j > 0 )
             j--;
-        if ( j > 0 || mpFTA->GetArray()[j]->GetOpCode() != ocSpaces )
+        if (j > 0 || !isWhitespace( mpFTA->GetArray()[j]->GetOpCode()))
             return mpFTA->GetArray()[ j ];
         else
             return nullptr;
diff --git a/formula/source/ui/dlg/formula.cxx b/formula/source/ui/dlg/formula.cxx
index 81931d8d586b..36b59d5eb0ec 100644
--- a/formula/source/ui/dlg/formula.cxx
+++ b/formula/source/ui/dlg/formula.cxx
@@ -389,6 +389,9 @@ sal_Int32 FormulaDlg_Impl::GetFunctionPos(sal_Int32 nPos)
         sal_Int32 nOldTokPos = 1;
         sal_Int32 nPrevFuncPos = 1;
         short nBracketCount = 0;
+        const sal_Int32 nOpPush = m_aSpecialOpCodes[sheet::FormulaMapGroupSpecialOffset::PUSH].Token.OpCode;
+        const sal_Int32 nOpSpaces = m_aSpecialOpCodes[sheet::FormulaMapGroupSpecialOffset::SPACES].Token.OpCode;
+        const sal_Int32 nOpWhitespace = m_aSpecialOpCodes[sheet::FormulaMapGroupSpecialOffset::WHITESPACE].Token.OpCode;
         while ( pIter != pEnd )
         {
             const sal_Int32 eOp = pIter->OpCode;
@@ -401,8 +404,7 @@ sal_Int32 FormulaDlg_Impl::GetFunctionPos(sal_Int32 nPos)
                 m_xBtnMatrix->set_active(true);
             }
 
-            if (eOp == m_aSpecialOpCodes[sheet::FormulaMapGroupSpecialOffset::PUSH].Token.OpCode ||
-                eOp == m_aSpecialOpCodes[sheet::FormulaMapGroupSpecialOffset::SPACES].Token.OpCode)
+            if (eOp == nOpPush || eOp == nOpSpaces || eOp == nOpWhitespace)
             {
                 const sal_Int32 n1 = nTokPos < 0 ? -1 : aFormString.indexOf( sep, nTokPos);
                 const sal_Int32 n2 = nTokPos < 0 ? -1 : aFormString.indexOf( ')', nTokPos);
@@ -444,7 +446,7 @@ sal_Int32 FormulaDlg_Impl::GetFunctionPos(sal_Int32 nPos)
                     m_pFunctionOpCodesEnd,
                     [&eOp](const sheet::FormulaOpCodeMapEntry& aEntry) { return aEntry.Token.OpCode == eOp; });
 
-            if ( bIsFunction && m_aSpecialOpCodes[sheet::FormulaMapGroupSpecialOffset::SPACES].Token.OpCode != eOp )
+            if ( bIsFunction && nOpSpaces != eOp && nOpWhitespace != eOp )
             {
                 nPrevFuncPos = nFuncPos;
                 nFuncPos = nOldTokPos;
diff --git a/include/formula/compiler.hxx b/include/formula/compiler.hxx
index baf3e23f6161..fcf7326d3e0f 100644
--- a/include/formula/compiler.hxx
+++ b/include/formula/compiler.hxx
@@ -40,26 +40,27 @@
 #define SC_OPCODE_BAD                14
 #define SC_OPCODE_STRINGXML          15
 #define SC_OPCODE_SPACES             16
-#define SC_OPCODE_MAT_REF            17
-#define SC_OPCODE_DB_AREA            18     /* additional access operators */
-#define SC_OPCODE_TABLE_REF          19
-#define SC_OPCODE_MACRO              20
-#define SC_OPCODE_COL_ROW_NAME       21
-#define SC_OPCODE_COL_ROW_NAME_AUTO  22
-#define SC_OPCODE_PERCENT_SIGN       23     /* operator _follows_ value */
-#define SC_OPCODE_ARRAY_OPEN         24
-#define SC_OPCODE_ARRAY_CLOSE        25
-#define SC_OPCODE_ARRAY_ROW_SEP      26
-#define SC_OPCODE_ARRAY_COL_SEP      27     /* some convs use sep != col_sep */
-#define SC_OPCODE_TABLE_REF_OPEN     28
-#define SC_OPCODE_TABLE_REF_CLOSE    29
-#define SC_OPCODE_TABLE_REF_ITEM_ALL      30
-#define SC_OPCODE_TABLE_REF_ITEM_HEADERS  31
-#define SC_OPCODE_TABLE_REF_ITEM_DATA     32
-#define SC_OPCODE_TABLE_REF_ITEM_TOTALS   33
-#define SC_OPCODE_TABLE_REF_ITEM_THIS_ROW 34
-#define SC_OPCODE_STOP_DIV           35
-#define SC_OPCODE_SKIP               36     /* used to skip raw tokens during string compilation */
+#define SC_OPCODE_WHITESPACE         17
+#define SC_OPCODE_MAT_REF            18
+#define SC_OPCODE_DB_AREA            19     /* additional access operators */
+#define SC_OPCODE_TABLE_REF          20
+#define SC_OPCODE_MACRO              21
+#define SC_OPCODE_COL_ROW_NAME       22
+#define SC_OPCODE_COL_ROW_NAME_AUTO  23
+#define SC_OPCODE_PERCENT_SIGN       24     /* operator _follows_ value */
+#define SC_OPCODE_ARRAY_OPEN         25
+#define SC_OPCODE_ARRAY_CLOSE        26
+#define SC_OPCODE_ARRAY_ROW_SEP      27
+#define SC_OPCODE_ARRAY_COL_SEP      28     /* some convs use sep != col_sep */
+#define SC_OPCODE_TABLE_REF_OPEN     29
+#define SC_OPCODE_TABLE_REF_CLOSE    30
+#define SC_OPCODE_TABLE_REF_ITEM_ALL      31
+#define SC_OPCODE_TABLE_REF_ITEM_HEADERS  32
+#define SC_OPCODE_TABLE_REF_ITEM_DATA     33
+#define SC_OPCODE_TABLE_REF_ITEM_TOTALS   34
+#define SC_OPCODE_TABLE_REF_ITEM_THIS_ROW 35
+#define SC_OPCODE_STOP_DIV           36
+#define SC_OPCODE_SKIP               37     /* used to skip raw tokens during string compilation */
 
 /*** error constants #... ***/
 #define SC_OPCODE_START_ERRORS       40
diff --git a/include/formula/opcode.hxx b/include/formula/opcode.hxx
index 3123e8f3fa38..d92ae0b1d41d 100644
--- a/include/formula/opcode.hxx
+++ b/include/formula/opcode.hxx
@@ -53,6 +53,7 @@ enum OpCode : sal_uInt16
         ocBad               = SC_OPCODE_BAD,
         ocStringXML         = SC_OPCODE_STRINGXML,
         ocSpaces            = SC_OPCODE_SPACES,
+        ocWhitespace        = SC_OPCODE_WHITESPACE,
         ocMatRef            = SC_OPCODE_MAT_REF,
         ocTableRefItemAll     = SC_OPCODE_TABLE_REF_ITEM_ALL,
         ocTableRefItemHeaders = SC_OPCODE_TABLE_REF_ITEM_HEADERS,
@@ -545,6 +546,7 @@ inline std::string OpCodeEnumToString(OpCode eCode)
     case ocBad: return "Bad";
     case ocStringXML: return "StringXML";
     case ocSpaces: return "Spaces";
+    case ocWhitespace: return "Whitespace";
     case ocMatRef: return "MatRef";
     case ocTableRefItemAll: return "TableRefItemAll";
     case ocTableRefItemHeaders: return "TableRefItemHeaders";
diff --git a/include/formula/token.hxx b/include/formula/token.hxx
index 3fa00e89339f..77bf3eeb90ea 100644
--- a/include/formula/token.hxx
+++ b/include/formula/token.hxx
@@ -187,6 +187,7 @@ public:
     virtual void                SetIndex( sal_uInt16 n );
     virtual sal_Int16           GetSheet() const;
     virtual void                SetSheet( sal_Int16 n );
+    virtual sal_Unicode         GetChar() const;
     virtual short*              GetJump() const;
     virtual const OUString&     GetExternal() const;
     virtual FormulaToken*       GetFAPOrigToken() const;
@@ -225,6 +226,25 @@ inline void intrusive_ptr_release(const FormulaToken* p)
     p->DecRef();
 }
 
+class FORMULA_DLLPUBLIC FormulaSpaceToken : public FormulaToken
+{
+private:
+            sal_uInt8           nByte;
+            sal_Unicode         cChar;
+public:
+                                FormulaSpaceToken( sal_uInt8 n, sal_Unicode c ) :
+                                    FormulaToken( svByte, ocWhitespace ),
+                                    nByte( n ), cChar( c ) {}
+                                FormulaSpaceToken( const FormulaSpaceToken& r ) :
+                                    FormulaToken( r ),
+                                    nByte( r.nByte ), cChar( r.cChar ) {}
+
+    virtual FormulaToken*       Clone() const override { return new FormulaSpaceToken(*this); }
+    virtual sal_uInt8           GetByte() const override;
+    virtual sal_Unicode         GetChar() const override;
+    virtual bool                operator==( const FormulaToken& rToken ) const override;
+};
+
 class FORMULA_DLLPUBLIC FormulaByteToken : public FormulaToken
 {
 private:
diff --git a/offapi/com/sun/star/sheet/FormulaMapGroupSpecialOffset.idl b/offapi/com/sun/star/sheet/FormulaMapGroupSpecialOffset.idl
index 89c21dca4328..4cb2699e9af6 100644
--- a/offapi/com/sun/star/sheet/FormulaMapGroupSpecialOffset.idl
+++ b/offapi/com/sun/star/sheet/FormulaMapGroupSpecialOffset.idl
@@ -140,10 +140,6 @@ constants FormulaMapGroupSpecialOffset
         <p>The FormulaToken::Data member shall contain a
         positive integer value of type `long` specifying the number
         of space characters.</p>
-
-        <p>Attention: This may change in next versions to support other
-        characters than simple space characters (e.g. line feeds, horizontal
-        tabulators, non-breakable spaces).</p>
      */
     const long SPACES = 8;
 
@@ -176,6 +172,26 @@ constants FormulaMapGroupSpecialOffset
     const long COL_ROW_NAME       = 12;
 
 
+    /** Formula tokens containing the op-code obtained from this offset
+        describe whitespace characters within the string representation of a
+        formula.
+
+        <p>Whitespace characters in formulas are used for readability and do
+        not affect the result of the formula.</p>
+
+        <p>The FormulaToken::Data member shall contain a
+        `string` of one (repeated) whitespace character. The length of
+        the string determines the number of repetitions.</p>
+
+        <p>Allowed whitespace characters are SPACE (U+0020), CHARACTER
+        TABULATION (U+0009), LINE FEED (U+000A), and CARRIAGE RETURN
+        (U+000D). See also ODF v1.3 OpenFormula 5.14 Whitespace.</p>
+
+        @since LibreOffice 7.3
+     */
+    const long WHITESPACE = 13;
+
+
 };
 
 
diff --git a/sc/inc/compiler.hxx b/sc/inc/compiler.hxx
index d8935c7f7545..17e258dc3805 100644
--- a/sc/inc/compiler.hxx
+++ b/sc/inc/compiler.hxx
@@ -107,6 +107,10 @@ struct ScRawToken final
 public:
     union {
         double       nValue;
+        struct {
+            sal_uInt8           nCount;
+            sal_Unicode         cChar;
+        } whitespace;
         struct {
             sal_uInt8           cByte;
             formula::ParamClass eInForceArray;
@@ -326,7 +330,21 @@ private:
     bool ToUpperAsciiOrI18nIsAscii( OUString& rUpper, const OUString& rOrg ) const;
 
     virtual void SetError(FormulaError nError) override;
-    sal_Int32 NextSymbol(bool bInArray);
+
+    struct Whitespace final
+    {
+        sal_Int32   nCount;
+        sal_Unicode cChar;
+
+        Whitespace() : nCount(0), cChar(0x20) {}
+        void reset( sal_Unicode c ) { nCount = 0; cChar = c; }
+    };
+
+    static void addWhitespace( std::vector<ScCompiler::Whitespace> & rvSpaces,
+            ScCompiler::Whitespace & rSpace, sal_Unicode c, sal_Int32 n = 1 );
+
+    std::vector<Whitespace> NextSymbol(bool bInArray);
+
     bool IsValue( const OUString& );
     bool IsOpCode( const OUString&, bool bInArray );
     bool IsOpCode2( const OUString& );
diff --git a/sc/source/core/tool/compiler.cxx b/sc/source/core/tool/compiler.cxx
index 0d1dc9d52aa6..83eb2f4ab7db 100644
--- a/sc/source/core/tool/compiler.cxx
+++ b/sc/source/core/tool/compiler.cxx
@@ -336,11 +336,8 @@ ScCompiler::Convention::Convention( FormulaGrammar::AddressConvention eConv )
     for (i = 0; i < 128; i++)
         t[i] = ScCharFlags::Illegal;
 
-// tdf#56036: Allow tabs/newlines in imported formulas (for now simply treat them as (and convert to) space)
-// TODO: tdf#76310: allow saving newlines as is (as per OpenFormula specification v.1.2, clause 5.14 "Whitespace")
-// This is compliant with the OASIS decision (see https://issues.oasis-open.org/browse/OFFICE-701)
-// Also, this would enable correct roundtrip from/to OOXML without losing tabs/newlines
-// This requires saving actual space characters in ocSpaces token, using them in UI and saving
+// Allow tabs/newlines.
+// Allow saving whitespace as is (as per OpenFormula specification v.1.2, clause 5.14 "Whitespace").
 /* tab */   t[ 9] = ScCharFlags::CharDontCare | ScCharFlags::WordSep | ScCharFlags::ValueSep;
 /* lf  */   t[10] = ScCharFlags::CharDontCare | ScCharFlags::WordSep | ScCharFlags::ValueSep;
 /* cr  */   t[13] = ScCharFlags::CharDontCare | ScCharFlags::WordSep | ScCharFlags::ValueSep;
@@ -2067,6 +2064,19 @@ static bool lcl_isUnicodeIgnoreAscii( const sal_Unicode* p1, const char* p2, siz
     return true;
 }
 
+// static
+void ScCompiler::addWhitespace( std::vector<ScCompiler::Whitespace> & rvSpaces,
+        ScCompiler::Whitespace & rSpace, sal_Unicode c, sal_Int32 n )
+{
+    if (rSpace.cChar != c)
+    {
+        if (rSpace.cChar && rSpace.nCount > 0)
+            rvSpaces.emplace_back(rSpace);
+        rSpace.reset(c);
+    }
+    rSpace.nCount += n;
+}
+
 // NextSymbol
 
 // Parses the formula into separate symbols for further processing.
@@ -2104,8 +2114,9 @@ static bool lcl_isUnicodeIgnoreAscii( const sal_Unicode* p1, const char* p2, siz
 //               | other             | Symbol=Symbol+char    | GetString
 //---------------+-------------------+-----------------------+---------------
 
-sal_Int32 ScCompiler::NextSymbol(bool bInArray)
+std::vector<ScCompiler::Whitespace> ScCompiler::NextSymbol(bool bInArray)
 {
+    std::vector<Whitespace> vSpaces;
     cSymbol[MAXSTRLEN] = 0;       // end
     sal_Unicode* pSym = cSymbol;
     const sal_Unicode* const pStart = aFormula.getStr();
@@ -2116,7 +2127,7 @@ sal_Int32 ScCompiler::NextSymbol(bool bInArray)
     bool bQuote = false;
     mnRangeOpPosInSymbol = -1;
     ScanState eState = ssGetChar;
-    sal_Int32 nSpaces = 0;
+    Whitespace aSpace;
     sal_Unicode cSep = mxSymbols->getSymbolChar( ocSep);
     sal_Unicode cArrayColSep = mxSymbols->getSymbolChar( ocArrayColSep);
     sal_Unicode cArrayRowSep = mxSymbols->getSymbolChar( ocArrayRowSep);
@@ -2129,6 +2140,7 @@ sal_Int32 ScCompiler::NextSymbol(bool bInArray)
 
     int nDecSeps = 0;
     bool bAutoIntersection = false;
+    size_t nAutoIntersectionSpacesPos = 0;
     int nRefInName = 0;
     bool bErrorConstantHadSlash = false;
     mnPredetectedReference = 0;
@@ -2187,7 +2199,12 @@ Label_MaskStateMachine:
                         if (!bAutoIntersection)
                         {
                             ++pSrc;
-                            nSpaces += 2;   // must match the character count
+                            // Add 2 because it must match the character count
+                            // for bi18n.
+                            addWhitespace( vSpaces, aSpace, 0x20, 2);
+                            // Position of Whitespace where it will be added to
+                            // vector.
+                            nAutoIntersectionSpacesPos = vSpaces.size();
                             bAutoIntersection = true;
                         }
                         else
@@ -2267,7 +2284,7 @@ Label_MaskStateMachine:
                 }
                 else if( nMask & ScCharFlags::CharDontCare )
                 {
-                    nSpaces++;
+                    addWhitespace( vSpaces, aSpace, c);
                 }
                 else if( nMask & ScCharFlags::CharIdent )
                 {   // try to get a simple ASCII identifier before calling
@@ -2731,10 +2748,15 @@ Label_MaskStateMachine:
         cLast = c;
         c = *pSrc;
     }
+
+    if (aSpace.nCount && aSpace.cChar)
+        vSpaces.emplace_back(aSpace);
+
     if ( bi18n )
     {
         const sal_Int32 nOldSrcPos = nSrcPos;
-        nSrcPos = nSrcPos + nSpaces;
+        for (const auto& r : vSpaces)
+            nSrcPos += r.nCount;
         // If group separator is not a possible operator and not one of any
         // separators then it may be parsed away in numbers. This is
         // specifically the case with NO-BREAK SPACE, which actually triggers
@@ -2835,9 +2857,9 @@ Label_MaskStateMachine:
     }
     if ( bAutoCorrect )
         aCorrectedSymbol = OUString(cSymbol, pSym - cSymbol);
-    if (bAutoIntersection && nSpaces > 1)
-        --nSpaces;  // replace '!!' with only one space
-    return nSpaces;
+    if (bAutoIntersection && vSpaces[nAutoIntersectionSpacesPos].nCount > 1)
+        --vSpaces[nAutoIntersectionSpacesPos].nCount;   // replace '!!' with only one space
+    return vSpaces;
 }
 
 // Convert symbol to token
@@ -4246,7 +4268,7 @@ bool ScCompiler::NextNewToken( bool bInArray )
     }
 
     bool bAllowBooleans = bInArray;
-    sal_Int32 nSpaces = NextSymbol(bInArray);
+    const std::vector<Whitespace> & vSpaces = NextSymbol(bInArray);
 
     if (!cSymbol[0])
     {
@@ -4266,15 +4288,31 @@ bool ScCompiler::NextNewToken( bool bInArray )
         return false;
     }
 
-    if( nSpaces )
+    if (!vSpaces.empty())
     {
         ScRawToken aToken;
-        aToken.SetOpCode( ocSpaces );
-        aToken.sbyte.cByte = static_cast<sal_uInt8>( std::min<sal_Int32>(nSpaces, 255) );
-        if( !static_cast<ScTokenArray*>(pArr)->AddRawToken( aToken ) )
+        for (const auto& rSpace : vSpaces)
         {
-            SetError(FormulaError::CodeOverflow);
-            return false;
+            if (rSpace.cChar == 0x20)
+            {
+                // For now keep this a FormulaByteToken for the nasty
+                // significant whitespace intersection. This probably can be
+                // changed to a FormulaSpaceToken but then other places may
+                // need to be adapted.
+                aToken.SetOpCode( ocSpaces );
+                aToken.sbyte.cByte = static_cast<sal_uInt8>( std::min<sal_Int32>(rSpace.nCount, 255) );
+            }
+            else
+            {
+                aToken.SetOpCode( ocWhitespace );
+                aToken.whitespace.nCount = static_cast<sal_uInt8>( std::min<sal_Int32>(rSpace.nCount, 255) );
+                aToken.whitespace.cChar = rSpace.cChar;
+            }
+            if (!static_cast<ScTokenArray*>(pArr)->AddRawToken( aToken ))
+            {
+                SetError(FormulaError::CodeOverflow);
+                return false;
+            }
         }
     }
 
diff --git a/sc/source/core/tool/parclass.cxx b/sc/source/core/tool/parclass.cxx
index 8dd39016cb48..6c560b07b42f 100644
--- a/sc/source/core/tool/parclass.cxx
+++ b/sc/source/core/tool/parclass.cxx
@@ -74,6 +74,7 @@ const ScParameterClassification::RawData ScParameterClassification::pRawData[] =
     { ocSep,             {{ Bounds                                               }, 0, Bounds }},
     { ocSkip,            {{ Bounds                                               }, 0, Bounds }},
     { ocSpaces,          {{ Bounds                                               }, 0, Bounds }},
+    { ocWhitespace,      {{ Bounds                                               }, 0, Bounds }},
     { ocStop,            {{ Bounds                                               }, 0, Bounds }},
     { ocStringXML,       {{ Bounds                                               }, 0, Bounds }},
     { ocTableRef,        {{ Bounds                                               }, 0, Value }},    // or Reference?
diff --git a/sc/source/core/tool/token.cxx b/sc/source/core/tool/token.cxx
index 04355b86a8ec..f17cd9ac27ad 100644
--- a/sc/source/core/tool/token.cxx
+++ b/sc/source/core/tool/token.cxx
@@ -235,6 +235,11 @@ void ScRawToken::SetOpCode( OpCode e )
         case ocTableRefClose:
             eType = svSep;
             break;
+        case ocWhitespace:
+            eType = svByte;
+            whitespace.nCount = 1;
+            whitespace.cChar = 0x20;
+            break;
         default:
             eType = svByte;
             sbyte.cByte = 0;
@@ -349,7 +354,10 @@ FormulaToken* ScRawToken::CreateToken(ScSheetLimits& rLimits) const
     switch ( GetType() )
     {
         case svByte :
-            return new FormulaByteToken( eOp, sbyte.cByte, sbyte.eInForceArray );
+            if (eOp == ocWhitespace)
+                return new FormulaSpaceToken( whitespace.nCount, whitespace.cChar );
+            else
+                return new FormulaByteToken( eOp, sbyte.cByte, sbyte.eInForceArray );
         case svDouble :
             IF_NOT_OPCODE_ERROR( ocPush, FormulaDoubleToken);
             return new FormulaDoubleToken( nValue );
@@ -1652,6 +1660,7 @@ void ScTokenArray::CheckToken( const FormulaToken& r )
             case ocMissing:
             case ocBad:
             case ocSpaces:
+            case ocWhitespace:
             case ocSkip:
             case ocPercentSign:
             case ocErrNull:
@@ -2089,6 +2098,7 @@ FormulaToken* ScTokenArray::MergeArray( )
             break;
 
             case ocSpaces :
+            case ocWhitespace :
                 // ignore spaces
                 --nPrevRowSep;      // shorten this row by 1
             break;
@@ -5136,12 +5146,18 @@ OUString ScTokenArray::CreateString( sc::TokenStringContext& rCxt, const ScAddre
     {
         const FormulaToken* pToken = *p;
         OpCode eOp = pToken->GetOpCode();
+        /* FIXME: why does this ignore the count of spaces? */
         if (eOp == ocSpaces)
         {
             // TODO : Handle intersection operator '!!'.
             aBuf.append(' ');
             continue;
         }
+        else if (eOp == ocWhitespace)
+        {
+            aBuf.append( pToken->GetChar());
+            continue;
+        }
 
         if (eOp < rCxt.mxOpCodeMap->getSymbolCount())
             aBuf.append(rCxt.mxOpCodeMap->getSymbol(eOp));
diff --git a/sc/source/filter/excel/xeformula.cxx b/sc/source/filter/excel/xeformula.cxx
index f2edeffb263c..f829529ca0db 100644
--- a/sc/source/filter/excel/xeformula.cxx
+++ b/sc/source/filter/excel/xeformula.cxx
@@ -826,9 +826,13 @@ const FormulaToken* XclExpFmlaCompImpl::PeekNextRawToken() const
 bool XclExpFmlaCompImpl::GetNextToken( XclExpScToken& rTokData )
 {
     rTokData.mpScToken = GetNextRawToken();
-    rTokData.mnSpaces = (rTokData.GetOpCode() == ocSpaces) ? rTokData.mpScToken->GetByte() : 0;
-    while( rTokData.GetOpCode() == ocSpaces )
+    rTokData.mnSpaces = 0;
+    /* TODO: handle ocWhitespace characters? */
+    while (rTokData.GetOpCode() == ocSpaces || rTokData.GetOpCode() == ocWhitespace)
+    {
+        rTokData.mnSpaces += rTokData.mpScToken->GetByte();
         rTokData.mpScToken = GetNextRawToken();
+    }
     return rTokData.Is();
 }
 
diff --git a/sc/source/filter/excel/xlformula.cxx b/sc/source/filter/excel/xlformula.cxx
index 1f974f47b38b..e2e082ac2651 100644
--- a/sc/source/filter/excel/xlformula.cxx
+++ b/sc/source/filter/excel/xlformula.cxx
@@ -867,8 +867,11 @@ void XclTokenArrayIterator::NextRawToken()
 void XclTokenArrayIterator::SkipSpaces()
 {
     if( mbSkipSpaces )
-        while( Is() && ((*this)->GetOpCode() == ocSpaces) )
+    {
+        OpCode eOp;
+        while( Is() && (((eOp = (*this)->GetOpCode()) == ocSpaces) || eOp == ocWhitespace) )
             NextRawToken();
+    }
 }
 
 // strings and string lists ---------------------------------------------------
diff --git a/sc/source/ui/app/inputhdl.cxx b/sc/source/ui/app/inputhdl.cxx
index f01f93d46d70..b3e644fc8620 100644
--- a/sc/source/ui/app/inputhdl.cxx
+++ b/sc/source/ui/app/inputhdl.cxx
@@ -654,7 +654,7 @@ void ScInputHandler::DeleteRangeFinder()
 
 static OUString GetEditText(const EditEngine* pEng)
 {
-    return ScEditUtil::GetSpaceDelimitedString(*pEng);
+    return ScEditUtil::GetMultilineString(*pEng);
 }
 
 static void lcl_RemoveTabs(OUString& rStr)
diff --git a/sc/source/ui/unoobj/tokenuno.cxx b/sc/source/ui/unoobj/tokenuno.cxx
index 33f005fff8d6..b07a04e12b04 100644
--- a/sc/source/ui/unoobj/tokenuno.cxx
+++ b/sc/source/ui/unoobj/tokenuno.cxx
@@ -32,6 +32,7 @@
 
 #include <svl/itemprop.hxx>
 #include <vcl/svapp.hxx>
+#include <comphelper/string.hxx>
 
 #include <miscuno.hxx>
 #include <convuno.hxx>
@@ -388,6 +389,18 @@ void ScTokenConversion::ConvertToTokenSequence( const ScDocument& rDoc,
                     // Only the count of spaces is stored as "long". Parameter count is ignored.
                     if ( eOpCode == ocSpaces )
                         rAPI.Data <<= static_cast<sal_Int32>(rToken.GetByte());
+                    else if (eOpCode == ocWhitespace)
+                    {
+                        // Convention is one character repeated.
+                        if (rToken.GetByte() == 1)
+                            rAPI.Data <<= OUString( rToken.GetChar());
+                        else
+                        {
+                            OUStringBuffer aBuf( rToken.GetByte());
+                            comphelper::string::padToLength( aBuf, rToken.GetByte(), rToken.GetChar());
+                            rAPI.Data <<= aBuf.makeStringAndClear();
+                        }
+                    }
                     else
                         rAPI.Data.clear();      // no data
                     break;
diff --git a/sc/source/ui/view/viewfunc.cxx b/sc/source/ui/view/viewfunc.cxx
index fbe8a0b3719c..4e5e149c9755 100644
--- a/sc/source/ui/view/viewfunc.cxx
+++ b/sc/source/ui/view/viewfunc.cxx
@@ -677,7 +677,7 @@ void ScViewFunc::EnterData( SCCOL nCol, SCROW nRow, SCTAB nTab,
         }
 
         // #i97726# always get text for "repeat" of undo action
-        aString = ScEditUtil::GetSpaceDelimitedString(aEngine);
+        aString = ScEditUtil::GetMultilineString(aEngine);
 
         //      undo
 


More information about the Libreoffice-commits mailing list