[Libreoffice-commits] core.git: cui/source lingucomponent/source linguistic/source

László Németh nemeth at numbertext.org
Thu Jan 30 05:58:01 PST 2014


 cui/source/options/optdict.cxx                        |   19 ++++
 lingucomponent/source/hyphenator/hyphen/hyphenimp.cxx |   28 ++++---
 linguistic/source/dicimp.cxx                          |   29 ++++++-
 linguistic/source/hyphdsp.cxx                         |   70 +++++++++++++-----
 linguistic/source/misc.cxx                            |    4 -
 5 files changed, 118 insertions(+), 32 deletions(-)

New commits:
commit e63923b0334ae381e0fcc576a6b6e08a62e657cf
Author: László Németh <nemeth at numbertext.org>
Date:   Thu Jan 30 14:56:30 2014 +0100

    fdo#44314 non-standard hyphenation at soft hyphens + with pers. dic.
    
    Change-Id: I25e7c13036c6ce1948cc33d45901ef69a258fb03

diff --git a/cui/source/options/optdict.cxx b/cui/source/options/optdict.cxx
index ba626c4..af00987 100644
--- a/cui/source/options/optdict.cxx
+++ b/cui/source/options/optdict.cxx
@@ -54,6 +54,23 @@ static long nStaticTabs[]=
 static OUString getNormDicEntry_Impl(const OUString &rText)
 {
     OUString aTmp(comphelper::string::stripEnd(rText, '.'));
+    // non-standard hyphenation
+    if (aTmp.indexOf('[') > -1)
+    {
+        OUStringBuffer aTmp2 ( aTmp.getLength() );
+        sal_Bool bSkip = sal_False;
+        for (sal_Int32 i = 0; i < aTmp.getLength(); i++)
+        {
+            sal_Unicode cTmp = aTmp[i];
+            if (cTmp == '[')
+                bSkip = sal_True;
+            else if (!bSkip)
+                aTmp2.append( cTmp );
+            else if (cTmp == ']')
+                bSkip = sal_False;
+        }
+        aTmp = aTmp2.makeStringAndClear();
+    }
     return comphelper::string::remove(aTmp, '=');
 }
 
@@ -68,7 +85,7 @@ static CDE_RESULT cmpDicEntry_Impl( const OUString &rText1, const OUString &rTex
         eRes = CDE_EQUAL;
     else
     {   // similar = equal up to trailing '.' and hyphenation positions
-        // marked with '='
+        // marked with '=' and '[' + alternative spelling pattern + ']'
         if (getNormDicEntry_Impl( rText1 ) == getNormDicEntry_Impl( rText2 ))
             eRes = CDE_SIMILAR;
     }
diff --git a/lingucomponent/source/hyphenator/hyphen/hyphenimp.cxx b/lingucomponent/source/hyphenator/hyphen/hyphenimp.cxx
index 64fe545..1cb7996 100644
--- a/lingucomponent/source/hyphenator/hyphen/hyphenimp.cxx
+++ b/lingucomponent/source/hyphenator/hyphen/hyphenimp.cxx
@@ -508,16 +508,22 @@ Reference< XHyphenatedWord > SAL_CALL Hyphenator::hyphenate( const OUString& aWo
 
 
 Reference < XHyphenatedWord > SAL_CALL Hyphenator::queryAlternativeSpelling(
-        const OUString& /*aWord*/,
-        const ::com::sun::star::lang::Locale& /*aLocale*/,
-        sal_Int16 /*nIndex*/,
-        const ::com::sun::star::beans::PropertyValues& /*aProperties*/ )
+        const OUString& aWord,
+        const ::com::sun::star::lang::Locale& aLocale,
+        sal_Int16 nIndex,
+        const ::com::sun::star::beans::PropertyValues& aProperties )
         throw(::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException)
 {
-  /* alternative spelling isn't supported by tex dictionaries */
-  /* XXX: OOo's extended libhjn algorithm can support alternative spellings with extended TeX dic. */
-  /* TASK: implement queryAlternativeSpelling() */
-  return NULL;
+    // FIXME: multiple character change, eg. briddzsel -> bridzs-dzsel is not supported,
+    // because Writer has got a layout problem here.
+    // Firstly we allow only one plus character before the hyphen to avoid to miss the right break point:
+    for (int extrachar = 1; extrachar < 2; extrachar++) // temporarily i < 2 instead of i <= 2
+    {
+        Reference< XHyphenatedWord > xRes = hyphenate(aWord, aLocale, nIndex + 1 + extrachar, aProperties);
+        if (xRes.is() && xRes->isAlternativeSpelling() && xRes->getHyphenationPos() == nIndex)
+            return xRes;
+    }
+    return NULL;
 }
 
 Reference< XPossibleHyphens > SAL_CALL Hyphenator::createPossibleHyphens( const OUString& aWord,
@@ -658,7 +664,7 @@ Reference< XPossibleHyphens > SAL_CALL Hyphenator::createPossibleHyphens( const
 
         for ( i = 0; i < encWord.getLength(); i++)
         {
-            if (hyphens[i]&1 && (!rep || !rep[i]))
+            if (hyphens[i]&1)
                 nHyphCount++;
         }
 
@@ -670,8 +676,8 @@ Reference< XPossibleHyphens > SAL_CALL Hyphenator::createPossibleHyphens( const
         for (i = 0; i < nWord.getLength(); i++)
         {
             hyphenatedWordBuffer.append(aWord[i]);
-            // hyphenation position (not alternative)
-            if (hyphens[i]&1 && (!rep || !rep[i]))
+            // hyphenation position
+            if (hyphens[i]&1)
             {
                 pPos[nHyphCount] = i;
                 hyphenatedWordBuffer.append('=');
diff --git a/linguistic/source/dicimp.cxx b/linguistic/source/dicimp.cxx
index f50c942..1306253 100644
--- a/linguistic/source/dicimp.cxx
+++ b/linguistic/source/dicimp.cxx
@@ -550,24 +550,37 @@ int DictionaryNeo::cmpDicEntry(const OUString& rWord1,
     }
 
     const sal_Unicode cIgnChar = '=';
+    const sal_Unicode cIgnBeg = '['; // for alternative hyphenation, eg. Schif[f]fahrt, Zuc[1k]ker
+    const sal_Unicode cIgnEnd = ']'; // planned: gee"[1-/e]rfde or ge[-/1e]e"rfde (gee"rfde -> ge=erfde)
     sal_Int32       nIdx1 = 0,
                   nIdx2 = 0,
                   nNumIgnChar1 = 0,
                   nNumIgnChar2 = 0;
 
+    bool IgnState;
     sal_Int32 nDiff = 0;
     sal_Unicode cChar1 = '\0';
     sal_Unicode cChar2 = '\0';
     do
     {
         // skip chars to be ignored
-        while (nIdx1 < nLen1  &&  (cChar1 = aWord1[ nIdx1 ]) == cIgnChar)
+        IgnState = false;
+        while (nIdx1 < nLen1  &&  ((cChar1 = aWord1[ nIdx1 ]) == cIgnChar || cChar1 == cIgnBeg || IgnState ))
         {
+            if ( cChar1 == cIgnBeg )
+                IgnState = true;
+            else if (cChar1 == cIgnEnd)
+                IgnState = false;
             nIdx1++;
             nNumIgnChar1++;
         }
-        while (nIdx2 < nLen2  &&  (cChar2 = aWord2[ nIdx2 ]) == cIgnChar)
+        IgnState = false;
+        while (nIdx2 < nLen2  &&  ((cChar2 = aWord2[ nIdx2 ]) == cIgnChar || cChar2 == cIgnBeg || IgnState ))
         {
+            if ( cChar2 == cIgnBeg )
+                IgnState = true;
+            else if (cChar2 == cIgnEnd)
+                IgnState = false;
             nIdx2++;
             nNumIgnChar2++;
         }
@@ -590,15 +603,25 @@ int DictionaryNeo::cmpDicEntry(const OUString& rWord1,
         // shorter one
 
         // count remaining IgnChars
+        IgnState = false;
         while (nIdx1 < nLen1 )
         {
-            if (aWord1[ nIdx1++ ] == cIgnChar)
+            if (aWord1[ nIdx1 ] == cIgnBeg)
+                IgnState = true;
+            if (IgnState || aWord1[ nIdx1++ ] == cIgnChar)
                 nNumIgnChar1++;
+            if (aWord1[ nIdx1] == cIgnEnd)
+                IgnState = false;
         }
+        IgnState = false;
         while (nIdx2 < nLen2 )
         {
+            if (aWord1[ nIdx2 ] == cIgnBeg)
+                IgnState = true;
             if (aWord2[ nIdx2++ ] == cIgnChar)
                 nNumIgnChar2++;
+            if (aWord1[ nIdx1] == cIgnEnd)
+                IgnState = false;
         }
 
         nRes = ((sal_Int32) nLen1 - nNumIgnChar1) - ((sal_Int32) nLen2 - nNumIgnChar2);
diff --git a/linguistic/source/hyphdsp.cxx b/linguistic/source/hyphdsp.cxx
index 815312b..ecc9ec2 100644
--- a/linguistic/source/hyphdsp.cxx
+++ b/linguistic/source/hyphdsp.cxx
@@ -79,18 +79,23 @@ Reference<XHyphenatedWord>  HyphenatorDispatcher::buildHyphWord(
         sal_Int32 nTextLen = aText.getLength();
 
         // trailing '=' means "hyphenation should not be possible"
-        if (nTextLen > 0  &&  aText[ nTextLen - 1 ] != '=')
+        if (nTextLen > 0  &&  aText[ nTextLen - 1 ] != '=' && aText[ nTextLen - 1 ] != '[')
         {
             sal_Int16 nHyphenationPos = -1;
+            sal_Int32 nHyphenPos = -1;
+            sal_Int16 nOrigHyphPos = -1;
 
             OUStringBuffer aTmp( nTextLen );
             sal_Bool  bSkip = sal_False;
+            sal_Bool  bSkip2 = sal_False;
             sal_Int32 nHyphIdx = -1;
             sal_Int32 nLeading = 0;
             for (sal_Int32 i = 0;  i < nTextLen;  i++)
             {
                 sal_Unicode cTmp = aText[i];
-                if (cTmp != '=')
+                if (cTmp == '[' || cTmp == ']')
+                    bSkip2 = !bSkip2;
+                if (cTmp != '=' && !bSkip2 && cTmp != ']')
                 {
                     aTmp.append( cTmp );
                     nLeading++;
@@ -101,8 +106,10 @@ Reference<XHyphenatedWord>  HyphenatorDispatcher::buildHyphWord(
                 {
                     if (!bSkip  &&  nHyphIdx >= 0)
                     {
-                        if (nLeading <= nMaxLeading)
+                        if (nLeading <= nMaxLeading) {
                             nHyphenationPos = (sal_Int16) nHyphIdx;
+                            nOrigHyphPos = i;
+                        }
                     }
                     bSkip = sal_True;   //! multiple '=' should count as one only
                 }
@@ -110,24 +117,23 @@ Reference<XHyphenatedWord>  HyphenatorDispatcher::buildHyphWord(
 
             if (nHyphenationPos > 0)
             {
-                aText = aTmp.makeStringAndClear();
 
 #if OSL_DEBUG_LEVEL > 1
                 {
-                    if (aText != rOrigWord)
+                    if (aTmp.toString() != rOrigWord)
                     {
                         // both words should only differ by a having a trailing '.'
                         // character or not...
                         OUString aShorter, aLonger;
-                        if (aText.getLength() <= rOrigWord.getLength())
+                        if (aTmp.getLength() <= rOrigWord.getLength())
                         {
-                            aShorter = aText;
+                            aShorter = aTmp.toString();
                             aLonger  = rOrigWord;
                         }
                         else
                         {
                             aShorter = rOrigWord;
-                            aLonger  = aText;
+                            aLonger  = aTmp.toString();
                         }
                         sal_Int32 nS = aShorter.getLength();
                         sal_Int32 nL = aLonger.getLength();
@@ -139,12 +145,33 @@ Reference<XHyphenatedWord>  HyphenatorDispatcher::buildHyphWord(
                     }
                 }
 #endif
-                //! take care of #i22591#
-                aText = rOrigWord;
+                if (aText[ nOrigHyphPos ] == '[')  // alternative hyphenation
+                {
+                    sal_Int16 split = 0;
+                    sal_Unicode c = aText [ nOrigHyphPos + 1 ];
+                    sal_Int32 endhyphpat = aText.indexOf( ']', nOrigHyphPos );
+                    if ('0' <= c && c <= '9')
+                    {
+                        split = c - '0';
+                        nOrigHyphPos++;
+                    }
+                    if (endhyphpat > -1)
+                    {
+                        OUStringBuffer aTmp2 ( aTmp.copy(0, std::max (nHyphenationPos + 1 - split, 0) ) );
+                        aTmp2.append( aText.copy( nOrigHyphPos + 1, endhyphpat - nOrigHyphPos - 1) );
+                        nHyphenPos = aTmp2.getLength();
+                        aTmp2.append( aTmp.copy( nHyphenationPos + 1 ) );
+                        //! take care of #i22591#
+                        if (rOrigWord[ rOrigWord.getLength() - 1 ] == '.')
+                            aTmp2.append( '.' );
+                        aText = aTmp2.makeStringAndClear();
+                    }
+                }
+                if (nHyphenPos == -1)
+                    aText = rOrigWord;
 
-                DBG_ASSERT( aText == rOrigWord, "failed to " );
-                xRes = new HyphenatedWord( aText, nLang, nHyphenationPos,
-                                aText, nHyphenationPos );
+                xRes = new HyphenatedWord( rOrigWord, nLang, nHyphenationPos,
+                                aText, (nHyphenPos > -1) ? nHyphenPos - 1 : nHyphenationPos);
             }
         }
     }
@@ -167,7 +194,7 @@ Reference< XPossibleHyphens > HyphenatorDispatcher::buildPossHyphens(
         sal_Int32 nTextLen = aText.getLength();
 
         // trailing '=' means "hyphenation should not be possible"
-        if (nTextLen > 0  &&  aText[ nTextLen - 1 ] != '=')
+        if (nTextLen > 0  &&  aText[ nTextLen - 1 ] != '=' && aText[ nTextLen - 1 ] != '[')
         {
             // sequence to hold hyphenation positions
             Sequence< sal_Int16 > aHyphPos( nTextLen );
@@ -176,11 +203,14 @@ Reference< XPossibleHyphens > HyphenatorDispatcher::buildPossHyphens(
 
             OUStringBuffer aTmp( nTextLen );
             sal_Bool  bSkip = sal_False;
+            sal_Bool  bSkip2 = sal_False;
             sal_Int32 nHyphIdx = -1;
             for (sal_Int32 i = 0;  i < nTextLen;  i++)
             {
                 sal_Unicode cTmp = aText[i];
-                if (cTmp != '=')
+                if (cTmp == '[' || cTmp == ']')
+                    bSkip2 = !bSkip2;
+                if (cTmp != '=' && !bSkip2 && cTmp != ']')
                 {
                     aTmp.append( cTmp );
                     bSkip = sal_False;
@@ -426,7 +456,15 @@ Reference< XHyphenatedWord > SAL_CALL
 
         if (xEntry.is())
         {
-            //! alternative spellings not yet supported by dictionaries
+            // FIXME: multiple character change, eg. briddzsel -> bridzs-dzsel is not supported,
+            // because Writer has got a layout problem here.
+            // Firstly we allow only one plus character before the hyphen to avoid to miss the right break point:
+            for (int extrachar = 1; extrachar < 2; extrachar++) // temporarily i < 2 instead of i <= 2
+            {
+                xRes = buildHyphWord(aChkWord, xEntry, nLanguage, nIndex + 1 + extrachar);
+                if (xRes.is() && xRes->isAlternativeSpelling() && xRes->getHyphenationPos() == nIndex)
+                    return xRes;
+            }
         }
         else
         {
diff --git a/linguistic/source/misc.cxx b/linguistic/source/misc.cxx
index 6ed0510..737f4ae 100644
--- a/linguistic/source/misc.cxx
+++ b/linguistic/source/misc.cxx
@@ -262,9 +262,11 @@ static sal_Bool lcl_HasHyphInfo( const uno::Reference<XDictionaryEntry> &xEntry
     sal_Bool bRes = sal_False;
     if (xEntry.is())
     {
-        // there has to be (at least one) '=' denoting a hyphenation position
+        // there has to be (at least one) '=' or '[' denoting a hyphenation position
         // and it must not be before any character of the word
         sal_Int32 nIdx = xEntry->getDictionaryWord().indexOf( '=' );
+        if (nIdx == -1)
+            nIdx = xEntry->getDictionaryWord().indexOf( '[' );
         bRes = nIdx != -1  &&  nIdx != 0;
     }
     return bRes;


More information about the Libreoffice-commits mailing list