[Libreoffice-commits] core.git: sdext/source

Vort vvort at yandex.ru
Tue Feb 4 07:13:37 PST 2014


 sdext/source/pdfimport/tree/pdfiprocessor.cxx |  245 ++++----------------------
 sdext/source/pdfimport/tree/pdfiprocessor.hxx |   29 ---
 2 files changed, 42 insertions(+), 232 deletions(-)

New commits:
commit 9db3b5585c5fa7fff633672fd32510c4066d035a
Author: Vort <vvort at yandex.ru>
Date:   Tue Jan 21 09:27:46 2014 +0200

    fdo#35143 PDF import: Reimplementation of whitespace detection function
    
    Change-Id: I5b230aaebf72b70bbb7e206414a5ac0e01f01f86
    Reviewed-on: https://gerrit.libreoffice.org/7564
    Reviewed-by: Caolán McNamara <caolanm at redhat.com>
    Tested-by: Caolán McNamara <caolanm at redhat.com>

diff --git a/sdext/source/pdfimport/tree/pdfiprocessor.cxx b/sdext/source/pdfimport/tree/pdfiprocessor.cxx
index 83be259..72cb753 100644
--- a/sdext/source/pdfimport/tree/pdfiprocessor.cxx
+++ b/sdext/source/pdfimport/tree/pdfiprocessor.cxx
@@ -75,7 +75,6 @@ namespace pdfi
     m_eTextDirection( LrTb ),
     m_nPages(0),
     m_nNextZOrder( 1 ),
-    m_bIsWhiteSpaceInLine( false ),
     m_xStatusIndicator( xStat ),
     m_bHaveTextOnDocLevel(false)
 {
@@ -210,221 +209,66 @@ sal_Int32 PDFIProcessor::getFontId( const FontAttributes& rAttr ) const
 // line diagnose block - start
 void PDFIProcessor::processGlyphLine()
 {
-    if( m_GlyphsList.empty() )
+    if (m_GlyphsList.empty())
         return;
 
-    double fPreAvarageSpaceValue= 0.0;
-    double fAvarageDiffCharSpaceValue= 0.0;
-    double fMinPreSpaceValue= 0.0;
-    double fMaxPreSpaceValue= 0.0;
-    double fNullSpaceBreakerAvaregeSpaceValue = 0.0;
+    double spaceDetectBoundary = 0.0;
 
-    unsigned int    nSpaceCount( 0 );
-    unsigned int    nDiffSpaceCount( 0 );
-    unsigned int    nNullSpaceBreakerCount=0;
-    bool preSpaceNull(true);
-
-    for ( unsigned int i=0; i<m_GlyphsList.size()-1; i++ ) // i=1 because the first glyph doesn't have a prevGlyphSpace value
+    // Try to find space glyph and it's width
+    for (size_t i = 0; i < m_GlyphsList.size(); i++)
     {
-        if( m_GlyphsList[i].getPrevGlyphsSpace()>0.0 )
-        {
-           if( fMinPreSpaceValue>m_GlyphsList[i].getPrevGlyphsSpace() )
-               fMinPreSpaceValue=m_GlyphsList[i].getPrevGlyphsSpace();
+        OUString& glyph = m_GlyphsList[i].getGlyph();
 
-           if( fMaxPreSpaceValue<m_GlyphsList[i].getPrevGlyphsSpace() )
-               fMaxPreSpaceValue=m_GlyphsList[i].getPrevGlyphsSpace();
+        sal_Unicode ch = '\0';
+        if (!glyph.isEmpty())
+            ch = glyph[0];
 
-           fPreAvarageSpaceValue+= m_GlyphsList[i].getPrevGlyphsSpace();
-           nSpaceCount++;
-        }
-    }
-
-    if( nSpaceCount!=0 )
-     fPreAvarageSpaceValue= fPreAvarageSpaceValue/( nSpaceCount );
-
-    for ( unsigned int i=0; i<m_GlyphsList.size()-1; i++ ) // i=1 because the first glyph doesn't have a prevGlyphSpace value
-    {
-       if ( m_GlyphsList[i].getPrevGlyphsSpace()==0.0 )
-       {
-            if (
-                 ( m_GlyphsList[i+1].getPrevGlyphsSpace()>0.0)&&
-                 ( fPreAvarageSpaceValue>m_GlyphsList[i+1].getPrevGlyphsSpace())
-               )
-            {
-              fNullSpaceBreakerAvaregeSpaceValue+=m_GlyphsList[i+1].getPrevGlyphsSpace();
-              nNullSpaceBreakerCount++;
-            }
+        if ((ch == 0x20) || (ch == 0xa0))
+        {
+            double spaceWidth =
+                m_GlyphsList[i].getRect().X2 -
+                m_GlyphsList[i].getRect().X1;
+            spaceDetectBoundary = spaceWidth * 0.5;
+            break;
         }
     }
 
-    if( ( fNullSpaceBreakerAvaregeSpaceValue!= 0.0 )&&
-        ( fNullSpaceBreakerAvaregeSpaceValue < fPreAvarageSpaceValue )
-      )
+    // If space glyph is not found, use average glyph width instead
+    if (spaceDetectBoundary == 0.0)
     {
-        fPreAvarageSpaceValue = fNullSpaceBreakerAvaregeSpaceValue;
-    }
-
-    for ( unsigned int i=0; i<m_GlyphsList.size()-1; i++ ) // i=1 cose the first Glypth dont have prevGlyphSpace value
-    {
-        if  ( ( m_GlyphsList[i].getPrevGlyphsSpace()>0.0 )
-            )
+        double avgGlyphWidth = 0.0;
+        for (size_t i = 0; i < m_GlyphsList.size(); i++)
         {
-          if (
-              ( m_GlyphsList[i].getPrevGlyphsSpace()  <= fPreAvarageSpaceValue )&&
-              ( m_GlyphsList[i+1].getPrevGlyphsSpace()<= fPreAvarageSpaceValue )
-             )
-          {
-               double temp= m_GlyphsList[i].getPrevGlyphsSpace()-m_GlyphsList[i+1].getPrevGlyphsSpace();
-
-               if(temp!=0.0)
-               {
-                 if( temp< 0.0)
-                  temp= temp* -1.0;
-
-                 fAvarageDiffCharSpaceValue+=temp;
-                 nDiffSpaceCount++;
-               }
-          }
+            avgGlyphWidth +=
+                m_GlyphsList[i].getRect().X2 -
+                m_GlyphsList[i].getRect().X1;
         }
-
-    }
-
-    if (
-         ( nNullSpaceBreakerCount>0 )
-       )
-    {
-       fNullSpaceBreakerAvaregeSpaceValue=fNullSpaceBreakerAvaregeSpaceValue/nNullSpaceBreakerCount;
-    }
-
-    if (
-         ( nDiffSpaceCount>0 )&&(fAvarageDiffCharSpaceValue>0)
-       )
-    {
-        fAvarageDiffCharSpaceValue= fAvarageDiffCharSpaceValue/ nDiffSpaceCount;
+        avgGlyphWidth /= m_GlyphsList.size();
+        spaceDetectBoundary = avgGlyphWidth * 0.2;
     }
 
-    ParagraphElement* pPara= NULL ;
-    FrameElement* pFrame= NULL ;
+    FrameElement* frame = m_pElFactory->createFrameElement(m_GlyphsList[0].getCurElement(),
+        getGCId(getTransformGlyphContext(m_GlyphsList[0])));
+    frame->ZOrder = m_nNextZOrder++;
+    ParagraphElement* para = m_pElFactory->createParagraphElement(frame);
 
-    if(!m_GlyphsList.empty())
+    for (size_t i = 0; i < m_GlyphsList.size(); i++)
     {
-        pFrame = m_pElFactory->createFrameElement( m_GlyphsList[0].getCurElement(), getGCId( getTransformGlyphContext( m_GlyphsList[0])) );
-        pFrame->ZOrder = m_nNextZOrder++;
-        pPara = m_pElFactory->createParagraphElement( pFrame );
-
-        processGlyph( 0,
-                  m_GlyphsList[0],
-                  pPara,
-                  pFrame,
-                  m_bIsWhiteSpaceInLine );
+        double spaceSize = 0.0;
+        if (i != 0)
+            spaceSize = m_GlyphsList[i].getRect().X1 - m_GlyphsList[i - 1].getRect().X2;
+        bool prependSpace = spaceSize > spaceDetectBoundary;
+        drawCharGlyphs(m_GlyphsList[i].getGlyph(),
+                       m_GlyphsList[i].getRect(),
+                       m_GlyphsList[i].getGC(),
+                       para,
+                       frame,
+                       prependSpace);
     }
 
-
-    preSpaceNull=false;
-
-    for ( unsigned int i=1; i<m_GlyphsList.size()-1; i++ )
-    {
-        double fPrevDiffCharSpace= m_GlyphsList[i].getPrevGlyphsSpace()-m_GlyphsList[i-1].getPrevGlyphsSpace();
-        double fPostDiffCharSpace= m_GlyphsList[i].getPrevGlyphsSpace()-m_GlyphsList[i+1].getPrevGlyphsSpace();
-
-
-         if(
-             preSpaceNull && (m_GlyphsList[i].getPrevGlyphsSpace()!= 0.0)
-            )
-         {
-               preSpaceNull=false;
-              if( fNullSpaceBreakerAvaregeSpaceValue > m_GlyphsList[i].getPrevGlyphsSpace() )
-              {
-                processGlyph( 0,
-                                      m_GlyphsList[i],
-                              pPara,
-                              pFrame,
-                              m_bIsWhiteSpaceInLine );
-
-              }
-              else
-              {
-                processGlyph( 1,
-                              m_GlyphsList[i],
-                              pPara,
-                              pFrame,
-                              m_bIsWhiteSpaceInLine );
-
-              }
-
-         }
-         else
-         {
-            if (
-                ( ( m_GlyphsList[i].getPrevGlyphsSpace()<= fPreAvarageSpaceValue )&&
-                  ( fPrevDiffCharSpace<=fAvarageDiffCharSpaceValue )&&
-                  ( fPostDiffCharSpace<=fAvarageDiffCharSpaceValue )
-                ) ||
-                ( m_GlyphsList[i].getPrevGlyphsSpace() == 0.0 )
-            )
-            {
-                preSpaceNull=true;
-
-            processGlyph( 0,
-                        m_GlyphsList[i],
-                        pPara,
-                        pFrame,
-                        m_bIsWhiteSpaceInLine );
-
-            }
-            else
-            {
-                processGlyph( 1,
-                        m_GlyphsList[i],
-                        pPara,
-                        pFrame,
-                        m_bIsWhiteSpaceInLine );
-
-            }
-
-         }
-
-    }
-
-    if(m_GlyphsList.size()>1)
-     processGlyph( 0,
-                  m_GlyphsList[m_GlyphsList.size()-1],
-                  pPara,
-                  pFrame,
-                  m_bIsWhiteSpaceInLine );
-
     m_GlyphsList.clear();
 }
 
-void PDFIProcessor::processGlyph( double       fPreAvarageSpaceValue,
-                                  CharGlyph&   aGlyph,
-                                  ParagraphElement* pPara,
-                                  FrameElement* pFrame,
-                                  bool         bIsWhiteSpaceInLine
-                                      )
-{
-    if( !bIsWhiteSpaceInLine )
-    {
-        bool flag=( 0 < fPreAvarageSpaceValue );
-
-        drawCharGlyphs(  aGlyph.getGlyph(),
-                         aGlyph.getRect(),
-                         aGlyph.getGC(),
-                         pPara,
-                         pFrame,
-                         flag);
-    }
-    else
-    {
-        drawCharGlyphs( aGlyph.getGlyph(),
-                        aGlyph.getRect(),
-                        aGlyph.getGC(),
-                        pPara,
-                        pFrame,
-                        false );
-    }
-}
-
 void PDFIProcessor::drawGlyphLine( const OUString&             rGlyphs,
                                    const geometry::RealRectangle2D& rRect,
                                    const geometry::Matrix2D&        rFontMatrix )
@@ -440,9 +284,7 @@ void PDFIProcessor::drawGlyphLine( const OUString&             rGlyphs,
         processGlyphLine();
     }
 
-    CharGlyph aGlyph(fXPrevTextPosition, fYPrevTextPosition, fPrevTextHeight, fPrevTextWidth,
-               m_pCurElement, getCurrentContext(), rFontMatrix, rRect, rGlyphs);
-
+    CharGlyph aGlyph(m_pCurElement, getCurrentContext(), rFontMatrix, rRect, rGlyphs);
 
     getGCId(getCurrentContext());
 
@@ -452,13 +294,6 @@ void PDFIProcessor::drawGlyphLine( const OUString&             rGlyphs,
     fXPrevTextPosition  = rRect.X2;
     fPrevTextHeight     = rRect.Y2-rRect.Y1;
     fPrevTextWidth      = rRect.X2-rRect.X1;
-
-    if( !m_bIsWhiteSpaceInLine )
-    {
-        static OUString tempWhiteSpaceStr( 0x20 );
-        static OUString tempWhiteSpaceNonBreakingStr( 0xa0 );
-        m_bIsWhiteSpaceInLine=(rGlyphs.equals( tempWhiteSpaceStr ) || rGlyphs.equals( tempWhiteSpaceNonBreakingStr ));
-    }
 }
 
 GraphicsContext& PDFIProcessor::getTransformGlyphContext( CharGlyph& rGlyph )
diff --git a/sdext/source/pdfimport/tree/pdfiprocessor.hxx b/sdext/source/pdfimport/tree/pdfiprocessor.hxx
index 97c58aa..23861fa 100644
--- a/sdext/source/pdfimport/tree/pdfiprocessor.hxx
+++ b/sdext/source/pdfimport/tree/pdfiprocessor.hxx
@@ -103,11 +103,6 @@ namespace pdfi
 
     private:
         void processGlyphLine();
-        void processGlyph(   double       fPreAvarageSpaceValue,
-                             CharGlyph&   rGlyph,
-                             ParagraphElement* pPara,
-                             FrameElement* pFrame,
-                             bool         bIsWhiteSpaceInLine );
 
         void drawGlyphLine( const OUString&                               rGlyphs,
                             const ::com::sun::star::geometry::RealRectangle2D& rRect,
@@ -226,7 +221,6 @@ namespace pdfi
 
         sal_Int32                          m_nPages;
         sal_Int32                          m_nNextZOrder;
-        bool                               m_bIsWhiteSpaceInLine;
         com::sun::star::uno::Reference<
             com::sun::star::task::XStatusIndicator >
                                            m_xStatusIndicator;
@@ -236,11 +230,9 @@ namespace pdfi
     class CharGlyph
     {
         public:
-            CharGlyph(double fXPrevGlyphPosition, double fYPrevGlyphPosition, double fPrevGlyphHeight, double fPrevGlyphWidth,
-               Element* pCurElement, const GraphicsContext& rCurrentContext, const com::sun::star::geometry::Matrix2D& rFontMatrix,
+            CharGlyph(Element* pCurElement, const GraphicsContext& rCurrentContext, const com::sun::star::geometry::Matrix2D& rFontMatrix,
                const com::sun::star::geometry::RealRectangle2D& rRect, const OUString& rGlyphs  )
-               : m_fXPrevGlyphPosition(fXPrevGlyphPosition), m_fYPrevGlyphPosition(fYPrevGlyphPosition), m_fPrevGlyphHeight(fPrevGlyphHeight),
-                 m_fPrevGlyphWidth(fPrevGlyphWidth), m_pCurElement(pCurElement), m_rCurrentContext(rCurrentContext),
+               : m_pCurElement(pCurElement), m_rCurrentContext(rCurrentContext),
                  m_rFontMatrix(rFontMatrix), m_rRect(rRect), m_rGlyphs(rGlyphs) {};
 
             virtual ~CharGlyph(){};
@@ -250,24 +242,7 @@ namespace pdfi
             GraphicsContext&  getGC(){ return m_rCurrentContext; }
             Element*  getCurElement(){ return m_pCurElement; }
 
-            double getYPrevGlyphPosition() const { return m_fYPrevGlyphPosition; }
-            double getXPrevGlyphPosition() const { return m_fXPrevGlyphPosition; }
-            double getPrevGlyphHeight() const { return m_fPrevGlyphHeight; }
-            double getPrevGlyphWidth () const { return m_fPrevGlyphWidth; }
-            double getPrevGlyphsSpace() const
-            {
-                if( (m_rRect.X1-m_fXPrevGlyphPosition)<0 )
-                    return 0;
-                else
-                    return m_rRect.X1-m_fXPrevGlyphPosition;
-            }
-
         private:
-
-            double                      m_fXPrevGlyphPosition ;
-            double                      m_fYPrevGlyphPosition ;
-            double                      m_fPrevGlyphHeight ;
-            double                      m_fPrevGlyphWidth ;
             Element*                    m_pCurElement ;
             GraphicsContext             m_rCurrentContext ;
             com::sun::star::geometry::Matrix2D          m_rFontMatrix ;


More information about the Libreoffice-commits mailing list