[Libreoffice-commits] core.git: sdext/source
Vort
vvort at yandex.ru
Tue Feb 4 07:13:37 PST 2014
sdext/source/pdfimport/tree/pdfiprocessor.cxx | 245 ++++----------------------
sdext/source/pdfimport/tree/pdfiprocessor.hxx | 29 ---
2 files changed, 42 insertions(+), 232 deletions(-)
New commits:
commit 9db3b5585c5fa7fff633672fd32510c4066d035a
Author: Vort <vvort at yandex.ru>
Date: Tue Jan 21 09:27:46 2014 +0200
fdo#35143 PDF import: Reimplementation of whitespace detection function
Change-Id: I5b230aaebf72b70bbb7e206414a5ac0e01f01f86
Reviewed-on: https://gerrit.libreoffice.org/7564
Reviewed-by: Caolán McNamara <caolanm at redhat.com>
Tested-by: Caolán McNamara <caolanm at redhat.com>
diff --git a/sdext/source/pdfimport/tree/pdfiprocessor.cxx b/sdext/source/pdfimport/tree/pdfiprocessor.cxx
index 83be259..72cb753 100644
--- a/sdext/source/pdfimport/tree/pdfiprocessor.cxx
+++ b/sdext/source/pdfimport/tree/pdfiprocessor.cxx
@@ -75,7 +75,6 @@ namespace pdfi
m_eTextDirection( LrTb ),
m_nPages(0),
m_nNextZOrder( 1 ),
- m_bIsWhiteSpaceInLine( false ),
m_xStatusIndicator( xStat ),
m_bHaveTextOnDocLevel(false)
{
@@ -210,221 +209,66 @@ sal_Int32 PDFIProcessor::getFontId( const FontAttributes& rAttr ) const
// line diagnose block - start
void PDFIProcessor::processGlyphLine()
{
- if( m_GlyphsList.empty() )
+ if (m_GlyphsList.empty())
return;
- double fPreAvarageSpaceValue= 0.0;
- double fAvarageDiffCharSpaceValue= 0.0;
- double fMinPreSpaceValue= 0.0;
- double fMaxPreSpaceValue= 0.0;
- double fNullSpaceBreakerAvaregeSpaceValue = 0.0;
+ double spaceDetectBoundary = 0.0;
- unsigned int nSpaceCount( 0 );
- unsigned int nDiffSpaceCount( 0 );
- unsigned int nNullSpaceBreakerCount=0;
- bool preSpaceNull(true);
-
- for ( unsigned int i=0; i<m_GlyphsList.size()-1; i++ ) // i=1 because the first glyph doesn't have a prevGlyphSpace value
+ // Try to find space glyph and it's width
+ for (size_t i = 0; i < m_GlyphsList.size(); i++)
{
- if( m_GlyphsList[i].getPrevGlyphsSpace()>0.0 )
- {
- if( fMinPreSpaceValue>m_GlyphsList[i].getPrevGlyphsSpace() )
- fMinPreSpaceValue=m_GlyphsList[i].getPrevGlyphsSpace();
+ OUString& glyph = m_GlyphsList[i].getGlyph();
- if( fMaxPreSpaceValue<m_GlyphsList[i].getPrevGlyphsSpace() )
- fMaxPreSpaceValue=m_GlyphsList[i].getPrevGlyphsSpace();
+ sal_Unicode ch = '\0';
+ if (!glyph.isEmpty())
+ ch = glyph[0];
- fPreAvarageSpaceValue+= m_GlyphsList[i].getPrevGlyphsSpace();
- nSpaceCount++;
- }
- }
-
- if( nSpaceCount!=0 )
- fPreAvarageSpaceValue= fPreAvarageSpaceValue/( nSpaceCount );
-
- for ( unsigned int i=0; i<m_GlyphsList.size()-1; i++ ) // i=1 because the first glyph doesn't have a prevGlyphSpace value
- {
- if ( m_GlyphsList[i].getPrevGlyphsSpace()==0.0 )
- {
- if (
- ( m_GlyphsList[i+1].getPrevGlyphsSpace()>0.0)&&
- ( fPreAvarageSpaceValue>m_GlyphsList[i+1].getPrevGlyphsSpace())
- )
- {
- fNullSpaceBreakerAvaregeSpaceValue+=m_GlyphsList[i+1].getPrevGlyphsSpace();
- nNullSpaceBreakerCount++;
- }
+ if ((ch == 0x20) || (ch == 0xa0))
+ {
+ double spaceWidth =
+ m_GlyphsList[i].getRect().X2 -
+ m_GlyphsList[i].getRect().X1;
+ spaceDetectBoundary = spaceWidth * 0.5;
+ break;
}
}
- if( ( fNullSpaceBreakerAvaregeSpaceValue!= 0.0 )&&
- ( fNullSpaceBreakerAvaregeSpaceValue < fPreAvarageSpaceValue )
- )
+ // If space glyph is not found, use average glyph width instead
+ if (spaceDetectBoundary == 0.0)
{
- fPreAvarageSpaceValue = fNullSpaceBreakerAvaregeSpaceValue;
- }
-
- for ( unsigned int i=0; i<m_GlyphsList.size()-1; i++ ) // i=1 cose the first Glypth dont have prevGlyphSpace value
- {
- if ( ( m_GlyphsList[i].getPrevGlyphsSpace()>0.0 )
- )
+ double avgGlyphWidth = 0.0;
+ for (size_t i = 0; i < m_GlyphsList.size(); i++)
{
- if (
- ( m_GlyphsList[i].getPrevGlyphsSpace() <= fPreAvarageSpaceValue )&&
- ( m_GlyphsList[i+1].getPrevGlyphsSpace()<= fPreAvarageSpaceValue )
- )
- {
- double temp= m_GlyphsList[i].getPrevGlyphsSpace()-m_GlyphsList[i+1].getPrevGlyphsSpace();
-
- if(temp!=0.0)
- {
- if( temp< 0.0)
- temp= temp* -1.0;
-
- fAvarageDiffCharSpaceValue+=temp;
- nDiffSpaceCount++;
- }
- }
+ avgGlyphWidth +=
+ m_GlyphsList[i].getRect().X2 -
+ m_GlyphsList[i].getRect().X1;
}
-
- }
-
- if (
- ( nNullSpaceBreakerCount>0 )
- )
- {
- fNullSpaceBreakerAvaregeSpaceValue=fNullSpaceBreakerAvaregeSpaceValue/nNullSpaceBreakerCount;
- }
-
- if (
- ( nDiffSpaceCount>0 )&&(fAvarageDiffCharSpaceValue>0)
- )
- {
- fAvarageDiffCharSpaceValue= fAvarageDiffCharSpaceValue/ nDiffSpaceCount;
+ avgGlyphWidth /= m_GlyphsList.size();
+ spaceDetectBoundary = avgGlyphWidth * 0.2;
}
- ParagraphElement* pPara= NULL ;
- FrameElement* pFrame= NULL ;
+ FrameElement* frame = m_pElFactory->createFrameElement(m_GlyphsList[0].getCurElement(),
+ getGCId(getTransformGlyphContext(m_GlyphsList[0])));
+ frame->ZOrder = m_nNextZOrder++;
+ ParagraphElement* para = m_pElFactory->createParagraphElement(frame);
- if(!m_GlyphsList.empty())
+ for (size_t i = 0; i < m_GlyphsList.size(); i++)
{
- pFrame = m_pElFactory->createFrameElement( m_GlyphsList[0].getCurElement(), getGCId( getTransformGlyphContext( m_GlyphsList[0])) );
- pFrame->ZOrder = m_nNextZOrder++;
- pPara = m_pElFactory->createParagraphElement( pFrame );
-
- processGlyph( 0,
- m_GlyphsList[0],
- pPara,
- pFrame,
- m_bIsWhiteSpaceInLine );
+ double spaceSize = 0.0;
+ if (i != 0)
+ spaceSize = m_GlyphsList[i].getRect().X1 - m_GlyphsList[i - 1].getRect().X2;
+ bool prependSpace = spaceSize > spaceDetectBoundary;
+ drawCharGlyphs(m_GlyphsList[i].getGlyph(),
+ m_GlyphsList[i].getRect(),
+ m_GlyphsList[i].getGC(),
+ para,
+ frame,
+ prependSpace);
}
-
- preSpaceNull=false;
-
- for ( unsigned int i=1; i<m_GlyphsList.size()-1; i++ )
- {
- double fPrevDiffCharSpace= m_GlyphsList[i].getPrevGlyphsSpace()-m_GlyphsList[i-1].getPrevGlyphsSpace();
- double fPostDiffCharSpace= m_GlyphsList[i].getPrevGlyphsSpace()-m_GlyphsList[i+1].getPrevGlyphsSpace();
-
-
- if(
- preSpaceNull && (m_GlyphsList[i].getPrevGlyphsSpace()!= 0.0)
- )
- {
- preSpaceNull=false;
- if( fNullSpaceBreakerAvaregeSpaceValue > m_GlyphsList[i].getPrevGlyphsSpace() )
- {
- processGlyph( 0,
- m_GlyphsList[i],
- pPara,
- pFrame,
- m_bIsWhiteSpaceInLine );
-
- }
- else
- {
- processGlyph( 1,
- m_GlyphsList[i],
- pPara,
- pFrame,
- m_bIsWhiteSpaceInLine );
-
- }
-
- }
- else
- {
- if (
- ( ( m_GlyphsList[i].getPrevGlyphsSpace()<= fPreAvarageSpaceValue )&&
- ( fPrevDiffCharSpace<=fAvarageDiffCharSpaceValue )&&
- ( fPostDiffCharSpace<=fAvarageDiffCharSpaceValue )
- ) ||
- ( m_GlyphsList[i].getPrevGlyphsSpace() == 0.0 )
- )
- {
- preSpaceNull=true;
-
- processGlyph( 0,
- m_GlyphsList[i],
- pPara,
- pFrame,
- m_bIsWhiteSpaceInLine );
-
- }
- else
- {
- processGlyph( 1,
- m_GlyphsList[i],
- pPara,
- pFrame,
- m_bIsWhiteSpaceInLine );
-
- }
-
- }
-
- }
-
- if(m_GlyphsList.size()>1)
- processGlyph( 0,
- m_GlyphsList[m_GlyphsList.size()-1],
- pPara,
- pFrame,
- m_bIsWhiteSpaceInLine );
-
m_GlyphsList.clear();
}
-void PDFIProcessor::processGlyph( double fPreAvarageSpaceValue,
- CharGlyph& aGlyph,
- ParagraphElement* pPara,
- FrameElement* pFrame,
- bool bIsWhiteSpaceInLine
- )
-{
- if( !bIsWhiteSpaceInLine )
- {
- bool flag=( 0 < fPreAvarageSpaceValue );
-
- drawCharGlyphs( aGlyph.getGlyph(),
- aGlyph.getRect(),
- aGlyph.getGC(),
- pPara,
- pFrame,
- flag);
- }
- else
- {
- drawCharGlyphs( aGlyph.getGlyph(),
- aGlyph.getRect(),
- aGlyph.getGC(),
- pPara,
- pFrame,
- false );
- }
-}
-
void PDFIProcessor::drawGlyphLine( const OUString& rGlyphs,
const geometry::RealRectangle2D& rRect,
const geometry::Matrix2D& rFontMatrix )
@@ -440,9 +284,7 @@ void PDFIProcessor::drawGlyphLine( const OUString& rGlyphs,
processGlyphLine();
}
- CharGlyph aGlyph(fXPrevTextPosition, fYPrevTextPosition, fPrevTextHeight, fPrevTextWidth,
- m_pCurElement, getCurrentContext(), rFontMatrix, rRect, rGlyphs);
-
+ CharGlyph aGlyph(m_pCurElement, getCurrentContext(), rFontMatrix, rRect, rGlyphs);
getGCId(getCurrentContext());
@@ -452,13 +294,6 @@ void PDFIProcessor::drawGlyphLine( const OUString& rGlyphs,
fXPrevTextPosition = rRect.X2;
fPrevTextHeight = rRect.Y2-rRect.Y1;
fPrevTextWidth = rRect.X2-rRect.X1;
-
- if( !m_bIsWhiteSpaceInLine )
- {
- static OUString tempWhiteSpaceStr( 0x20 );
- static OUString tempWhiteSpaceNonBreakingStr( 0xa0 );
- m_bIsWhiteSpaceInLine=(rGlyphs.equals( tempWhiteSpaceStr ) || rGlyphs.equals( tempWhiteSpaceNonBreakingStr ));
- }
}
GraphicsContext& PDFIProcessor::getTransformGlyphContext( CharGlyph& rGlyph )
diff --git a/sdext/source/pdfimport/tree/pdfiprocessor.hxx b/sdext/source/pdfimport/tree/pdfiprocessor.hxx
index 97c58aa..23861fa 100644
--- a/sdext/source/pdfimport/tree/pdfiprocessor.hxx
+++ b/sdext/source/pdfimport/tree/pdfiprocessor.hxx
@@ -103,11 +103,6 @@ namespace pdfi
private:
void processGlyphLine();
- void processGlyph( double fPreAvarageSpaceValue,
- CharGlyph& rGlyph,
- ParagraphElement* pPara,
- FrameElement* pFrame,
- bool bIsWhiteSpaceInLine );
void drawGlyphLine( const OUString& rGlyphs,
const ::com::sun::star::geometry::RealRectangle2D& rRect,
@@ -226,7 +221,6 @@ namespace pdfi
sal_Int32 m_nPages;
sal_Int32 m_nNextZOrder;
- bool m_bIsWhiteSpaceInLine;
com::sun::star::uno::Reference<
com::sun::star::task::XStatusIndicator >
m_xStatusIndicator;
@@ -236,11 +230,9 @@ namespace pdfi
class CharGlyph
{
public:
- CharGlyph(double fXPrevGlyphPosition, double fYPrevGlyphPosition, double fPrevGlyphHeight, double fPrevGlyphWidth,
- Element* pCurElement, const GraphicsContext& rCurrentContext, const com::sun::star::geometry::Matrix2D& rFontMatrix,
+ CharGlyph(Element* pCurElement, const GraphicsContext& rCurrentContext, const com::sun::star::geometry::Matrix2D& rFontMatrix,
const com::sun::star::geometry::RealRectangle2D& rRect, const OUString& rGlyphs )
- : m_fXPrevGlyphPosition(fXPrevGlyphPosition), m_fYPrevGlyphPosition(fYPrevGlyphPosition), m_fPrevGlyphHeight(fPrevGlyphHeight),
- m_fPrevGlyphWidth(fPrevGlyphWidth), m_pCurElement(pCurElement), m_rCurrentContext(rCurrentContext),
+ : m_pCurElement(pCurElement), m_rCurrentContext(rCurrentContext),
m_rFontMatrix(rFontMatrix), m_rRect(rRect), m_rGlyphs(rGlyphs) {};
virtual ~CharGlyph(){};
@@ -250,24 +242,7 @@ namespace pdfi
GraphicsContext& getGC(){ return m_rCurrentContext; }
Element* getCurElement(){ return m_pCurElement; }
- double getYPrevGlyphPosition() const { return m_fYPrevGlyphPosition; }
- double getXPrevGlyphPosition() const { return m_fXPrevGlyphPosition; }
- double getPrevGlyphHeight() const { return m_fPrevGlyphHeight; }
- double getPrevGlyphWidth () const { return m_fPrevGlyphWidth; }
- double getPrevGlyphsSpace() const
- {
- if( (m_rRect.X1-m_fXPrevGlyphPosition)<0 )
- return 0;
- else
- return m_rRect.X1-m_fXPrevGlyphPosition;
- }
-
private:
-
- double m_fXPrevGlyphPosition ;
- double m_fYPrevGlyphPosition ;
- double m_fPrevGlyphHeight ;
- double m_fPrevGlyphWidth ;
Element* m_pCurElement ;
GraphicsContext m_rCurrentContext ;
com::sun::star::geometry::Matrix2D m_rFontMatrix ;
More information about the Libreoffice-commits
mailing list