[poppler] 2 commits - glib/poppler-page.cc poppler/TextOutputDev.cc qt5/tests qt6/tests
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Fri Apr 29 13:43:56 UTC 2022
glib/poppler-page.cc | 6 +++---
poppler/TextOutputDev.cc | 9 ++++++++-
qt5/tests/check_search.cpp | 27 +++++++++++++++++++++++++++
qt6/tests/check_search.cpp | 27 +++++++++++++++++++++++++++
4 files changed, 65 insertions(+), 4 deletions(-)
New commits:
commit 8ce9069f5dff8636373c546f33495908a963777e
Author: Nelson Benítez León <nbenitezl at gmail.com>
Date: Sat Apr 23 18:14:37 2022 -0400
fix multiline find_text() bug in two column docs
Fix for a bug in double column documents where some
single line matches are wrongly returned as being
multiline matches.
Includes test case for the bug.
diff --git a/glib/poppler-page.cc b/glib/poppler-page.cc
index a9515f3b..8e361a58 100644
--- a/glib/poppler-page.cc
+++ b/glib/poppler-page.cc
@@ -864,7 +864,7 @@ GList *poppler_page_find_text_with_options(PopplerPage *page, const char *text,
xMin = 0;
yMin = backwards ? height : 0;
- continueMatch.x1 = G_MAXDOUBLE; // we use this to detect valid returned values
+ continueMatch.x1 = std::numeric_limits<double>::max(); // we use this to detect valid returned values
while (text_dev->findText(ucs4, ucs4_len, false, true, // startAtTop, stopAtBottom
start_at_last,
@@ -881,7 +881,7 @@ GList *poppler_page_find_text_with_options(PopplerPage *page, const char *text,
matches = g_list_prepend(matches, match);
start_at_last = TRUE;
- if (continueMatch.x1 != G_MAXDOUBLE) {
+ if (continueMatch.x1 != std::numeric_limits<double>::max()) {
// received rect for next-line part of a multi-line match, add it.
if (multiline) {
match->match_continued = true;
@@ -896,7 +896,7 @@ GList *poppler_page_find_text_with_options(PopplerPage *page, const char *text,
matches = g_list_prepend(matches, match);
}
- continueMatch.x1 = G_MAXDOUBLE;
+ continueMatch.x1 = std::numeric_limits<double>::max();
}
}
diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc
index 23e0a7ae..4a37b29f 100644
--- a/poppler/TextOutputDev.cc
+++ b/poppler/TextOutputDev.cc
@@ -4146,6 +4146,12 @@ bool TextPage::findText(const Unicode *s, int len, bool startAtTop, bool stopAtB
continueMatch->x2 = xMax2;
continueMatch->y2 = yMin2;
}
+ } else if (continueMatch && continueMatch->x1 != std::numeric_limits<double>::max()) {
+ if (ignoredHyphen) {
+ *ignoredHyphen = false;
+ }
+
+ continueMatch->x1 = std::numeric_limits<double>::max();
}
}
}
diff --git a/qt5/tests/check_search.cpp b/qt5/tests/check_search.cpp
index 0ec67752..c9bb65e3 100644
--- a/qt5/tests/check_search.cpp
+++ b/qt5/tests/check_search.cpp
@@ -11,6 +11,7 @@ public:
explicit TestSearch(QObject *parent = nullptr) : QObject(parent) { }
private slots:
void testAcrossLinesSearch(); // leave it first
+ void testAcrossLinesSearchDoubleColumn();
void bug7063();
void testNextAndPrevious();
void testWholeWordsOnly();
@@ -370,5 +371,26 @@ void TestSearch::testAcrossLinesSearch()
QCOMPARE(page->search(bug_str, mode2).size(), 1);
}
+void TestSearch::testAcrossLinesSearchDoubleColumn()
+{
+ // Test for searching across lines with new flag Poppler::Page::AcrossLines
+ // in a document with two columns of text.
+ QScopedPointer<Poppler::Document> document(Poppler::Document::load(TESTDATADIR "/unittestcases/searchAcrossLinesDoubleColumn.pdf"));
+ QVERIFY(document);
+
+ QScopedPointer<Poppler::Page> page(document->page(0));
+ QVERIFY(page);
+
+ const Poppler::Page::SearchFlags mode = Poppler::Page::AcrossLines | Poppler::Page::IgnoreDiacritics | Poppler::Page::IgnoreCase;
+
+ // Test for a bug in double column documents where single line matches are
+ // wrongly returned as being multiline matches.
+ const QString bug_str = QString::fromUtf8("betw"); // clazy:exclude=qstring-allocations
+
+ // there's only 3 matches for 'betw' in document, where only the last
+ // one is a multiline match, so that's a total of 4 rects returned
+ QCOMPARE(page->search(bug_str, mode).size(), 4);
+}
+
QTEST_GUILESS_MAIN(TestSearch)
#include "check_search.moc"
diff --git a/qt6/tests/check_search.cpp b/qt6/tests/check_search.cpp
index 2e55ba01..ede2d0c2 100644
--- a/qt6/tests/check_search.cpp
+++ b/qt6/tests/check_search.cpp
@@ -9,6 +9,7 @@ public:
explicit TestSearch(QObject *parent = nullptr) : QObject(parent) { }
private slots:
void testAcrossLinesSearch(); // leave it first
+ void testAcrossLinesSearchDoubleColumn();
void bug7063();
void testNextAndPrevious();
void testWholeWordsOnly();
@@ -369,5 +370,26 @@ void TestSearch::testAcrossLinesSearch()
QCOMPARE(page->search(bug_str, mode2).size(), 1);
}
+void TestSearch::testAcrossLinesSearchDoubleColumn()
+{
+ // Test for searching across lines with new flag Poppler::Page::AcrossLines
+ // in a document with two columns of text.
+ std::unique_ptr<Poppler::Document> document = Poppler::Document::load(TESTDATADIR "/unittestcases/searchAcrossLinesDoubleColumn.pdf");
+ QVERIFY(document);
+
+ std::unique_ptr<Poppler::Page> page = document->page(0);
+ QVERIFY(page);
+
+ const Poppler::Page::SearchFlags mode = Poppler::Page::AcrossLines | Poppler::Page::IgnoreDiacritics | Poppler::Page::IgnoreCase;
+
+ // Test for a bug in double column documents where single line matches are
+ // wrongly returned as being multiline matches.
+ const QString bug_str = QString::fromUtf8("betw"); // clazy:exclude=qstring-allocations
+
+ // there's only 3 matches for 'betw' in document, where only the last
+ // one is a multiline match, so that's a total of 4 rects returned
+ QCOMPARE(page->search(bug_str, mode).size(), 4);
+}
+
QTEST_GUILESS_MAIN(TestSearch)
#include "check_search.moc"
commit 309004931712476b0ee751fc60224a87c14daf56
Author: Nelson Benítez León <nbenitezl at gmail.com>
Date: Mon Apr 18 20:03:49 2022 -0400
fix bug in multiline find_text()
which caused some false positives being returned.
Includes test case for the bug.
See original comment about this bug:
https://gitlab.gnome.org/GNOME/evince/-/merge_requests/159#note_1431380
diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc
index 439143c7..23e0a7ae 100644
--- a/poppler/TextOutputDev.cc
+++ b/poppler/TextOutputDev.cc
@@ -4044,6 +4044,7 @@ bool TextPage::findText(const Unicode *s, int len, bool startAtTop, bool stopAtB
for (k = 0; k < len; ++k) {
bool last_char_of_line = j + k == m - 1;
bool last_char_of_search_term = k == len - 1;
+ bool match_started = (bool)k;
if (p[k] != s2[k] || (nextline && last_char_of_line && !last_char_of_search_term)) {
// now check if the comparison failed at the end-of-line hyphen,
@@ -4055,7 +4056,7 @@ bool TextPage::findText(const Unicode *s, int len, bool startAtTop, bool stopAtB
break;
}
k++;
- } else if (p[k] != (Unicode)'-' || UnicodeIsWhitespace(s2[k])) {
+ } else if (!match_started || p[k] != (Unicode)'-' || !last_char_of_line || UnicodeIsWhitespace(s2[k])) {
break;
} else {
nextlineAfterHyphen = true;
diff --git a/qt5/tests/check_search.cpp b/qt5/tests/check_search.cpp
index cf57c133..0ec67752 100644
--- a/qt5/tests/check_search.cpp
+++ b/qt5/tests/check_search.cpp
@@ -363,6 +363,11 @@ void TestSearch::testAcrossLinesSearch()
QCOMPARE(page->search(full2linesHyphenated, l, t, r, b, direction, mode1), true);
QCOMPARE(page->search(full2linesHyphenated, l, t, r, b, direction, mode2), true);
QCOMPARE(page->search(full2linesHyphenated, l, t, r, b, direction, mode2W), true);
+
+ // BUG about false positives at start of a line.
+ const QString bug_str = QString::fromUtf8("nes y"); // clazy:exclude=qstring-allocations
+ // there's only 1 match, check for that
+ QCOMPARE(page->search(bug_str, mode2).size(), 1);
}
QTEST_GUILESS_MAIN(TestSearch)
diff --git a/qt6/tests/check_search.cpp b/qt6/tests/check_search.cpp
index b0e84482..2e55ba01 100644
--- a/qt6/tests/check_search.cpp
+++ b/qt6/tests/check_search.cpp
@@ -362,6 +362,11 @@ void TestSearch::testAcrossLinesSearch()
QCOMPARE(page->search(full2linesHyphenated, l, t, r, b, direction, mode1), true);
QCOMPARE(page->search(full2linesHyphenated, l, t, r, b, direction, mode2), true);
QCOMPARE(page->search(full2linesHyphenated, l, t, r, b, direction, mode2W), true);
+
+ // BUG about false positives at start of a line.
+ const QString bug_str = QString::fromUtf8("nes y"); // clazy:exclude=qstring-allocations
+ // there's only 1 match, check for that
+ QCOMPARE(page->search(bug_str, mode2).size(), 1);
}
QTEST_GUILESS_MAIN(TestSearch)
More information about the poppler
mailing list