[poppler] 2 commits - glib/poppler-page.cc poppler/TextOutputDev.cc qt5/tests qt6/tests

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Fri Apr 29 13:43:56 UTC 2022


 glib/poppler-page.cc       |    6 +++---
 poppler/TextOutputDev.cc   |    9 ++++++++-
 qt5/tests/check_search.cpp |   27 +++++++++++++++++++++++++++
 qt6/tests/check_search.cpp |   27 +++++++++++++++++++++++++++
 4 files changed, 65 insertions(+), 4 deletions(-)

New commits:
commit 8ce9069f5dff8636373c546f33495908a963777e
Author: Nelson Benítez León <nbenitezl at gmail.com>
Date:   Sat Apr 23 18:14:37 2022 -0400

    fix multiline find_text() bug in two column docs
    
    Fix for a bug in double column documents where some
    single line matches are wrongly returned as being
    multiline matches.
    
    Includes test case for the bug.

diff --git a/glib/poppler-page.cc b/glib/poppler-page.cc
index a9515f3b..8e361a58 100644
--- a/glib/poppler-page.cc
+++ b/glib/poppler-page.cc
@@ -864,7 +864,7 @@ GList *poppler_page_find_text_with_options(PopplerPage *page, const char *text,
     xMin = 0;
     yMin = backwards ? height : 0;
 
-    continueMatch.x1 = G_MAXDOUBLE; // we use this to detect valid returned values
+    continueMatch.x1 = std::numeric_limits<double>::max(); // we use this to detect valid returned values
 
     while (text_dev->findText(ucs4, ucs4_len, false, true, // startAtTop, stopAtBottom
                               start_at_last,
@@ -881,7 +881,7 @@ GList *poppler_page_find_text_with_options(PopplerPage *page, const char *text,
         matches = g_list_prepend(matches, match);
         start_at_last = TRUE;
 
-        if (continueMatch.x1 != G_MAXDOUBLE) {
+        if (continueMatch.x1 != std::numeric_limits<double>::max()) {
             // received rect for next-line part of a multi-line match, add it.
             if (multiline) {
                 match->match_continued = true;
@@ -896,7 +896,7 @@ GList *poppler_page_find_text_with_options(PopplerPage *page, const char *text,
                 matches = g_list_prepend(matches, match);
             }
 
-            continueMatch.x1 = G_MAXDOUBLE;
+            continueMatch.x1 = std::numeric_limits<double>::max();
         }
     }
 
diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc
index 23e0a7ae..4a37b29f 100644
--- a/poppler/TextOutputDev.cc
+++ b/poppler/TextOutputDev.cc
@@ -4146,6 +4146,12 @@ bool TextPage::findText(const Unicode *s, int len, bool startAtTop, bool stopAtB
                                             continueMatch->x2 = xMax2;
                                             continueMatch->y2 = yMin2;
                                         }
+                                    } else if (continueMatch && continueMatch->x1 != std::numeric_limits<double>::max()) {
+                                        if (ignoredHyphen) {
+                                            *ignoredHyphen = false;
+                                        }
+
+                                        continueMatch->x1 = std::numeric_limits<double>::max();
                                     }
                                 }
                             }
diff --git a/qt5/tests/check_search.cpp b/qt5/tests/check_search.cpp
index 0ec67752..c9bb65e3 100644
--- a/qt5/tests/check_search.cpp
+++ b/qt5/tests/check_search.cpp
@@ -11,6 +11,7 @@ public:
     explicit TestSearch(QObject *parent = nullptr) : QObject(parent) { }
 private slots:
     void testAcrossLinesSearch(); // leave it first
+    void testAcrossLinesSearchDoubleColumn();
     void bug7063();
     void testNextAndPrevious();
     void testWholeWordsOnly();
@@ -370,5 +371,26 @@ void TestSearch::testAcrossLinesSearch()
     QCOMPARE(page->search(bug_str, mode2).size(), 1);
 }
 
+void TestSearch::testAcrossLinesSearchDoubleColumn()
+{
+    // Test for searching across lines with new flag Poppler::Page::AcrossLines
+    // in a document with two columns of text.
+    QScopedPointer<Poppler::Document> document(Poppler::Document::load(TESTDATADIR "/unittestcases/searchAcrossLinesDoubleColumn.pdf"));
+    QVERIFY(document);
+
+    QScopedPointer<Poppler::Page> page(document->page(0));
+    QVERIFY(page);
+
+    const Poppler::Page::SearchFlags mode = Poppler::Page::AcrossLines | Poppler::Page::IgnoreDiacritics | Poppler::Page::IgnoreCase;
+
+    // Test for a bug in double column documents where single line matches are
+    // wrongly returned as being multiline matches.
+    const QString bug_str = QString::fromUtf8("betw"); // clazy:exclude=qstring-allocations
+
+    // there's only 3 matches for 'betw' in document, where only the last
+    // one is a multiline match, so that's a total of 4 rects returned
+    QCOMPARE(page->search(bug_str, mode).size(), 4);
+}
+
 QTEST_GUILESS_MAIN(TestSearch)
 #include "check_search.moc"
diff --git a/qt6/tests/check_search.cpp b/qt6/tests/check_search.cpp
index 2e55ba01..ede2d0c2 100644
--- a/qt6/tests/check_search.cpp
+++ b/qt6/tests/check_search.cpp
@@ -9,6 +9,7 @@ public:
     explicit TestSearch(QObject *parent = nullptr) : QObject(parent) { }
 private slots:
     void testAcrossLinesSearch(); // leave it first
+    void testAcrossLinesSearchDoubleColumn();
     void bug7063();
     void testNextAndPrevious();
     void testWholeWordsOnly();
@@ -369,5 +370,26 @@ void TestSearch::testAcrossLinesSearch()
     QCOMPARE(page->search(bug_str, mode2).size(), 1);
 }
 
+void TestSearch::testAcrossLinesSearchDoubleColumn()
+{
+    // Test for searching across lines with new flag Poppler::Page::AcrossLines
+    // in a document with two columns of text.
+    std::unique_ptr<Poppler::Document> document = Poppler::Document::load(TESTDATADIR "/unittestcases/searchAcrossLinesDoubleColumn.pdf");
+    QVERIFY(document);
+
+    std::unique_ptr<Poppler::Page> page = document->page(0);
+    QVERIFY(page);
+
+    const Poppler::Page::SearchFlags mode = Poppler::Page::AcrossLines | Poppler::Page::IgnoreDiacritics | Poppler::Page::IgnoreCase;
+
+    // Test for a bug in double column documents where single line matches are
+    // wrongly returned as being multiline matches.
+    const QString bug_str = QString::fromUtf8("betw"); // clazy:exclude=qstring-allocations
+
+    // there's only 3 matches for 'betw' in document, where only the last
+    // one is a multiline match, so that's a total of 4 rects returned
+    QCOMPARE(page->search(bug_str, mode).size(), 4);
+}
+
 QTEST_GUILESS_MAIN(TestSearch)
 #include "check_search.moc"
commit 309004931712476b0ee751fc60224a87c14daf56
Author: Nelson Benítez León <nbenitezl at gmail.com>
Date:   Mon Apr 18 20:03:49 2022 -0400

    fix bug in multiline find_text()
    
    which caused some false positives being returned.
    
    Includes test case for the bug.
    
    See original comment about this bug:
    https://gitlab.gnome.org/GNOME/evince/-/merge_requests/159#note_1431380

diff --git a/poppler/TextOutputDev.cc b/poppler/TextOutputDev.cc
index 439143c7..23e0a7ae 100644
--- a/poppler/TextOutputDev.cc
+++ b/poppler/TextOutputDev.cc
@@ -4044,6 +4044,7 @@ bool TextPage::findText(const Unicode *s, int len, bool startAtTop, bool stopAtB
                     for (k = 0; k < len; ++k) {
                         bool last_char_of_line = j + k == m - 1;
                         bool last_char_of_search_term = k == len - 1;
+                        bool match_started = (bool)k;
 
                         if (p[k] != s2[k] || (nextline && last_char_of_line && !last_char_of_search_term)) {
                             // now check if the comparison failed at the end-of-line hyphen,
@@ -4055,7 +4056,7 @@ bool TextPage::findText(const Unicode *s, int len, bool startAtTop, bool stopAtB
                                     break;
                                 }
                                 k++;
-                            } else if (p[k] != (Unicode)'-' || UnicodeIsWhitespace(s2[k])) {
+                            } else if (!match_started || p[k] != (Unicode)'-' || !last_char_of_line || UnicodeIsWhitespace(s2[k])) {
                                 break;
                             } else {
                                 nextlineAfterHyphen = true;
diff --git a/qt5/tests/check_search.cpp b/qt5/tests/check_search.cpp
index cf57c133..0ec67752 100644
--- a/qt5/tests/check_search.cpp
+++ b/qt5/tests/check_search.cpp
@@ -363,6 +363,11 @@ void TestSearch::testAcrossLinesSearch()
     QCOMPARE(page->search(full2linesHyphenated, l, t, r, b, direction, mode1), true);
     QCOMPARE(page->search(full2linesHyphenated, l, t, r, b, direction, mode2), true);
     QCOMPARE(page->search(full2linesHyphenated, l, t, r, b, direction, mode2W), true);
+
+    // BUG about false positives at start of a line.
+    const QString bug_str = QString::fromUtf8("nes y"); // clazy:exclude=qstring-allocations
+    // there's only 1 match, check for that
+    QCOMPARE(page->search(bug_str, mode2).size(), 1);
 }
 
 QTEST_GUILESS_MAIN(TestSearch)
diff --git a/qt6/tests/check_search.cpp b/qt6/tests/check_search.cpp
index b0e84482..2e55ba01 100644
--- a/qt6/tests/check_search.cpp
+++ b/qt6/tests/check_search.cpp
@@ -362,6 +362,11 @@ void TestSearch::testAcrossLinesSearch()
     QCOMPARE(page->search(full2linesHyphenated, l, t, r, b, direction, mode1), true);
     QCOMPARE(page->search(full2linesHyphenated, l, t, r, b, direction, mode2), true);
     QCOMPARE(page->search(full2linesHyphenated, l, t, r, b, direction, mode2W), true);
+
+    // BUG about false positives at start of a line.
+    const QString bug_str = QString::fromUtf8("nes y"); // clazy:exclude=qstring-allocations
+    // there's only 1 match, check for that
+    QCOMPARE(page->search(bug_str, mode2).size(), 1);
 }
 
 QTEST_GUILESS_MAIN(TestSearch)


More information about the poppler mailing list