[poppler] 3 commits - cpp/poppler-page.cpp cpp/poppler-page.h cpp/tests
Pino Toscano
pino at kemper.freedesktop.org
Wed Sep 15 08:29:46 PDT 2010
cpp/poppler-page.cpp | 27 +++++++++++++++++++++-
cpp/poppler-page.h | 5 ++++
cpp/tests/poppler-dump.cpp | 53 +++++++++++++++++++++++++++++++++++++++------
3 files changed, 76 insertions(+), 9 deletions(-)
New commits:
commit b9333529bba43a71655fdbf1919ba515f7df9ca3
Author: Pino Toscano <pino at kde.org>
Date: Wed Sep 15 17:23:54 2010 +0200
[cpp/tests] poppler-dump: convert out_ustring() to an operator<<(std::ostream&)
so we have a chance to better output the bytearray of a string to the stream
diff --git a/cpp/tests/poppler-dump.cpp b/cpp/tests/poppler-dump.cpp
index e9a068c..cb0ee75 100644
--- a/cpp/tests/poppler-dump.cpp
+++ b/cpp/tests/poppler-dump.cpp
@@ -81,9 +81,13 @@ static void error(const std::string &msg)
exit(1);
}
-static std::string out_ustring(const poppler::ustring &str)
+std::ostream& operator<<(std::ostream& stream, const poppler::ustring &str)
{
- return str.to_latin1();
+ const poppler::byte_array ba = str.to_utf8();
+ for (unsigned int i = 0; i < ba.size(); ++i) {
+ stream << (char)(ba[i]);
+ }
+ return stream;
}
static std::string out_date(std::time_t date)
@@ -174,7 +178,7 @@ static void print_info(poppler::document *doc)
const std::vector<std::string> keys = doc->info_keys();
std::vector<std::string>::const_iterator key_it = keys.begin(), key_end = keys.end();
for (; key_it != key_end; ++key_it) {
- std::cout << std::setw(out_width) << *key_it << ": " << out_ustring(doc->info_key(*key_it)) << std::endl;
+ std::cout << std::setw(out_width) << *key_it << ": " << doc->info_key(*key_it) << std::endl;
}
std::cout << std::setw(out_width) << "Date (creation)" << ": " << out_date(doc->info_date("CreationDate")) << std::endl;
std::cout << std::setw(out_width) << "Date (modification)" << ": " << out_date(doc->info_date("ModDate")) << std::endl;
@@ -205,14 +209,14 @@ static void print_perm(poppler::document *doc)
static void print_metadata(poppler::document *doc)
{
std::cout << std::setw(out_width) << "Metadata" << ":" << std::endl
- << out_ustring(doc->metadata()) << std::endl;
+ << doc->metadata() << std::endl;
std::cout << std::endl;
}
static void print_toc_item(poppler::toc_item *item, int indent)
{
std::cout << std::setw(indent * 2) << " "
- << "+ " << out_ustring(item->title()) << " (" << item->is_open() << ")"
+ << "+ " << item->title() << " (" << item->is_open() << ")"
<< std::endl;
poppler::toc_item::iterator it = item->children_begin(), it_end = item->children_end();
for (; it != it_end; ++it) {
@@ -271,7 +275,13 @@ static void print_embedded_files(poppler::document *doc)
<< " " << std::setw(20) << out_date(f->creation_date())
<< " " << std::setw(20) << out_date(f->modification_date())
<< std::endl
- << " " << (f->description().empty() ? std::string("<no description>") : out_ustring(f->description()))
+ << " ";
+ if (f->description().empty()) {
+ std::cout << "<no description>";
+ } else {
+ std::cout << f->description();
+ }
+ std::cout
<< std::endl
<< " " << std::setw(35) << (f->checksum().empty() ? std::string("<no checksum>") : out_hex_string(f->checksum()))
<< " " << (f->mime_type().empty() ? std::string("<no mime type>") : f->mime_type())
@@ -287,7 +297,7 @@ static void print_embedded_files(poppler::document *doc)
static void print_page(poppler::page *p)
{
std::cout << std::setw(out_width) << "Rect" << ": " << p->page_rect() << std::endl;
- std::cout << std::setw(out_width) << "Label" << ": " << out_ustring(p->label()) << std::endl;
+ std::cout << std::setw(out_width) << "Label" << ": " << p->label() << std::endl;
std::cout << std::setw(out_width) << "Duration" << ": " << p->duration() << std::endl;
std::cout << std::setw(out_width) << "Orientation" << ": " << out_page_orientation(p->orientation()) << std::endl;
std::cout << std::endl;
@@ -295,7 +305,7 @@ static void print_page(poppler::page *p)
static void print_page_text(poppler::page *p)
{
- std::cout << out_ustring(p->text(p->page_rect(), show_text_layout)) << std::endl;
+ std::cout << p->text(p->page_rect(), show_text_layout) << std::endl;
std::cout << std::endl;
}
commit a44f711b4412332875337e9fb7509f18db806ddc
Author: Pino Toscano <pino at kde.org>
Date: Wed Sep 15 16:44:30 2010 +0200
[cpp/tests] poppler-dump: add a "--show-text <physical|raw>" option
... to show the text of a page in the specified layout
diff --git a/cpp/tests/poppler-dump.cpp b/cpp/tests/poppler-dump.cpp
index 104aaa4..e9a068c 100644
--- a/cpp/tests/poppler-dump.cpp
+++ b/cpp/tests/poppler-dump.cpp
@@ -23,6 +23,7 @@
#include <poppler-toc.h>
#include <cstdlib>
+#include <cstring>
#include <ctime>
#include <iomanip>
#include <iostream>
@@ -44,6 +45,8 @@ bool show_fonts = false;
bool show_embedded_files = false;
bool show_pages = false;
bool show_help = false;
+char show_text[32];
+poppler::page::text_layout_enum show_text_layout = poppler::page::physical_layout;
static const ArgDesc the_args[] = {
{ "--show-all", argFlag, &show_all, 0,
@@ -62,6 +65,8 @@ static const ArgDesc the_args[] = {
"show the document-level embedded files" },
{ "--show-pages", argFlag, &show_pages, 0,
"show pages information" },
+ { "--show-text", argString, &show_text, sizeof(show_text),
+ "show text (physical|raw) extracted from all pages" },
{ "-h", argFlag, &show_help, 0,
"print usage information" },
{ "--help", argFlag, &show_help, 0,
@@ -288,6 +293,12 @@ static void print_page(poppler::page *p)
std::cout << std::endl;
}
+static void print_page_text(poppler::page *p)
+{
+ std::cout << out_ustring(p->text(p->page_rect(), show_text_layout)) << std::endl;
+ std::cout << std::endl;
+}
+
int main(int argc, char *argv[])
{
if (!parseArgs(the_args, &argc, argv)
@@ -296,6 +307,16 @@ int main(int argc, char *argv[])
exit(1);
}
+ if (show_text[0]) {
+ if (!memcmp(show_text, "physical", 9)) {
+ show_text_layout = poppler::page::physical_layout;
+ } else if (!memcmp(show_text, "raw", 4)) {
+ show_text_layout = poppler::page::raw_order_layout;
+ } else {
+ error(std::string("unrecognized text mode: '") + show_text + "'");
+ }
+ }
+
std::string file_name(argv[1]);
std::auto_ptr<poppler::document> doc(poppler::document::load_from_file(file_name));
@@ -345,6 +366,14 @@ int main(int argc, char *argv[])
print_page(p.get());
}
}
+ if (show_text[0]) {
+ const int pages = doc->pages();
+ for (int i = 0; i < pages; ++i) {
+ std::cout << "Page " << (i + 1) << "/" << pages << ":" << std::endl;
+ std::auto_ptr<poppler::page> p(doc->create_page(i));
+ print_page_text(p.get());
+ }
+ }
return 0;
}
commit 0094c9372b5b439af2564d83d6fb7439f4bdba88
Author: Pino Toscano <pino at kde.org>
Date: Wed Sep 15 13:19:13 2010 +0200
[cpp] add a new page::text() for specifying a layout mode
add a new text_layout_enum enum for the layout mode, used by the new text()
make the old text() implementation call the new one with the old value (= physical)
add & adapt the apidox accordingly
diff --git a/cpp/poppler-page.cpp b/cpp/poppler-page.cpp
index 1bfb8d4..4e2f730 100644
--- a/cpp/poppler-page.cpp
+++ b/cpp/poppler-page.cpp
@@ -60,6 +60,12 @@ page_private::~page_private()
The direction/action to follow when performing a text search.
*/
+/**
+ \enum poppler::page::text_layout_enum
+
+ A layout of the text of a page.
+*/
+
page::page(document_private *doc, int index)
: d(new page_private(doc, index))
@@ -234,7 +240,7 @@ bool page::search(const ustring &text, rectf &r, search_direction_enum direction
}
/**
- Returns the text in the page.
+ Returns the text in the page, in its physical layout.
\param r if not empty, it will be extracted the text in it; otherwise, the
text of the whole page
@@ -243,8 +249,25 @@ bool page::search(const ustring &text, rectf &r, search_direction_enum direction
*/
ustring page::text(const rectf &r) const
{
+ return text(r, physical_layout);
+}
+
+/**
+ Returns the text in the page.
+
+ \param rect if not empty, it will be extracted the text in it; otherwise, the
+ text of the whole page
+ \param layout_mode the layout of the text
+
+ \returns the text of the page in the specified rect or in the whole page
+
+ \since 0.16
+ */
+ustring page::text(const rectf &r, text_layout_enum layout_mode) const
+{
std::auto_ptr<GooString> s;
- TextOutputDev td(0, gFalse, gFalse, gFalse);
+ const GBool use_raw_order = (layout_mode == raw_order_layout);
+ TextOutputDev td(0, gFalse, use_raw_order, gFalse);
d->doc->doc->displayPage(&td, d->index + 1, 72, 72, 0, false, true, false);
if (r.is_empty()) {
const PDFRectangle *rect = d->page->getCropBox();
diff --git a/cpp/poppler-page.h b/cpp/poppler-page.h
index 89fdea6..7b4298a 100644
--- a/cpp/poppler-page.h
+++ b/cpp/poppler-page.h
@@ -44,6 +44,10 @@ public:
search_next_result,
search_previous_result
};
+ enum text_layout_enum {
+ physical_layout,
+ raw_order_layout
+ };
~page();
@@ -57,6 +61,7 @@ public:
bool search(const ustring &text, rectf &r, search_direction_enum direction,
case_sensitivity_enum case_sensitivity, rotation_enum rotation = rotate_0) const;
ustring text(const rectf &rect = rectf()) const;
+ ustring text(const rectf &rect, text_layout_enum layout_mode) const;
private:
page(document_private *doc, int index);
More information about the poppler
mailing list