[PATCH 2/3] Separate HelpIndexer into header, implementation, and main
Gert van Valkenhoef
g.h.m.van.valkenhoef at rug.nl
Tue Feb 14 11:19:37 PST 2012
---
l10ntools/source/help/HelpIndexer.cxx | 123 ++++++++++++++
l10ntools/source/help/HelpIndexer.hxx | 71 ++++++++
l10ntools/source/help/HelpIndexer_main.cxx | 66 ++++++++
l10ntools/source/help/helpindexer.cxx | 247 ----------------------------
l10ntools/source/help/makefile.mk | 8 +-
5 files changed, 265 insertions(+), 250 deletions(-)
create mode 100644 l10ntools/source/help/HelpIndexer.cxx
create mode 100644 l10ntools/source/help/HelpIndexer.hxx
create mode 100644 l10ntools/source/help/HelpIndexer_main.cxx
delete mode 100644 l10ntools/source/help/helpindexer.cxx
diff --git a/l10ntools/source/help/HelpIndexer.cxx b/l10ntools/source/help/HelpIndexer.cxx
new file mode 100644
index 0000000..ed0ce39
--- /dev/null
+++ b/l10ntools/source/help/HelpIndexer.cxx
@@ -0,0 +1,123 @@
+#include "HelpIndexer.hxx"
+
+#define TODO
+
+#ifdef TODO
+#include <CLucene/analysis/LanguageBasedAnalyzer.h>
+#endif
+
+#include <unistd.h>
+#include <sys/stat.h>
+#include <dirent.h>
+#include <errno.h>
+#include <string.h>
+
+#include <algorithm>
+
+using namespace lucene::document;
+
+HelpIndexer::HelpIndexer(std::string const &lang, std::string const &module,
+ std::string const &captionDir, std::string const &contentDir, std::string const &indexDir) :
+d_lang(lang), d_module(module), d_captionDir(captionDir), d_contentDir(contentDir), d_indexDir(indexDir), d_error(""), d_files() {}
+
+bool HelpIndexer::indexDocuments() {
+ if (!scanForFiles()) {
+ return false;
+ }
+
+#ifdef TODO
+ // Construct the analyzer appropriate for the given language
+ lucene::analysis::Analyzer *analyzer = (
+ d_lang.compare("ja") == 0 ?
+ (lucene::analysis::Analyzer*)new lucene::analysis::LanguageBasedAnalyzer(L"cjk") :
+ (lucene::analysis::Analyzer*)new lucene::analysis::standard::StandardAnalyzer());
+#else
+ lucene::analysis::Analyzer *analyzer = (
+ (lucene::analysis::Analyzer*)new lucene::analysis::standard::StandardAnalyzer());
+#endif
+
+ lucene::index::IndexWriter writer(d_indexDir.c_str(), analyzer, true);
+
+ // Index the identified help files
+ Document doc;
+ for (std::set<std::string>::iterator i = d_files.begin(); i != d_files.end(); ++i) {
+ doc.clear();
+ if (!helpDocument(*i, &doc)) {
+ delete analyzer;
+ return false;
+ }
+ writer.addDocument(&doc);
+ }
+
+ // Optimize the index
+ writer.optimize();
+
+ delete analyzer;
+ return true;
+}
+
+std::string const & HelpIndexer::getErrorMessage() {
+ return d_error;
+}
+
+bool HelpIndexer::scanForFiles() {
+ if (!scanForFiles(d_contentDir)) {
+ return false;
+ }
+ if (!scanForFiles(d_captionDir)) {
+ return false;
+ }
+ return true;
+}
+
+bool HelpIndexer::scanForFiles(std::string const & path) {
+ DIR *dir = opendir(path.c_str());
+ if (dir == 0) {
+ d_error = "Error reading directory " + path + strerror(errno);
+ return true;
+ }
+
+ struct dirent *ent;
+ struct stat info;
+ while ((ent = readdir(dir)) != 0) {
+ if (stat((path + "/" + ent->d_name).c_str(), &info) == 0 && S_ISREG(info.st_mode)) {
+ d_files.insert(ent->d_name);
+ }
+ }
+
+ closedir(dir);
+
+ return true;
+}
+
+bool HelpIndexer::helpDocument(std::string const & fileName, Document *doc) {
+ // Add the help path as an indexed, untokenized field.
+ std::wstring path(L"#HLP#" + string2wstring(d_module) + L"/" + string2wstring(fileName));
+ doc->add(*new Field(_T("path"), path.c_str(), Field::STORE_YES | Field::INDEX_UNTOKENIZED));
+
+ // Add the caption as a field.
+ std::string captionPath = d_captionDir + "/" + fileName;
+ doc->add(*new Field(_T("caption"), helpFileReader(captionPath), Field::STORE_NO | Field::INDEX_TOKENIZED));
+ // FIXME: does the Document take responsibility for the FileReader or should I free it somewhere?
+
+ // Add the content as a field.
+ std::string contentPath = d_contentDir + "/" + fileName;
+ doc->add(*new Field(_T("content"), helpFileReader(contentPath), Field::STORE_NO | Field::INDEX_TOKENIZED));
+ // FIXME: does the Document take responsibility for the FileReader or should I free it somewhere?
+
+ return true;
+}
+
+lucene::util::Reader *HelpIndexer::helpFileReader(std::string const & path) {
+ if (access(path.c_str(), R_OK) == 0) {
+ return new lucene::util::FileReader(path.c_str(), "UTF-8");
+ } else {
+ return new lucene::util::StringReader(L"");
+ }
+}
+
+std::wstring HelpIndexer::string2wstring(std::string const &source) {
+ std::wstring target(source.length(), L' ');
+ std::copy(source.begin(), source.end(), target.begin());
+ return target;
+}
diff --git a/l10ntools/source/help/HelpIndexer.hxx b/l10ntools/source/help/HelpIndexer.hxx
new file mode 100644
index 0000000..56122e7
--- /dev/null
+++ b/l10ntools/source/help/HelpIndexer.hxx
@@ -0,0 +1,71 @@
+#ifndef HELPINDEXER_HXX
+#define HELPINDEXER_HXX
+
+#include <CLucene/StdHeader.h>
+#include <CLucene.h>
+
+#include <string>
+#include <set>
+
+// I assume that TCHAR is defined as wchar_t throughout
+
+class HelpIndexer {
+ private:
+ std::string d_lang;
+ std::string d_module;
+ std::string d_captionDir;
+ std::string d_contentDir;
+ std::string d_indexDir;
+ std::string d_error;
+ std::set<std::string> d_files;
+
+ public:
+
+ /**
+ * @param lang Help files language.
+ * @param module The module of the helpfiles.
+ * @param captionDir The directory to scan for caption files.
+ * @param contentDir The directory to scan for content files.
+ * @param indexDir The directory to write the index to.
+ */
+ HelpIndexer(std::string const &lang, std::string const &module,
+ std::string const &captionDir, std::string const &contentDir,
+ std::string const &indexDir);
+
+ /**
+ * Run the indexer.
+ * @return true if index successfully generated.
+ */
+ bool indexDocuments();
+
+ /**
+ * Get the error string (empty if no error occurred).
+ */
+ std::string const & getErrorMessage();
+
+ private:
+
+ /**
+ * Scan the caption & contents directories for help files.
+ */
+ bool scanForFiles();
+
+ /**
+ * Scan for files in the given directory.
+ */
+ bool scanForFiles(std::string const &path);
+
+ /**
+ * Fill the Document with information on the given help file.
+ */
+ bool helpDocument(std::string const & fileName, lucene::document::Document *doc);
+
+ /**
+ * Create a reader for the given file, and create an "empty" reader in case the file doesn't exist.
+ */
+ lucene::util::Reader *helpFileReader(std::string const & path);
+
+ std::wstring string2wstring(std::string const &source);
+};
+
+#endif
diff --git a/l10ntools/source/help/HelpIndexer_main.cxx b/l10ntools/source/help/HelpIndexer_main.cxx
new file mode 100644
index 0000000..a1dd50b
--- /dev/null
+++ b/l10ntools/source/help/HelpIndexer_main.cxx
@@ -0,0 +1,66 @@
+#include "HelpIndexer.hxx"
+
+#include <string>
+#include <iostream>
+
+int main(int argc, char **argv) {
+ const std::string pLang("-lang");
+ const std::string pModule("-mod");
+ const std::string pOutDir("-zipdir");
+ const std::string pSrcDir("-srcdir");
+
+ std::string lang;
+ std::string module;
+ std::string srcDir;
+ std::string outDir;
+
+ bool error = false;
+ for (int i = 1; i < argc; ++i) {
+ if (pLang.compare(argv[i]) == 0) {
+ if (i + 1 < argc) {
+ lang = argv[++i];
+ } else {
+ error = true;
+ }
+ } else if (pModule.compare(argv[i]) == 0) {
+ if (i + 1 < argc) {
+ module = argv[++i];
+ } else {
+ error = true;
+ }
+ } else if (pOutDir.compare(argv[i]) == 0) {
+ if (i + 1 < argc) {
+ outDir = argv[++i];
+ } else {
+ error = true;
+ }
+ } else if (pSrcDir.compare(argv[i]) == 0) {
+ if (i + 1 < argc) {
+ srcDir = argv[++i];
+ } else {
+ error = true;
+ }
+ } else {
+ error = true;
+ }
+ }
+
+ if (error) {
+ std::cerr << "Error parsing command-line arguments" << std::endl;
+ }
+
+ if (error || lang.empty() || module.empty() || srcDir.empty() || outDir.empty()) {
+ std::cerr << "Usage: HelpIndexer -lang ISOLangCode -mod HelpModule -srcdir SourceDir -zipdir OutputDir" << std::endl;
+ return 1;
+ }
+
+ std::string captionDir(srcDir + "/caption");
+ std::string contentDir(srcDir + "/content");
+ std::string indexDir(outDir + "/" + module + ".idxl");
+ HelpIndexer indexer(lang, module, captionDir, contentDir, indexDir);
+ if (!indexer.indexDocuments()) {
+ std::cerr << indexer.getErrorMessage() << std::endl;
+ return 2;
+ }
+ return 0;
+}
diff --git a/l10ntools/source/help/helpindexer.cxx b/l10ntools/source/help/helpindexer.cxx
deleted file mode 100644
index c327119..0000000
--- a/l10ntools/source/help/helpindexer.cxx
+++ /dev/null
@@ -1,247 +0,0 @@
-#include <CLucene/StdHeader.h>
-#include <CLucene.h>
-#ifdef TODO
-#include <CLucene/analysis/LanguageBasedAnalyzer.h>
-#endif
-
-#include <unistd.h>
-#include <sys/stat.h>
-#include <dirent.h>
-#include <errno.h>
-#include <string.h>
-
-#include <string>
-#include <iostream>
-#include <algorithm>
-#include <set>
-
-// I assume that TCHAR is defined as wchar_t throughout
-
-using namespace lucene::document;
-
-class HelpIndexer {
- private:
- std::string d_lang;
- std::string d_module;
- std::string d_captionDir;
- std::string d_contentDir;
- std::string d_indexDir;
- std::string d_error;
- std::set<std::string> d_files;
-
- public:
-
- /**
- * @param lang Help files language.
- * @param module The module of the helpfiles.
- * @param captionDir The directory to scan for caption files.
- * @param contentDir The directory to scan for content files.
- * @param indexDir The directory to write the index to.
- */
- HelpIndexer(std::string const &lang, std::string const &module,
- std::string const &captionDir, std::string const &contentDir,
- std::string const &indexDir);
-
- /**
- * Run the indexer.
- * @return true if index successfully generated.
- */
- bool indexDocuments();
-
- /**
- * Get the error string (empty if no error occurred).
- */
- std::string const & getErrorMessage();
-
- private:
-
- /**
- * Scan the caption & contents directories for help files.
- */
- bool scanForFiles();
-
- /**
- * Scan for files in the given directory.
- */
- bool scanForFiles(std::string const &path);
-
- /**
- * Fill the Document with information on the given help file.
- */
- bool helpDocument(std::string const & fileName, Document *doc);
-
- /**
- * Create a reader for the given file, and create an "empty" reader in case the file doesn't exist.
- */
- lucene::util::Reader *helpFileReader(std::string const & path);
-
- std::wstring string2wstring(std::string const &source);
-};
-
-HelpIndexer::HelpIndexer(std::string const &lang, std::string const &module,
- std::string const &captionDir, std::string const &contentDir, std::string const &indexDir) :
-d_lang(lang), d_module(module), d_captionDir(captionDir), d_contentDir(contentDir), d_indexDir(indexDir), d_error(""), d_files() {}
-
-bool HelpIndexer::indexDocuments() {
- if (!scanForFiles()) {
- return false;
- }
-
-#ifdef TODO
- // Construct the analyzer appropriate for the given language
- lucene::analysis::Analyzer *analyzer = (
- d_lang.compare("ja") == 0 ?
- (lucene::analysis::Analyzer*)new lucene::analysis::LanguageBasedAnalyzer(L"cjk") :
- (lucene::analysis::Analyzer*)new lucene::analysis::standard::StandardAnalyzer());
-#else
- lucene::analysis::Analyzer *analyzer = (
- (lucene::analysis::Analyzer*)new lucene::analysis::standard::StandardAnalyzer());
-#endif
-
- lucene::index::IndexWriter writer(d_indexDir.c_str(), analyzer, true);
-
- // Index the identified help files
- Document doc;
- for (std::set<std::string>::iterator i = d_files.begin(); i != d_files.end(); ++i) {
- doc.clear();
- if (!helpDocument(*i, &doc)) {
- delete analyzer;
- return false;
- }
- writer.addDocument(&doc);
- }
-
- // Optimize the index
- writer.optimize();
-
- delete analyzer;
- return true;
-}
-
-std::string const & HelpIndexer::getErrorMessage() {
- return d_error;
-}
-
-bool HelpIndexer::scanForFiles() {
- if (!scanForFiles(d_contentDir)) {
- return false;
- }
- if (!scanForFiles(d_captionDir)) {
- return false;
- }
- return true;
-}
-
-bool HelpIndexer::scanForFiles(std::string const & path) {
- DIR *dir = opendir(path.c_str());
- if (dir == 0) {
- d_error = "Error reading directory " + path + strerror(errno);
- return true;
- }
-
- struct dirent *ent;
- struct stat info;
- while ((ent = readdir(dir)) != 0) {
- if (stat((path + "/" + ent->d_name).c_str(), &info) == 0 && S_ISREG(info.st_mode)) {
- d_files.insert(ent->d_name);
- }
- }
-
- closedir(dir);
-
- return true;
-}
-
-bool HelpIndexer::helpDocument(std::string const & fileName, Document *doc) {
- // Add the help path as an indexed, untokenized field.
- std::wstring path(L"#HLP#" + string2wstring(d_module) + L"/" + string2wstring(fileName));
- doc->add(*new Field(_T("path"), path.c_str(), Field::STORE_YES | Field::INDEX_UNTOKENIZED));
-
- // Add the caption as a field.
- std::string captionPath = d_captionDir + "/" + fileName;
- doc->add(*new Field(_T("caption"), helpFileReader(captionPath), Field::STORE_NO | Field::INDEX_TOKENIZED));
- // FIXME: does the Document take responsibility for the FileReader or should I free it somewhere?
-
- // Add the content as a field.
- std::string contentPath = d_contentDir + "/" + fileName;
- doc->add(*new Field(_T("content"), helpFileReader(contentPath), Field::STORE_NO | Field::INDEX_TOKENIZED));
- // FIXME: does the Document take responsibility for the FileReader or should I free it somewhere?
-
- return true;
-}
-
-lucene::util::Reader *HelpIndexer::helpFileReader(std::string const & path) {
- if (access(path.c_str(), R_OK) == 0) {
- return new lucene::util::FileReader(path.c_str(), "UTF-8");
- } else {
- return new lucene::util::StringReader(L"");
- }
-}
-
-std::wstring HelpIndexer::string2wstring(std::string const &source) {
- std::wstring target(source.length(), L' ');
- std::copy(source.begin(), source.end(), target.begin());
- return target;
-}
-
-int main(int argc, char **argv) {
- const std::string pLang("-lang");
- const std::string pModule("-mod");
- const std::string pOutDir("-zipdir");
- const std::string pSrcDir("-srcdir");
-
- std::string lang;
- std::string module;
- std::string srcDir;
- std::string outDir;
-
- bool error = false;
- for (int i = 1; i < argc; ++i) {
- if (pLang.compare(argv[i]) == 0) {
- if (i + 1 < argc) {
- lang = argv[++i];
- } else {
- error = true;
- }
- } else if (pModule.compare(argv[i]) == 0) {
- if (i + 1 < argc) {
- module = argv[++i];
- } else {
- error = true;
- }
- } else if (pOutDir.compare(argv[i]) == 0) {
- if (i + 1 < argc) {
- outDir = argv[++i];
- } else {
- error = true;
- }
- } else if (pSrcDir.compare(argv[i]) == 0) {
- if (i + 1 < argc) {
- srcDir = argv[++i];
- } else {
- error = true;
- }
- } else {
- error = true;
- }
- }
-
- if (error) {
- std::cerr << "Error parsing command-line arguments" << std::endl;
- }
-
- if (error || lang.empty() || module.empty() || srcDir.empty() || outDir.empty()) {
- std::cerr << "Usage: HelpIndexer -lang ISOLangCode -mod HelpModule -srcdir SourceDir -zipdir OutputDir" << std::endl;
- return 1;
- }
-
- std::string captionDir(srcDir + "/caption");
- std::string contentDir(srcDir + "/content");
- std::string indexDir(outDir + "/" + module + ".idxl");
- HelpIndexer indexer(lang, module, captionDir, contentDir, indexDir);
- if (!indexer.indexDocuments()) {
- std::cerr << indexer.getErrorMessage() << std::endl;
- return 2;
- }
- return 0;
-}
diff --git a/l10ntools/source/help/makefile.mk b/l10ntools/source/help/makefile.mk
index e22c6a3..1283535 100644
--- a/l10ntools/source/help/makefile.mk
+++ b/l10ntools/source/help/makefile.mk
@@ -60,7 +60,8 @@ SLOFILES=\
EXCEPTIONSFILES=\
$(OBJ)$/HelpLinker.obj \
$(OBJ)$/HelpCompiler.obj \
- $(OBJ)$/helpindexer.obj \
+ $(OBJ)$/HelpIndexer.obj \
+ $(OBJ)$/HelpIndexer_main.obj \
$(SLO)$/HelpLinker.obj \
$(SLO)$/HelpCompiler.obj
@@ -74,7 +75,7 @@ NOOPTFILES=\
$(SLO)$/HelpLinker.obj
.ENDIF
-PKGCONFIG_MODULES=libclucene-core
+PKGCONFIG_MODULES=libclucene-core libclucene-contribs-lib
.INCLUDE : pkg_config.mk
APP1TARGET= $(TARGET)
@@ -86,7 +87,8 @@ APP1STDLIBS+=$(SALLIB) $(BERKELEYLIB) $(XSLTLIB) $(EXPATASCII3RDLIB)
APP2TARGET=HelpIndexer
APP2OBJS=\
- $(OBJ)$/helpindexer.obj
+ $(OBJ)$/HelpIndexer.obj \
+ $(OBJ)$/HelpIndexer_main.obj
APP2RPATH = NONE
APP2STDLIBS+=$(SALLIB) $(PKGCONFIG_LIBS)
--
1.7.0.4
--------------000405030600050500020300
Content-Type: text/x-patch;
name="core-0003-HelpIndexer-using-rtl-OUString-called-from-xmlhelp.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment;
filename*0="core-0003-HelpIndexer-using-rtl-OUString-called-from-xmlhelp";
filename*1=".patch"
More information about the LibreOffice
mailing list