[PATCH 2/3] Separate HelpIndexer into header, implementation, and main

Gert van Valkenhoef g.h.m.van.valkenhoef at rug.nl
Tue Feb 14 11:19:37 PST 2012


---
 l10ntools/source/help/HelpIndexer.cxx      |  123 ++++++++++++++
 l10ntools/source/help/HelpIndexer.hxx      |   71 ++++++++
 l10ntools/source/help/HelpIndexer_main.cxx |   66 ++++++++
 l10ntools/source/help/helpindexer.cxx      |  247 ----------------------------
 l10ntools/source/help/makefile.mk          |    8 +-
 5 files changed, 265 insertions(+), 250 deletions(-)
 create mode 100644 l10ntools/source/help/HelpIndexer.cxx
 create mode 100644 l10ntools/source/help/HelpIndexer.hxx
 create mode 100644 l10ntools/source/help/HelpIndexer_main.cxx
 delete mode 100644 l10ntools/source/help/helpindexer.cxx

diff --git a/l10ntools/source/help/HelpIndexer.cxx b/l10ntools/source/help/HelpIndexer.cxx
new file mode 100644
index 0000000..ed0ce39
--- /dev/null
+++ b/l10ntools/source/help/HelpIndexer.cxx
@@ -0,0 +1,123 @@
+#include "HelpIndexer.hxx"
+
+#define TODO
+
+#ifdef TODO
+#include <CLucene/analysis/LanguageBasedAnalyzer.h>
+#endif
+
+#include <unistd.h>
+#include <sys/stat.h>
+#include <dirent.h>
+#include <errno.h>
+#include <string.h>
+
+#include <algorithm>
+
+using namespace lucene::document;
+
+HelpIndexer::HelpIndexer(std::string const &lang, std::string const &module,
+	std::string const &captionDir, std::string const &contentDir, std::string const &indexDir) :
+d_lang(lang), d_module(module), d_captionDir(captionDir), d_contentDir(contentDir), d_indexDir(indexDir), d_error(""), d_files() {}
+
+bool HelpIndexer::indexDocuments() {
+	if (!scanForFiles()) {
+		return false;
+	}
+
+#ifdef TODO
+	// Construct the analyzer appropriate for the given language
+	lucene::analysis::Analyzer *analyzer = (
+		d_lang.compare("ja") == 0 ?
+		(lucene::analysis::Analyzer*)new lucene::analysis::LanguageBasedAnalyzer(L"cjk") :
+		(lucene::analysis::Analyzer*)new lucene::analysis::standard::StandardAnalyzer());
+#else
+	lucene::analysis::Analyzer *analyzer = (
+		(lucene::analysis::Analyzer*)new lucene::analysis::standard::StandardAnalyzer());
+#endif
+
+	lucene::index::IndexWriter writer(d_indexDir.c_str(), analyzer, true);
+
+	// Index the identified help files
+	Document doc;
+	for (std::set<std::string>::iterator i = d_files.begin(); i != d_files.end(); ++i) {
+		doc.clear();
+		if (!helpDocument(*i, &doc)) {
+			delete analyzer;
+			return false;
+		}
+		writer.addDocument(&doc);
+	}
+
+	// Optimize the index
+	writer.optimize();
+
+	delete analyzer;
+	return true;
+}
+
+std::string const & HelpIndexer::getErrorMessage() {
+	return d_error;
+}
+
+bool HelpIndexer::scanForFiles() {
+	if (!scanForFiles(d_contentDir)) {
+		return false;
+	}
+	if (!scanForFiles(d_captionDir)) {
+		return false;
+	}
+	return true;
+}
+
+bool HelpIndexer::scanForFiles(std::string const & path) {
+	DIR *dir = opendir(path.c_str());
+	if (dir == 0) {
+		d_error = "Error reading directory " + path + strerror(errno);
+		return true;
+	}
+
+	struct dirent *ent;
+	struct stat info;
+	while ((ent = readdir(dir)) != 0) {
+		if (stat((path + "/" + ent->d_name).c_str(), &info) == 0 && S_ISREG(info.st_mode)) {
+			d_files.insert(ent->d_name);
+		}
+	}
+
+	closedir(dir);
+
+	return true;
+}
+
+bool HelpIndexer::helpDocument(std::string const & fileName, Document *doc) {
+	// Add the help path as an indexed, untokenized field.
+	std::wstring path(L"#HLP#" + string2wstring(d_module) + L"/" + string2wstring(fileName));
+	doc->add(*new Field(_T("path"), path.c_str(), Field::STORE_YES | Field::INDEX_UNTOKENIZED));
+
+	// Add the caption as a field.
+	std::string captionPath = d_captionDir + "/" + fileName;
+	doc->add(*new Field(_T("caption"), helpFileReader(captionPath), Field::STORE_NO | Field::INDEX_TOKENIZED));
+	// FIXME: does the Document take responsibility for the FileReader or should I free it somewhere?
+
+	// Add the content as a field.
+	std::string contentPath = d_contentDir + "/" + fileName;
+	doc->add(*new Field(_T("content"), helpFileReader(contentPath), Field::STORE_NO | Field::INDEX_TOKENIZED));
+	// FIXME: does the Document take responsibility for the FileReader or should I free it somewhere?
+
+	return true;
+}
+
+lucene::util::Reader *HelpIndexer::helpFileReader(std::string const & path) {
+	if (access(path.c_str(), R_OK) == 0) {
+		return new lucene::util::FileReader(path.c_str(), "UTF-8");
+	} else {
+		return new lucene::util::StringReader(L"");
+	}
+}
+
+std::wstring HelpIndexer::string2wstring(std::string const &source) {
+	std::wstring target(source.length(), L' ');
+	std::copy(source.begin(), source.end(), target.begin());
+	return target;
+}
diff --git a/l10ntools/source/help/HelpIndexer.hxx b/l10ntools/source/help/HelpIndexer.hxx
new file mode 100644
index 0000000..56122e7
--- /dev/null
+++ b/l10ntools/source/help/HelpIndexer.hxx
@@ -0,0 +1,71 @@
+#ifndef HELPINDEXER_HXX
+#define HELPINDEXER_HXX
+
+#include <CLucene/StdHeader.h>
+#include <CLucene.h>
+
+#include <string>
+#include <set>
+
+// I assume that TCHAR is defined as wchar_t throughout
+
+class HelpIndexer {
+	private:
+		std::string d_lang;
+		std::string d_module;
+		std::string d_captionDir;
+		std::string d_contentDir;
+		std::string d_indexDir;
+		std::string d_error;
+		std::set<std::string> d_files;
+
+	public:
+
+	/**
+	 * @param lang Help files language.
+	 * @param module The module of the helpfiles.
+	 * @param captionDir The directory to scan for caption files.
+	 * @param contentDir The directory to scan for content files.
+	 * @param indexDir The directory to write the index to.
+	 */
+	HelpIndexer(std::string const &lang, std::string const &module,
+		std::string const &captionDir, std::string const &contentDir,
+		std::string const &indexDir);
+
+	/**
+	 * Run the indexer.
+	 * @return true if index successfully generated.
+	 */
+	bool indexDocuments();
+
+	/**
+	 * Get the error string (empty if no error occurred).
+	 */
+	std::string const & getErrorMessage();
+
+	private:
+
+	/**
+	 * Scan the caption & contents directories for help files.
+	 */
+	bool scanForFiles();
+
+	/**
+	 * Scan for files in the given directory.
+	 */
+	bool scanForFiles(std::string const &path);
+
+	/**
+	 * Fill the Document with information on the given help file.
+	 */
+	bool helpDocument(std::string const & fileName, lucene::document::Document *doc);
+
+	/**
+	 * Create a reader for the given file, and create an "empty" reader in case the file doesn't exist.
+	 */
+	lucene::util::Reader *helpFileReader(std::string const & path);
+
+	std::wstring string2wstring(std::string const &source);
+};
+
+#endif
diff --git a/l10ntools/source/help/HelpIndexer_main.cxx b/l10ntools/source/help/HelpIndexer_main.cxx
new file mode 100644
index 0000000..a1dd50b
--- /dev/null
+++ b/l10ntools/source/help/HelpIndexer_main.cxx
@@ -0,0 +1,66 @@
+#include "HelpIndexer.hxx"
+
+#include <string>
+#include <iostream>
+
+int main(int argc, char **argv) {
+	const std::string pLang("-lang");
+	const std::string pModule("-mod");
+	const std::string pOutDir("-zipdir");
+	const std::string pSrcDir("-srcdir");
+
+	std::string lang;
+	std::string module;
+	std::string srcDir;
+	std::string outDir;
+
+	bool error = false;
+	for (int i = 1; i < argc; ++i) {
+		if (pLang.compare(argv[i]) == 0) {
+			if (i + 1 < argc) {
+				lang = argv[++i];
+			} else {
+				error = true;
+			}
+		} else if (pModule.compare(argv[i]) == 0) {
+			if (i + 1 < argc) {
+				module = argv[++i];
+			} else {
+				error = true;
+			}
+		} else if (pOutDir.compare(argv[i]) == 0) {
+			if (i + 1 < argc) {
+				outDir = argv[++i];
+			} else {
+				error = true;
+			}
+		} else if (pSrcDir.compare(argv[i]) == 0) {
+			if (i + 1 < argc) {
+				srcDir = argv[++i];
+			} else {
+				error = true;
+			}
+		} else {
+			error = true;
+		}
+	}
+
+	if (error) {
+		std::cerr << "Error parsing command-line arguments" << std::endl;
+	}
+
+	if (error || lang.empty() || module.empty() || srcDir.empty() || outDir.empty()) {
+		std::cerr << "Usage: HelpIndexer -lang ISOLangCode -mod HelpModule -srcdir SourceDir -zipdir OutputDir" << std::endl;
+		return 1;
+	}
+
+	std::string captionDir(srcDir + "/caption");
+	std::string contentDir(srcDir + "/content");
+	std::string indexDir(outDir + "/" + module + ".idxl");
+	HelpIndexer indexer(lang, module, captionDir, contentDir, indexDir);
+	if (!indexer.indexDocuments()) {
+		std::cerr << indexer.getErrorMessage() << std::endl;
+		return 2;
+	}
+	return 0;
+}
diff --git a/l10ntools/source/help/helpindexer.cxx b/l10ntools/source/help/helpindexer.cxx
deleted file mode 100644
index c327119..0000000
--- a/l10ntools/source/help/helpindexer.cxx
+++ /dev/null
@@ -1,247 +0,0 @@
-#include <CLucene/StdHeader.h>
-#include <CLucene.h>
-#ifdef TODO
-#include <CLucene/analysis/LanguageBasedAnalyzer.h>
-#endif
-
-#include <unistd.h>
-#include <sys/stat.h>
-#include <dirent.h>
-#include <errno.h>
-#include <string.h>
-
-#include <string>
-#include <iostream>
-#include <algorithm>
-#include <set>
-
-// I assume that TCHAR is defined as wchar_t throughout
-
-using namespace lucene::document;
-
-class HelpIndexer {
-	private:
-		std::string d_lang;
-		std::string d_module;
-		std::string d_captionDir;
-		std::string d_contentDir;
-		std::string d_indexDir;
-		std::string d_error;
-		std::set<std::string> d_files;
-
-	public:
-
-	/**
-	 * @param lang Help files language.
-	 * @param module The module of the helpfiles.
-	 * @param captionDir The directory to scan for caption files.
-	 * @param contentDir The directory to scan for content files.
-	 * @param indexDir The directory to write the index to.
-	 */
-	HelpIndexer(std::string const &lang, std::string const &module,
-		std::string const &captionDir, std::string const &contentDir,
-		std::string const &indexDir);
-
-	/**
-	 * Run the indexer.
-	 * @return true if index successfully generated.
-	 */
-	bool indexDocuments();
-
-	/**
-	 * Get the error string (empty if no error occurred).
-	 */
-	std::string const & getErrorMessage();
-
-	private:
-
-	/**
-	 * Scan the caption & contents directories for help files.
-	 */
-	bool scanForFiles();
-
-	/**
-	 * Scan for files in the given directory.
-	 */
-	bool scanForFiles(std::string const &path);
-
-	/**
-	 * Fill the Document with information on the given help file.
-	 */
-	bool helpDocument(std::string const & fileName, Document *doc);
-
-	/**
-	 * Create a reader for the given file, and create an "empty" reader in case the file doesn't exist.
-	 */
-	lucene::util::Reader *helpFileReader(std::string const & path);
-
-	std::wstring string2wstring(std::string const &source);
-};
-
-HelpIndexer::HelpIndexer(std::string const &lang, std::string const &module,
-	std::string const &captionDir, std::string const &contentDir, std::string const &indexDir) :
-d_lang(lang), d_module(module), d_captionDir(captionDir), d_contentDir(contentDir), d_indexDir(indexDir), d_error(""), d_files() {}
-
-bool HelpIndexer::indexDocuments() {
-	if (!scanForFiles()) {
-		return false;
-	}
-
-#ifdef TODO
-	// Construct the analyzer appropriate for the given language
-	lucene::analysis::Analyzer *analyzer = (
-		d_lang.compare("ja") == 0 ?
-		(lucene::analysis::Analyzer*)new lucene::analysis::LanguageBasedAnalyzer(L"cjk") :
-		(lucene::analysis::Analyzer*)new lucene::analysis::standard::StandardAnalyzer());
-#else
-	lucene::analysis::Analyzer *analyzer = (
-		(lucene::analysis::Analyzer*)new lucene::analysis::standard::StandardAnalyzer());
-#endif
-
-	lucene::index::IndexWriter writer(d_indexDir.c_str(), analyzer, true);
-
-	// Index the identified help files
-	Document doc;
-	for (std::set<std::string>::iterator i = d_files.begin(); i != d_files.end(); ++i) {
-		doc.clear();
-		if (!helpDocument(*i, &doc)) {
-			delete analyzer;
-			return false;
-		}
-		writer.addDocument(&doc);
-	}
-
-	// Optimize the index
-	writer.optimize();
-
-	delete analyzer;
-	return true;
-}
-
-std::string const & HelpIndexer::getErrorMessage() {
-	return d_error;
-}
-
-bool HelpIndexer::scanForFiles() {
-	if (!scanForFiles(d_contentDir)) {
-		return false;
-	}
-	if (!scanForFiles(d_captionDir)) {
-		return false;
-	}
-	return true;
-}
-
-bool HelpIndexer::scanForFiles(std::string const & path) {
-	DIR *dir = opendir(path.c_str());
-	if (dir == 0) {
-		d_error = "Error reading directory " + path + strerror(errno);
-		return true;
-	}
-
-	struct dirent *ent;
-	struct stat info;
-	while ((ent = readdir(dir)) != 0) {
-		if (stat((path + "/" + ent->d_name).c_str(), &info) == 0 && S_ISREG(info.st_mode)) {
-			d_files.insert(ent->d_name);
-		}
-	}
-
-	closedir(dir);
-
-	return true;
-}
-
-bool HelpIndexer::helpDocument(std::string const & fileName, Document *doc) {
-	// Add the help path as an indexed, untokenized field.
-	std::wstring path(L"#HLP#" + string2wstring(d_module) + L"/" + string2wstring(fileName));
-	doc->add(*new Field(_T("path"), path.c_str(), Field::STORE_YES | Field::INDEX_UNTOKENIZED));
-
-	// Add the caption as a field.
-	std::string captionPath = d_captionDir + "/" + fileName;
-	doc->add(*new Field(_T("caption"), helpFileReader(captionPath), Field::STORE_NO | Field::INDEX_TOKENIZED));
-	// FIXME: does the Document take responsibility for the FileReader or should I free it somewhere?
-
-	// Add the content as a field.
-	std::string contentPath = d_contentDir + "/" + fileName;
-	doc->add(*new Field(_T("content"), helpFileReader(contentPath), Field::STORE_NO | Field::INDEX_TOKENIZED));
-	// FIXME: does the Document take responsibility for the FileReader or should I free it somewhere?
-
-	return true;
-}
-
-lucene::util::Reader *HelpIndexer::helpFileReader(std::string const & path) {
-	if (access(path.c_str(), R_OK) == 0) {
-		return new lucene::util::FileReader(path.c_str(), "UTF-8");
-	} else {
-		return new lucene::util::StringReader(L"");
-	}
-}
-
-std::wstring HelpIndexer::string2wstring(std::string const &source) {
-	std::wstring target(source.length(), L' ');
-	std::copy(source.begin(), source.end(), target.begin());
-	return target;
-}
-
-int main(int argc, char **argv) {
-	const std::string pLang("-lang");
-	const std::string pModule("-mod");
-	const std::string pOutDir("-zipdir");
-	const std::string pSrcDir("-srcdir");
-
-	std::string lang;
-	std::string module;
-	std::string srcDir;
-	std::string outDir;
-
-	bool error = false;
-	for (int i = 1; i < argc; ++i) {
-		if (pLang.compare(argv[i]) == 0) {
-			if (i + 1 < argc) {
-				lang = argv[++i];
-			} else {
-				error = true;
-			}
-		} else if (pModule.compare(argv[i]) == 0) {
-			if (i + 1 < argc) {
-				module = argv[++i];
-			} else {
-				error = true;
-			}
-		} else if (pOutDir.compare(argv[i]) == 0) {
-			if (i + 1 < argc) {
-				outDir = argv[++i];
-			} else {
-				error = true;
-			}
-		} else if (pSrcDir.compare(argv[i]) == 0) {
-			if (i + 1 < argc) {
-				srcDir = argv[++i];
-			} else {
-				error = true;
-			}
-		} else {
-			error = true;
-		}
-	}
-
-	if (error) {
-		std::cerr << "Error parsing command-line arguments" << std::endl;
-	}
-
-	if (error || lang.empty() || module.empty() || srcDir.empty() || outDir.empty()) {
-		std::cerr << "Usage: HelpIndexer -lang ISOLangCode -mod HelpModule -srcdir SourceDir -zipdir OutputDir" << std::endl;
-		return 1;
-	}
-
-	std::string captionDir(srcDir + "/caption");
-	std::string contentDir(srcDir + "/content");
-	std::string indexDir(outDir + "/" + module + ".idxl");
-	HelpIndexer indexer(lang, module, captionDir, contentDir, indexDir);
-	if (!indexer.indexDocuments()) {
-		std::cerr << indexer.getErrorMessage() << std::endl;
-		return 2;
-	}
-	return 0;
-}
diff --git a/l10ntools/source/help/makefile.mk b/l10ntools/source/help/makefile.mk
index e22c6a3..1283535 100644
--- a/l10ntools/source/help/makefile.mk
+++ b/l10ntools/source/help/makefile.mk
@@ -60,7 +60,8 @@ SLOFILES=\
 EXCEPTIONSFILES=\
         $(OBJ)$/HelpLinker.obj \
         $(OBJ)$/HelpCompiler.obj \
-        $(OBJ)$/helpindexer.obj \
+        $(OBJ)$/HelpIndexer.obj \
+        $(OBJ)$/HelpIndexer_main.obj \
         $(SLO)$/HelpLinker.obj \
         $(SLO)$/HelpCompiler.obj
 
@@ -74,7 +75,7 @@ NOOPTFILES=\
         $(SLO)$/HelpLinker.obj
 .ENDIF
 
-PKGCONFIG_MODULES=libclucene-core
+PKGCONFIG_MODULES=libclucene-core libclucene-contribs-lib
 .INCLUDE : pkg_config.mk
 
 APP1TARGET= $(TARGET)
@@ -86,7 +87,8 @@ APP1STDLIBS+=$(SALLIB) $(BERKELEYLIB) $(XSLTLIB) $(EXPATASCII3RDLIB)
 
 APP2TARGET=HelpIndexer
 APP2OBJS=\
-      $(OBJ)$/helpindexer.obj
+      $(OBJ)$/HelpIndexer.obj \
+      $(OBJ)$/HelpIndexer_main.obj
 APP2RPATH = NONE
 APP2STDLIBS+=$(SALLIB) $(PKGCONFIG_LIBS)
 
-- 
1.7.0.4


--------------000405030600050500020300
Content-Type: text/x-patch;
 name="core-0003-HelpIndexer-using-rtl-OUString-called-from-xmlhelp.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: attachment;
 filename*0="core-0003-HelpIndexer-using-rtl-OUString-called-from-xmlhelp";
 filename*1=".patch"



More information about the LibreOffice mailing list