[Libreoffice-commits] core.git: sw/CppunitTest_sw_txtimport.mk sw/CppunitTest_sw_uwriter.mk sw/Library_sw.mk sw/Module_sw.mk sw/qa sw/source
Tomofumi Yagi (via logerrit)
logerrit at kemper.freedesktop.org
Wed Sep 30 08:19:04 UTC 2020
sw/CppunitTest_sw_txtimport.mk | 63 ++++++++
sw/CppunitTest_sw_uwriter.mk | 2
sw/Library_sw.mk | 1
sw/Module_sw.mk | 1
sw/qa/extras/txtexport/txtexport.cxx | 97 ++-----------
sw/qa/extras/txtimport/data/UTF16BEWITHBOM.txt |binary
sw/qa/extras/txtimport/data/UTF16BEWITHOUTBOM.txt |binary
sw/qa/extras/txtimport/data/UTF16LEWITHBOM.txt |binary
sw/qa/extras/txtimport/data/UTF16LEWITHOUTBOM.txt |binary
sw/qa/extras/txtimport/data/UTF8WITHBOM.txt | 2
sw/qa/extras/txtimport/data/UTF8WITHOUTBOM.txt | 2
sw/qa/extras/txtimport/data/bullets.odt |binary
sw/qa/extras/txtimport/txtimport.cxx | 155 ++++++++++++++++++++++
sw/source/filter/basflt/iodetect.cxx | 27 +++
14 files changed, 269 insertions(+), 81 deletions(-)
New commits:
commit ef77a256de527f6d00212839e55f949024f2e7bc
Author: Tomofumi Yagi <yagitmknada at gmail.com>
AuthorDate: Wed Sep 16 18:11:22 2020 +0900
Commit: Michael Stahl <michael.stahl at cib.de>
CommitDate: Wed Sep 30 10:18:23 2020 +0200
tdf#60145 sw: fix UTF-8 encoding without BOM is not detected
Writer can now detect Unicode type even if importing text file does not
have a BOM.
Change-Id: I70fa4145672d855f9ef9df040a930ff5d1564905
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/102884
Tested-by: Jenkins
Reviewed-by: Eike Rathke <erack at redhat.com>
Reviewed-by: Michael Stahl <michael.stahl at cib.de>
diff --git a/sw/CppunitTest_sw_txtimport.mk b/sw/CppunitTest_sw_txtimport.mk
new file mode 100644
index 000000000000..646a2ea83391
--- /dev/null
+++ b/sw/CppunitTest_sw_txtimport.mk
@@ -0,0 +1,63 @@
+# -*- Mode: makefile-gmake; tab-width: 4; indent-tabs-mode: t -*-
+#*************************************************************************
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+#*************************************************************************
+
+$(eval $(call gb_CppunitTest_CppunitTest,sw_txtimport))
+
+$(eval $(call gb_CppunitTest_use_common_precompiled_header,sw_txtimport))
+
+$(eval $(call gb_CppunitTest_add_exception_objects,sw_txtimport, \
+ sw/qa/extras/txtimport/txtimport \
+))
+
+$(eval $(call gb_CppunitTest_use_libraries,sw_txtimport, \
+ comphelper \
+ cppu \
+ cppuhelper \
+ i18nlangtag \
+ sal \
+ sfx \
+ sw \
+ swqahelper \
+ test \
+ tl \
+ unotest \
+ utl \
+ vcl \
+ $(gb_UWINAPI) \
+))
+
+$(eval $(call gb_CppunitTest_use_externals,sw_txtimport,\
+ boost_headers \
+ libxml2 \
+))
+
+$(eval $(call gb_CppunitTest_set_include,sw_txtimport,\
+ -I$(SRCDIR)/sw/inc \
+ -I$(SRCDIR)/sw/source/core/inc \
+ -I$(SRCDIR)/sw/source/uibase/inc \
+ -I$(SRCDIR)/sw/qa/inc \
+ $$(INCLUDE) \
+))
+
+$(eval $(call gb_CppunitTest_use_api,sw_txtimport,\
+ udkapi \
+ offapi \
+ oovbaapi \
+))
+
+$(eval $(call gb_CppunitTest_use_ure,sw_txtimport))
+$(eval $(call gb_CppunitTest_use_vcl,sw_txtimport))
+
+$(eval $(call gb_CppunitTest_use_rdb,sw_txtimport,services))
+
+$(eval $(call gb_CppunitTest_use_configuration,sw_txtimport))
+
+# vim: set noet sw=4 ts=4:
diff --git a/sw/CppunitTest_sw_uwriter.mk b/sw/CppunitTest_sw_uwriter.mk
index 6b9ffa4ba683..e6a490c5dff1 100644
--- a/sw/CppunitTest_sw_uwriter.mk
+++ b/sw/CppunitTest_sw_uwriter.mk
@@ -64,7 +64,9 @@ $(eval $(call gb_CppunitTest_use_libraries,sw_uwriter, \
$(eval $(call gb_CppunitTest_use_externals,sw_uwriter,\
boost_headers \
+ icui18n \
icuuc \
+ icu_headers \
libxml2 \
))
diff --git a/sw/Library_sw.mk b/sw/Library_sw.mk
index 2de7d6b0e4dc..65f4154bf12e 100644
--- a/sw/Library_sw.mk
+++ b/sw/Library_sw.mk
@@ -87,6 +87,7 @@ $(eval $(call gb_Library_use_libraries,sw,\
$(eval $(call gb_Library_use_externals,sw,\
boost_headers \
+ icui18n \
icuuc \
icu_headers \
libxml2 \
diff --git a/sw/Module_sw.mk b/sw/Module_sw.mk
index ec6e73fd2779..2f8a9a35ed01 100644
--- a/sw/Module_sw.mk
+++ b/sw/Module_sw.mk
@@ -97,6 +97,7 @@ $(eval $(call gb_Module_add_slowcheck_targets,sw,\
CppunitTest_sw_odfexport \
CppunitTest_sw_odfimport \
CppunitTest_sw_txtexport \
+ CppunitTest_sw_txtimport \
$(if $(filter-out MACOSX,$(OS)), \
CppunitTest_sw_uiwriter \
) \
diff --git a/sw/qa/extras/txtexport/txtexport.cxx b/sw/qa/extras/txtexport/txtexport.cxx
index d7246d05aced..0151ee289cc3 100644
--- a/sw/qa/extras/txtexport/txtexport.cxx
+++ b/sw/qa/extras/txtexport/txtexport.cxx
@@ -15,42 +15,13 @@
#include <unotxdoc.hxx>
#include <docsh.hxx>
-class TxtImportTest : public SwModelTestBase
+class TxtExportTest : public SwModelTestBase
{
public:
- TxtImportTest() :
- SwModelTestBase("/sw/qa/extras/txtexport/data/", "Text")
- {}
-
- // Export & assert part of the document (defined by SwPaM).
- void assertExportedRange(const OString& aExpected, SwPaM& rPaM)
+ TxtExportTest()
+ : SwModelTestBase("/sw/qa/extras/txtexport/data/", "Text")
{
- WriterRef rAsciiWriter;
- SwReaderWriter::GetWriter(FILTER_TEXT, OUString(), rAsciiWriter);
- CPPUNIT_ASSERT(rAsciiWriter.is());
-
- // no start char
- rAsciiWriter->m_bUCS2_WithStartChar = false;
-
- SvMemoryStream aMemoryStream;
-
- SwWriter aWriter(aMemoryStream, rPaM);
- ErrCode nError = aWriter.Write(rAsciiWriter);
- CPPUNIT_ASSERT_EQUAL(ERRCODE_NONE, nError);
-
- const char* pData = static_cast<const char*>(aMemoryStream.GetData());
- OString aResult(pData, aMemoryStream.GetSize());
-
- CPPUNIT_ASSERT_EQUAL(aExpected, aResult);
}
-};
-
-class TxtExportTest : public SwModelTestBase
-{
-public:
- TxtExportTest() :
- SwModelTestBase("/sw/qa/extras/txtexport/data/", "Text")
- {}
protected:
OString readExportedFile()
@@ -61,42 +32,35 @@ protected:
const char* pData = static_cast<const char*>(aMemoryStream.GetData());
int offset = 0;
- if (aMemoryStream.GetSize() > 2 && pData[0] == '\xEF' && pData[1] == '\xBB' && pData[2] == '\xBF')
+ if (aMemoryStream.GetSize() > 2 && pData[0] == '\xEF' && pData[1] == '\xBB'
+ && pData[2] == '\xBF')
offset = 3;
return OString(pData + offset, aMemoryStream.GetSize() - offset);
}
};
-#define DECLARE_TXTIMPORT_TEST(TestName, filename) DECLARE_SW_EXPORT_TEST(TestName, filename, nullptr, TxtImportTest)
-#define DECLARE_TXTEXPORT_TEST(TestName, filename) DECLARE_SW_EXPORT_TEST(TestName, filename, nullptr, TxtExportTest)
+#define DECLARE_TXTEXPORT_TEST(TestName, filename) \
+ DECLARE_SW_EXPORT_TEST(TestName, filename, nullptr, TxtExportTest)
DECLARE_TXTEXPORT_TEST(testBullets, "bullets.odt")
{
OString aData = readExportedFile();
OUString aString = OStringToOUString(
- "1 Heading 1" SAL_NEWLINE_STRING
- "1.A Heading 2" SAL_NEWLINE_STRING
- "Paragraph" SAL_NEWLINE_STRING
- "" SAL_NEWLINE_STRING
+ "1 Heading 1" SAL_NEWLINE_STRING "1.A Heading 2" SAL_NEWLINE_STRING
+ "Paragraph" SAL_NEWLINE_STRING "" SAL_NEWLINE_STRING
" \xe2\x80\xa2 First bullet" SAL_NEWLINE_STRING
" \xe2\x80\xa2 Second bullet" SAL_NEWLINE_STRING
" \xe2\x97\xa6 Sub-second bullet" SAL_NEWLINE_STRING
" Third bullet, but deleted" SAL_NEWLINE_STRING
- " \xe2\x80\xa2 Fourth bullet" SAL_NEWLINE_STRING
- "" SAL_NEWLINE_STRING
- "Numbering" SAL_NEWLINE_STRING
- "" SAL_NEWLINE_STRING
- " 1. First" SAL_NEWLINE_STRING
- " 2. Second" SAL_NEWLINE_STRING
- " 1. Second-first" SAL_NEWLINE_STRING
- " Third, but deleted" SAL_NEWLINE_STRING
- " 3. Actual third" SAL_NEWLINE_STRING
- "" SAL_NEWLINE_STRING
- "Paragraph after numbering" SAL_NEWLINE_STRING
- "Next paragraph" SAL_NEWLINE_STRING
- "Final paragraph" SAL_NEWLINE_STRING, RTL_TEXTENCODING_UTF8);
+ " \xe2\x80\xa2 Fourth bullet" SAL_NEWLINE_STRING "" SAL_NEWLINE_STRING
+ "Numbering" SAL_NEWLINE_STRING "" SAL_NEWLINE_STRING " 1. First" SAL_NEWLINE_STRING
+ " 2. Second" SAL_NEWLINE_STRING " 1. Second-first" SAL_NEWLINE_STRING
+ " Third, but deleted" SAL_NEWLINE_STRING " 3. Actual third" SAL_NEWLINE_STRING
+ "" SAL_NEWLINE_STRING "Paragraph after numbering" SAL_NEWLINE_STRING
+ "Next paragraph" SAL_NEWLINE_STRING "Final paragraph" SAL_NEWLINE_STRING,
+ RTL_TEXTENCODING_UTF8);
// To get the stuff back in the system's encoding
OString aExpected(OUStringToOString(aString, osl_getThreadTextEncoding()));
@@ -104,35 +68,6 @@ DECLARE_TXTEXPORT_TEST(testBullets, "bullets.odt")
CPPUNIT_ASSERT_EQUAL(aExpected, aData);
}
-DECLARE_TXTIMPORT_TEST(testTdf112191, "bullets.odt")
-{
- SwXTextDocument* pTextDoc = dynamic_cast<SwXTextDocument *>(mxComponent.get());
- CPPUNIT_ASSERT(pTextDoc);
- SwDoc* pDoc = pTextDoc->GetDocShell()->GetDoc();
- CPPUNIT_ASSERT(pDoc);
-
- // just the 5th paragraph - no bullet
- uno::Reference<text::XTextRange> xPara(getParagraph(5));
- SwUnoInternalPaM aPaM(*pDoc);
- bool bSuccess = sw::XTextRangeToSwPaM(aPaM, xPara);
- CPPUNIT_ASSERT(bSuccess);
-
- assertExportedRange("First bullet", aPaM);
-
- // but when we extend to the next paragraph - now there are bullets
- xPara = getParagraph(6);
- SwUnoInternalPaM aPaM2(*pDoc);
- bSuccess = sw::XTextRangeToSwPaM(aPaM2, xPara);
- CPPUNIT_ASSERT(bSuccess);
-
- OUString aString = OStringToOUString(
- " \xe2\x80\xa2 First bullet" SAL_NEWLINE_STRING
- " \xe2\x80\xa2 Second bullet", RTL_TEXTENCODING_UTF8);
-
- SwPaM aPaM3(*aPaM2.GetMark(), *aPaM.GetPoint());
- assertExportedRange(OUStringToOString(aString, osl_getThreadTextEncoding()), aPaM3);
-}
-
CPPUNIT_PLUGIN_IMPLEMENT();
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sw/qa/extras/txtimport/data/UTF16BEWITHBOM.txt b/sw/qa/extras/txtimport/data/UTF16BEWITHBOM.txt
new file mode 100755
index 000000000000..2e0455704058
Binary files /dev/null and b/sw/qa/extras/txtimport/data/UTF16BEWITHBOM.txt differ
diff --git a/sw/qa/extras/txtimport/data/UTF16BEWITHOUTBOM.txt b/sw/qa/extras/txtimport/data/UTF16BEWITHOUTBOM.txt
new file mode 100755
index 000000000000..0551ae547521
Binary files /dev/null and b/sw/qa/extras/txtimport/data/UTF16BEWITHOUTBOM.txt differ
diff --git a/sw/qa/extras/txtimport/data/UTF16LEWITHBOM.txt b/sw/qa/extras/txtimport/data/UTF16LEWITHBOM.txt
new file mode 100755
index 000000000000..43ed8143ef9b
Binary files /dev/null and b/sw/qa/extras/txtimport/data/UTF16LEWITHBOM.txt differ
diff --git a/sw/qa/extras/txtimport/data/UTF16LEWITHOUTBOM.txt b/sw/qa/extras/txtimport/data/UTF16LEWITHOUTBOM.txt
new file mode 100755
index 000000000000..d2cc98e785ce
Binary files /dev/null and b/sw/qa/extras/txtimport/data/UTF16LEWITHOUTBOM.txt differ
diff --git a/sw/qa/extras/txtimport/data/UTF8WITHBOM.txt b/sw/qa/extras/txtimport/data/UTF8WITHBOM.txt
new file mode 100755
index 000000000000..afd812fe6ff3
--- /dev/null
+++ b/sw/qa/extras/txtimport/data/UTF8WITHBOM.txt
@@ -0,0 +1,2 @@
+漢a'
+å½¼ã¯èå¾ã«ã²ãããªè¶³é³ãèãããããã¯ãã¾ãè¯ãæå³ã示ããã®ã§ã¯ãªãã誰ããããªå¤æ´ãã«ãããããããªè¡ç¯ã®ãç²æ«ãªæ¸¯è¡ã®çãå°éã§å½¼ãã¤ãã¦æ¥ãã¨ããã®ã ã人çã®èªè·¯ãæ»ãæ²ãããã®ç²ç©ã¨å
±ã«ç«ã¡å»ããã¨ãã¦ããããã®ä¸åº¦ä»ã å½¼ã®ãã®ä»äºã¸ã®æããåãããããã«ãæ°å¤ã仲éã®ä¸ã«åãèããæ±ããå½¼ãè¦å®ããå¾
ã£ã¦ããè
ãããã¨ããã®ããããã¨ãèå¾ã®è¶³é³ã®ä¸»ã¯ããã®è¡ã«ç¡æ°ã«ããæ³ç£è¦å½¹ã§ãå¼·éãªç½°ãããã«ãå½¼ã®æé¦ã«ã¬ã·ã£ã³ã¨ä¸ãã¨ããã®ããå½¼ã¯è¶³é³ãæ¢ã¾ã£ããã¨ã«æ°ãçãããããã¦ã¦è¾ºããè¦åãããµã¨çãæãéã«ç®ãæ¢ã¾ãã å½¼ã¯ç´ æ©ãå³ã«èº«ãç¿»ãã建ç©ã®éã«æ¶ãå»ã£ãããã®æå½¼ã¯ãããå°ãã§éã®çä¸ã«è»¢ãã£ã¦ãããã¿ãã±ãã«èºã転ã¶ã¨ããã ã£ãã å
½¼ã¯æéã®ä¸ã§éã確ããããã¨ãã£ã¨è¦ã¤ãããã©ãããèªåã®éã£ã¦ããé以å¤ã«ãã®ä¸åºããã®åºéã¯ãªãããã ã 足é³ã¯ã ãã ãè¿ã¥ããå½¼ã«ã¯è§ãæ²ããé»ã人影ãè¦ãããå½¼ã®ç®ã¯å¤ã®éã®ä¸ãå¿
æ»ã«ãã¾ãããéãéãæ¢ãããããã¹ã¦ã¯çµãããªã®ãããã¹ã¦ã®è¦å´ã¨æºåã¯æ°´ã®æ³¡ã ã¨ããã®ãã çªç¶ãå½¼ã®æ¨ªã§æã風ã«æºãããã»ãã®ãããã«ãããã®ãèããæãå½¼ã¯èä¸ãå£ã«æ¼ãä»ãã追跡è
ã«è¦ä»ããããªããã¨ãé¡ã£ãããã®æã¯æã¿ã®ç¶±ã¨ãã¦æãããããããå½¼ã®ã¸ã¬ã³ãããã®åºå£ãªã®ã ããããèä¸ãå£ã«ã´ã£ããæ¼ãä»ããã¾ã¾ããã£ããã¨å½¼ã¯éãã¦ããæã®æ¹ã¸ã¨èº«ãåããã¦è¡ã£ãããã®æã¯å½¼ã®æãã¨ãªãã®ã ãããã
diff --git a/sw/qa/extras/txtimport/data/UTF8WITHOUTBOM.txt b/sw/qa/extras/txtimport/data/UTF8WITHOUTBOM.txt
new file mode 100755
index 000000000000..b8362e9e789f
--- /dev/null
+++ b/sw/qa/extras/txtimport/data/UTF8WITHOUTBOM.txt
@@ -0,0 +1,2 @@
+æ¼¢a'
+å½¼ã¯èå¾ã«ã²ãããªè¶³é³ãèãããããã¯ãã¾ãè¯ãæå³ã示ããã®ã§ã¯ãªãã誰ããããªå¤æ´ãã«ãããããããªè¡ç¯ã®ãç²æ«ãªæ¸¯è¡ã®çãå°éã§å½¼ãã¤ãã¦æ¥ãã¨ããã®ã ã人çã®èªè·¯ãæ»ãæ²ãããã®ç²ç©ã¨å
±ã«ç«ã¡å»ããã¨ãã¦ããããã®ä¸åº¦ä»ã å½¼ã®ãã®ä»äºã¸ã®æããåãããããã«ãæ°å¤ã仲éã®ä¸ã«åãèããæ±ããå½¼ãè¦å®ããå¾
ã£ã¦ããè
ãããã¨ããã®ããããã¨ãèå¾ã®è¶³é³ã®ä¸»ã¯ããã®è¡ã«ç¡æ°ã«ããæ³ç£è¦å½¹ã§ãå¼·éãªç½°ãããã«ãå½¼ã®æé¦ã«ã¬ã·ã£ã³ã¨ä¸ãã¨ããã®ããå½¼ã¯è¶³é³ãæ¢ã¾ã£ããã¨ã«æ°ãçãããããã¦ã¦è¾ºããè¦åãããµã¨çãæãéã«ç®ãæ¢ã¾ãã å½¼ã¯ç´ æ©ãå³ã«èº«ãç¿»ãã建ç©ã®éã«æ¶ãå»ã£ãããã®æå½¼ã¯ãããå°ãã§éã®çä¸ã«è»¢ãã£ã¦ãããã¿ãã±ãã«èºã転ã¶ã¨ããã ã£ãã å
½¼ã¯æéã®ä¸ã§éã確ããããã¨ãã£ã¨è¦ã¤ãããã©ãããèªåã®éã£ã¦ããé以å¤ã«ãã®ä¸åºããã®åºéã¯ãªãããã ã 足é³ã¯ã ãã ãè¿ã¥ããå½¼ã«ã¯è§ãæ²ããé»ã人影ãè¦ãããå½¼ã®ç®ã¯å¤ã®éã®ä¸ãå¿
æ»ã«ãã¾ãããéãéãæ¢ãããããã¹ã¦ã¯çµãããªã®ãããã¹ã¦ã®è¦å´ã¨æºåã¯æ°´ã®æ³¡ã ã¨ããã®ãã çªç¶ãå½¼ã®æ¨ªã§æã風ã«æºãããã»ãã®ãããã«ãããã®ãèããæãå½¼ã¯èä¸ãå£ã«æ¼ãä»ãã追跡è
ã«è¦ä»ããããªããã¨ãé¡ã£ãããã®æã¯æã¿ã®ç¶±ã¨ãã¦æãããããããå½¼ã®ã¸ã¬ã³ãããã®åºå£ãªã®ã ããããèä¸ãå£ã«ã´ã£ããæ¼ãä»ããã¾ã¾ããã£ããã¨å½¼ã¯éãã¦ããæã®æ¹ã¸ã¨èº«ãåããã¦è¡ã£ãããã®æã¯å½¼ã®æãã¨ãªãã®ã ãããã
diff --git a/sw/qa/extras/txtimport/data/bullets.odt b/sw/qa/extras/txtimport/data/bullets.odt
new file mode 100644
index 000000000000..43e0c2123ffc
Binary files /dev/null and b/sw/qa/extras/txtimport/data/bullets.odt differ
diff --git a/sw/qa/extras/txtimport/txtimport.cxx b/sw/qa/extras/txtimport/txtimport.cxx
new file mode 100644
index 000000000000..a97242641df8
--- /dev/null
+++ b/sw/qa/extras/txtimport/txtimport.cxx
@@ -0,0 +1,155 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#include <swmodeltestbase.hxx>
+
+#include <shellio.hxx>
+#include <unotextrange.hxx>
+#include <iodetect.hxx>
+#include <unotxdoc.hxx>
+#include <docsh.hxx>
+
+class TxtImportTest : public SwModelTestBase
+{
+public:
+ TxtImportTest()
+ : SwModelTestBase("/sw/qa/extras/txtimport/data/", "Text")
+ {
+ }
+
+ // Export & assert part of the document (defined by SwPaM).
+ void assertExportedRange(const OString& aExpected, SwPaM& rPaM)
+ {
+ WriterRef rAsciiWriter;
+ SwReaderWriter::GetWriter(FILTER_TEXT, OUString(), rAsciiWriter);
+ CPPUNIT_ASSERT(rAsciiWriter.is());
+
+ // no start char
+ rAsciiWriter->m_bUCS2_WithStartChar = false;
+
+ SvMemoryStream aMemoryStream;
+
+ SwWriter aWriter(aMemoryStream, rPaM);
+ ErrCode nError = aWriter.Write(rAsciiWriter);
+ CPPUNIT_ASSERT_EQUAL(ERRCODE_NONE, nError);
+
+ const char* pData = static_cast<const char*>(aMemoryStream.GetData());
+ OString aResult(pData, aMemoryStream.GetSize());
+
+ CPPUNIT_ASSERT_EQUAL(aExpected, aResult);
+ }
+};
+
+#define DECLARE_TXTIMPORT_TEST(TestName, filename) \
+ DECLARE_SW_IMPORT_TEST(TestName, filename, nullptr, TxtImportTest)
+
+DECLARE_TXTIMPORT_TEST(testTdf112191, "bullets.odt")
+{
+ SwXTextDocument* pTextDoc = dynamic_cast<SwXTextDocument*>(mxComponent.get());
+ CPPUNIT_ASSERT(pTextDoc);
+ SwDoc* pDoc = pTextDoc->GetDocShell()->GetDoc();
+ CPPUNIT_ASSERT(pDoc);
+
+ // just the 5th paragraph - no bullet
+ uno::Reference<text::XTextRange> xPara(getParagraph(5));
+ SwUnoInternalPaM aPaM(*pDoc);
+ bool bSuccess = sw::XTextRangeToSwPaM(aPaM, xPara);
+ CPPUNIT_ASSERT(bSuccess);
+
+ assertExportedRange("First bullet", aPaM);
+
+ // but when we extend to the next paragraph - now there are bullets
+ xPara = getParagraph(6);
+ SwUnoInternalPaM aPaM2(*pDoc);
+ bSuccess = sw::XTextRangeToSwPaM(aPaM2, xPara);
+ CPPUNIT_ASSERT(bSuccess);
+
+ OUString aString = OStringToOUString(" \xe2\x80\xa2 First bullet" SAL_NEWLINE_STRING
+ " \xe2\x80\xa2 Second bullet",
+ RTL_TEXTENCODING_UTF8);
+
+ SwPaM aPaM3(*aPaM2.GetMark(), *aPaM.GetPoint());
+ assertExportedRange(OUStringToOString(aString, osl_getThreadTextEncoding()), aPaM3);
+}
+
+DECLARE_TXTIMPORT_TEST(testTdf60145_utf8withoutbom, "UTF8WITHOUTBOM.txt")
+{
+ SwXTextDocument* pTextDoc = dynamic_cast<SwXTextDocument*>(mxComponent.get());
+ CPPUNIT_ASSERT(pTextDoc);
+ SwDoc* pDoc = pTextDoc->GetDocShell()->GetDoc();
+ CPPUNIT_ASSERT(pDoc);
+
+ uno::Reference<text::XTextRange> xPara(getParagraph(1));
+
+ CPPUNIT_ASSERT_EQUAL(OUString(u"æ¼¢a'"), xPara->getString());
+}
+
+DECLARE_TXTIMPORT_TEST(testTdf60145_utf8withbom, "UTF8WITHBOM.txt")
+{
+ SwXTextDocument* pTextDoc = dynamic_cast<SwXTextDocument*>(mxComponent.get());
+ CPPUNIT_ASSERT(pTextDoc);
+ SwDoc* pDoc = pTextDoc->GetDocShell()->GetDoc();
+ CPPUNIT_ASSERT(pDoc);
+
+ uno::Reference<text::XTextRange> xPara(getParagraph(1));
+
+ CPPUNIT_ASSERT_EQUAL(OUString(u"æ¼¢a'"), xPara->getString());
+}
+
+DECLARE_TXTIMPORT_TEST(testTdf60145_utf16lewithoutbom, "UTF16LEWITHOUTBOM.txt")
+{
+ SwXTextDocument* pTextDoc = dynamic_cast<SwXTextDocument*>(mxComponent.get());
+ CPPUNIT_ASSERT(pTextDoc);
+ SwDoc* pDoc = pTextDoc->GetDocShell()->GetDoc();
+ CPPUNIT_ASSERT(pDoc);
+
+ uno::Reference<text::XTextRange> xPara(getParagraph(1));
+
+ CPPUNIT_ASSERT_EQUAL(OUString(u"æ¼¢a'"), xPara->getString());
+}
+
+DECLARE_TXTIMPORT_TEST(testTdf60145_utf16lewithbom, "UTF16LEWITHBOM.txt")
+{
+ SwXTextDocument* pTextDoc = dynamic_cast<SwXTextDocument*>(mxComponent.get());
+ CPPUNIT_ASSERT(pTextDoc);
+ SwDoc* pDoc = pTextDoc->GetDocShell()->GetDoc();
+ CPPUNIT_ASSERT(pDoc);
+
+ uno::Reference<text::XTextRange> xPara(getParagraph(1));
+
+ CPPUNIT_ASSERT_EQUAL(OUString(u"æ¼¢a'"), xPara->getString());
+}
+
+DECLARE_TXTIMPORT_TEST(testTdf60145_utf16bewithoutbom, "UTF16BEWITHOUTBOM.txt")
+{
+ SwXTextDocument* pTextDoc = dynamic_cast<SwXTextDocument*>(mxComponent.get());
+ CPPUNIT_ASSERT(pTextDoc);
+ SwDoc* pDoc = pTextDoc->GetDocShell()->GetDoc();
+ CPPUNIT_ASSERT(pDoc);
+
+ uno::Reference<text::XTextRange> xPara(getParagraph(1));
+
+ CPPUNIT_ASSERT_EQUAL(OUString(u"æ¼¢a'"), xPara->getString());
+}
+
+DECLARE_TXTIMPORT_TEST(testTdf60145_utf16bewithbom, "UTF16BEWITHBOM.txt")
+{
+ SwXTextDocument* pTextDoc = dynamic_cast<SwXTextDocument*>(mxComponent.get());
+ CPPUNIT_ASSERT(pTextDoc);
+ SwDoc* pDoc = pTextDoc->GetDocShell()->GetDoc();
+ CPPUNIT_ASSERT(pDoc);
+
+ uno::Reference<text::XTextRange> xPara(getParagraph(1));
+
+ CPPUNIT_ASSERT_EQUAL(OUString(u"æ¼¢a'"), xPara->getString());
+}
+
+CPPUNIT_PLUGIN_IMPLEMENT();
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sw/source/filter/basflt/iodetect.cxx b/sw/source/filter/basflt/iodetect.cxx
index 5f9929e8f2b7..a47bb9e82b8d 100644
--- a/sw/source/filter/basflt/iodetect.cxx
+++ b/sw/source/filter/basflt/iodetect.cxx
@@ -28,6 +28,8 @@
#include <sfx2/docfile.hxx>
#include <com/sun/star/ucb/ContentCreationException.hpp>
#include <com/sun/star/embed/XStorage.hpp>
+#include <unicode/ucsdet.h>
+#include <unicode/uclean.h>
using namespace ::com::sun::star;
@@ -267,6 +269,31 @@ bool SwIoSystem::IsDetectableText(const char* pBuf, sal_uLong &rLen,
pBuf+=nHead;
rLen-=nHead;
}
+ /*See unicode type again without BOM*/
+ if (rLen >= 1 && eCharSet == RTL_TEXTENCODING_DONTKNOW)
+ {
+ UErrorCode uerr = U_ZERO_ERROR;
+ UCharsetDetector* ucd = ucsdet_open(&uerr);
+ ucsdet_setText(ucd, pBuf, rLen, &uerr);
+ const UCharsetMatch* match = ucsdet_detect(ucd, &uerr);
+ const char* pEncodingName = ucsdet_getName(match, &uerr);
+
+ if (U_SUCCESS(uerr) && !strcmp("UTF-8", pEncodingName))
+ {
+ eCharSet = RTL_TEXTENCODING_UTF8; // UTF-8
+ }
+ else if (U_SUCCESS(uerr) && !strcmp("UTF-16BE", pEncodingName))
+ {
+ eCharSet = RTL_TEXTENCODING_UCS2; // UTF-16BE
+ bLE = false;
+ }
+ else if (U_SUCCESS(uerr) && !strcmp("UTF-16LE", pEncodingName))
+ {
+ eCharSet = RTL_TEXTENCODING_UCS2; // UTF-16LE
+ }
+
+ ucsdet_close(ucd);
+ }
bool bCR = false, bLF = false, bIsBareUnicode = false;
More information about the Libreoffice-commits
mailing list