[Libreoffice-commits] core.git: Branch 'distro/mimo/mimo-5-4-7-2' - 4 commits - configure.ac external/liborcus shell/source
Andras Timar (via logerrit)
logerrit at kemper.freedesktop.org
Mon May 10 21:32:37 UTC 2021
Rebased ref, commits from common ancestor:
commit 12f2ddf58c4b4b7ce35814be09a8f0b99bde18fb
Author: Andras Timar <andras.timar at collabora.com>
AuthorDate: Mon May 10 22:48:59 2021 +0200
Commit: Andras Timar <andras.timar at collabora.com>
CommitDate: Mon May 10 23:31:56 2021 +0200
Bump version to 5.4.7.2.M7
Change-Id: I0e5314312bbefd3bd59cd39f0f8b876de8463e25
diff --git a/configure.ac b/configure.ac
index 9c25ac076c8c..bab32d23e6fa 100644
--- a/configure.ac
+++ b/configure.ac
@@ -9,7 +9,7 @@ dnl in order to create a configure script.
# several non-alphanumeric characters, those are split off and used only for the
# ABOUTBOXPRODUCTVERSIONSUFFIX in openoffice.lst. Why that is necessary, no idea.
-AC_INIT([LibreOffice],[5.4.7.2.M6],[],[],[http://documentfoundation.org/])
+AC_INIT([LibreOffice],[5.4.7.2.M7],[],[],[http://documentfoundation.org/])
AC_PREREQ([2.59])
commit 036e62326d08cfdcda8b390720ab9c0cf9f8c3f7
Author: Luboš Luňák <l.lunak at collabora.com>
AuthorDate: Thu Apr 29 20:10:34 2021 +0200
Commit: Andras Timar <andras.timar at collabora.com>
CommitDate: Mon May 10 23:31:56 2021 +0200
allow utf-8 in xml names (liborcus) (tdf#141672)
Change-Id: Ib150d55b588a572e4352396f18de2331983b2aae
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/114892
Tested-by: Jenkins
Reviewed-by: Luboš Luňák <l.lunak at collabora.com>
diff --git a/external/liborcus/UnpackedTarball_liborcus.mk b/external/liborcus/UnpackedTarball_liborcus.mk
index 791436e66016..6df45cb2fb39 100644
--- a/external/liborcus/UnpackedTarball_liborcus.mk
+++ b/external/liborcus/UnpackedTarball_liborcus.mk
@@ -21,6 +21,10 @@ $(eval $(call gb_UnpackedTarball_add_patches,liborcus,\
external/liborcus/0001-protect-the-self-closing-xml-element-code-against-se.patch \
))
+$(eval $(call gb_UnpackedTarball_add_patches,liborcus,\
+ external/liborcus/allow-utf-8-in-xml-names.patch \
+))
+
ifeq ($(OS),WNT)
$(eval $(call gb_UnpackedTarball_add_patches,liborcus,\
external/liborcus/windows-constants-hack.patch \
diff --git a/external/liborcus/allow-utf-8-in-xml-names.patch b/external/liborcus/allow-utf-8-in-xml-names.patch
new file mode 100644
index 000000000000..efef24b84053
--- /dev/null
+++ b/external/liborcus/allow-utf-8-in-xml-names.patch
@@ -0,0 +1,289 @@
+From 9889cb660372bc6c3da22fc274c73ea11040415f Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Lubo=C5=A1=20Lu=C5=88=C3=A1k?= <l.lunak at centrum.cz>
+Date: Thu, 29 Apr 2021 19:12:20 +0200
+Subject: [PATCH] allow utf-8 in xml names (#137)
+
+https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-NameStartChar
+has a list of all allowed characters.
+---
+ include/orcus/sax_parser_base.hpp | 3 +
+ src/orcus_test_xml.cpp | 1 +
+ src/parser/sax_parser_base.cpp | 186 ++++++++++++++++++++++++++++--
+ test/xml/non-ascii/check.txt | 4 +
+ test/xml/non-ascii/input.xml | 4 +
+ 5 files changed, 190 insertions(+), 8 deletions(-)
+ create mode 100644 test/xml/non-ascii/check.txt
+ create mode 100644 test/xml/non-ascii/input.xml
+
+diff --git a/include/orcus/sax_parser_base.hpp b/include/orcus/sax_parser_base.hpp
+index a117b3a2..1aeb8b38 100644
+--- a/include/orcus/sax_parser_base.hpp
++++ b/include/orcus/sax_parser_base.hpp
+@@ -217,6 +217,9 @@ protected:
+ void element_name(parser_element& elem, const char* begin_pos);
+ void attribute_name(pstring& attr_ns, pstring& attr_name);
+ void characters_with_encoded_char(cell_buffer& buf);
++
++ int is_name_char();
++ int is_name_start_char();
+ };
+
+ }}
+diff --git a/src/orcus_test_xml.cpp b/src/orcus_test_xml.cpp
+index 98e83297..89c8af72 100644
+--- a/src/orcus_test_xml.cpp
++++ b/src/orcus_test_xml.cpp
+@@ -73,6 +73,7 @@ const char* sax_parser_test_dirs[] = {
+ SRCDIR"/test/xml/bom/",
+ SRCDIR"/test/xml/custom-decl-1/",
+ SRCDIR"/test/xml/cdata-1/"
++ SRCDIR"/test/xml/non-ascii/",
+ };
+
+ const char* sax_parser_parse_only_test_dirs[] = {
+diff --git a/src/parser/sax_parser_base.cpp b/src/parser/sax_parser_base.cpp
+index 743130da..ecbd7f99 100644
+--- a/src/parser/sax_parser_base.cpp
++++ b/src/parser/sax_parser_base.cpp
+@@ -296,20 +296,22 @@ void parser_base::value_with_encoded_char(cell_buffer& buf, pstring& str)
+ str = pstring(buf.get(), buf.size());
+
+ // Skip the closing quote.
+- assert(cur_char() == '"');
++ assert(!has_char() || cur_char() == '"');
+ next();
+ }
+
+ bool parser_base::value(pstring& str, bool decode)
+ {
+ char c = cur_char();
+- if (c != '"')
++ if (c != '"' && c != '\'')
+ throw malformed_xml_error("value must be quoted", offset());
+
++ char quote_char = c;
++
+ c = next_char_checked();
+
+ const char* p0 = mp_char;
+- for (; c != '"'; c = next_char_checked())
++ for (; c != quote_char; c = next_char_checked())
+ {
+ if (decode && c == '&')
+ {
+@@ -330,19 +332,187 @@ bool parser_base::value(pstring& str, bool decode)
+ return false;
+ }
+
++// https://www.w3.org/TR/2006/REC-xml11-20060816/#NT-NameStartChar
++// Return length of the character in bytes, otherwise 0.
++template< bool only_start_name >
++static
++int is_name_char_helper(const char* mp_char, const char* mp_end)
++{
++ const unsigned char first = mp_char[0];
++ // Note that ':' technically is an allowed name character, but it is handled separately
++ // e.g. in element_name(), so here pretend it isn't.
++ if (/*first == ':' ||*/ first == '_' || (first >= 'A' && first <= 'Z') || (first >= 'a' && first <= 'z'))
++ return 1;
++ if (!only_start_name && (first == '-' || first == '.' || (first >= '0' && first <= '9')))
++ return 1;
++
++ if (first < 0x7f) // other ascii characters are not allowed
++ return 0;
++ if (mp_end < mp_char + 1)
++ return 0;
++ const unsigned char second = mp_char[1];
++
++ // 0xb7 = 0xc2 0xb7 utf-8
++ if (!only_start_name && first == 0xc2 && second == 0xb7)
++ return 2;
++
++ // [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF]
++ // 0xc0 = 0xc3 0x80 utf-8
++ if (first < 0xc3)
++ return 0;
++ // xd7 = 0xc3 0x97 utf-8, 0xf7 = 0xc3 0xb7 utf-8
++ if (first == 0xc3)
++ return second >= 0x80 && second <= 0xff && second != 0x97 && second != 0xb7 ? 2 : 0;
++ // 0x2ff = 0xcb 0xbf utf-8, 0x300 = 0xcc 0x80 utf-8
++ if (first >= 0xc4 && first <= 0xcb)
++ return 2;
++
++ // [#x0300-#x036F]
++ // 0x0300 = 0xcc 0x80 utf-8, 0x36f = 0xcd 0xaf utf-8
++ if (!only_start_name && first == 0xcc)
++ return 2;
++ if (!only_start_name && first == 0xcd && second <= 0xaf)
++ return 2;
++
++ // [#x370-#x37D] | [#x37F-#x1FFF]
++ // 0x370 = 0xcd 0xb0 utf-8, 0x37e = 0xcd 0xbe
++ if (first < 0xcd)
++ return 0;
++ if (first == 0xcd)
++ return second >= 0xb0 && second != 0xbe ? 2 : 0;
++ // 0x07ff = 0xdf 0xbf utf-8 (the last 2-byte utf-8)
++ if (first <= 0xdf)
++ return 2;
++
++ if (first < 0xe0)
++ return 0;
++ if (mp_end < mp_char + 2)
++ return 0;
++ const unsigned char third = mp_char[2];
++
++ // 0x0800 = 0xe0 0xa0 0x80 utf-8, 0x1fff = 0xe1 0xbf 0xbf utf-8, 0x2000 = 0xe2 0x80 0x80
++ if (first == 0xe0 || first == 0xe1)
++ return 3;
++
++ // [#x200C-#x200D]
++ // 0x200c = 0xe2 0x80 0x8c utf-8, 0x200d = 0xe2 0x80 0x8d utf-8
++ if (first < 0xe2)
++ return 0;
++ if (first == 0xe2 && second == 0x80 && (third == 0x8c || third == 0x8d))
++ return 3;
++
++ // [#x203F-#x2040]
++ // 0x203f = 0xe2 0x80 0xbf utf-8, 0x2040 = 0xe2 0x81 0x80 utf-8
++ if (!only_start_name && first == 0xe2 && second == 0x80 && third == 0xbf)
++ return 3;
++ if (!only_start_name && first == 0xe2 && second == 0x81 && third == 0x80)
++ return 3;
++
++ // [#x2070-#x218F]
++ // 0x2070 = 0xe2 0x81 0xb0 utf-8, 0x218f = 0xe2 0x86 0x8f utf-8
++ if (first == 0xe2)
++ {
++ if (second < 0x81)
++ return 0;
++ if (second >= 0x81 && second < 0x86)
++ return 3;
++ if (second == 0x86 && third <= 0x8f)
++ return 3;
++ }
++
++ // [#x2C00-#x2FEF]
++ // 0x2c00 = 0xe2 0xb0 0x80 utf-8, 0x2fef = 0xe2 0xbf 0xaf utf-8
++ if (first == 0xe2)
++ {
++ if (second < 0xb0)
++ return 0;
++ if (second < 0xbf)
++ return 3;
++ if (second == 0xbf && third <= 0xaf)
++ return 3;
++ }
++
++ // [#x3001-#xD7FF]
++ // 0x3001 = 0xe3 0x80 0x81 utf-8, 0xd7ff = 0xed 0x9f 0xbf utf-8, 0xd800 = 0xed 0xa0 0x80 utf-8
++ if (first < 0xe3)
++ return 0;
++ if (first < 0xed)
++ return 3;
++ if (first == 0xed && second <= 0x9f)
++ return 3;
++
++ // [#xF900-#xFDCF]
++ // 0xf900 = 0xef 0xa4 0x80 utf-8, 0xfdcf = 0xef 0xb7 0x8f utf-8
++ if (first == 0xef)
++ {
++ if (second < 0xa4)
++ return 0;
++ if (second < 0xb7)
++ return 3;
++ if (second == 0xb7 && third <= 0x8f)
++ return 3;
++ }
++
++ // [#xFDF0-#xFFFD]
++ // 0xfdf0 = 0xef 0xb7 0xb0 utf-8, 0xfffd = 0xef 0xbf 0xbd utf-8
++ if (first == 0xef)
++ {
++ assert(second >= 0xb7);
++ if (second == 0xb7 && third < 0xb0)
++ return 0;
++ if (second < 0xbe)
++ return 3;
++ if (second == 0xbf && third <= 0xbd)
++ return 3;
++ }
++
++ if (first < 0xf0)
++ return 0;
++ if (mp_end < mp_char + 3)
++ return 0;
++ // const unsigned char fourth = mp_char[3];
++
++ // [#x10000-#xEFFFF]
++ // 0x10000 = 0xf0 0x90 0x80 0x80 utf-8, 0xeffff = 0xf3 0xaf 0xbf 0xbf utf-8,
++ // 0xf0000 = 0xf3 0xb0 0x80 0x80 utf-8
++ if (first >= 0xf0 && first < 0xf2)
++ return 4;
++ if (first == 0xf3 && second < 0xb0)
++ return 4;
++
++ return 0;
++}
++
++int parser_base::is_name_char()
++{
++ return is_name_char_helper<false>(mp_char, mp_end);
++}
++
++int parser_base::is_name_start_char()
++{
++ return is_name_char_helper<true>(mp_char, mp_end);
++}
++
+ void parser_base::name(pstring& str)
+ {
+ const char* p0 = mp_char;
+- char c = cur_char();
+- if (!is_alpha(c))
++ int skip = is_name_start_char();
++ if (skip == 0)
+ {
+ ::std::ostringstream os;
+- os << "name must begin with an alphabet, but got this instead '" << c << "'";
++ os << "name must begin with an alphabet, but got this instead '" << cur_char() << "'";
+ throw malformed_xml_error(os.str(), offset());
+ }
++ next(skip);
+
+- while (is_alpha(c) || is_numeric(c) || is_name_char(c))
+- c = next_char_checked();
++ for(;;)
++ {
++ cur_char_checked(); // check end of xml stream
++ skip = is_name_char();
++ if(skip == 0)
++ break;
++ next(skip);
++ }
+
+ str = pstring(p0, mp_char-p0);
+ }
+diff --git a/test/xml/non-ascii/check.txt b/test/xml/non-ascii/check.txt
+new file mode 100644
+index 00000000..77b7c003
+--- /dev/null
++++ b/test/xml/non-ascii/check.txt
+@@ -0,0 +1,4 @@
++/Myšička
++/Myšička at jméno="Žužla"
++/Myšička/Nožičky
++/Myšička/Nožičky"4"
+diff --git a/test/xml/non-ascii/input.xml b/test/xml/non-ascii/input.xml
+new file mode 100644
+index 00000000..c516744b
+--- /dev/null
++++ b/test/xml/non-ascii/input.xml
+@@ -0,0 +1,4 @@
++<?xml version="1.0" encoding="UTF-8"?>
++<Myšička jméno="Žužla">
++ <Nožičky>4</Nožičky>
++</Myšička>
+--
+2.26.2
+
commit 70acef544db618ded97e9550a2519930d30e3fba
Author: Stephan Bergmann <sbergman at redhat.com>
AuthorDate: Tue Feb 16 09:30:09 2021 +0100
Commit: Andras Timar <andras.timar at collabora.com>
CommitDate: Mon May 10 23:31:54 2021 +0200
Improve checkExtension
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/110970
Reviewed-by: Mike Kaganski <mike.kaganski at collabora.com>
Tested-by: Jenkins
(cherry picked from commit f456c4dacf700e064e112ef068ff7edb04239754)
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/110922
Reviewed-by: Michael Stahl <michael.stahl at allotropia.de>
(cherry picked from commit f19d95986756412e5d72047656eec17a720c5e57)
Change-Id: Iff416a9c5930ad5903f7ee51a2abbc94d5f40800
diff --git a/shell/source/win32/SysShExec.cxx b/shell/source/win32/SysShExec.cxx
index 1d0932e5db8d..19e0223f37d0 100644
--- a/shell/source/win32/SysShExec.cxx
+++ b/shell/source/win32/SysShExec.cxx
@@ -389,21 +389,28 @@ void SAL_CALL CSysShExec::execute( const OUString& aCommand, const OUString& aPa
}
}
pathname = OUString(SAL_U(path));
+ // ShellExecuteExW appears to ignore trailing dots, so remove them:
+ while (pathname.endsWith(".", &pathname)) {}
auto const n = pathname.lastIndexOf('.');
if (n > pathname.lastIndexOf('\\')) {
auto const ext = pathname.copy(n + 1);
- OUString env;
- if (osl_getEnvironment(OUString("PATHEXT").pData, &env.pData) != osl_Process_E_None)
- {
- SAL_INFO("shell", "osl_getEnvironment(PATHEXT) failed");
- }
- if (!(checkExtension(ext, env)
- && checkExtension(
- ext,
- ".COM;.EXE;.BAT;.CMD;.VBS;.VBE;.JS;.JSE;.WSF;.WSH;.MSC;.PY;.CLASS;.JAR")))
- {
- throw css::lang::IllegalArgumentException(
- "XSystemShellExecute.execute, cannot process <" + aCommand + ">", {}, 0);
+ if (!ext.isEmpty()) {
+ OUString env;
+ if (osl_getEnvironment(OUString("PATHEXT").pData, &env.pData)
+ != osl_Process_E_None)
+ {
+ SAL_INFO("shell", "osl_getEnvironment(PATHEXT) failed");
+ }
+ if (!(checkExtension(ext, env)
+ && checkExtension(
+ ext,
+ ".COM;.EXE;.BAT;.CMD;.VBS;.VBE;.JS;.JSE;.WSF;.WSH;.MSC;.PY;.CLASS;"
+ ".JAR;.APPLICATION;.LNK;.SCR")))
+ {
+ throw css::lang::IllegalArgumentException(
+ "XSystemShellExecute.execute, cannot process <" + aCommand + ">", {},
+ 0);
+ }
}
}
}
commit af34e2c563a1535e25c4ca9a933cb90299b8eef2
Author: Mike Kaganski <mike.kaganski at collabora.com>
AuthorDate: Tue Jan 28 01:28:24 2020 +0300
Commit: Andras Timar <andras.timar at collabora.com>
CommitDate: Mon May 10 23:31:24 2021 +0200
tdf#130216: normalize paths with .. segments
... which obviously are rejected by SHGetFileInfoW and SHParseDisplayName
that it calls internally.
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/87565
Tested-by: Jenkins
Reviewed-by: Mike Kaganski <mike.kaganski at collabora.com>
Signed-off-by: Xisco Fauli <xiscofauli at libreoffice.org>
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/87737
Reviewed-by: Miklos Vajna <vmiklos at collabora.com>
(cherry picked from commit fc043d38c256243fb782cc48e7708feaeabba4ae)
Change-Id: I2f5f3c675ea6aa1c2d92eef30be4399a8d600255
diff --git a/shell/source/win32/SysShExec.cxx b/shell/source/win32/SysShExec.cxx
index a9e5a2c4ec7d..1d0932e5db8d 100644
--- a/shell/source/win32/SysShExec.cxx
+++ b/shell/source/win32/SysShExec.cxx
@@ -39,6 +39,7 @@
#endif
#include <windows.h>
#include <shellapi.h>
+#include <Shlobj.h>
#include <Shobjidl.h>
#include <objbase.h>
#if defined _MSC_VER
@@ -307,21 +308,33 @@ void SAL_CALL CSysShExec::execute( const OUString& aCommand, const OUString& aPa
+ "> failed with " + OUString::number(e1)),
{}, 0);
}
+ const int MAX_LONG_PATH = 32767; // max longpath on WinNT
+ if (pathname.getLength() >= MAX_LONG_PATH)
+ {
+ throw css::lang::IllegalArgumentException(
+ "XSystemShellExecute.execute, path <" + pathname + "> too long", {}, 0);
+ }
+ wchar_t path[MAX_LONG_PATH];
+ wcscpy_s(path, SAL_W(pathname.getStr()));
for (int i = 0;; ++i) {
+ // tdf#130216: normalize c:\path\to\something\..\else into c:\path\to\else
+ if (PathResolve(path, nullptr, PRF_VERIFYEXISTS | PRF_REQUIREABSOLUTE) == 0)
+ {
+ throw css::lang::IllegalArgumentException(
+ "XSystemShellExecute.execute, PathResolve(" + OUString(SAL_U(path))
+ + ") failed",
+ {}, 0);
+ }
SHFILEINFOW info;
- if (SHGetFileInfoW(
- SAL_W(pathname.getStr()), 0, &info, sizeof info, SHGFI_EXETYPE)
- != 0)
+ if (SHGetFileInfoW(path, 0, &info, sizeof info, SHGFI_EXETYPE) != 0)
{
throw css::lang::IllegalArgumentException(
"XSystemShellExecute.execute, cannot process <" + aCommand + ">", {}, 0);
}
- if (SHGetFileInfoW(
- SAL_W(pathname.getStr()), 0, &info, sizeof info, SHGFI_ATTRIBUTES)
- == 0)
+ if (SHGetFileInfoW(path, 0, &info, sizeof info, SHGFI_ATTRIBUTES) == 0)
{
throw css::lang::IllegalArgumentException(
- "XSystemShellExecute.execute, SHGetFileInfoW(" + pathname + ") failed", {},
+ "XSystemShellExecute.execute, SHGetFileInfoW(" + OUString(SAL_U(path)) + ") failed", {},
0);
}
if ((info.dwAttributes & SFGAO_LINK) == 0) {
@@ -346,7 +359,7 @@ void SAL_CALL CSysShExec::execute( const OUString& aCommand, const OUString& aPa
+ o3tl::runtimeToOUString(e3.what())),
{}, 0);
}
- e2 = file->Load(SAL_W(pathname.getStr()), STGM_READ);
+ e2 = file->Load(path, STGM_READ);
if (FAILED(e2)) {
throw css::lang::IllegalArgumentException(
("XSystemShellExecute.execute, IPersistFile.Load failed with "
@@ -360,16 +373,14 @@ void SAL_CALL CSysShExec::execute( const OUString& aCommand, const OUString& aPa
+ OUString::number(e2)),
{}, 0);
}
- wchar_t path[MAX_PATH];
WIN32_FIND_DATAW wfd;
- e2 = link->GetPath(path, MAX_PATH, &wfd, SLGP_RAWPATH);
+ e2 = link->GetPath(path, SAL_N_ELEMENTS(path), &wfd, SLGP_RAWPATH);
if (FAILED(e2)) {
throw css::lang::IllegalArgumentException(
("XSystemShellExecute.execute, IShellLink.GetPath failed with "
+ OUString::number(e2)),
{}, 0);
}
- pathname = SAL_U(path);
// Fail at some arbitrary nesting depth, to avoid an infinite loop:
if (i == 30) {
throw css::lang::IllegalArgumentException(
@@ -377,6 +388,7 @@ void SAL_CALL CSysShExec::execute( const OUString& aCommand, const OUString& aPa
{}, 0);
}
}
+ pathname = OUString(SAL_U(path));
auto const n = pathname.lastIndexOf('.');
if (n > pathname.lastIndexOf('\\')) {
auto const ext = pathname.copy(n + 1);
More information about the Libreoffice-commits
mailing list