[Libreoffice-commits] dev-tools.git: 4 commits - prettyprint/formatrtf prettyprint/formatxml.cpp prettyprint/Makefile prettyprint/prettyprint
Miklos Vajna
vmiklos at collabora.co.uk
Sat Sep 21 11:26:56 PDT 2013
prettyprint/Makefile | 2
prettyprint/formatrtf | 60 ++++++++++
prettyprint/formatxml.cpp | 263 ++++++++++++++++++++++++++++++++++++++++++++++
prettyprint/prettyprint | 19 +++
4 files changed, 344 insertions(+)
New commits:
commit 9c7dbe3d46107225f35097fbfa5edc08f69f070b
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date: Sat Sep 21 20:26:12 2013 +0200
prettyprint: add shell wrapper that invokes the right sub-tool
Change-Id: Ifa52d88697fe87b42f31c5878e1a68c6d36b5935
diff --git a/prettyprint/prettyprint b/prettyprint/prettyprint
new file mode 100755
index 0000000..0f3e698
--- /dev/null
+++ b/prettyprint/prettyprint
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+file=$(mktemp)
+cat > $file
+kind=$(file -i $file |sed 's/.*: \(.*\); .*/\1/')
+
+mydir=$(dirname $0)
+[ -h $0 ] && mydir=$(dirname $(readlink $0))
+
+if [ "$kind" == "text/rtf" ]; then
+ $mydir/formatrtf $file
+elif [ "$kind" == "application/xml" ]; then
+ #xmllint --format --recover $file 2>/dev/null
+ $mydir/formatxml $file
+else
+ echo "unknown type: $kind"
+fi
+
+rm -f $file
commit 9d8b7bea29769971db1f7f02de85855186aa866d
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date: Sat Sep 21 20:20:54 2013 +0200
prettyprint: add formatrtf tool
This was hosted as
http://cgit.freedesktop.org/~vmiklos/lo-test-files/plain/writer/prettyprint.py
previously.
Change-Id: I97c28caf71d65b0b03bfacb36644e34af599afb4
diff --git a/prettyprint/formatrtf b/prettyprint/formatrtf
new file mode 100755
index 0000000..95e2efe
--- /dev/null
+++ b/prettyprint/formatrtf
@@ -0,0 +1,60 @@
+#!/usr/bin/env python
+
+import sys, getopt
+
+def usage(code):
+ print("Usage: %s [-i|--inline] input.rtf" % sys.argv[0])
+ print()
+ print("The pretty-printer acts as a filter: it just inserts newlines\n" + \
+ "around '{' and '}' characters for better readability.\n\n" + \
+ "The -i switch makes the filter in-place, instead of just\n" + \
+ "reading the input and outputting the result to stdout.")
+ sys.exit(code)
+
+# opt parsing
+inline = False
+argv = sys.argv[1:]
+try:
+ opts, args = getopt.getopt(argv, "i", ["inline"])
+except getopt.GetoptError:
+ usage(1)
+optind = 0
+for opt, arg in opts:
+ if opt in ("-i", "--inline"):
+ inline = True
+ optind += 1
+
+if optind < len(argv):
+ input = argv[optind]
+else:
+ usage(0)
+
+sock = open(input)
+
+out = []
+
+while True:
+ char = sock.read(1)
+ after = None
+ if not len(char):
+ break
+ if ord(char) == 0x0d:
+ continue
+ if char == "{" and len(out) and out[-1][-1] not in ("\n", "\\"):
+ out.append("\n")
+ elif char == "}" and out[-1][-1] != "\\":
+ after = "\n"
+ if char == "\n" and len(out) and out[-1][-1] == "\n":
+ pass
+ else:
+ out.append(char)
+ if after:
+ out.append(after)
+
+if not inline:
+ sys.stdout.write("".join(out))
+else:
+ sock.close()
+ sock = open(input, "w")
+ sock.write("".join(out))
+ sock.close()
commit 3bf9eb695d48a64624c0e47fc70cbc9610e69c19
Author: Miklos Vajna <vmiklos at suse.cz>
Date: Wed Nov 14 15:20:51 2012 +0100
formatxml: always write to stdout
diff --git a/prettyprint/Makefile b/prettyprint/Makefile
new file mode 100644
index 0000000..04cbd23
--- /dev/null
+++ b/prettyprint/Makefile
@@ -0,0 +1,2 @@
+formatxml: formatxml.cpp
+ g++ -Wall -I/usr/include/QtCore -I/usr/include formatxml.cpp -lQtCore -L/usr/lib -o formatxml
diff --git a/prettyprint/formatxml.cpp b/prettyprint/formatxml.cpp
index d514d94..938cc32 100644
--- a/prettyprint/formatxml.cpp
+++ b/prettyprint/formatxml.cpp
@@ -255,18 +255,7 @@ int main( int argc, char* argv[] )
}
QTextStream in( &fin );
QFile fout;
- if( !isatty( 1 ))
- fout.open( stdout, QIODevice::WriteOnly );
- else
- {
- QString fname( QString( argv[ 1 ] ) + ".format.xml" );
- fout.setFileName( fname );
- if( !fout.open( QIODevice::WriteOnly ))
- {
- QTextStream( stderr ) << "Cannot write to " << fname << endl;
- return 4;
- }
- }
+ fout.open( stdout, QIODevice::WriteOnly );
QTextStream out( &fout );
in.setCodec( "UTF-8" );
out.setCodec( "UTF-8" );
commit e403bd8ceb1b058a1176b5f169e083a884b20d2d
Author: Miklos Vajna <vmiklos at suse.cz>
Date: Wed Nov 14 15:18:59 2012 +0100
add Lubos' formatxml that fails to build ATM
diff --git a/prettyprint/formatxml.cpp b/prettyprint/formatxml.cpp
new file mode 100644
index 0000000..d514d94
--- /dev/null
+++ b/prettyprint/formatxml.cpp
@@ -0,0 +1,274 @@
+/*****************************************************************
+
+Copyright (C) 2010 Lubos Lunak <l.lunak at suse.cz>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+******************************************************************/
+
+/*
+
+This is a tool that formats nicely an XML file (e.g. the .docx or .odt formats
+are basically everything in a single line, which is pain to analyze).
+Unlike 'xmllint --format', this works even if the XML is corrupt, and unlike
+xmllint's --recover it does not alter the XML itself in any way (or at least
+tries not to, complain if there's a problem).
+
+If there are problems with the XML, there is also a comment inserted in the output
+file that warns about it (so that the problem is easy to spot).
+
+To compile (libQtCore from Qt4 is required, $QTDIR is the location, usually /usr):
+g++ -Wall -I$QTDIR/include/QtCore -I$QTDIR/include formatxml.cpp -lQtCore -L$QTDIR/lib -o formatxml
+
+The given file is written to stdout if it's redirected, otherwise it's written
+to file with .format.xml appended.
+
+*/
+
+#include <assert.h>
+#include <qfile.h>
+#include <qstack.h>
+#include <qstringlist.h>
+#include <qtextstream.h>
+#include <stdio.h>
+
+enum TokenType
+ {
+ Error, // parse error or whatever
+ OtherTag, // comments, <? ... ?>
+ OpeningTag,
+ ClosingTag,
+ StandaloneTag, // <foo/>
+ Text // whatever text outside of tags
+ };
+
+static QStringList readTokens( QTextStream& in )
+ {
+ QStringList ret;
+ while( !in.atEnd())
+ {
+ QChar c;
+ in >> c;
+ if( c == '\n' ) // strip line leading whitespace (otherwise keep it, may be empty text between tags)
+ {
+ in.skipWhiteSpace();
+ in >> c;
+ }
+ if( in.atEnd())
+ break;
+ if( c == '<' )
+ {
+ QString str = c;
+ while( !in.atEnd())
+ {
+ in >> c;
+ str.append( c );
+ if( c == '>' )
+ break;
+ }
+ ret.append( str );
+ }
+ else
+ {
+ QString str = c;
+ while( !in.atEnd())
+ {
+// if( c == '\n' )
+// break;
+ in >> c;
+ if( c == '<' || c == '>' )
+ {
+ in.seek( in.pos() - 1 ); // one char back
+ break;
+ }
+ str.append( c );
+ }
+ ret.append( str );
+ }
+ }
+ return ret;
+ }
+
+static QString tagName( const QString& token )
+ {
+ assert( token.length() >= 3 && token[ 0 ] == '<' );
+ int start = ( token[ 1 ] == '/' ? 2 : 1 );
+ int after = token.indexOf( ' ' );
+ if( after == -1 )
+ {
+ if( token[ token.length() - 2 ] == '/' )
+ after = token.length() - 2; // strip trailing />
+ else
+ after = token.length() - 1; // string trailing /
+ }
+ return token.mid( start, after - start );
+ }
+
+static TokenType analyzeToken( const QString& token )
+ {
+ if( token.isEmpty())
+ return Error;
+ if( token[ 0 ] == '<' )
+ {
+ if( token.length() >= 4 // <??>
+ && ( token[ 1 ] == '?' || token[ 1 ] == '!' ))
+ {
+ if( token[ token.length() - 1 ] == '>' && token[ 1 ] == token[ token.length() - 2 ] )
+ return OtherTag;
+ else
+ return Error;
+ }
+ if( token.length() >= 4 // <a/>
+ && token[ token.length() - 1 ] == '>' && token[ token.length() - 2 ] == '/' )
+ {
+ return StandaloneTag;
+ }
+ if( token.length() >= 4 // </a>
+ && token[ 1 ] == '/' && token[ token.length() - 1 ] == '>' )
+ {
+ return ClosingTag;
+ }
+ if( token.length() >= 3 // <a>
+ && token[ token.length() - 1 ] == '>' )
+ {
+ return OpeningTag;
+ }
+ return Error;
+ }
+ return Text;
+ }
+
+static QString indent( int size )
+ {
+ return QString().fill( ' ', size );
+ }
+
+static void ensureNewLine( QTextStream& out, bool* needNewLine )
+ {
+ if( *needNewLine )
+ {
+ out << endl;
+ *needNewLine = false;
+ }
+ }
+
+static bool format( QTextStream& in, QTextStream& out )
+ {
+#define INDENT indent( stack.size() * 2 )
+ QStack< QString > stack;
+ QStringList tokens = readTokens( in );
+ bool needNewLine = false;
+ while( !tokens.isEmpty())
+ {
+ QString token = tokens.takeFirst();
+#if 0
+ static const char* const types[] = { "Error", "Other", "Opening", "Closing", "Standalone", "Text" };
+ QTextStream( stderr ) << "TOKEN(" << types[ analyzeToken( token ) ] << "): " << token << endl;
+#endif
+ switch( analyzeToken( token ))
+ {
+ case OpeningTag:
+ ensureNewLine( out, &needNewLine );
+ out << INDENT << token;
+ needNewLine = true;
+ stack.push( tagName( token ));
+ break;
+ case ClosingTag:
+ {
+ QString tag = tagName( token );
+ if( stack.isEmpty())
+ {
+ ensureNewLine( out, &needNewLine );
+ out << "<!-- ERROR: missing opening tag -->" << endl;
+ }
+ else if( stack.top() != tag )
+ { // TODO or try to find it in the stack?
+ ensureNewLine( out, &needNewLine );
+ out << "<!-- ERROR: opening/closing tag mismatch -->" << endl;
+ }
+ else
+ {
+ stack.pop();
+ }
+ if( !needNewLine ) // not line continuation
+ out << INDENT;
+ out << token << endl;
+ needNewLine = false;
+ break;
+ }
+ case StandaloneTag:
+ ensureNewLine( out, &needNewLine );
+ out << INDENT << token << endl;
+ break;
+ case OtherTag:
+ ensureNewLine( out, &needNewLine );
+ out << INDENT << token << endl;
+ break;
+ case Text:
+ if( !needNewLine ) // not line continuation
+ out << INDENT;
+ out << token;
+ needNewLine = true;
+ break;
+ case Error:
+ ensureNewLine( out, &needNewLine );
+ out << "<!-- ERROR: cannot parse: " << token << "-->" << endl;
+ break;
+ }
+ }
+ if( needNewLine )
+ out << endl;
+ if( stack.size() == 0 )
+ return true;
+ out << "<!-- ERROR: missing closing tags -->" << endl;
+ return false;
+#undef INDENT
+ }
+
+int main( int argc, char* argv[] )
+ {
+ if( argc != 2 )
+ {
+ QTextStream( stderr ) << "Usage: " << argv[ 0 ] << " <file>" << endl;
+ return 2;
+ }
+ QFile fin( argv[ 1 ] );
+ if( !fin.open( QIODevice::ReadOnly ))
+ {
+ QTextStream( stderr ) << "File " << argv[ 1 ] << " cannot be read" << endl;
+ return 3;
+ }
+ QTextStream in( &fin );
+ QFile fout;
+ if( !isatty( 1 ))
+ fout.open( stdout, QIODevice::WriteOnly );
+ else
+ {
+ QString fname( QString( argv[ 1 ] ) + ".format.xml" );
+ fout.setFileName( fname );
+ if( !fout.open( QIODevice::WriteOnly ))
+ {
+ QTextStream( stderr ) << "Cannot write to " << fname << endl;
+ return 4;
+ }
+ }
+ QTextStream out( &fout );
+ in.setCodec( "UTF-8" );
+ out.setCodec( "UTF-8" );
+ return format( in, out ) ? 0 : 1;
+ }
More information about the Libreoffice-commits
mailing list