[Libreoffice-commits] dev-tools.git: 4 commits - prettyprint/formatrtf prettyprint/formatxml.cpp prettyprint/Makefile prettyprint/prettyprint

Miklos Vajna vmiklos at collabora.co.uk
Sat Sep 21 11:26:56 PDT 2013


 prettyprint/Makefile      |    2 
 prettyprint/formatrtf     |   60 ++++++++++
 prettyprint/formatxml.cpp |  263 ++++++++++++++++++++++++++++++++++++++++++++++
 prettyprint/prettyprint   |   19 +++
 4 files changed, 344 insertions(+)

New commits:
commit 9c7dbe3d46107225f35097fbfa5edc08f69f070b
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date:   Sat Sep 21 20:26:12 2013 +0200

    prettyprint: add shell wrapper that invokes the right sub-tool
    
    Change-Id: Ifa52d88697fe87b42f31c5878e1a68c6d36b5935

diff --git a/prettyprint/prettyprint b/prettyprint/prettyprint
new file mode 100755
index 0000000..0f3e698
--- /dev/null
+++ b/prettyprint/prettyprint
@@ -0,0 +1,19 @@
+#!/bin/bash
+
+file=$(mktemp)
+cat > $file
+kind=$(file -i $file |sed 's/.*: \(.*\); .*/\1/')
+
+mydir=$(dirname $0)
+[ -h $0 ] && mydir=$(dirname $(readlink $0))
+
+if [ "$kind" == "text/rtf" ]; then
+	$mydir/formatrtf $file
+elif [ "$kind" == "application/xml" ]; then
+	#xmllint --format --recover $file 2>/dev/null
+	$mydir/formatxml $file
+else
+	echo "unknown type: $kind"
+fi
+
+rm -f $file
commit 9d8b7bea29769971db1f7f02de85855186aa866d
Author: Miklos Vajna <vmiklos at collabora.co.uk>
Date:   Sat Sep 21 20:20:54 2013 +0200

    prettyprint: add formatrtf tool
    
    This was hosted as
    http://cgit.freedesktop.org/~vmiklos/lo-test-files/plain/writer/prettyprint.py
    previously.
    
    Change-Id: I97c28caf71d65b0b03bfacb36644e34af599afb4

diff --git a/prettyprint/formatrtf b/prettyprint/formatrtf
new file mode 100755
index 0000000..95e2efe
--- /dev/null
+++ b/prettyprint/formatrtf
@@ -0,0 +1,60 @@
+#!/usr/bin/env python
+
+import sys, getopt
+
+def usage(code):
+	print("Usage: %s [-i|--inline] input.rtf" % sys.argv[0])
+	print()
+	print("The pretty-printer acts as a filter: it just inserts newlines\n" + \
+		"around '{' and '}' characters for better readability.\n\n" + \
+		"The -i switch makes the filter in-place, instead of just\n" + \
+		"reading the input and outputting the result to stdout.")
+	sys.exit(code)
+
+# opt parsing
+inline = False
+argv = sys.argv[1:]
+try:
+	opts, args = getopt.getopt(argv, "i", ["inline"])
+except getopt.GetoptError:
+	usage(1)
+optind = 0
+for opt, arg in opts:
+	if opt in ("-i", "--inline"):
+		inline = True
+	optind += 1
+
+if optind < len(argv):
+	input = argv[optind]
+else:
+	usage(0)
+
+sock = open(input)
+
+out = []
+
+while True:
+	char = sock.read(1)
+	after = None
+	if not len(char):
+		break
+	if ord(char) == 0x0d:
+		continue
+	if char == "{" and len(out) and out[-1][-1] not in ("\n", "\\"):
+		out.append("\n")
+	elif char == "}" and out[-1][-1] != "\\":
+		after = "\n"
+	if char == "\n" and len(out) and out[-1][-1] == "\n":
+		pass
+	else:
+		out.append(char)
+	if after:
+		out.append(after)
+
+if not inline:
+	sys.stdout.write("".join(out))
+else:
+	sock.close()
+	sock = open(input, "w")
+	sock.write("".join(out))
+	sock.close()
commit 3bf9eb695d48a64624c0e47fc70cbc9610e69c19
Author: Miklos Vajna <vmiklos at suse.cz>
Date:   Wed Nov 14 15:20:51 2012 +0100

    formatxml: always write to stdout

diff --git a/prettyprint/Makefile b/prettyprint/Makefile
new file mode 100644
index 0000000..04cbd23
--- /dev/null
+++ b/prettyprint/Makefile
@@ -0,0 +1,2 @@
+formatxml: formatxml.cpp
+	g++ -Wall -I/usr/include/QtCore -I/usr/include formatxml.cpp -lQtCore -L/usr/lib -o formatxml
diff --git a/prettyprint/formatxml.cpp b/prettyprint/formatxml.cpp
index d514d94..938cc32 100644
--- a/prettyprint/formatxml.cpp
+++ b/prettyprint/formatxml.cpp
@@ -255,18 +255,7 @@ int main( int argc, char* argv[] )
         }
     QTextStream in( &fin );
     QFile fout;
-    if( !isatty( 1 ))
-        fout.open( stdout, QIODevice::WriteOnly );
-    else
-        {
-        QString fname( QString( argv[ 1 ] ) + ".format.xml" );
-        fout.setFileName( fname );
-        if( !fout.open( QIODevice::WriteOnly ))
-            {
-            QTextStream( stderr ) << "Cannot write to " << fname << endl;
-            return 4;
-            }
-        }
+    fout.open( stdout, QIODevice::WriteOnly );
     QTextStream out( &fout );
     in.setCodec( "UTF-8" );
     out.setCodec( "UTF-8" );
commit e403bd8ceb1b058a1176b5f169e083a884b20d2d
Author: Miklos Vajna <vmiklos at suse.cz>
Date:   Wed Nov 14 15:18:59 2012 +0100

    add Lubos' formatxml that fails to build ATM

diff --git a/prettyprint/formatxml.cpp b/prettyprint/formatxml.cpp
new file mode 100644
index 0000000..d514d94
--- /dev/null
+++ b/prettyprint/formatxml.cpp
@@ -0,0 +1,274 @@
+/*****************************************************************
+
+Copyright (C) 2010 Lubos Lunak <l.lunak at suse.cz>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+******************************************************************/
+
+/*
+
+This is a tool that formats nicely an XML file (e.g. the .docx or .odt formats
+are basically everything in a single line, which is pain to analyze).
+Unlike 'xmllint --format', this works even if the XML is corrupt, and unlike
+xmllint's --recover it does not alter the XML itself in any way (or at least
+tries not to, complain if there's a problem).
+
+If there are problems with the XML, there is also a comment inserted in the output
+file that warns about it (so that the problem is easy to spot).
+
+To compile (libQtCore from Qt4 is required, $QTDIR is the location, usually /usr):
+g++ -Wall -I$QTDIR/include/QtCore -I$QTDIR/include formatxml.cpp -lQtCore -L$QTDIR/lib -o formatxml
+
+The given file is written to stdout if it's redirected, otherwise it's written
+to file with .format.xml appended.
+
+*/
+
+#include <assert.h>
+#include <qfile.h>
+#include <qstack.h>
+#include <qstringlist.h>
+#include <qtextstream.h>
+#include <stdio.h>
+
+enum TokenType
+    {
+    Error, // parse error or whatever
+    OtherTag, // comments, <? ... ?>
+    OpeningTag,
+    ClosingTag,
+    StandaloneTag, // <foo/>
+    Text // whatever text outside of tags
+    };
+
+static QStringList readTokens( QTextStream& in )
+    {
+    QStringList ret;
+    while( !in.atEnd())
+        {
+        QChar c;
+        in >> c;
+        if( c == '\n' ) // strip line leading whitespace (otherwise keep it, may be empty text between tags)
+            {
+            in.skipWhiteSpace();
+            in >> c;
+            }
+        if( in.atEnd())
+            break;
+        if( c == '<' )
+            {
+            QString str = c;
+            while( !in.atEnd())
+                {
+                in >> c;
+                str.append( c );
+                if( c == '>' )
+                    break;
+                }
+            ret.append( str );
+            }
+        else
+            {
+            QString str = c;
+            while( !in.atEnd())
+                {
+//                if( c == '\n' )
+//                    break;
+                in >> c;
+                if( c == '<' || c == '>' )
+                    {
+                    in.seek( in.pos() - 1 ); // one char back
+                    break;
+                    }
+                str.append( c );
+                }
+            ret.append( str );
+            }
+        }
+    return ret;
+    }
+
+static QString tagName( const QString& token )
+    {
+    assert( token.length() >= 3 && token[ 0 ] == '<' );
+    int start = ( token[ 1 ] == '/' ? 2 : 1 );
+    int after = token.indexOf( ' ' );
+    if( after == -1 )
+        {
+        if( token[ token.length() - 2 ] == '/' )
+            after = token.length() - 2; // strip trailing />
+        else
+            after = token.length() - 1; // string trailing /
+        }
+    return token.mid( start, after - start );
+    }
+
+static TokenType analyzeToken( const QString& token )
+    {
+    if( token.isEmpty())
+        return Error;
+    if( token[ 0 ] == '<' )
+        {
+        if( token.length() >= 4 // <??>
+            && ( token[ 1 ] == '?' || token[ 1 ] == '!' ))
+            {
+            if( token[ token.length() - 1 ] == '>' && token[ 1 ] == token[ token.length() - 2 ] )
+                return OtherTag;
+            else
+                return Error;
+            }
+        if( token.length() >= 4 // <a/>
+            && token[ token.length() - 1 ] == '>' && token[ token.length() - 2 ] == '/' )
+            {
+            return StandaloneTag;
+            }
+        if( token.length() >= 4 // </a>
+            && token[ 1 ] == '/' && token[ token.length() - 1 ] == '>' )
+            {
+            return ClosingTag;
+            }
+        if( token.length() >= 3 // <a>
+            && token[ token.length() - 1 ] == '>' )
+            {
+            return OpeningTag;
+            }
+        return Error;
+        }
+    return Text;
+    }
+
+static QString indent( int size )
+    {
+    return QString().fill( ' ', size );
+    }
+
+static void ensureNewLine( QTextStream& out, bool* needNewLine )
+    {
+    if( *needNewLine )
+        {
+        out << endl;
+        *needNewLine = false;
+        }
+    }
+
+static bool format( QTextStream& in, QTextStream& out )
+    {
+#define INDENT indent( stack.size() * 2 )
+    QStack< QString > stack;
+    QStringList tokens = readTokens( in );
+    bool needNewLine = false;
+    while( !tokens.isEmpty())
+        {
+        QString token = tokens.takeFirst();
+#if 0
+        static const char* const types[] = { "Error", "Other", "Opening", "Closing", "Standalone", "Text" };
+        QTextStream( stderr ) << "TOKEN(" << types[ analyzeToken( token ) ] << "): " << token << endl;
+#endif
+        switch( analyzeToken( token ))
+            {
+            case OpeningTag:
+                ensureNewLine( out, &needNewLine );
+                out << INDENT << token;
+                needNewLine = true;
+                stack.push( tagName( token ));
+                break;
+            case ClosingTag:
+                {
+                QString tag = tagName( token );
+                if( stack.isEmpty())
+                    {
+                    ensureNewLine( out, &needNewLine );
+                    out << "<!-- ERROR: missing opening tag -->" << endl;
+                    }
+                else if( stack.top() != tag )
+                    { // TODO or try to find it in the stack?
+                    ensureNewLine( out, &needNewLine );
+                    out << "<!-- ERROR: opening/closing tag mismatch -->" << endl;
+                    }
+                else
+                    {
+                    stack.pop();
+                    }
+                if( !needNewLine ) // not line continuation
+                    out << INDENT;
+                out << token << endl;
+                needNewLine = false;
+                break;
+                }
+            case StandaloneTag:
+                ensureNewLine( out, &needNewLine );
+                out << INDENT << token << endl;
+                break;
+            case OtherTag:
+                ensureNewLine( out, &needNewLine );
+                out << INDENT << token << endl;
+                break;
+            case Text:
+                if( !needNewLine ) // not line continuation
+                    out << INDENT;
+                out << token;
+                needNewLine = true;
+                break;
+            case Error:
+                ensureNewLine( out, &needNewLine );
+                out << "<!-- ERROR: cannot parse: " << token << "-->" << endl;
+                break;
+            }
+        }
+    if( needNewLine )
+        out << endl;
+    if( stack.size() == 0 )
+        return true;
+    out << "<!-- ERROR: missing closing tags -->" << endl;
+    return false;
+#undef INDENT        
+    }
+
+int main( int argc, char* argv[] )
+    {
+    if( argc != 2 )
+        {
+        QTextStream( stderr ) << "Usage: " << argv[ 0 ] << " <file>" << endl;
+        return 2;
+        }
+    QFile fin( argv[ 1 ] );
+    if( !fin.open( QIODevice::ReadOnly ))
+        {
+        QTextStream( stderr ) << "File " << argv[ 1 ] << " cannot be read" << endl;
+        return 3;
+        }
+    QTextStream in( &fin );
+    QFile fout;
+    if( !isatty( 1 ))
+        fout.open( stdout, QIODevice::WriteOnly );
+    else
+        {
+        QString fname( QString( argv[ 1 ] ) + ".format.xml" );
+        fout.setFileName( fname );
+        if( !fout.open( QIODevice::WriteOnly ))
+            {
+            QTextStream( stderr ) << "Cannot write to " << fname << endl;
+            return 4;
+            }
+        }
+    QTextStream out( &fout );
+    in.setCodec( "UTF-8" );
+    out.setCodec( "UTF-8" );
+    return format( in, out ) ? 0 : 1;
+    }


More information about the Libreoffice-commits mailing list