[Libreoffice-commits] dev-tools.git: scripts/rtf-anonymiser.py
Miklos Vajna
vmiklos at suse.cz
Mon Jun 10 09:22:56 PDT 2013
scripts/rtf-anonymiser.py | 113 ++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 113 insertions(+)
New commits:
commit d2dcf2d511d8d6c83fba8c1e33b09fc5e6242fb6
Author: Miklos Vajna <vmiklos at suse.cz>
Date: Mon Jun 10 18:21:39 2013 +0200
import rtf-anonymiser
Handy when someone mails you some bugdoc and you want to convert it to
an fdo attachement, but the content is sensitive.
Change-Id: I4f4615c0c37279064e4e0d814a04fc55525c73a5
diff --git a/scripts/rtf-anonymiser.py b/scripts/rtf-anonymiser.py
new file mode 100755
index 0000000..de8a596
--- /dev/null
+++ b/scripts/rtf-anonymiser.py
@@ -0,0 +1,113 @@
+#!/usr/bin/env python2
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at http://mozilla.org/MPL/2.0/.
+#
+
+
+import sys, getopt
+
+def usage(code):
+ print("Usage: %s [-i|--inline] input.rtf" % sys.argv[0])
+ sys.exit(code)
+
+class RtfParser:
+ """This is meant to be a lightweight generic RTF parser. The purpose of
+ this class is to provide methods to be overloaded for subclasses."""
+ def __init__(self, input, inline):
+ self.sock = open(input)
+ self.out = []
+ self.hexCount = 0
+
+ while True:
+ ch = self.sock.read(1)
+ if not len(ch):
+ break
+ if ch in ("{", "}", chr(0x0d), chr(0x0a)):
+ self.out.append(ch)
+ elif ch == "\\":
+ self.handleKeyword()
+ else:
+ self.handleChar(ch)
+
+ self.sock.close()
+
+ if not inline:
+ sys.stdout.write("".join(self.out))
+ else:
+ self.sock = open(input, "w")
+ self.sock.write("".join(self.out))
+ self.sock.close()
+
+ def handleKeyword(self):
+ ch = self.sock.read(1)
+ if not len(ch):
+ return
+ self.out.append("\\")
+ if not ch.isalpha():
+ if ch == "'":
+ self.hexCount = 2
+ self.out.append(ch)
+ return
+ while ch.isalpha():
+ self.out.append(ch)
+ ch = self.sock.read(1)
+ if ch == "-":
+ self.out.append(ch)
+ ch = self.sock.read(1)
+ if ch.isdigit():
+ while ch.isdigit():
+ self.out.append(ch)
+ ch = self.sock.read(1)
+ if ch == " ":
+ self.handleChar(ch)
+ else:
+ self.sock.seek(self.sock.tell() - 1)
+
+ def handleHexChar(self, ch):
+ self.hexCount -= 1
+ self.out.append(ch)
+
+ def handleChar(self, ch):
+ if self.hexCount > 0:
+ self.handleHexChar(ch)
+ else:
+ self.out.append(ch)
+
+class RtfAnonymiser(RtfParser):
+ """This class only overloads handleChar() -- hopefully this removes all
+ sensitive contents."""
+ def __init__(self, input, inline):
+ RtfParser.__init__(self, input, inline)
+
+ def handleChar(self, ch):
+ if self.hexCount > 0:
+ self.handleHexChar(ch)
+ else:
+ if ch.isupper():
+ self.out.append("X")
+ else:
+ self.out.append("x")
+
+# opt parsing
+inline = False
+argv = sys.argv[1:]
+try:
+ opts, args = getopt.getopt(argv, "i", ["inline"])
+except getopt.GetoptError:
+ usage(0)
+optind = 0
+for opt, arg in opts:
+ if opt in ("-i", "--inline"):
+ inline = True
+ optind += 1
+
+if optind < len(argv):
+ input = argv[optind]
+else:
+ usage(0)
+
+RtfAnonymiser(input, inline)
+
+# vim:set filetype=python shiftwidth=4 softtabstop=4 expandtab:
More information about the Libreoffice-commits
mailing list