[Libreoffice-commits] dev-tools.git: qa/document_analyser.py
Ahlaam Rafiq (via logerrit)
logerrit at kemper.freedesktop.org
Fri Sep 17 11:25:37 UTC 2021
qa/document_analyser.py | 79 ++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 79 insertions(+)
New commits:
commit 71ffc7eba9137e94a96b72fed762cc1c9a82baeb
Author: Ahlaam Rafiq <ahlraf16 at gmail.com>
AuthorDate: Sun Apr 4 13:17:48 2021 +0300
Commit: Ilmari Lauhakangas <ilmari.lauhakangas at libreoffice.org>
CommitDate: Fri Sep 17 13:25:19 2021 +0200
tdf#124141 add document analyser
Co-authored-by: Sebastian O. <gelberwolf at posteo.de>
Change-Id: Ie1e3474d020721538d6618addf7094b3307d9f5c
Reviewed-on: https://gerrit.libreoffice.org/c/dev-tools/+/113567
Reviewed-by: Xisco Fauli <xiscofauli at libreoffice.org>
Reviewed-by: Ilmari Lauhakangas <ilmari.lauhakangas at libreoffice.org>
Tested-by: Ilmari Lauhakangas <ilmari.lauhakangas at libreoffice.org>
diff --git a/qa/document_analyser.py b/qa/document_analyser.py
new file mode 100644
index 0000000..06bc98a
--- /dev/null
+++ b/qa/document_analyser.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env python3
+#
+# This file is part of the LibreOffice project.
+#
+# This Source Code Form is subject to the terms of the Mozilla Public
+# License, v. 2.0. If a copy of the MPL was not distributed with this
+# file, You can obtain one at https://mozilla.org/MPL/2.0/.
+#
+
+"""
+Document analyser uses the odfpy module: https://pypi.org/project/odfpy/
+
+This script prints:
+bookmark count, cell count, changetracking count, character count,
+comment count, draw count, frame count, hyperlink count,
+image count, non-whitespace character count, object count, OLE object count,
+page count, paragraph count, row count, sentence count,
+syllable count, table count, textbox count, word count, and paragraph styles.
+
+"""
+
+import odf
+from odf.namespaces import TEXTNS
+from odf.element import Element
+from odf.opendocument import load
+from odf import text,meta,office,draw
+
+
+print("Enter filename: ")
+filename=input()
+
+doc=load(filename)
+
+#--------------------document statistics from the odf.meta module--------------------
+print("\nDOCUMENT STATISTICS\n")
+for stat in doc.getElementsByType(meta.DocumentStatistic):
+ print("Cell count",stat.getAttribute('cellcount'))
+ print("Character count:",stat.getAttribute('charactercount'))
+ print("Draw count:",stat.getAttribute('drawcount'))
+ print("Frame count:",stat.getAttribute('framecount'))
+ print("Image count:",stat.getAttribute('imagecount'))
+ print("Non-whitespace character count:",stat.getAttribute('nonwhitespacecharactercount'))
+ print("Object count:",stat.getAttribute('objectcount'))
+ print("Object linking and embedding (OLE) object count:",stat.getAttribute('oleobjectcount'))
+ print("Page count:",stat.getAttribute('pagecount'))
+ print("Paragraph count:",stat.getAttribute('paragraphcount'))
+ print("Row count:",stat.getAttribute('rowcount'))
+ print("Sentence count:",stat.getAttribute('sentencecount'))
+ print("Syllable count:",stat.getAttribute('syllablecount'))
+ print("Table count:",stat.getAttribute('tablecount'))
+ print("Word count:",stat.getAttribute('wordcount'))
+
+
+#--------------------type counter for attributes not covered by odf.meta.DocumentStatistic--------------------
+def type_counter(doc,type):
+ count=0
+ for element in doc.getElementsByType(type):
+ count+=1
+ return count
+
+types={
+ 'Bookmark':text.Bookmark,
+ 'Changetracking':text.FormatChange,
+ 'Comment':office.Annotation,
+ 'Hyperlink':text.A,
+ 'Textbox':draw.TextBox
+}
+
+for key,value in types.items():
+ print(key,'count:',type_counter(doc,value))
+
+#--------------------paragraph styles--------------------
+def paragraph_style(doc):
+ i = 1
+ for paragraph in doc.getElementsByType(text.P):
+ print('Paragraph',i,'style:',paragraph.getAttribute('stylename'))
+ i+=1
+
+paragraph_style(doc)
More information about the Libreoffice-commits
mailing list