[Libreoffice-commits] core.git: Branch 'feature/ooxml-analyze' - bin/ooxml-analyze.py

Gülşah Köse (via logerrit) logerrit at kemper.freedesktop.org
Wed May 26 13:58:05 UTC 2021


 bin/ooxml-analyze.py |   31 ++++++++++++++++++++++---------
 1 file changed, 22 insertions(+), 9 deletions(-)

New commits:
commit a8b521dc0f8e810f97630551406ccd8d1590371f
Author:     Gülşah Köse <gulsah.kose at collabora.com>
AuthorDate: Wed May 26 16:57:23 2021 +0300
Commit:     Gülşah Köse <gulsah.kose at collabora.com>
CommitDate: Wed May 26 16:57:23 2021 +0300

    Create a sub result for each ooxml document in output directory
    
    Change-Id: Ibbb366725d344f8e44c085ced60c35e190f98a9d

diff --git a/bin/ooxml-analyze.py b/bin/ooxml-analyze.py
index 3f9b0e8bdad1..8dbfe8cacd0b 100755
--- a/bin/ooxml-analyze.py
+++ b/bin/ooxml-analyze.py
@@ -35,13 +35,19 @@ def main(argv):
         extracted_files_dir = os.path.join(outputdir, 'extractedfiles')
         extract_files(inputdir, extracted_files_dir)
 
-        count_elements(extracted_files_dir, result_list)
+        # create seperate result files for each ooxml document as <document name>.result in output directory
+        for ext_dir in get_list_of_subdir(extracted_files_dir):
+            i = ext_dir.rfind('/')
+            sub_result_name = ext_dir[i+1:] + ".result"
+            sub_result_list = []
+            count_elements(ext_dir, sub_result_list)
+            sub_result_path = os.path.join(outputdir, sub_result_name)
+            with open(sub_result_path, "w") as log_file:
+                pprint.pprint(sub_result_list, log_file)
     else:
         # use user defined directory path for extracted ooxml files.
         count_elements(extracted_files_dir_by_user, result_list)
 
-    pprint.pprint(result_list)
-
 # unzip all ooxml files into the given path
 def extract_files(inputdir, extracted_files_dir):
 
@@ -49,9 +55,6 @@ def extract_files(inputdir, extracted_files_dir):
     if(os.path.exists(extracted_files_dir)):
         shutil.rmtree(extracted_files_dir)
 
-    # holds directory names for each ooxml document in extracted files dir.
-    counter = 1
-
     # unzip files into the extracted files directory
     for filename in os.listdir(inputdir):
         if (filename.endswith(".pptx") or       \
@@ -59,12 +62,10 @@ def extract_files(inputdir, extracted_files_dir):
             filename.endswith(".xlsx")) and not \
             filename.startswith("~"):
             filepath = os.path.join(inputdir, filename)
-            extracted_file_path = os.path.join(extracted_files_dir, str(counter))
+            extracted_file_path = os.path.join(extracted_files_dir, filename)
 
             with ZipFile(filepath) as zipObj:
                 zipObj.extractall(extracted_file_path)
-
-            counter +=1
         else:
             continue
 
@@ -158,5 +159,17 @@ def get_list_of_files(directory_name):
 
     return all_files
 
+def get_list_of_subdir(directory_name):
+
+    list_of_file = os.listdir(directory_name)
+    subdirs = list()
+
+    for filename in list_of_file:
+        full_path = os.path.join(directory_name, filename)
+        if os.path.isdir(full_path):
+            subdirs.append(full_path)
+
+    return subdirs
+
 if __name__ == "__main__":
     main(sys.argv[1:])


More information about the Libreoffice-commits mailing list