[Libreoffice-commits] core.git: Branch 'feature/ooxml-analyze' - bin/compare-ooxml-analyze-results.py
GülÅah Köse (via logerrit)
logerrit at kemper.freedesktop.org
Tue Jun 8 05:51:21 UTC 2021
bin/compare-ooxml-analyze-results.py | 87 +++++++++++++++++++++++++++--------
1 file changed, 68 insertions(+), 19 deletions(-)
New commits:
commit 4bf4b28b2d6316a34d0966d1109b76162aa0b336
Author: Gülşah Köse <gulsah.kose at collabora.com>
AuthorDate: Tue Jun 8 08:50:37 2021 +0300
Commit: Gülşah Köse <gulsah.kose at collabora.com>
CommitDate: Tue Jun 8 08:50:37 2021 +0300
Compare texts
Change-Id: I0aee62ffa79dbf716bb9ec2af2bc06078e1eb566
diff --git a/bin/compare-ooxml-analyze-results.py b/bin/compare-ooxml-analyze-results.py
index d0d89fc30a31..fdd4fa8a0243 100644
--- a/bin/compare-ooxml-analyze-results.py
+++ b/bin/compare-ooxml-analyze-results.py
@@ -1,6 +1,6 @@
#!/usr/bin/python
-import sys, getopt, os, pprint, ast
+import sys, getopt, os, pprint, ast, difflib
original_results_dir = ''
saved_results_dir = ''
@@ -24,13 +24,18 @@ def main(argv):
global saved_results_dir
saved_results_dir = arg
- # takes file list produced by ooxml-analyze.py tool.
- original_result_files = get_list_of_files(original_results_dir)
- saved_result_files = get_list_of_files(saved_results_dir)
+ # takes result file list produced by ooxml-analyze.py tool. <filename>.result
+ original_result_files = get_list_of_result_files(original_results_dir)
+ saved_result_files = get_list_of_result_files(saved_results_dir)
+ compare_results(original_result_files, saved_result_files)
- compare(original_result_files, saved_result_files)
+ # takes concanated texts file list produced by ooxml-analyze.py tool. <filename>.text
+ original_text_files = get_list_of_text_files(original_results_dir)
+ saved_text_files = get_list_of_text_files(saved_results_dir)
+ compare_texts(original_text_files, saved_text_files)
-def get_list_of_files(directory_name):
+# collects <filename>.result files
+def get_list_of_result_files(directory_name):
list_of_file = os.listdir(directory_name)
all_files = list()
@@ -38,16 +43,32 @@ def get_list_of_files(directory_name):
for filename in list_of_file:
full_path = os.path.join(directory_name, filename)
if os.path.isdir(full_path):
- all_files = all_files + get_list_of_files(full_path)
+ all_files = all_files + get_list_of_result_files(full_path)
else:
- all_files.append(full_path)
+ if filename.endswith(".result"):
+ all_files.append(full_path)
+
+ return all_files
+
+# collecsts <filename>.text files
+def get_list_of_text_files(directory_name):
+
+ list_of_file = os.listdir(directory_name)
+ all_files = list()
+
+ for filename in list_of_file:
+ full_path = os.path.join(directory_name, filename)
+ if os.path.isdir(full_path):
+ all_files = all_files + get_list_of_result_files(full_path)
+ else:
+ if filename.endswith(".text"):
+ all_files.append(full_path)
return all_files
# compares the elements the original results and and after saved results.
-def compare(original_result_files, saved_result_files):
- ind = 1
+def compare_results(original_result_files, saved_result_files):
for original_filepath in original_result_files:
saved_filepath = get_corresponding_file(original_filepath)
if saved_filepath == '':
@@ -57,11 +78,31 @@ def compare(original_result_files, saved_result_files):
original_result_list = create_list_from_result_file(original_filepath)
saved_result_list = create_list_from_result_file(saved_filepath)
- check_text_contents(original_result_list, saved_result_list)
+ check_text_contents(original_result_list, saved_result_list, original_filepath)
+
+def compare_texts(original_texts_file, saved_texts_file):
+ for original_filepath in original_texts_file:
+ saved_filepath = get_corresponding_file(original_filepath)
+ if saved_filepath == '':
+ print("No result text file after roundtrip for " + original_filepath)
+ continue
+
+ with open(original_filepath) as file_1:
+ original_file_text = file_1.readlines()
+
+ with open(saved_filepath) as file_2:
+ saved_file_text = file_2.readlines()
+
+ # if os.path.exists("./result"):
+ # os.remove("result")
+ for line in difflib.unified_diff(original_file_text, saved_file_text, fromfile=original_filepath, tofile=saved_filepath, lineterm=''):
+ with open("result", "a") as log_file:
+ print(line, file=log_file)
+ log_file.close()
-# checks if we missed any text content after saving the file.
-def check_text_contents(original_result_list, saved_result_list):
+# checks if we missed any text content after saving the file. (except a:t, We are comparing them with compare_text function)
+def check_text_contents(original_result_list, saved_result_list, original_file_path):
# detect if we lost or added any text on existing texts of original version.
for line in original_result_list:
@@ -69,6 +110,10 @@ def check_text_contents(original_result_list, saved_result_list):
if not bool(text_dict): # check if text context is empty
continue
tag = list(line[0].keys())[0] #if there is a text context, find the owner tag.
+
+ if tag == "a:t": # exclude text, we are comparing them seperatly
+ continue
+
for sline in saved_result_list:
stag = list(sline[0].keys())[0]
if stag == tag: # check if saved results has same tag too.
@@ -76,18 +121,22 @@ def check_text_contents(original_result_list, saved_result_list):
if text_dict != saved_text_dict:
for key, val in text_dict.items():
if key not in saved_text_dict.keys():
- print ("We lost %d \"%s\" text in %s tag." % (val, key, tag))
+ print ("We lost %d \"%s\" text in %s tag in %s." % (val, key, tag, original_file_path))
elif val > saved_text_dict[key]:
- print ("We lost %d \"%s\" text in %s tag." % (val - saved_text_dict[key], key, tag))
+ print ("We lost %d \"%s\" text in %s tag in %s." % (val - saved_text_dict[key], key, tag, original_file_path))
elif val < saved_text_dict[key]:
- print("We added extra %d \"%s\" text in %s tag" % (saved_text_dict[key] - val, key, tag))
+ print("We added extra %d \"%s\" text in %s tag in %s" % (saved_text_dict[key] - val, key, tag, original_file_path))
- # detect if we add any new text that not existed in original version
+ # detect if we add any new text that not existed in original version (Reverse comparision)
for line in saved_result_list:
saved_text_dict = line[3]
if not bool(saved_text_dict): # check if text context is empty
continue
tag = list(line[0].keys())[0] #if there is a text context, find the owner tag.
+
+ if tag == "a:t": # exclude text, we are comparing them seperatly
+ continue
+
for sline in original_result_list:
stag = list(sline[0].keys())[0]
if stag == tag: # check if original results has same tag too.
@@ -95,9 +144,9 @@ def check_text_contents(original_result_list, saved_result_list):
if saved_text_dict != text_dict:
for key, val in saved_text_dict.items():
if key not in text_dict.keys():
- print ("We add extra %d \"%s\" text in %s tag." % (val, key, tag))
+ print ("We add extra %d \"%s\" text in %s tag in %s." % (val, key, tag, original_file_path))
-#reads the file context and create the result list structer from.
+#reads the file context and create the result list structure from.
# eg res_list[[{},{},{},{}],[{},{},{},{}]...] ]
def create_list_from_result_file(filepath):
result_list = []
More information about the Libreoffice-commits
mailing list