[Libreoffice-commits] core.git: sw/qa sw/source

Tomaž Vajngerl (via logerrit) logerrit at kemper.freedesktop.org
Mon Aug 23 00:51:58 UTC 2021


 sw/qa/extras/indexing/IndexingExportTest.cxx |  167 ++++++++++++++-------------
 sw/source/filter/indexing/IndexingExport.cxx |   28 ++--
 2 files changed, 109 insertions(+), 86 deletions(-)

New commits:
commit 50f0e8c7880122a05585a2233f6f35d0dfee0385
Author:     Tomaž Vajngerl <tomaz.vajngerl at collabora.co.uk>
AuthorDate: Sun Aug 22 11:43:10 2021 +0900
Commit:     Tomaž Vajngerl <quikee at gmail.com>
CommitDate: Mon Aug 23 02:51:23 2021 +0200

    indexing: make indexing XML flat and use simple element names
    
    This changes the indexing XML to be flat-er and change the element
    names to just be either "paragraph" or an "object", where an
    "object" then has an attribute what type exactly it is.
    
    This makes converting the XML to an indexing engine accepted
    format easier.
    
    Change-Id: Ia8941cc9616a862c1bc980efea5ba2548217644e
    Reviewed-on: https://gerrit.libreoffice.org/c/core/+/120836
    Tested-by: Jenkins
    Reviewed-by: Tomaž Vajngerl <quikee at gmail.com>

diff --git a/sw/qa/extras/indexing/IndexingExportTest.cxx b/sw/qa/extras/indexing/IndexingExportTest.cxx
index 346ff783d2c4..9d40d887f30d 100644
--- a/sw/qa/extras/indexing/IndexingExportTest.cxx
+++ b/sw/qa/extras/indexing/IndexingExportTest.cxx
@@ -107,10 +107,12 @@ void IndexingExportTest::testIndexingExport_Images()
     CPPUNIT_ASSERT(pXmlDoc);
 
     assertXPath(pXmlDoc, "/indexing");
-    assertXPath(pXmlDoc, "/indexing/graphic[1]", "alt", "Image_NonCaption - Alternative text");
-    assertXPath(pXmlDoc, "/indexing/graphic[1]", "name", "Image_NonCaption");
-    assertXPath(pXmlDoc, "/indexing/graphic[2]", "alt", "Image_InCaption - Alternative text");
-    assertXPath(pXmlDoc, "/indexing/graphic[2]", "name", "Image_InCaption");
+    assertXPath(pXmlDoc, "/indexing/object[1]", "alt", "Image_NonCaption - Alternative text");
+    assertXPath(pXmlDoc, "/indexing/object[1]", "name", "Image_NonCaption");
+    assertXPath(pXmlDoc, "/indexing/object[1]", "type", "graphic");
+    assertXPath(pXmlDoc, "/indexing/object[2]", "alt", "Image_InCaption - Alternative text");
+    assertXPath(pXmlDoc, "/indexing/object[2]", "name", "Image_InCaption");
+    assertXPath(pXmlDoc, "/indexing/object[2]", "type", "graphic");
 }
 
 void IndexingExportTest::testIndexingExport_OLE()
@@ -127,8 +129,9 @@ void IndexingExportTest::testIndexingExport_OLE()
     CPPUNIT_ASSERT(pXmlDoc);
 
     assertXPath(pXmlDoc, "/indexing");
-    assertXPath(pXmlDoc, "/indexing/ole[1]", "name", "Object - Chart");
-    assertXPath(pXmlDoc, "/indexing/ole[1]", "alt", "Alt Text");
+    assertXPath(pXmlDoc, "/indexing/object[1]", "name", "Object - Chart");
+    assertXPath(pXmlDoc, "/indexing/object[1]", "alt", "Alt Text");
+    assertXPath(pXmlDoc, "/indexing/object[1]", "type", "ole");
 }
 
 void IndexingExportTest::testIndexingExport_Shapes()
@@ -145,18 +148,22 @@ void IndexingExportTest::testIndexingExport_Shapes()
     CPPUNIT_ASSERT(pXmlDoc);
 
     assertXPath(pXmlDoc, "/indexing");
-
-    assertXPath(pXmlDoc, "/indexing/shape[1]", "name", "Circle");
-    assertXPathContent(pXmlDoc, "/indexing/shape[1]/paragraph[1]", "This is a circle");
-    assertXPathContent(pXmlDoc, "/indexing/shape[1]/paragraph[2]", "This is a second paragraph");
-
-    assertXPath(pXmlDoc, "/indexing/shape[2]", "name", "Diamond");
-    assertXPathContent(pXmlDoc, "/indexing/shape[2]/paragraph[1]", "This is a diamond");
-
-    assertXPath(pXmlDoc, "/indexing/shape[3]", "name", "Text Frame 1");
-    assertXPathContent(pXmlDoc, "/indexing/shape[3]/paragraph[1]", "This is a TextBox - Para1");
-    assertXPathContent(pXmlDoc, "/indexing/shape[3]/paragraph[2]", "Para2");
-    assertXPathContent(pXmlDoc, "/indexing/shape[3]/paragraph[3]", "Para3");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[1]", "Drawing : Just a Diamond");
+
+    assertXPath(pXmlDoc, "/indexing/object[1]", "name", "Circle");
+    assertXPath(pXmlDoc, "/indexing/object[1]", "type", "shape");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[2]", "This is a circle");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[3]", "This is a second paragraph");
+
+    assertXPath(pXmlDoc, "/indexing/object[2]", "name", "Diamond");
+    assertXPath(pXmlDoc, "/indexing/object[2]", "type", "shape");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[4]", "This is a diamond");
+
+    assertXPath(pXmlDoc, "/indexing/object[3]", "name", "Text Frame 1");
+    assertXPath(pXmlDoc, "/indexing/object[3]", "type", "shape");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[5]", "This is a TextBox - Para1");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[6]", "Para2");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[7]", "Para3");
 }
 
 void IndexingExportTest::testIndexingExport_Tables()
@@ -174,51 +181,56 @@ void IndexingExportTest::testIndexingExport_Tables()
 
     assertXPath(pXmlDoc, "/indexing");
 
-    assertXPath(pXmlDoc, "/indexing/table[1]", "name", "Table1");
-    assertXPathContent(pXmlDoc, "/indexing/table[1]/paragraph[1]", "A");
-    assertXPathContent(pXmlDoc, "/indexing/table[1]/paragraph[2]", "B");
-    assertXPathContent(pXmlDoc, "/indexing/table[1]/paragraph[3]", "1");
-    assertXPathContent(pXmlDoc, "/indexing/table[1]/paragraph[4]", "2");
-
-    assertXPath(pXmlDoc, "/indexing/table[2]", "name", "Table2");
-    assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[1]", "A");
-    assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[2]", "B");
-    assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[3]", "C");
-    assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[4]", "1");
-    assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[5]", "10");
-    assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[6]", "100");
-    assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[7]", "2");
-    assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[8]", "20");
-    assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[9]", "200");
-    assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[10]", "3");
-    assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[11]", "30");
-    assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[12]", "300");
-    assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[13]", "4");
-    assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[14]", "40");
-    assertXPathContent(pXmlDoc, "/indexing/table[2]/paragraph[15]", "400");
-
-    assertXPath(pXmlDoc, "/indexing/table[3]", "name", "WeirdTable");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[1]", "A1");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[2]", "B1");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[3]", "C1");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[4]", "D1");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[5]", "A2B2");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[6]", "C2D2");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[7]", "A3B3C3D3");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[8]", "A4-1");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[9]", "A4-2");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[10]", "B4-1");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[11]", "C4-1");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[12]", "D4-1");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[13]", "D4-2");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[14]", "");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[15]", "");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[16]", "B4-2");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[17]", "C4-2");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[18]", "");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[19]", "");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[20]", "A5B5C5");
-    assertXPathContent(pXmlDoc, "/indexing/table[3]/paragraph[21]", "D5");
+    assertXPath(pXmlDoc, "/indexing/object[1]", "name", "Table1");
+    assertXPath(pXmlDoc, "/indexing/object[1]", "type", "table");
+    assertXPath(pXmlDoc, "/indexing/object[1]", "index", "9");
+    // Search paragraph with parent = 9
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=9][1]", "A");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=9][2]", "B");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=9][3]", "1");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=9][4]", "2");
+
+    assertXPath(pXmlDoc, "/indexing/object[2]", "name", "Table2");
+    assertXPath(pXmlDoc, "/indexing/object[2]", "type", "table");
+    assertXPath(pXmlDoc, "/indexing/object[2]", "index", "24");
+    // Search paragraph with parent = 24
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][1]", "A");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][2]", "B");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][3]", "C");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][4]", "1");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][5]", "10");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][6]", "100");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][7]", "2");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][8]", "20");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][9]", "200");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][10]", "3");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][11]", "30");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][12]", "300");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][13]", "4");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][14]", "40");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=24][15]", "400");
+
+    assertXPath(pXmlDoc, "/indexing/object[3]", "name", "WeirdTable");
+    assertXPath(pXmlDoc, "/indexing/object[3]", "type", "table");
+    assertXPath(pXmlDoc, "/indexing/object[3]", "index", "72");
+    // Search paragraph with parent = 72
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][1]", "A1");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][2]", "B1");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][3]", "C1");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][4]", "D1");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][5]", "A2B2");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][6]", "C2D2");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][7]", "A3B3C3D3");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][8]", "A4-1");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][9]", "A4-2");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][10]", "B4-1");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][11]", "C4-1");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][12]", "D4-1");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][13]", "D4-2");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][14]", "B4-2");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][15]", "C4-2");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][16]", "A5B5C5");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[@parent=72][17]", "D5");
 }
 
 void IndexingExportTest::testIndexingExport_Sections()
@@ -236,18 +248,20 @@ void IndexingExportTest::testIndexingExport_Sections()
 
     assertXPath(pXmlDoc, "/indexing");
 
-    assertXPath(pXmlDoc, "/indexing/section[1]", "name", "Section1");
-    assertXPathContent(pXmlDoc, "/indexing/section[1]/paragraph[1]",
-                       "This is a paragraph in a Section1");
-    assertXPathContent(pXmlDoc, "/indexing/section[1]/paragraph[2]", "Section1 - Paragraph 2");
-    assertXPathContent(pXmlDoc, "/indexing/section[1]/paragraph[3]", "Section1 - Paragraph 3");
+    assertXPath(pXmlDoc, "/indexing/object[1]", "name", "Section1");
+    assertXPath(pXmlDoc, "/indexing/object[1]", "type", "section");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[1]", "This is a paragraph in a Section1");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[2]", "Section1 - Paragraph 2");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[3]", "Section1 - Paragraph 3");
+
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[4]", "This is a paragraph outside sections");
 
-    assertXPath(pXmlDoc, "/indexing/section[2]", "name", "Section2");
-    assertXPathContent(pXmlDoc, "/indexing/section[2]/paragraph[1]", "Section2 - Paragraph 1");
-    assertXPathContent(pXmlDoc, "/indexing/section[2]/paragraph[2]", "Section2 - Paragraph 2");
+    assertXPath(pXmlDoc, "/indexing/object[2]", "name", "Section2");
+    assertXPath(pXmlDoc, "/indexing/object[2]", "type", "section");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[5]", "Section2 - Paragraph 1");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[6]", "Section2 - Paragraph 2");
 
-    assertXPathContent(pXmlDoc, "/indexing/paragraph[1]", "This is a paragraph outside sections");
-    assertXPathContent(pXmlDoc, "/indexing/paragraph[2]", "This is a paragraph outside sections");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[7]", "This is a paragraph outside sections");
 }
 
 void IndexingExportTest::testIndexingExport_Fontwork()
@@ -265,10 +279,11 @@ void IndexingExportTest::testIndexingExport_Fontwork()
 
     assertXPath(pXmlDoc, "/indexing");
 
-    assertXPath(pXmlDoc, "/indexing/shape[1]", "name", "Gray");
+    assertXPath(pXmlDoc, "/indexing/object[1]", "name", "Gray");
+    assertXPath(pXmlDoc, "/indexing/object[1]", "type", "shape");
 
-    assertXPathContent(pXmlDoc, "/indexing/shape[1]/paragraph[1]", "Fontwork Text 1");
-    assertXPathContent(pXmlDoc, "/indexing/shape[1]/paragraph[2]", "Fontwork Text 2");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[1]", "Fontwork Text 1");
+    assertXPathContent(pXmlDoc, "/indexing/paragraph[2]", "Fontwork Text 2");
 }
 
 void IndexingExportTest::testIndexingExport_Header_Footer()
diff --git a/sw/source/filter/indexing/IndexingExport.cxx b/sw/source/filter/indexing/IndexingExport.cxx
index 834111d88af7..946c4aaddc06 100644
--- a/sw/source/filter/indexing/IndexingExport.cxx
+++ b/sw/source/filter/indexing/IndexingExport.cxx
@@ -69,18 +69,20 @@ public:
     void handleOLENode(const SwOLENode* pOleNode)
     {
         auto pFrameFormat = pOleNode->GetFlyFormat();
-        m_rXmlWriter.startElement("ole");
+        m_rXmlWriter.startElement("object");
         m_rXmlWriter.attribute("alt", pOleNode->GetTitle());
         m_rXmlWriter.attribute("name", pFrameFormat->GetName());
+        m_rXmlWriter.attribute("type", "ole");
         m_rXmlWriter.endElement();
     }
 
     void handleGraphicNode(const SwGrfNode* pGraphicNode)
     {
         auto pFrameFormat = pGraphicNode->GetFlyFormat();
-        m_rXmlWriter.startElement("graphic");
+        m_rXmlWriter.startElement("object");
         m_rXmlWriter.attribute("alt", pGraphicNode->GetTitle());
         m_rXmlWriter.attribute("name", pFrameFormat->GetName());
+        m_rXmlWriter.attribute("type", "graphic");
         m_rXmlWriter.endElement();
     }
 
@@ -93,6 +95,8 @@ public:
         }
         const OUString& rString
             = pTextNode->GetText().replaceAll(OUStringChar(CH_TXTATR_BREAKWORD), "");
+        if (rString.isEmpty())
+            return;
         m_rXmlWriter.startElement("paragraph");
         m_rXmlWriter.attribute("index", pTextNode->GetIndex());
         m_rXmlWriter.attribute("type", "1");
@@ -106,11 +110,15 @@ public:
     {
         if (pObject->GetName().isEmpty())
             return;
-        m_rXmlWriter.startElement("shape");
+
+        m_rXmlWriter.startElement("object");
         m_rXmlWriter.attribute("name", pObject->GetName());
         m_rXmlWriter.attribute("alt", pObject->GetTitle());
+        m_rXmlWriter.attribute("type", "shape");
         m_rXmlWriter.attribute("description", pObject->GetDescription());
 
+        m_rXmlWriter.endElement();
+
         SdrTextObj* pTextObject = dynamic_cast<SdrTextObj*>(pObject);
         if (pTextObject)
         {
@@ -123,12 +131,11 @@ public:
                 m_rXmlWriter.startElement("paragraph");
                 m_rXmlWriter.attribute("index", nParagraph);
                 m_rXmlWriter.attribute("type", "2");
+                m_rXmlWriter.attribute("parent", pObject->GetName());
                 m_rXmlWriter.content(sText);
                 m_rXmlWriter.endElement();
             }
         }
-
-        m_rXmlWriter.endElement();
     }
 
     void handleTableNode(SwTableNode* pTableNode)
@@ -136,20 +143,22 @@ public:
         const SwTableFormat* pFormat = pTableNode->GetTable().GetFrameFormat();
         OUString sName = pFormat->GetName();
 
-        m_rXmlWriter.startElement("table");
+        m_rXmlWriter.startElement("object");
         m_rXmlWriter.attribute("index", pTableNode->GetIndex());
-        m_rXmlWriter.attribute("type", "1");
         m_rXmlWriter.attribute("name", sName);
+        m_rXmlWriter.attribute("type", "table");
+        m_rXmlWriter.endElement();
 
         maNodeStack.push_back(pTableNode);
     }
 
     void handleSectionNode(SwSectionNode* pSectionNode)
     {
-        m_rXmlWriter.startElement("section");
+        m_rXmlWriter.startElement("object");
         m_rXmlWriter.attribute("index", pSectionNode->GetIndex());
-        m_rXmlWriter.attribute("type", "1");
         m_rXmlWriter.attribute("name", pSectionNode->GetSection().GetSectionName());
+        m_rXmlWriter.attribute("type", "section");
+        m_rXmlWriter.endElement();
 
         maNodeStack.push_back(pSectionNode);
     }
@@ -159,7 +168,6 @@ public:
         if (!maNodeStack.empty() && pEndNode->StartOfSectionNode() == maNodeStack.back())
         {
             maNodeStack.pop_back();
-            m_rXmlWriter.endElement();
         }
     }
 };


More information about the Libreoffice-commits mailing list