[Libreoffice-commits] .: 2 commits - filter/source

Kohei Yoshida kohei at kemper.freedesktop.org
Fri May 25 13:03:31 PDT 2012


 filter/source/config/cache/cacheitem.hxx     |    8 -
 filter/source/config/cache/typedetection.cxx |  147 +++++++++++++++++++++------
 2 files changed, 115 insertions(+), 40 deletions(-)

New commits:
commit 0c782558aee08bfc24e03c715a504a234ab30307
Author: Kohei Yoshida <kohei.yoshida at gmail.com>
Date:   Fri May 25 16:01:52 2012 -0400

    Rank format types in order of complexity.
    
    This way we can run detection services in the correct order; from
    more complex to less complex, since the more complex structure is
    much easier to test and reject.
    
    Change-Id: Iee2d3e2e4f2834f95e6e89975f646e3928114b11

diff --git a/filter/source/config/cache/typedetection.cxx b/filter/source/config/cache/typedetection.cxx
index 93e8015..672a09f 100644
--- a/filter/source/config/cache/typedetection.cxx
+++ b/filter/source/config/cache/typedetection.cxx
@@ -109,8 +109,101 @@ TypeDetection::~TypeDetection()
 namespace {
 
 /**
- * Types with matching pattern first, then extension, then types that are
- * supported by the document service come next.
+ * Rank format types in order of complexity.  More complex formats are
+ * ranked higher so that they get tested sooner over simpler formats.
+ *
+ * Guidelines to determine how complex a format is (subject to change):
+ *
+ * 1) compressed text (XML, HTML, etc)
+ * 2) binary
+ * 3) non-compressed text
+ *   3.1) structured text
+ *     3.1.1) dialect of a structured text (e.g. docbook XML)
+ *     3.1.2) generic structured text (e.g. generic XML)
+ *   3.2) non-structured text
+ *
+ * In each category, rank them from strictly-structured to
+ * loosely-structured.
+ */
+int getFlatTypeRank(const rtl::OUString& rType)
+{
+    // List formats from more complex to less complex.
+    // TODO: Add more.
+    static const char* ranks[] = {
+        // Compressed XML
+        "writer8_template",
+        "writer8",
+        "calc8_template",
+        "calc8",
+        "writer_OOXML_Text_Template",
+        "writer_OOXML",
+        "writer_MS_Word_2007_Template",
+        "writer_MS_Word_2007",
+        "Office Open XML Spreadsheet Template",
+        "Office Open XML Spreadsheet",
+        "MS Excel 2007 XML Template",
+        "MS Excel 2007 XML",
+
+        // Compressed text
+        "pdf_Portable_Document_Format",
+
+        // Binary
+        "writer_T602_Document",
+        "writer_WordPerfect_Document",
+        "writer_MS_Works_Document",
+        "writer_MS_Word_97_Vorlage",
+        "writer_MS_Word_97",
+        "writer_MS_Word_95_Vorlage",
+        "writer_MS_Word_95",
+        "writer_MS_WinWord_60",
+        "writer_MS_WinWord_5",
+        "MS Excel 2007 Binary",
+        "calc_MS_Excel_97_VorlageTemplate",
+        "calc_MS_Excel_97",
+        "calc_MS_Excel_95_VorlageTemplate",
+        "calc_MS_Excel_95",
+        "calc_MS_Excel_5095_VorlageTemplate",
+        "calc_MS_Excel_5095",
+        "calc_MS_Excel_40_VorlageTemplate",
+        "calc_MS_Excel_40",
+        "calc_Pocket_Excel_File",
+        "calc_Lotus",
+        "calc_QPro",
+        "calc_SYLK",
+        "calc_DIF",
+        "calc_dBase",
+
+
+        // Non-compressed XML
+        "writer_ODT_FlatXML",
+        "calc_ODS_FlatXML",
+        "calc_MS_Excel_2003_XML",
+        "writer_MS_Word_2003_XML",
+        "writer_DocBook_File",
+        "XHTML_File",
+
+        // Non-compressed text
+        "writer_Rich_Text_Format",
+        "generic_HTML",
+        "generic_Text"
+    };
+
+    size_t n = SAL_N_ELEMENTS(ranks);
+
+    for (size_t i = 0; i < n; ++i)
+    {
+        if (rType.equalsAscii(ranks[i]))
+            return n - i - 1;
+    }
+
+    // Not ranked.  Treat them equally.
+    return -1;
+}
+
+/**
+ * Types with matching pattern first, then extension, then custom ranks by
+ * types, then types that are supported by the document service come next.
+ * Lastly, sort them alphabetically.
  */
 struct SortByPriority : public std::binary_function<FlatDetectionInfo, FlatDetectionInfo, bool>
 {
@@ -122,7 +215,25 @@ struct SortByPriority : public std::binary_function<FlatDetectionInfo, FlatDetec
         if (r1.bMatchByExtension != r2.bMatchByExtension)
             return r1.bMatchByExtension;
 
-        return r1.bPreselectedByDocumentService;
+        int rank1 = getFlatTypeRank(r1.sType);
+        int rank2 = getFlatTypeRank(r2.sType);
+
+        if (rank1 != rank2)
+            return rank1 > rank2;
+
+        if (r1.bPreselectedByDocumentService != r2.bPreselectedByDocumentService)
+            return r1.bPreselectedByDocumentService;
+
+        // All things being equal, sort them alphabetically.
+        return r1.sType > r2.sType;
+    }
+};
+
+struct EqualByName : public std::binary_function<FlatDetectionInfo, FlatDetectionInfo, bool>
+{
+    bool operator() (const FlatDetectionInfo& r1, const FlatDetectionInfo& r2) const
+    {
+        return r1.sType == r2.sType;
     }
 };
 
@@ -177,6 +288,7 @@ struct SortByPriority : public std::binary_function<FlatDetectionInfo, FlatDetec
 
     // Properly prioritize all candidate types.
     lFlatTypes.sort(SortByPriority());
+    lFlatTypes.unique(EqualByName());
 
     ::rtl::OUString sType      ;
     ::rtl::OUString sLastChance;
commit 58652054727a29701795f2849c87f320de05c4dd
Author: Kohei Yoshida <kohei.yoshida at gmail.com>
Date:   Fri May 25 14:57:35 2012 -0400

    The logic behind these two flags no longer makes sense.
    
    Change-Id: Ie2fada1c641d2bc313ddb14903083beab08f8a98

diff --git a/filter/source/config/cache/cacheitem.hxx b/filter/source/config/cache/cacheitem.hxx
index d76aa92..486b299 100644
--- a/filter/source/config/cache/cacheitem.hxx
+++ b/filter/source/config/cache/cacheitem.hxx
@@ -235,12 +235,6 @@ struct  FlatDetectionInfo
     // this type was found by a matching URL Pattern
     sal_Bool bMatchByPattern;
 
-    // the user selected this type explicitly
-    sal_Bool bPreselectedAsType;
-
-    // the user selected this type implicit by selecting a corresponding filter
-    sal_Bool bPreselectedByFilter;
-
     // the user selected this type implicit by selecting a corresponding office module
     sal_Bool bPreselectedByDocumentService;
 
@@ -248,8 +242,6 @@ struct  FlatDetectionInfo
         : sType                        (::rtl::OUString())
         , bMatchByExtension            (sal_False        )
         , bMatchByPattern              (sal_False        )
-        , bPreselectedAsType           (sal_False        )
-        , bPreselectedByFilter         (sal_False        )
         , bPreselectedByDocumentService(sal_False        )
     {}
 };
diff --git a/filter/source/config/cache/typedetection.cxx b/filter/source/config/cache/typedetection.cxx
index 303ebfc..93e8015 100644
--- a/filter/source/config/cache/typedetection.cxx
+++ b/filter/source/config/cache/typedetection.cxx
@@ -505,7 +505,6 @@ sal_Bool TypeDetection::impl_getPreselectionForType(const ::rtl::OUString& sPreS
         aInfo.sType              = sType;
         aInfo.bMatchByExtension  = bMatchByExtension;
         aInfo.bMatchByPattern    = bMatchByPattern;
-        aInfo.bPreselectedAsType = sal_True;
 
         if (bPreferredPreselection)
             rFlatTypes.push_front(aInfo);
@@ -561,17 +560,6 @@ sal_Bool TypeDetection::impl_getPreselectionForFilter(const ::rtl::OUString& sPr
             sFilter = ::rtl::OUString();
     }
 
-    // We have to mark all retrieved preselection items as "preselected by filter"!
-    FlatDetection::iterator pIt;
-    for (  pIt  = rFlatTypes.begin();
-           pIt != rFlatTypes.end()  ;
-         ++pIt                      )
-    {
-        FlatDetectionInfo& rInfo = *pIt;
-        rInfo.bPreselectedAsType   = sal_False;
-        rInfo.bPreselectedByFilter = sal_True;
-    }
-
     if (!sFilter.isEmpty())
         return sal_True;
     else
@@ -630,8 +618,6 @@ sal_Bool TypeDetection::impl_getPreselectionForDocumentService(const ::rtl::OUSt
          ++pIt                          )
     {
         FlatDetectionInfo& rInfo = *pIt;
-        rInfo.bPreselectedAsType            = sal_False;
-        rInfo.bPreselectedByFilter          = sal_False;
         rInfo.bPreselectedByDocumentService = sal_True ;
         rFlatTypes.push_back(rInfo);
     }
@@ -733,21 +719,6 @@ void TypeDetection::impl_getPreselection(const css::util::URL&                aP
             // c)
             if (sDetectService.isEmpty())
             {
-                // accept or not accept flat types without deep detection: that's the question :-)
-                // May be there exists some states, where we have to use our LastChance feature instead
-                // of using the flat type directly.
-                // Here the list of task ID's, which wasrelated to these lines of code:
-                // #i47159#, #i43404#, #i46494#
-
-                // a flat detected type without the chance for a deep detection ... but preselected by the user
-                // explicitly (means preselected as type or filter ... not as documentservice!)
-                // should be accepted. So the user can overrule our detection.
-                if (
-                    (aFlatTypeInfo.bPreselectedAsType  ) ||
-                    (aFlatTypeInfo.bPreselectedByFilter)
-                   )
-                    return sFlatType;
-
                 // flat detected types without any registered deep detection service and not
                 // preselected by the user can be used as LAST CHANCE in case no other type could
                 // be detected. Of course only the first type without deep detector can be used.


More information about the Libreoffice-commits mailing list