[Libreoffice-commits] .: 2 commits - filter/source
Kohei Yoshida
kohei at kemper.freedesktop.org
Fri May 25 13:03:31 PDT 2012
filter/source/config/cache/cacheitem.hxx | 8 -
filter/source/config/cache/typedetection.cxx | 147 +++++++++++++++++++++------
2 files changed, 115 insertions(+), 40 deletions(-)
New commits:
commit 0c782558aee08bfc24e03c715a504a234ab30307
Author: Kohei Yoshida <kohei.yoshida at gmail.com>
Date: Fri May 25 16:01:52 2012 -0400
Rank format types in order of complexity.
This way we can run detection services in the correct order; from
more complex to less complex, since the more complex structure is
much easier to test and reject.
Change-Id: Iee2d3e2e4f2834f95e6e89975f646e3928114b11
diff --git a/filter/source/config/cache/typedetection.cxx b/filter/source/config/cache/typedetection.cxx
index 93e8015..672a09f 100644
--- a/filter/source/config/cache/typedetection.cxx
+++ b/filter/source/config/cache/typedetection.cxx
@@ -109,8 +109,101 @@ TypeDetection::~TypeDetection()
namespace {
/**
- * Types with matching pattern first, then extension, then types that are
- * supported by the document service come next.
+ * Rank format types in order of complexity. More complex formats are
+ * ranked higher so that they get tested sooner over simpler formats.
+ *
+ * Guidelines to determine how complex a format is (subject to change):
+ *
+ * 1) compressed text (XML, HTML, etc)
+ * 2) binary
+ * 3) non-compressed text
+ * 3.1) structured text
+ * 3.1.1) dialect of a structured text (e.g. docbook XML)
+ * 3.1.2) generic structured text (e.g. generic XML)
+ * 3.2) non-structured text
+ *
+ * In each category, rank them from strictly-structured to
+ * loosely-structured.
+ */
+int getFlatTypeRank(const rtl::OUString& rType)
+{
+ // List formats from more complex to less complex.
+ // TODO: Add more.
+ static const char* ranks[] = {
+ // Compressed XML
+ "writer8_template",
+ "writer8",
+ "calc8_template",
+ "calc8",
+ "writer_OOXML_Text_Template",
+ "writer_OOXML",
+ "writer_MS_Word_2007_Template",
+ "writer_MS_Word_2007",
+ "Office Open XML Spreadsheet Template",
+ "Office Open XML Spreadsheet",
+ "MS Excel 2007 XML Template",
+ "MS Excel 2007 XML",
+
+ // Compressed text
+ "pdf_Portable_Document_Format",
+
+ // Binary
+ "writer_T602_Document",
+ "writer_WordPerfect_Document",
+ "writer_MS_Works_Document",
+ "writer_MS_Word_97_Vorlage",
+ "writer_MS_Word_97",
+ "writer_MS_Word_95_Vorlage",
+ "writer_MS_Word_95",
+ "writer_MS_WinWord_60",
+ "writer_MS_WinWord_5",
+ "MS Excel 2007 Binary",
+ "calc_MS_Excel_97_VorlageTemplate",
+ "calc_MS_Excel_97",
+ "calc_MS_Excel_95_VorlageTemplate",
+ "calc_MS_Excel_95",
+ "calc_MS_Excel_5095_VorlageTemplate",
+ "calc_MS_Excel_5095",
+ "calc_MS_Excel_40_VorlageTemplate",
+ "calc_MS_Excel_40",
+ "calc_Pocket_Excel_File",
+ "calc_Lotus",
+ "calc_QPro",
+ "calc_SYLK",
+ "calc_DIF",
+ "calc_dBase",
+
+
+ // Non-compressed XML
+ "writer_ODT_FlatXML",
+ "calc_ODS_FlatXML",
+ "calc_MS_Excel_2003_XML",
+ "writer_MS_Word_2003_XML",
+ "writer_DocBook_File",
+ "XHTML_File",
+
+ // Non-compressed text
+ "writer_Rich_Text_Format",
+ "generic_HTML",
+ "generic_Text"
+ };
+
+ size_t n = SAL_N_ELEMENTS(ranks);
+
+ for (size_t i = 0; i < n; ++i)
+ {
+ if (rType.equalsAscii(ranks[i]))
+ return n - i - 1;
+ }
+
+ // Not ranked. Treat them equally.
+ return -1;
+}
+
+/**
+ * Types with matching pattern first, then extension, then custom ranks by
+ * types, then types that are supported by the document service come next.
+ * Lastly, sort them alphabetically.
*/
struct SortByPriority : public std::binary_function<FlatDetectionInfo, FlatDetectionInfo, bool>
{
@@ -122,7 +215,25 @@ struct SortByPriority : public std::binary_function<FlatDetectionInfo, FlatDetec
if (r1.bMatchByExtension != r2.bMatchByExtension)
return r1.bMatchByExtension;
- return r1.bPreselectedByDocumentService;
+ int rank1 = getFlatTypeRank(r1.sType);
+ int rank2 = getFlatTypeRank(r2.sType);
+
+ if (rank1 != rank2)
+ return rank1 > rank2;
+
+ if (r1.bPreselectedByDocumentService != r2.bPreselectedByDocumentService)
+ return r1.bPreselectedByDocumentService;
+
+ // All things being equal, sort them alphabetically.
+ return r1.sType > r2.sType;
+ }
+};
+
+struct EqualByName : public std::binary_function<FlatDetectionInfo, FlatDetectionInfo, bool>
+{
+ bool operator() (const FlatDetectionInfo& r1, const FlatDetectionInfo& r2) const
+ {
+ return r1.sType == r2.sType;
}
};
@@ -177,6 +288,7 @@ struct SortByPriority : public std::binary_function<FlatDetectionInfo, FlatDetec
// Properly prioritize all candidate types.
lFlatTypes.sort(SortByPriority());
+ lFlatTypes.unique(EqualByName());
::rtl::OUString sType ;
::rtl::OUString sLastChance;
commit 58652054727a29701795f2849c87f320de05c4dd
Author: Kohei Yoshida <kohei.yoshida at gmail.com>
Date: Fri May 25 14:57:35 2012 -0400
The logic behind these two flags no longer makes sense.
Change-Id: Ie2fada1c641d2bc313ddb14903083beab08f8a98
diff --git a/filter/source/config/cache/cacheitem.hxx b/filter/source/config/cache/cacheitem.hxx
index d76aa92..486b299 100644
--- a/filter/source/config/cache/cacheitem.hxx
+++ b/filter/source/config/cache/cacheitem.hxx
@@ -235,12 +235,6 @@ struct FlatDetectionInfo
// this type was found by a matching URL Pattern
sal_Bool bMatchByPattern;
- // the user selected this type explicitly
- sal_Bool bPreselectedAsType;
-
- // the user selected this type implicit by selecting a corresponding filter
- sal_Bool bPreselectedByFilter;
-
// the user selected this type implicit by selecting a corresponding office module
sal_Bool bPreselectedByDocumentService;
@@ -248,8 +242,6 @@ struct FlatDetectionInfo
: sType (::rtl::OUString())
, bMatchByExtension (sal_False )
, bMatchByPattern (sal_False )
- , bPreselectedAsType (sal_False )
- , bPreselectedByFilter (sal_False )
, bPreselectedByDocumentService(sal_False )
{}
};
diff --git a/filter/source/config/cache/typedetection.cxx b/filter/source/config/cache/typedetection.cxx
index 303ebfc..93e8015 100644
--- a/filter/source/config/cache/typedetection.cxx
+++ b/filter/source/config/cache/typedetection.cxx
@@ -505,7 +505,6 @@ sal_Bool TypeDetection::impl_getPreselectionForType(const ::rtl::OUString& sPreS
aInfo.sType = sType;
aInfo.bMatchByExtension = bMatchByExtension;
aInfo.bMatchByPattern = bMatchByPattern;
- aInfo.bPreselectedAsType = sal_True;
if (bPreferredPreselection)
rFlatTypes.push_front(aInfo);
@@ -561,17 +560,6 @@ sal_Bool TypeDetection::impl_getPreselectionForFilter(const ::rtl::OUString& sPr
sFilter = ::rtl::OUString();
}
- // We have to mark all retrieved preselection items as "preselected by filter"!
- FlatDetection::iterator pIt;
- for ( pIt = rFlatTypes.begin();
- pIt != rFlatTypes.end() ;
- ++pIt )
- {
- FlatDetectionInfo& rInfo = *pIt;
- rInfo.bPreselectedAsType = sal_False;
- rInfo.bPreselectedByFilter = sal_True;
- }
-
if (!sFilter.isEmpty())
return sal_True;
else
@@ -630,8 +618,6 @@ sal_Bool TypeDetection::impl_getPreselectionForDocumentService(const ::rtl::OUSt
++pIt )
{
FlatDetectionInfo& rInfo = *pIt;
- rInfo.bPreselectedAsType = sal_False;
- rInfo.bPreselectedByFilter = sal_False;
rInfo.bPreselectedByDocumentService = sal_True ;
rFlatTypes.push_back(rInfo);
}
@@ -733,21 +719,6 @@ void TypeDetection::impl_getPreselection(const css::util::URL& aP
// c)
if (sDetectService.isEmpty())
{
- // accept or not accept flat types without deep detection: that's the question :-)
- // May be there exists some states, where we have to use our LastChance feature instead
- // of using the flat type directly.
- // Here the list of task ID's, which wasrelated to these lines of code:
- // #i47159#, #i43404#, #i46494#
-
- // a flat detected type without the chance for a deep detection ... but preselected by the user
- // explicitly (means preselected as type or filter ... not as documentservice!)
- // should be accepted. So the user can overrule our detection.
- if (
- (aFlatTypeInfo.bPreselectedAsType ) ||
- (aFlatTypeInfo.bPreselectedByFilter)
- )
- return sFlatType;
-
// flat detected types without any registered deep detection service and not
// preselected by the user can be used as LAST CHANCE in case no other type could
// be detected. Of course only the first type without deep detector can be used.
More information about the Libreoffice-commits
mailing list