[Libreoffice-commits] core.git: Branch 'aoo/trunk' - ooxml/source
Andre Fischer
af at apache.org
Tue Jun 10 03:10:18 PDT 2014
ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/AcceptingStateTable.java | 41
ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/AttributeManager.java | 4
ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/NameMap.java | 41
ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/NamespaceMap.java | 27
ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/OOXMLParser.java | 17
ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ParseTableReader.java | 64
ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/SkipStateTable.java | 42
ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/StateMachine.java | 92 -
ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/Transition.java | 11
ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/TransitionTable.java | 77 -
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/SchemaReader.java | 324 +++--
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/Test.java | 58
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/DFACreator.java | 258 +++
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/EpsilonTransition.java | 49
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/FiniteAutomaton.java | 141 ++
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/FiniteAutomatonContainer.java | 155 ++
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/HopcroftMinimizer.java | 355 +++++
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/NonValidatingCreator.java | 180 ++
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/SkipData.java | 33
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/State.java | 214 +++
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/StateContainer.java | 52
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/StateContext.java | 240 +++
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/StateSet.java | 223 +++
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/Transition.java | 97 +
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/ValidatingCreator.java | 147 ++
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/ValidatingCreatorVisitor.java | 646 ++++++++++
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/LogGenerator.java | 67 -
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/ParserTablesGenerator.java | 184 +-
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/IAction.java | 38
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/NonValidatingCreator.java | 198 ---
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/ParseElementAction.java | 90 -
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/SkipElementAction.java | 67 -
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/StackAutomaton.java | 110 -
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/StackAutomatonOptimizer.java | 257 ---
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/State.java | 196 ---
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/StateContext.java | 179 --
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/Transition.java | 149 --
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/ValidatingCreator.java | 122 -
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/ValidatingCreatorVisitor.java | 556 --------
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/iterator/AttributeIterator.java | 12
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/iterator/DereferencingNodeIterator.java | 12
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/iterator/PermutationIterator.java | 116 +
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/attribute/AttributeGroupReference.java | 12
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/attribute/AttributeReference.java | 10
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/base/INodeReference.java | 4
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/base/Node.java | 8
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/base/QualifiedName.java | 8
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/complex/All.java | 9
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/complex/Any.java | 45
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/complex/ComplexContent.java | 9
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/complex/ComplexTypeReference.java | 6
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/complex/Element.java | 1
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/complex/ElementReference.java | 11
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/complex/Extension.java | 26
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/complex/GroupReference.java | 10
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/optimize/ProcessTypeVisitor.java | 30
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/optimize/RequestVisitor.java | 14
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/optimize/SchemaOptimizer.java | 41
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/schema/NamespaceMap.java | 62
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/schema/Schema.java | 53
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/schema/SchemaBase.java | 99 +
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/simple/SimpleContent.java | 9
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/simple/SimpleTypeReference.java | 10
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/parser/SchemaParser.java | 39
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/parser/XmlNamespace.java | 10
65 files changed, 3996 insertions(+), 2471 deletions(-)
New commits:
commit 4710644c94f552347529e9852727373460f1f12a
Author: Andre Fischer <af at apache.org>
Date: Tue Jun 10 09:35:39 2014 +0000
125035: Can now create a validating parser.
diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/AcceptingStateTable.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/AcceptingStateTable.java
new file mode 100644
index 0000000..006c187
--- /dev/null
+++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/AcceptingStateTable.java
@@ -0,0 +1,41 @@
+package org.apache.openoffice.ooxml.parser;
+
+import java.util.HashSet;
+import java.util.Set;
+
+/** List of all accepting states.
+ *
+ * The accepting status of states is important when a closing tag is seen.
+ * It denotes the end of the input stream for the state machine of the currently
+ * processed element. It is an error when the current state is not accepting
+ * when a closing tag is processed.
+ */
+public class AcceptingStateTable
+{
+ public AcceptingStateTable (final Iterable<String[]> aData)
+ {
+ maAcceptingStates = new HashSet<>();
+
+ for (final String[] aLine : aData)
+ {
+ // Create new transition.
+ final int nStateId = Integer.parseInt(aLine[1]);
+
+ maAcceptingStates.add(nStateId);
+ }
+ Log.Std.printf("read %d accepting states\n", maAcceptingStates.size());
+ }
+
+
+
+
+ public boolean Contains (final int nStateId)
+ {
+ return maAcceptingStates.contains(nStateId);
+ }
+
+
+
+
+ private final Set<Integer> maAcceptingStates;
+}
diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/AttributeManager.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/AttributeManager.java
index 75caea7..5c65bdb 100644
--- a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/AttributeManager.java
+++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/AttributeManager.java
@@ -21,14 +21,14 @@
package org.apache.openoffice.ooxml.parser;
-import java.io.File;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
+import java.util.Vector;
public class AttributeManager
{
- public AttributeManager (final File aDataLocation)
+ public AttributeManager (final Vector<String[]> aData)
{
maStateIdToAttributesMap = new HashMap<>();
}
diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/NameMap.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/NameMap.java
index ee306c5..0fffe78 100644
--- a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/NameMap.java
+++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/NameMap.java
@@ -21,44 +21,26 @@
package org.apache.openoffice.ooxml.parser;
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
import java.util.HashMap;
import java.util.Map;
import java.util.Vector;
public class NameMap
{
- NameMap (final File aDataLocation)
+ NameMap (final Vector<String[]> aData)
{
maNameToIdMap = new HashMap<>();
maIdToNameMap = new Vector<>();
- try
+ for (final String[] aLine : aData)
{
- final BufferedReader aReader = new BufferedReader(
- new FileReader(
- new File(aDataLocation, "names.lst")));
-
- while (true)
- {
- final String sLine = aReader.readLine();
- if (sLine == null)
- break;
- final String aParts[] = sLine.split("\\s+");
- final int nId = Integer.parseInt(aParts[0]);
- maNameToIdMap.put(aParts[1], nId);
- if (maIdToNameMap.size() <= nId)
- maIdToNameMap.setSize(nId+1);
- maIdToNameMap.set(nId, aParts[1]);
- }
-
- aReader.close();
- }
- catch (final Exception aException)
- {
- throw new RuntimeException(aException);
+ final int nId = Integer.parseInt(aLine[1]);
+
+ maNameToIdMap.put(aLine[2], nId);
+
+ if (maIdToNameMap.size() <= nId)
+ maIdToNameMap.setSize(nId+1);
+ maIdToNameMap.set(nId, aLine[2]);
}
if (Log.Dbg != null)
@@ -89,7 +71,10 @@ public class NameMap
public String GetNameForId (final int nId)
{
- return maIdToNameMap.get(nId);
+ if (nId == -1)
+ return "<none>";
+ else
+ return maIdToNameMap.get(nId);
}
diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/NamespaceMap.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/NamespaceMap.java
index 2f493bc..214a008 100644
--- a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/NamespaceMap.java
+++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/NamespaceMap.java
@@ -21,38 +21,19 @@
package org.apache.openoffice.ooxml.parser;
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
import java.util.HashMap;
import java.util.Map;
+import java.util.Vector;
public class NamespaceMap
{
- NamespaceMap (final File aDataLocation)
+ NamespaceMap (final Vector<String[]> aData)
{
maUriToPrefixMap = new HashMap<>();
- try
+ for (final String[] aLine : aData)
{
- final BufferedReader aReader = new BufferedReader(
- new FileReader(
- new File(aDataLocation, "namespaces.lst")));
-
- while (true)
- {
- final String sLine = aReader.readLine();
- if (sLine == null)
- break;
- final String aParts[] = sLine.split("\\s+");
- maUriToPrefixMap.put(aParts[0], aParts[1]);
- }
-
- aReader.close();
- }
- catch (final Exception aException)
- {
- throw new RuntimeException(aException);
+ maUriToPrefixMap.put(aLine[2], aLine[1]);
}
if (Log.Dbg != null)
diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/OOXMLParser.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/OOXMLParser.java
index 82232c4..64abf25 100644
--- a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/OOXMLParser.java
+++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/OOXMLParser.java
@@ -77,8 +77,6 @@ public class OOXMLParser
}
-
-
private static InputStream GetInputStream (final String sInputName)
{
final InputStream aIn;
@@ -165,13 +163,17 @@ public class OOXMLParser
case XMLStreamReader.START_ELEMENT:
++nElementCount;
if (aMachine.IsInSkipState())
+ {
+ Log.Dbg.printf("is skip state -> starting to skip\n");
nElementCount += Skip(aReader);
+ }
else if ( ! aMachine.ProcessStartElement(
aReader.getNamespaceURI(),
aReader.getLocalName(),
aReader.getLocation(),
aAttributeProvider))
{
+ Log.Dbg.printf("starting to skip to recover from error\n");
nElementCount += Skip(aReader);
}
break;
@@ -185,7 +187,7 @@ public class OOXMLParser
case XMLStreamReader.CHARACTERS:
final String sText = aReader.getText();
- Log.Dbg.printf("text [%s]\n", sText);
+ Log.Dbg.printf("text [%s]\n", sText.replace("\n", "\\n"));
aMachine.ProcessCharacters(sText);
break;
@@ -251,7 +253,7 @@ public class OOXMLParser
throw new RuntimeException("saw end of document while skipping elements\n");
case XMLStreamReader.CHARACTERS:
- Log.Dbg.printf("skipping text [%s]\n", aReader.getText());
+ SkipText(aReader.getText());
break;
default:
@@ -265,4 +267,11 @@ public class OOXMLParser
}
return nElementCount;
}
+
+
+
+ private static void SkipText (final String sText)
+ {
+ Log.Dbg.printf("skipping text [%s]\n", sText.replace("\n", "\\n"));
+ }
}
diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ParseTableReader.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ParseTableReader.java
new file mode 100644
index 0000000..b38a2c8
--- /dev/null
+++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/ParseTableReader.java
@@ -0,0 +1,64 @@
+package org.apache.openoffice.ooxml.parser;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Vector;
+
+/** A simple reader for the parse table data that allows simple filtering on the
+ * first word in each line.
+ *
+ * Lines that only contain comments or whitespace are ignored.
+ *
+ */
+public class ParseTableReader
+{
+ public ParseTableReader (final File aFile)
+ {
+ maSections = new HashMap<>();
+
+ try
+ {
+ final BufferedReader aReader = new BufferedReader(new FileReader(aFile));
+
+ while (true)
+ {
+ final String sLine = aReader.readLine();
+ if (sLine == null)
+ break;
+ if (sLine.startsWith("#"))
+ continue;
+ final String aParts[] = sLine.split("\\s+");
+
+ GetSection(aParts[0]).add(aParts);
+ }
+
+ aReader.close();
+ }
+ catch (final Exception aException)
+ {
+ throw new RuntimeException(aException);
+ }
+ }
+
+
+
+
+ public Vector<String[]> GetSection (final String sSectionName)
+ {
+ Vector<String[]> aSection = maSections.get(sSectionName);
+ if (aSection == null)
+ {
+ aSection = new Vector<>();
+ maSections.put(sSectionName, aSection);
+ }
+ return aSection;
+ }
+
+
+
+
+ private final Map<String,Vector<String[]>> maSections;
+}
diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/SkipStateTable.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/SkipStateTable.java
new file mode 100644
index 0000000..bc0c76b
--- /dev/null
+++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/SkipStateTable.java
@@ -0,0 +1,42 @@
+package org.apache.openoffice.ooxml.parser;
+
+import java.util.HashSet;
+import java.util.Set;
+
+/** Table of all skip states.
+ *
+ * A skip state corresponds to the 'any' element in the schemas.
+ * It means that the content of the element is specified by an extension of the
+ * schema which may or may not be known at parse time.
+ * At the moment the whole element is skipped, i.e. ignored.
+ *
+ */
+public class SkipStateTable
+{
+ public SkipStateTable (final Iterable<String[]> aData)
+ {
+ maSkipStates = new HashSet<>();
+
+ for (final String[] aLine : aData)
+ {
+ // Create new transition.
+ final int nStateId = Integer.parseInt(aLine[1]);
+
+ maSkipStates.add(nStateId);
+ }
+ Log.Std.printf("read %d skip states\n", maSkipStates.size());
+ }
+
+
+
+
+ public boolean Contains (final int nStateId)
+ {
+ return maSkipStates.contains(nStateId);
+ }
+
+
+
+
+ private final Set<Integer> maSkipStates;
+}
diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/StateMachine.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/StateMachine.java
index ffdeafc..72a2a33 100644
--- a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/StateMachine.java
+++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/StateMachine.java
@@ -22,7 +22,6 @@
package org.apache.openoffice.ooxml.parser;
import java.io.File;
-import java.util.Set;
import java.util.Stack;
import javax.xml.stream.Location;
@@ -32,16 +31,19 @@ import javax.xml.stream.Location;
*/
public class StateMachine
{
- public StateMachine (final File aDataLocation)
+ public StateMachine (final File aParseTableFile)
{
- maNamespaceMap = new NamespaceMap(aDataLocation);
- maNameMap = new NameMap(aDataLocation);
- maTransitions = new TransitionTable(aDataLocation);
- maSkipStates = maTransitions.GetSkipStates();
- maAttributeManager = new AttributeManager(aDataLocation);
-
- mnStartStateId = maNameMap.GetIdForName(null, "_start_");
- mnEndStateId = maNameMap.GetIdForName(null, "_end_");
+ final ParseTableReader aReader = new ParseTableReader(aParseTableFile);
+ maNamespaceMap = new NamespaceMap(aReader.GetSection("namespace"));
+ maElementNameMap = new NameMap(aReader.GetSection("element-name"));
+ maStateNameMap = new NameMap(aReader.GetSection("state-name"));
+ maTransitions = new TransitionTable(aReader.GetSection("transition"));
+ maSkipStates = new SkipStateTable(aReader.GetSection("skip"));
+ maAcceptingStates = new AcceptingStateTable(aReader.GetSection("accepting-state"));
+ maAttributeManager = new AttributeManager(aReader.GetSection("attribute"));
+
+ mnStartStateId = Integer.parseInt(aReader.GetSection("start-state").firstElement()[1]);
+ mnEndStateId = Integer.parseInt(aReader.GetSection("end-state").firstElement()[1]);
mnCurrentStateId = mnStartStateId;
maStateStack = new Stack<>();
Log.Dbg.printf("starting in state _start_ (%d)\n", mnCurrentStateId);
@@ -56,12 +58,13 @@ public class StateMachine
final Location aLocation,
final AttributeProvider aAttributes)
{
- final String sPrefix = maNamespaceMap.GetPrefixForURI(sNamespaceURI);
boolean bResult = false;
+
try
{
- final int nElementId = maNameMap.GetIdForName(sPrefix, sElementName);
- Log.Dbg.printf("%s:%s(%d, aArgumentList) L%dC%d\n",
+ final String sPrefix = maNamespaceMap.GetPrefixForURI(sNamespaceURI);
+ final int nElementId = maElementNameMap.GetIdForName(sPrefix, sElementName);
+ Log.Dbg.printf("%s:%s(%d) L%dC%d\n",
sPrefix,
sElementName,
nElementId,
@@ -73,21 +76,25 @@ public class StateMachine
nElementId);
if (aTransition == null)
{
- Log.Err.printf(
- "can not find transition for state %s and element %s at L%dC%d\n",
- maNameMap.GetNameForId(mnCurrentStateId),
- maNameMap.GetNameForId(nElementId),
+ final String sText = String.format(
+ "can not find transition for state %s(%d) and element %s(%d) at L%dC%d\n",
+ maStateNameMap.GetNameForId(mnCurrentStateId),
+ mnCurrentStateId,
+ maElementNameMap.GetNameForId(nElementId),
+ nElementId,
aLocation.getLineNumber(),
aLocation.getColumnNumber());
+ Log.Err.printf(sText);
+ Log.Dbg.printf(sText);
}
else
{
Log.Dbg.printf(" %s(%d) -> %s(%d) via %s(%d)",
- maNameMap.GetNameForId(mnCurrentStateId),
+ maStateNameMap.GetNameForId(mnCurrentStateId),
mnCurrentStateId,
- maNameMap.GetNameForId(aTransition.GetEndStateId()),
+ maStateNameMap.GetNameForId(aTransition.GetEndStateId()),
aTransition.GetEndStateId(),
- maNameMap.GetNameForId(aTransition.GetActionId()),
+ maStateNameMap.GetNameForId(aTransition.GetActionId()),
aTransition.GetActionId());
Log.Dbg.printf("\n");
@@ -101,7 +108,10 @@ public class StateMachine
}
catch (RuntimeException aException)
{
- aException.printStackTrace();
+ System.err.printf("error at line %d and column %d\n",
+ aLocation.getLineNumber(),
+ aLocation.getColumnNumber());
+ throw aException;
}
return bResult;
}
@@ -114,6 +124,15 @@ public class StateMachine
final String sElementName,
final Location aLocation)
{
+ if ( ! maAcceptingStates.Contains(mnCurrentStateId)
+ && mnCurrentStateId!=-1)
+ {
+ Log.Dbg.printf("current state %s(%d) is not an accepting state\n",
+ maStateNameMap.GetNameForId(mnCurrentStateId),
+ mnCurrentStateId);
+ throw new RuntimeException("not expecting end element "+sElementName);
+ }
+
final String sPrefix = maNamespaceMap.GetPrefixForURI(sNamespaceURI);
final int nOldStateId = mnCurrentStateId;
@@ -126,9 +145,9 @@ public class StateMachine
aLocation.getLineNumber(),
aLocation.getColumnNumber());
Log.Dbg.printf(" %s(%d) <- %s(%d)\n",
- maNameMap.GetNameForId(nOldStateId),
+ maStateNameMap.GetNameForId(nOldStateId),
nOldStateId,
- maNameMap.GetNameForId(mnCurrentStateId),
+ maStateNameMap.GetNameForId(mnCurrentStateId),
mnCurrentStateId);
}
@@ -145,7 +164,7 @@ public class StateMachine
public boolean IsInSkipState ()
{
- return maSkipStates.contains(mnCurrentStateId);
+ return maSkipStates.Contains(mnCurrentStateId);
}
@@ -171,32 +190,25 @@ public class StateMachine
final int nOldState,
final int nNewState)
{
- switch(aTransition.GetAction())
- {
- case 'p' :
- // Parse action.
- maStateStack.push(mnCurrentStateId);
- Log.Dbg.IncreaseIndentation();
- final int nActionId = aTransition.GetActionId();
- SetCurrentState(nActionId);
- maAttributeManager.ParseAttributes(nActionId, aAttributes);
- break;
-
- default:
- throw new RuntimeException();
- }
+ maStateStack.push(mnCurrentStateId);
+ Log.Dbg.IncreaseIndentation();
+ final int nActionId = aTransition.GetActionId();
+ SetCurrentState(nActionId);
+ maAttributeManager.ParseAttributes(nActionId, aAttributes);
}
private final NamespaceMap maNamespaceMap;
- private final NameMap maNameMap;
+ private final NameMap maElementNameMap;
+ private final NameMap maStateNameMap;
private final TransitionTable maTransitions;
private final AttributeManager maAttributeManager;
private int mnCurrentStateId;
private Stack<Integer> maStateStack;
private final int mnStartStateId;
private final int mnEndStateId;
- private static Set<Integer> maSkipStates;
+ private SkipStateTable maSkipStates;
+ private AcceptingStateTable maAcceptingStates;
}
diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/Transition.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/Transition.java
index 69d4fa3..c5c9561 100644
--- a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/Transition.java
+++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/Transition.java
@@ -27,13 +27,11 @@ class Transition
final int nStartStateId,
final int nEndStateId,
final int nElementId,
- final String sAction,
final int nActionStateId)
{
mnStartStateId = nStartStateId;
mnEndStateId = nEndStateId;
mnElementId = nElementId;
- mcAction = sAction.charAt(0);
mnActionStateId = nActionStateId;
}
@@ -64,14 +62,6 @@ class Transition
- public char GetAction ()
- {
- return mcAction;
- }
-
-
-
-
public int GetActionId ()
{
return mnActionStateId;
@@ -83,6 +73,5 @@ class Transition
private final int mnStartStateId;
private final int mnEndStateId;
private final int mnElementId;
- private final char mcAction;
private final int mnActionStateId;
}
diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/TransitionTable.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/TransitionTable.java
index 908c6dd..3f0bb65 100644
--- a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/TransitionTable.java
+++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/TransitionTable.java
@@ -21,66 +21,38 @@
package org.apache.openoffice.ooxml.parser;
-import java.io.BufferedReader;
-import java.io.File;
-import java.io.FileReader;
import java.util.HashMap;
-import java.util.HashSet;
import java.util.Map;
-import java.util.Set;
+import java.util.Vector;
public class TransitionTable
{
- public TransitionTable (final File aDataLocation)
+ public TransitionTable (final Vector<String[]> aData)
{
maTransitions = new HashMap<>();
- maSkipStates = new HashSet<>();
int nTransitionCount = 0;
- try
+ for (final String[] aLine : aData)
{
- final BufferedReader aReader = new BufferedReader(new FileReader(new File(aDataLocation, "transitions.lst")));
- while(true)
+ // Create new transition.
+ final int nStartStateId = Integer.parseInt(aLine[1]);
+ final int nEndStateId = Integer.parseInt(aLine[2]);
+ final int nElementId = Integer.parseInt(aLine[3]);
+ final int nElementStateId = Integer.parseInt(aLine[4]);
+ final Transition aTransition = new Transition(
+ nStartStateId,
+ nEndStateId,
+ nElementId,
+ nElementStateId);
+ ++nTransitionCount;
+
+ Map<Integer,Transition> aPerElementTransitions = maTransitions.get(aTransition.GetStartStateId());
+ if (aPerElementTransitions == null)
{
- // Read line, ignore comments, split into parts at whitespace.
- final String sLine = aReader.readLine();
- if (sLine == null)
- break;
- if (sLine.startsWith("#"))
- continue;
- final String[] aParts = sLine.split("\\s+");
-
- // Create new transition.
- final int nStartStateId = Integer.parseInt(aParts[0]);
- final int nEndStateId = Integer.parseInt(aParts[1]);
- final int nElementId = Integer.parseInt(aParts[2]);
- final int nActionStateId = Integer.parseInt(aParts[4]);
- if (nElementId==-1 && nActionStateId==-1)
- maSkipStates.add(nStartStateId);
- else
- {
- final Transition aTransition = new Transition(
- nStartStateId,
- nEndStateId,
- nElementId,
- aParts[3],
- nActionStateId);
- ++nTransitionCount;
-
- Map<Integer,Transition> aPerElementTransitions = maTransitions.get(aTransition.GetStartStateId());
- if (aPerElementTransitions == null)
- {
- aPerElementTransitions = new HashMap<>();
- maTransitions.put(aTransition.GetStartStateId(), aPerElementTransitions);
- }
- aPerElementTransitions.put(aTransition.GetElementId(), aTransition);
- }
+ aPerElementTransitions = new HashMap<>();
+ maTransitions.put(aTransition.GetStartStateId(), aPerElementTransitions);
}
- aReader.close();
- }
- catch (final Exception aException)
- {
- aException.printStackTrace();
+ aPerElementTransitions.put(aTransition.GetElementId(), aTransition);
}
Log.Std.printf("read %d transitions\n", nTransitionCount);
}
@@ -102,14 +74,5 @@ public class TransitionTable
- public Set<Integer> GetSkipStates ()
- {
- return maSkipStates;
- }
-
-
-
-
private final Map<Integer,Map<Integer,Transition>> maTransitions;
- private final Set<Integer> maSkipStates;
}
diff --git a/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/SchemaReader.java b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/SchemaReader.java
index 0c9ce1d..c770a34 100644
--- a/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/SchemaReader.java
+++ b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/SchemaReader.java
@@ -24,23 +24,26 @@ package org.apache.openoffice.ooxml.schema;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
+import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
+import java.util.Map;
+import java.util.Map.Entry;
import java.util.Queue;
import java.util.Set;
import java.util.Vector;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
import javax.xml.stream.XMLStreamException;
+import org.apache.openoffice.ooxml.schema.automaton.FiniteAutomatonContainer;
+import org.apache.openoffice.ooxml.schema.automaton.NonValidatingCreator;
+import org.apache.openoffice.ooxml.schema.automaton.FiniteAutomaton;
+import org.apache.openoffice.ooxml.schema.automaton.ValidatingCreator;
import org.apache.openoffice.ooxml.schema.generator.LogGenerator;
import org.apache.openoffice.ooxml.schema.generator.ParserTablesGenerator;
-import org.apache.openoffice.ooxml.schema.generator.automaton.NonValidatingCreator;
-import org.apache.openoffice.ooxml.schema.generator.automaton.StackAutomaton;
import org.apache.openoffice.ooxml.schema.model.schema.Schema;
+import org.apache.openoffice.ooxml.schema.model.schema.SchemaBase;
import org.apache.openoffice.ooxml.schema.parser.SchemaParser;
-import org.apache.openoffice.ooxml.schema.parser.XmlNamespace;
public class SchemaReader
{
@@ -49,6 +52,15 @@ public class SchemaReader
if (aArgumentList.length != 1)
{
System.err.printf("usage: SchemaParser <driver-file>\n");
+ System.err.printf(" driver file can contain these lines:\n");
+ System.err.printf("# Comments\n");
+ System.err.printf(" are ignored\n");
+ System.err.printf("schema <mark> <file-name>\n");
+ System.err.printf(" specifies a top-level schema file to read\n");
+ System.err.printf("output-schema <file-name>\n");
+ System.err.printf(" write schema information to file\n");
+ System.err.printf("output-optimized-schema <file-name>\n");
+ System.err.printf(" write information about optimized schema to file\n");
System.exit(1);
}
@@ -61,15 +73,15 @@ public class SchemaReader
private SchemaReader (final File aDriverFile)
{
- maSchema = new Schema();
+ maSchemaBase = new SchemaBase();
+ maTopLevelSchemas = new HashMap<>();
maMainSchemaFiles = new Vector<>();
maSchemaFiles = new HashSet<>();
- maTodo = new LinkedList<String>();
+ maWorkList = new LinkedList<>();
+ maOutputOperations = new Vector<>();
mnTotalLineCount = 0;
mnTotalByteCount = 0;
- XmlNamespace.Apply(maSchema);
-
ParseDriverFile(aDriverFile);
}
@@ -87,43 +99,80 @@ public class SchemaReader
System.exit(1);
}
- final Pattern aSchemaPattern = Pattern.compile("^\\s*schema\\s+(.*)\\s+(.*)");
- final Pattern aOutputPattern = Pattern.compile("^\\s*output-directory\\s+(.*)");
try
{
final BufferedReader aIn = new BufferedReader(new FileReader(aDriverFile));
while(true)
{
- final String sLine = aIn.readLine();
+ String sLine = aIn.readLine();
if (sLine == null)
break;
// Lines starting with # are comment lines and are ignored.
- if (sLine.matches("^\\s*#"))
+ if (sLine.matches("^\\s*#.*"))
continue;
// Lines containing only whitespace are also ignored.
else if (sLine.matches("^\\s*$"))
continue;
- Matcher aMatcher = aSchemaPattern.matcher(sLine);
- if (aMatcher.matches())
- {
- maMainSchemaFiles.add(new String[]{aMatcher.group(1), aMatcher.group(2)});
- }
- else
+ // Handle line continuation.
+ while (sLine.endsWith("\\"))
+ sLine = sLine.substring(0, sLine.length()-1) + aIn.readLine();
+
+ final Vector<String> aParts = SplitLine(sLine);
+ switch (aParts.get(0))
{
- aMatcher = aOutputPattern.matcher(sLine);
- if (aMatcher.matches())
- {
- maOutputDirectory = new File(aMatcher.group(1));
- if (maOutputDirectory.exists() && ! maOutputDirectory.canWrite())
+ case "schema":
+ maMainSchemaFiles.add(new String[]{aParts.get(1), aParts.get(2)});
+ break;
+
+ case "output-schema":
+ maOutputOperations.add(new Runnable()
{
- System.err.printf("can not write output file '%s' \n", maOutputDirectory.toString());
- System.exit(1);
- }
- }
+ final File maFile = CreateCheckedOutputFile(aParts.get(1));
+ @Override public void run()
+ {
+ WriteSchema(maFile);
+ }
+ });
+ break;
+
+ case "output-optimized-schema":
+ maOutputOperations.add(new Runnable()
+ {
+ final File maFile = CreateCheckedOutputFile(aParts.get(1));
+ @Override public void run()
+ {
+ WriteOptimizedSchema(maFile);
+ }
+ });
+ break;
+
+ case "output-nonvalidating-parse-tables":
+ maOutputOperations.add(new Runnable()
+ {
+ final File maAutomatonLogFile = CreateCheckedOutputFile(aParts.get(1));
+ final File maParseTableFile = CreateCheckedOutputFile(aParts.get(2));
+ @Override public void run() {WriteNonValidatingParseTables(
+ maAutomatonLogFile,
+ maParseTableFile);}
+ });
+ break;
+
+ case "output-validating-parse-tables":
+ maOutputOperations.add(new Runnable()
+ {
+ final File maAutomatonLogFile = CreateCheckedOutputFile(aParts.get(1));
+ final File maParseTableFile = CreateCheckedOutputFile(aParts.get(2));
+ @Override public void run() {WriteValidatingParseTables(
+ maAutomatonLogFile,
+ maParseTableFile);}
+ });
+ break;
+
+ default:
+ System.err.printf("unknown command '%s' in driver file", aParts.get(0));
+ System.exit(1);
}
-
-
}
aIn.close();
}
@@ -146,20 +195,19 @@ public class SchemaReader
{
aException.printStackTrace();
}
- final Schema aOptimizedSchema = maSchema.GetOptimizedSchema();
- System.out.printf(" used are %d complex types, %d simple types, %d groups and %d top level elements\n",
- aOptimizedSchema.ComplexTypes.GetCount(),
- aOptimizedSchema.SimpleTypes.GetCount(),
- aOptimizedSchema.Groups.GetCount(),
- aOptimizedSchema.TopLevelElements.GetCount());
+ maOptimizedSchemaBase = maSchemaBase.GetOptimizedSchema(maTopLevelSchemas.values());
+ for (final Entry<String, Schema> aEntry : maTopLevelSchemas.entrySet())
+ aEntry.setValue(aEntry.getValue().GetOptimizedSchema(maOptimizedSchemaBase));
- LogGenerator.Write(maSchema, new File(maOutputDirectory, "original-schema.txt"));
- LogGenerator.Write(aOptimizedSchema, new File(maOutputDirectory, "bla.txt"));
+ System.out.printf(" optimization left %d complex types and %d simple types\n",
+ maOptimizedSchemaBase.ComplexTypes.GetCount(),
+ maOptimizedSchemaBase.SimpleTypes.GetCount());
- final StackAutomaton aAutomaton = CreateStackAutomaton(aOptimizedSchema);
-
- new ParserTablesGenerator(aAutomaton).Generate(new File("/tmp/ooxml-parser"));
+ for (final Runnable aOperation : maOutputOperations)
+ {
+ aOperation.run();
+ }
}
@@ -172,7 +220,7 @@ public class SchemaReader
for (final String[] aEntry : maMainSchemaFiles)
{
- final String sShortName = aEntry[0];
+ final String sMainSchemaShortname = aEntry[0];
final String sMainSchemaFile = aEntry[1];
final File aMainSchemaFile = new File(sMainSchemaFile);
if ( ! aMainSchemaFile.exists())
@@ -186,36 +234,53 @@ public class SchemaReader
System.exit(1);
}
- AddSchemaReference(sMainSchemaFile);
+ final Schema aSchema = new Schema(sMainSchemaShortname, maSchemaBase);
+ ParseSchemaFile(sMainSchemaFile, aSchema);
+ maTopLevelSchemas.put(sMainSchemaShortname, aSchema);
}
long nStartTime = System.currentTimeMillis();
-
- while ( ! maTodo.isEmpty())
+ while ( ! maWorkList.isEmpty())
{
- final String sSchemaName = maTodo.poll();
- System.out.printf("parsing %s\n", sSchemaName);
- maSchemaFiles.add(sSchemaName);
-
- final SchemaParser aParser = new SchemaParser(new File(sSchemaName), maSchema);
- aParser.Parse();
-
- mnTotalLineCount += aParser.GetLineCount();
- mnTotalByteCount += aParser.GetByteCount();
- for (final File aFile : aParser.GetImportedSchemaFilenames())
- AddSchemaReference(aFile.getAbsolutePath());
+ ParseSchemaFile(maWorkList.poll(), null);
}
long nEndTime = System.currentTimeMillis();
+
System.out.printf("parsed %d schema files with a total of %d lines and %d bytes in %fs\n",
maSchemaFiles.size(),
mnTotalLineCount,
mnTotalByteCount,
(nEndTime-nStartTime)/1000.0);
- System.out.printf(" found %d complex types, %d simple types, %d groups and %d top level elements\n",
- maSchema.ComplexTypes.GetCount(),
- maSchema.SimpleTypes.GetCount(),
- maSchema.Groups.GetCount(),
- maSchema.TopLevelElements.GetCount());
+ System.out.printf(" found %d complex types and %d simple types\n",
+ maSchemaBase.ComplexTypes.GetCount(),
+ maSchemaBase.SimpleTypes.GetCount());
+
+ int nTopLevelElementCount = 0;
+ for (final Schema aSchema : maTopLevelSchemas.values())
+ nTopLevelElementCount += aSchema.TopLevelElements.GetCount();
+ System.out.printf(" the %d top level schemas have %d elements\n",
+ maTopLevelSchemas.size(),
+ nTopLevelElementCount);
+ }
+
+
+
+
+ private void ParseSchemaFile (
+ final String sSchemaFilename,
+ final Schema aSchema)
+ throws XMLStreamException
+ {
+ System.out.printf("parsing %s\n", sSchemaFilename);
+ maSchemaFiles.add(sSchemaFilename);
+
+ final SchemaParser aParser = new SchemaParser(new File(sSchemaFilename), aSchema, maSchemaBase);
+ aParser.Parse();
+
+ mnTotalLineCount += aParser.GetLineCount();
+ mnTotalByteCount += aParser.GetByteCount();
+ for (final File aFile : aParser.GetImportedSchemaFilenames())
+ AddSchemaReference(aFile.getAbsolutePath());
}
@@ -230,45 +295,148 @@ public class SchemaReader
// We don't know yet the file name of the schema, so just store null to mark the schema name as 'known'.
maSchemaFiles.add(sSchemaFilename);
- maTodo.add(sSchemaFilename);
+ maWorkList.add(sSchemaFilename);
}
}
- private static StackAutomaton CreateStackAutomaton (final Schema aSchema)
+ /** Split the given string at whitespace but not at whitespace inside double quotes.
+ *
+ */
+ private Vector<String> SplitLine (final String sLine)
+ {
+ final Vector<String> aParts = new Vector<>();
+
+ boolean bIsInsideQuotes = false;
+ for (final String sPart : sLine.split("\""))
+ {
+ if (bIsInsideQuotes)
+ aParts.add(sPart);
+ else
+ for (final String sInnerPart : sPart.split("\\s+"))
+ {
+ if (sInnerPart == null)
+ throw new RuntimeException();
+ else if ( ! sInnerPart.isEmpty())
+ aParts.add(sInnerPart);
+ }
+
+ bIsInsideQuotes = ! bIsInsideQuotes;
+ }
+
+ return aParts;
+ }
+
+
+
+
+ /** Create a File object for a given file name.
+ * Check that the file is writable, i.e. its directory exists and that if
+ * the file already exists it can be replaced.
+ * Throws a RuntimeException when a check fails.
+ */
+ private File CreateCheckedOutputFile (final String sFilename)
+ {
+ final File aFile = new File(sFilename);
+ if ( ! aFile.getParentFile().exists())
+ throw new RuntimeException("directory of "+sFilename+" does not exist: can not create file");
+ if (aFile.exists() && ! aFile.canWrite())
+ throw new RuntimeException("file "+sFilename+" already exists and can not be replaced");
+ return aFile;
+ }
+
+
+
+
+ private void WriteSchema (final File aOutputFile)
+ {
+ LogGenerator.Write(aOutputFile, maSchemaBase, maTopLevelSchemas.values());
+ }
+
+
+
+
+ private void WriteOptimizedSchema (final File aOutputFile)
+ {
+ LogGenerator.Write(aOutputFile, maOptimizedSchemaBase, maTopLevelSchemas.values());
+ }
+
+
+
+
+ private void WriteNonValidatingParseTables (
+ final File aAutomatonLogFile,
+ final File aParseTableFile)
{
long nStartTime = System.currentTimeMillis();
- StackAutomaton aAutomaton = new NonValidatingCreator(aSchema).Create(new File("/tmp/schema.log"));
+ final NonValidatingCreator aCreator = new NonValidatingCreator(maOptimizedSchemaBase, aAutomatonLogFile);
+ FiniteAutomatonContainer aAutomatons = aCreator.Create(maTopLevelSchemas.values());
long nEndTime = System.currentTimeMillis();
System.out.printf(
- "created stack automaton in %fs, it has %d states and %d transitions\n",
- (nEndTime-nStartTime)/1000.0,
- aAutomaton.GetStateCount(),
- aAutomaton.GetTransitionCount());
+ "created %d non-validating automatons with %d states and %d transitions in %fs\n",
+ aAutomatons.GetAutomatonCount(),
+ aAutomatons.GetStateCount(),
+ aAutomatons.GetTransitionCount(),
+ (nEndTime-nStartTime)/1000.0);
+
+ new ParserTablesGenerator(aAutomatons, maOptimizedSchemaBase.Namespaces)
+ .Generate(aParseTableFile);
+ }
+
+
+
+
+ private void WriteValidatingParseTables (
+ final File aAutomatonLogFile,
+ final File aParseTableFile)
+ {
+ long nStartTime = System.currentTimeMillis();
+ final ValidatingCreator aCreator = new ValidatingCreator(maOptimizedSchemaBase, aAutomatonLogFile);
+ FiniteAutomatonContainer aAutomatons = aCreator.Create();
+ long nEndTime = System.currentTimeMillis();
+ System.out.printf(
+ "created %d validating stack automatons with %d states and %d transitions in %fs\n",
+ aAutomatons.GetAutomatonCount(),
+ aAutomatons.GetStateCount(),
+ aAutomatons.GetTransitionCount(),
+ (nEndTime-nStartTime)/1000.0);
+
- /*
nStartTime = System.currentTimeMillis();
- aAutomaton = aAutomaton.Optimize();
+ aAutomatons = aAutomatons.CreateDFAs();
nEndTime = System.currentTimeMillis();
System.out.printf(
- "optimized stack automaton in %fs, it now has %d states and %d transitions\n",
+ "created %d deterministic automatons with %d states and %d transitions in %fs\n",
+ aAutomatons.GetAutomatonCount(),
+ aAutomatons.GetStateCount(),
+ aAutomatons.GetTransitionCount(),
+ (nEndTime-nStartTime)/1000.0);
+
+ nStartTime = System.currentTimeMillis();
+ aAutomatons = aAutomatons.MinimizeDFAs();
+ nEndTime = System.currentTimeMillis();
+ System.out.printf(
+ "minimized automaton in %fs, there are now %d states and %d transitions\n",
(nEndTime-nStartTime)/1000.0,
- aAutomaton.GetStateCount(),
- aAutomaton.GetTransitionCount());
- */
- return aAutomaton;
+ aAutomatons.GetStateCount(),
+ aAutomatons.GetTransitionCount());
+
+ new ParserTablesGenerator(aAutomatons, maOptimizedSchemaBase.Namespaces)
+ .Generate(aParseTableFile);
}
- private final Schema maSchema;
+ private final SchemaBase maSchemaBase;
+ private SchemaBase maOptimizedSchemaBase;
+ private final Map<String,Schema> maTopLevelSchemas;
private final Vector<String[]> maMainSchemaFiles;
- private File maOutputDirectory;
+ private final Queue<String> maWorkList;
+ private final Vector<Runnable> maOutputOperations;
private final Set<String> maSchemaFiles;
- private final Queue<String> maTodo;
private int mnTotalLineCount;
private int mnTotalByteCount;
}
diff --git a/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/Test.java b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/Test.java
new file mode 100644
index 0000000..dff3508
--- /dev/null
+++ b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/Test.java
@@ -0,0 +1,58 @@
+package org.apache.openoffice.ooxml.schema;
+
+import org.apache.openoffice.ooxml.schema.automaton.HopcroftMinimizer;
+import org.apache.openoffice.ooxml.schema.automaton.State;
+import org.apache.openoffice.ooxml.schema.automaton.StateContainer;
+import org.apache.openoffice.ooxml.schema.automaton.StateContext;
+import org.apache.openoffice.ooxml.schema.automaton.Transition;
+import org.apache.openoffice.ooxml.schema.model.base.QualifiedName;
+
+/** A simple test of the minimization algorithm for DFAs.
+ *
+ * May lead to the use of a testing framework in the future.
+ */
+public class Test
+{
+ public static void main (final String ... aArgumentList)
+ {
+ new Test("S", new String[]{"E"}, new String[][]{
+ {"S", "A", "a"},
+ {"A", "B", "b"},
+ {"A", "C", "b"},
+ {"B", "E", "c"},
+ {"C", "E", "c"},
+ });
+ }
+ private Test (
+ final String sStartState,
+ final String[] aAcceptingStates,
+ final String[][] aTransitions)
+ {
+ final StateContainer aOriginalStateContainer = new StateContainer();
+ final StateContext aStates = new StateContext(
+ aOriginalStateContainer,
+ sStartState);
+ for (final String sAcceptingState : aAcceptingStates)
+ {
+ final State s = aStates.CreateState(sAcceptingState);
+ s.SetIsAccepting();
+ }
+ for (final String[] aTransition : aTransitions)
+ {
+ final State start = aStates.GetOrCreateState(
+ new QualifiedName(aTransition[0]),
+ null);
+ final State end = aStates.GetOrCreateState(
+ new QualifiedName(aTransition[1]),
+ null);
+ final QualifiedName element = new QualifiedName(aTransition[2]);
+ final String type = "T_"+aTransition[2];
+
+ start.AddTransition(new Transition(start, end, element, type));
+ }
+ HopcroftMinimizer.MinimizeDFA (
+ new StateContainer(),
+ aStates,
+ System.out);
+ }
+}
diff --git a/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/DFACreator.java b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/DFACreator.java
new file mode 100644
index 0000000..a0ba1c3
--- /dev/null
+++ b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/DFACreator.java
@@ -0,0 +1,258 @@
+package org.apache.openoffice.ooxml.schema.automaton;
+
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Queue;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.TreeSet;
+import java.util.Vector;
+
+import org.apache.openoffice.ooxml.schema.model.base.QualifiedName;
+
+/** Convert an NFA into a DFA via the powerset construction (also called subset
+ * construction).
+ */
+public class DFACreator
+{
+ /** For a given non-deterministic finite automaton create an equivalent
+ * deterministic finite automaton.
+ */
+ public static FiniteAutomaton CreateDFAforNFA (
+ final StateContainer aDFAStateContainer,
+ final StateContext aNFAStateContext,
+ final QualifiedName aTypeName)
+ {
+ final DFACreator aCreator = new DFACreator(aDFAStateContainer, aNFAStateContext, aTypeName);
+ aCreator.CreateDFAforNFA();
+ return new FiniteAutomaton(aCreator.maDFAStateContext);
+ }
+
+
+
+
+ private DFACreator (
+ final StateContainer aDFAStateContainer,
+ final StateContext aNFAStateContext,
+ final QualifiedName aTypeName)
+ {
+ maNFAStateContext = aNFAStateContext;
+
+ // Create the set of state sets where each element corresponds to a
+ // state in the DFA.
+ maNFASetToDFAStateMap = new TreeMap<>();
+ maDFAStateContext = new StateContext(
+ aDFAStateContainer,
+ aTypeName == null
+ ? "<TOP-LEVEL>"
+ : aTypeName.GetStateName());
+
+ maDFATransitions = new HashSet<>();
+ maAcceptingDFAStates = new Vector<>();
+ }
+
+
+
+
+ private void CreateDFAforNFA ()
+ {
+ final State aNFAStartState = maNFAStateContext.GetStartState();
+
+ // Initialize the creation process by adding the epsilon closure of the
+ // original start state to the work list.
+ final StateSet aStartSet = GetEpsilonClosure(new StateSet(aNFAStartState));
+ maNFASetToDFAStateMap.put(aStartSet, maDFAStateContext.GetStartState());
+
+ PropagateStateFlags(aStartSet, maDFAStateContext.GetStartState());
+
+ final Queue<StateSet> aWorklist = new LinkedList<>();
+ aWorklist.add(aStartSet);
+
+ while ( ! aWorklist.isEmpty())
+ {
+ final Collection<StateSet> aAdditionalWorkList = ProcessTransitionFront(
+ aWorklist.poll());
+
+ aWorklist.addAll(aAdditionalWorkList);
+ }
+ }
+
+
+
+
+ private Collection<StateSet> ProcessTransitionFront (
+ final StateSet aSet)
+ {
+ final Set<StateSet> aLocalWorklist = new TreeSet<>();
+
+ // Find all regular transitions that start from any state in the set.
+ final Map<String,Vector<Transition>> aTransitions = GetTransitionFront(aSet);
+
+ // Create new state sets for states that are reachable via the same element and
+ // the following epsilon transitions.
+ for (final Entry<String,Vector<Transition>> aEntry : aTransitions.entrySet())
+ {
+ // Create new state sets for both the end state of the transition.
+ final StateSet aEpsilonClosure = GetEpsilonClosure(GetEndStateSet(aEntry.getValue()));
+
+ // When these are new state sets then add them to the worklist
+ // and the set of sets.
+ State aDFAState = maNFASetToDFAStateMap.get(aEpsilonClosure);
+ if (aDFAState == null)
+ {
+ aLocalWorklist.add(aEpsilonClosure);
+ aDFAState = aEpsilonClosure.CreateStateForStateSet(maDFAStateContext);
+ PropagateStateFlags(aEpsilonClosure, aDFAState);
+ maNFASetToDFAStateMap.put(aEpsilonClosure, aDFAState);
+ if (aDFAState.IsAccepting())
+ maAcceptingDFAStates.add(aDFAState);
+ }
+
+ final State aStartState = maNFASetToDFAStateMap.get(aSet);
+ final QualifiedName aElementName = GetElementName(aEntry.getValue());
+ final String sElementTypeName = GetElementTypeName(aEntry.getValue());
+ assert(aElementName != null);
+ final Transition aTransition = new Transition(
+ aStartState,
+ aDFAState,
+ aElementName,
+ sElementTypeName);
+ aStartState.AddTransition(aTransition);
+ maDFATransitions.add(aTransition);
+ }
+
+ return aLocalWorklist;
+ }
+
+
+
+
+ private QualifiedName GetElementName (final Vector<Transition> aTransitions)
+ {
+ for (final Transition aTransition : aTransitions)
+ return aTransition.GetElementName();
+ return null;
+ }
+
+
+
+
+ private String GetElementTypeName (final Vector<Transition> aTransitions)
+ {
+ for (final Transition aTransition : aTransitions)
+ return aTransition.GetElementTypeName();
+ return null;
+ }
+
+
+
+
+ /** Return the epsilon closure of the given set of states.
+ * The result is the set of all states that are reachable via zero, one or
+ * more epsilon transitions from at least one state in the given set of
+ * states.
+ */
+ private StateSet GetEpsilonClosure ( final StateSet aSet)
+ {
+ final StateSet aClosure = new StateSet(aSet);
+
+ final Queue<State> aWorkList = new LinkedList<>();
+ for (final State aState : aSet.GetStates())
+ aWorkList.add(aState);
+
+ while( ! aWorkList.isEmpty())
+ {
+ final State aState = aWorkList.poll();
+ for (final EpsilonTransition aTransition : aState.GetEpsilonTransitions())
+ {
+ final State aEndState = aTransition.GetEndState();
+ if ( ! aClosure.ContainsState(aEndState))
+ {
+ aClosure.AddState(aEndState);
+ aWorkList.add(aEndState);
+ }
+ }
+ }
+
+ return aClosure;
+ }
+
+
+
+
+ /** Return the list of regular transitions (i.e. not epsilon transitions)
+ * that start from any of the states in the given set.
+ * The returned map is a partition of the transitions according to their
+ * triggering XML element.
+ */
+ private Map<String, Vector<Transition>> GetTransitionFront (final StateSet aSet)
+ {
+ final Map<String, Vector<Transition>> aTransitions = new HashMap<>();
+
+ for (final State aState : aSet.GetStates())
+ for (final Transition aTransition : aState.GetTransitions())
+ {
+ final String sElementName;
+ final QualifiedName aElementName = aTransition.GetElementName();
+ if (aElementName != null)
+ sElementName = aElementName.GetDisplayName();
+ else
+ sElementName = null; // For skip transitions.
+
+ Vector<Transition> aElementTransitions = aTransitions.get(sElementName);
+ if (aElementTransitions == null)
+ {
+ aElementTransitions = new Vector<>();
+ aTransitions.put(sElementName, aElementTransitions);
+ }
+ aElementTransitions.add(aTransition);
+ }
+ return aTransitions;
+ }
+
+
+
+
+ /** Return a state set that contains all end states of all the given transitions.
+ */
+ private StateSet GetEndStateSet (final Iterable<Transition> aTransitions)
+ {
+ final StateSet aStateSet = new StateSet();
+ for (final Transition aTransition : aTransitions)
+ aStateSet.AddState(aTransition.GetEndState());
+ return aStateSet;
+ }
+
+
+
+
+ /** Propagate accepting state flag and skip data.
+ */
+ private void PropagateStateFlags (
+ final StateSet aNFAStateSet,
+ final State aDFAState)
+ {
+ for (final State aNFAState : aNFAStateSet.GetStates())
+ {
+ if (aNFAState.IsAccepting())
+ aDFAState.SetIsAccepting();
+
+ for (final SkipData aSkipData : aNFAState.GetSkipData())
+ aDFAState.AddSkipData(aSkipData.Clone(aDFAState));
+ }
+ }
+
+
+
+
+ private final StateContext maNFAStateContext;
+
+ private final Map<StateSet,State> maNFASetToDFAStateMap;
+ private final StateContext maDFAStateContext;
+ private final Set<Transition> maDFATransitions;
+ private final Vector<State> maAcceptingDFAStates;
+}
diff --git a/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/EpsilonTransition.java b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/EpsilonTransition.java
new file mode 100644
index 0000000..a14006f
--- /dev/null
+++ b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/EpsilonTransition.java
@@ -0,0 +1,49 @@
+package org.apache.openoffice.ooxml.schema.automaton;
+
+/** Transition from one state to another that does not consume an input token.
+ *
+ * Use in the process of creating a validating parser.
+ */
+public class EpsilonTransition
+{
+ EpsilonTransition (
+ final State aStartState,
+ final State aEndState)
+ {
+ maStartState = aStartState;
+ maEndState = aEndState;
+ }
+
+
+
+
+ public State GetStartState ()
+ {
+ return maStartState;
+ }
+
+
+
+
+ public State GetEndState ()
+ {
+ return maEndState;
+ }
+
+
+
+
+ @Override
+ public String toString ()
+ {
+ return String.format("%s -> %s",
+ maStartState.GetFullname(),
+ maEndState.GetFullname());
+ }
+
+
+
+
+ private final State maStartState;
+ private final State maEndState;
+}
diff --git a/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/FiniteAutomaton.java b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/FiniteAutomaton.java
new file mode 100644
index 0000000..999824c
--- /dev/null
+++ b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/FiniteAutomaton.java
@@ -0,0 +1,141 @@
+/**************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+package org.apache.openoffice.ooxml.schema.automaton;
+
+import java.util.Vector;
+
+import org.apache.openoffice.ooxml.schema.model.base.QualifiedName;
+
+
+/** Represents a DFA (deterministic FA) or a NFA (non-deterministic FA).
+ * There is one automaton for each complex type and one for the top level elements.
+ * Transitions correspond to 'element' elements in the schema or a start tag in
+ * the input file. During parsing the current automaton is pushed on a stack
+ * and the automaton that represents the complex type associated with the
+ * starting element is made the current automaton. An end tag pops an automaton
+ * from the stack and replaces the current automaton with it.
+ */
+public class FiniteAutomaton
+{
+ FiniteAutomaton (
+ final StateContext aContext)
+ {
+ maStateContext = aContext;
+ }
+
+
+
+
+ public int GetStateCount ()
+ {
+ return maStateContext.GetStateCount();
+ }
+
+
+
+
+ public Iterable<State> GetStates()
+ {
+ return maStateContext.GetStates();
+ }
+
+
+
+
+ public Iterable<State> GetStatesSorted ()
+ {
+ return maStateContext.GetStatesSorted();
+ }
+
+
+
+
+ public State GetStartState ()
+ {
+ return maStateContext.GetStartState();
+ }
+
+
+
+
+ public Iterable<State> GetAcceptingStates ()
+ {
+ return maStateContext.GetAcceptingStates();
+ }
+
+
+
+
+ public FiniteAutomaton CreateDFA (
+ final StateContainer aDFAContainer,
+ final QualifiedName aTypeName)
+ {
+ return DFACreator.CreateDFAforNFA(
+ aDFAContainer,
+ maStateContext,
+ aTypeName);
+ }
+
+
+
+
+ public StateContext GetStateContext()
+ {
+ return maStateContext;
+ }
+
+
+
+
+ public Iterable<Transition> GetTransitions ()
+ {
+ final Vector<Transition> aTransitions = new Vector<>();
+ for (final State aState : maStateContext.GetStates())
+ for (final Transition aTransition : aState.GetTransitions())
+ aTransitions.add(aTransition);
+ return aTransitions;
+ }
+
+
+
+
+ public int GetTransitionCount()
+ {
+ int nTransitionCount = 0;
+ for (final State aState : maStateContext.GetStates())
+ nTransitionCount += aState.GetTransitionCount();
+ return nTransitionCount;
+ }
+
+
+
+
+ public String GetTypeName ()
+ {
+ return maStateContext.GetStartState().GetFullname();
+ }
+
+
+
+
+ private final StateContext maStateContext;
+}
diff --git a/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/FiniteAutomatonContainer.java b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/FiniteAutomatonContainer.java
new file mode 100644
index 0000000..5ac2e66
--- /dev/null
+++ b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/FiniteAutomatonContainer.java
@@ -0,0 +1,155 @@
+package org.apache.openoffice.ooxml.schema.automaton;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.PrintStream;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Vector;
+
+import org.apache.openoffice.ooxml.schema.model.base.QualifiedName;
+
+/** As there is one FA for each complex type and one for the top level elements,
+ * this container represents the whole set of schemas.
+ */
+public class FiniteAutomatonContainer
+{
+ FiniteAutomatonContainer (final StateContainer aStateContainer)
+ {
+ maComplexTypeNameToAutomatonMap = new HashMap<>();
+ }
+
+
+
+
+ public void AddAutomaton (
+ final QualifiedName aElementName,
+ final FiniteAutomaton aAutomaton)
+ {
+ maComplexTypeNameToAutomatonMap.put(aElementName, aAutomaton);
+ }
+
+
+
+
+ public Iterable<FiniteAutomaton> GetAutomatons()
+ {
+ return maComplexTypeNameToAutomatonMap.values();
+ }
+
+
+
+
+ public int GetAutomatonCount ()
+ {
+ return maComplexTypeNameToAutomatonMap.size();
+ }
+
+
+
+
+ public Iterable<State> GetStates()
+ {
+ final Vector<State> aStates = new Vector<>();
+ for (final FiniteAutomaton aAutomaton : maComplexTypeNameToAutomatonMap.values())
+ for (final State aState : aAutomaton.GetStates())
+ aStates.add(aState);
+ return aStates;
+ }
+
+
+
+
+ public int GetStateCount()
+ {
+ int nStateCount = 0;
+ for (final FiniteAutomaton aAutomaton : maComplexTypeNameToAutomatonMap.values())
+ nStateCount += aAutomaton.GetStateCount();
+ return nStateCount;
+ }
+
+
+
+
+ public Iterable<Transition> GetTransitions ()
+ {
+ final Vector<Transition> aTransitions = new Vector<>();
+ for (final FiniteAutomaton aAutomaton : maComplexTypeNameToAutomatonMap.values())
+ for (final Transition aTransition : aAutomaton.GetTransitions())
+ aTransitions.add(aTransition);
+ return aTransitions;
+ }
+
+
+
+
+ public int GetTransitionCount ()
+ {
+ int nTransitionCount = 0;
+ for (final FiniteAutomaton aAutomaton : maComplexTypeNameToAutomatonMap.values())
+ nTransitionCount += aAutomaton.GetTransitionCount();
+ return nTransitionCount;
+ }
+
+
+
+
+ public FiniteAutomatonContainer CreateDFAs ()
+ {
+ final StateContainer aDFAStateContainer = new StateContainer();
+ final FiniteAutomatonContainer aDFAs = new FiniteAutomatonContainer(aDFAStateContainer);
+ for (final Entry<QualifiedName, FiniteAutomaton> aEntry : maComplexTypeNameToAutomatonMap.entrySet())
+ {
+ aDFAs.AddAutomaton(
+ aEntry.getKey(),
+ aEntry.getValue().CreateDFA(
+ aDFAStateContainer,
+ aEntry.getKey()));
+ }
+ return aDFAs;
+ }
+
+
+
+
+ public FiniteAutomatonContainer MinimizeDFAs ()
+ {
+ PrintStream aLog = null;
+ try
+ {
+ aLog = new PrintStream(new FileOutputStream(new File("/tmp/minimization.log")));
+ }
+ catch(Exception e)
+ {
+ e.printStackTrace();
+ return null;
+ }
+
+ final StateContainer aNewStateContainer = new StateContainer();
+ final FiniteAutomatonContainer aDFAs = new FiniteAutomatonContainer(aNewStateContainer);
+ for (final Entry<QualifiedName, FiniteAutomaton> aEntry : maComplexTypeNameToAutomatonMap.entrySet())
+ {
+ aDFAs.AddAutomaton(
+ aEntry.getKey(),
+ HopcroftMinimizer.MinimizeDFA(
+ aNewStateContainer,
+ aEntry.getValue().GetStateContext(),
+ aLog));
+ }
+ return aDFAs;
+ }
+
+
+
+
+ public FiniteAutomaton GetTopLevelAutomaton ()
+ {
+ return maComplexTypeNameToAutomatonMap.get(null);
+ }
+
+
+
+
+ private final Map<QualifiedName, FiniteAutomaton> maComplexTypeNameToAutomatonMap;
+}
diff --git a/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/HopcroftMinimizer.java b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/HopcroftMinimizer.java
new file mode 100644
index 0000000..6d50592
--- /dev/null
+++ b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/HopcroftMinimizer.java
@@ -0,0 +1,355 @@
+package org.apache.openoffice.ooxml.schema.automaton;
+
+import java.io.PrintStream;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+import java.util.TreeMap;
+import java.util.TreeSet;
+
+import org.apache.openoffice.ooxml.schema.model.base.QualifiedName;
+
+/** Minimize an DFA with respect to its number of states.
+ * This is most important for the use of the 'all' element in the OOXML
+ * specification which leads to a lot of additional states and transitions.
+ */
+public class HopcroftMinimizer
+{
+ /** Create a DFA that is equivalent to a given DFA but has the minimal
+ * number of states.
+ */
+ public static FiniteAutomaton MinimizeDFA (
+ final StateContainer aNewStateContainer,
+ final StateContext aOriginalStates,
+ final PrintStream aLog)
+ {
+ if (aLog != null)
+ {
+ aLog.printf("minimizing %d states and %d transitions\n",
+ aOriginalStates.GetStateCount(),
+ aOriginalStates.GetTransitionCount());
+ DisplayStates(aOriginalStates, aLog);
+ }
+
+ TreeSet<StateSet> aT = new TreeSet<>();
+ TreeSet<StateSet> aP = new TreeSet<>();
+ Map<State,StateSet> aTMap = new HashMap<>();
+ Map<State,StateSet> aPMap = new HashMap<>();
+ InitializeMap(aT, aTMap, aOriginalStates.GetStates());
+
+ // Split partitions until there is nothing else to do.
+ while ( ! AreSetsOfStateSetsEqual(aP, aT))
+ {
+ if (aLog != null)
+ aLog.printf("T has %d members\n", aT.size());
+
+ aP = aT;
+ aPMap = aTMap;
+ aT = new TreeSet<>();
+ aTMap = new HashMap<>();
+
+ for (final StateSet aSet : aP)
+ {
+ final Iterable<StateSet> aParts = Split(aSet, aP, aPMap);
+ if (aParts == null)
+ {
+ // No split necessary.
+ assert( ! aSet.IsEmpty());
+ aT.add(aSet);
+ for (final State aState : aSet.GetStates())
+ aTMap.put(aState, aSet);
+ }
+ else
+ {
+ for (final StateSet aPart : aParts)
+ {
+ assert( ! aPart.IsEmpty());
+ aT.add(aPart);
+
+ for (final State aState : aPart.GetStates())
+ aTMap.put(aState, aPart);
+ }
+ }
+ }
+ }
+
+ // Create new states.
+ final StateContext aMinimizedStates = CreateNewStates(
+ aP,
+ aPMap,
+ aNewStateContainer,
+ aOriginalStates);
+
+ if (aLog != null)
+ {
+ aLog.printf("to %d states and %d transitions\n",
+ aMinimizedStates.GetStateCount(),
+ aMinimizedStates.GetTransitionCount());
+ DisplayStates(aMinimizedStates, aLog);
+ for (final StateSet aSet : aT)
+ aLog.printf(" %s\n", aSet.toString());
+ }
+
+ // Create and return the new minimized automaton.
+ return new FiniteAutomaton(
+ aMinimizedStates);
+ }
+
+
+
+
+ /** We start with two sets. One contains all start states (in our case
+ * just one), the other contains all other states.
+ */
+ private static void InitializeMap (
+ final Set<StateSet> aSet,
+ final Map<State,StateSet> aMap,
+ final Iterable<State> aStates)
+ {
+ final StateSet aAcceptingStates = new StateSet();
+ final StateSet aNonAcceptingStates = new StateSet();
+ for (final State aState : aStates)
+ {
+ if (aState.IsAccepting())
+ {
+ aAcceptingStates.AddState(aState);
+ aMap.put(aState, aAcceptingStates);
+ }
+ else
+ {
+ aNonAcceptingStates.AddState(aState);
+ aMap.put(aState, aNonAcceptingStates);
+ }
+ }
+ if (aAcceptingStates.IsEmpty())
+ throw new RuntimeException("there should be at least one accepting state");
+ aSet.add(aAcceptingStates);
+ if ( ! aNonAcceptingStates.IsEmpty())
+ aSet.add(aNonAcceptingStates);
+ }
+
+
+
+
+ private static Iterable<StateSet> Split (
+ final StateSet aSet,
+ final Set<StateSet> aT,
+ final Map<State,StateSet> aTMap)
+ {
+ if (aSet.GetStateCount() == 1)
+ return null;
+
+ final Set<QualifiedName> aElements = CollectElementNames(aSet);
+ for (final QualifiedName aElementName : aElements)
+ {
+ final Collection<StateSet> aPartitions = Split(aSet, aT, aTMap, aElementName);
+ if (aPartitions == null)
+ continue;
+ if (aPartitions.size() > 1)
+ return aPartitions;
+ }
+ return null;
+ }
+
+
+
+
+ /** Create a partition of the given set of states according to their
+ * transitions.
+ * All states whose transitions point to the same state set go in the same
+ * partition.
+ */
+ private static Collection<StateSet> Split (
+ final StateSet aSet,
+ final Set<StateSet> aT,
+ final Map<State,StateSet> aTMap,
+ final QualifiedName aElementName)
+ {
+ // Set up a forward map that does two steps:
+ // from s via transition regarding aElementName to s'
+ // from s' to a state set under aTMap(s).
+ final Map<State,StateSet> aForwardMap = new HashMap<>();
+ for (final State aState : aSet.GetStates())
+ {
+ final Transition aTransition = GetTransition(aState, aElementName);
+ if (aTransition == null)
+ aForwardMap.put(aState, null);
+ else
+ aForwardMap.put(aState, aTMap.get(aTransition.GetEndState()));
+ }
+
+ // Create the partion of aSet according to aForwardMap. All states that map
+ // to the same element go into the same state set.
+ if (aForwardMap.size() == 1)
+ {
+ // No split necessary.
+ return null;
+ }
+ else
+ {
+ // Set up a reverse map that maps that maps the values in aForwardMap to
+ // new state sets whose contents are the keys in aForwardMap.
+ final Map<StateSet,StateSet> aReverseMap = new HashMap<>();
+ for (final Entry<State,StateSet> aEntry : aForwardMap.entrySet())
+ {
+ StateSet aPartitionSet = aReverseMap.get(aEntry.getValue());
+ if (aPartitionSet == null)
+ {
+ aPartitionSet = new StateSet();
+ aReverseMap.put(aEntry.getValue(), aPartitionSet);
+ }
+ aPartitionSet.AddState(aEntry.getKey());
+ }
+ return aReverseMap.values();
+ }
+ }
+
+
+
+
+ private static Transition GetTransition (
+ final State aState,
+ final QualifiedName aElementName)
+ {
+ Transition aTransition = null;
+ for (final Transition aCandidate : aState.GetTransitions())
+ if (aCandidate.GetElementName().compareTo(aElementName) == 0)
+ {
+ assert(aTransition==null);
+ aTransition = aCandidate;
+ // break;
+ }
+ return aTransition;
+ }
+
+
+
+
+ private static Set<QualifiedName> CollectElementNames (final StateSet aSet)
+ {
+ final Set<QualifiedName> aNames = new TreeSet<>();
+ for (final State aState : aSet.GetStates())
+ for (final Transition aTransition : aState.GetTransitions())
+ aNames.add(aTransition.GetElementName());
+
+ return aNames;
+ }
+
+
+
+
+ private static boolean AreSetsOfStateSetsEqual (
+ final TreeSet<StateSet> aSetOfSetsA,
+ final TreeSet<StateSet> aSetOfSetsB)
+ {
+ if (aSetOfSetsA.size() != aSetOfSetsB.size())
+ return false;
+ else
+ {
+ final Iterator<StateSet> aSetIteratorA = aSetOfSetsA.iterator();
+ final Iterator<StateSet> aSetIteratorB = aSetOfSetsB.iterator();
+ while (aSetIteratorA.hasNext() && aSetIteratorB.hasNext())
+ {
+ if (aSetIteratorA.next().compareTo(aSetIteratorB.next()) != 0)
+ return false;
+ }
+ return true;
+ }
+ }
+
+
+
+
+ private static StateContext CreateNewStates (
+ final TreeSet<StateSet> aP,
+ final Map<State,StateSet> aPMap,
+ final StateContainer aNewStateContainer,
+ final StateContext aOriginalStates)
+ {
+ final StateContext aMinimizedStates = new StateContext(
+ aNewStateContainer,
+ aOriginalStates.GetStartState().GetFullname());
+
+ // Create the new states.
+ final Map<State,State> aOldStateToNewStateMap = new TreeMap<>();
+ for (final StateSet aSet : aP)
+ {
+ State aNewState = null;
+ for (final State aOldState : aSet.GetStates())
+ {
+ if (aNewState == null)
+ aNewState = aOldState.Clone(aMinimizedStates);
+ aOldStateToNewStateMap.put(aOldState, aNewState);
+ }
+ }
+
+ // Create the new transitions.
+ for (final StateSet aSet : aP)
+ {
+ final State aOldStartState = aSet.GetStates().iterator().next();
+ final State aNewStartState = aOldStateToNewStateMap.get(aOldStartState);
+
+ for (final Transition aTransition : aOldStartState.GetTransitions())
+ {
+ final State aOldEndState = aTransition.GetEndState();
+ final State aNewEndState = aOldStateToNewStateMap.get(aOldEndState);
+
+ // Check if the transition already exists.
+ if (HasTransition(aNewStartState, aTransition.GetElementName()))
+ continue;
+
+ aNewStartState.AddTransition(
+ new Transition(
+ aNewStartState,
+ aNewEndState,
+ aTransition.GetElementName(),
+ aTransition.GetElementTypeName()));
+ }
+ }
+
+ // Transfer skip data and accepting flags.
+ for (final State aOldState : aOriginalStates.GetStates())
+ {
+ final State aNewState = aOldStateToNewStateMap.get(aOldState);
+ if (aOldState.IsAccepting())
+ aNewState.SetIsAccepting();
+ for (final SkipData aSkipData : aOldState.GetSkipData())
+ aNewState.AddSkipData(aSkipData.Clone(aNewState));
+ }
+ return aMinimizedStates;
+ }
+
+
+
+
+ private static boolean HasTransition (
+ final State aState,
+ final QualifiedName aElementName)
+ {
+ for (final Transition aTransition : aState.GetTransitions())
+ if (aTransition.GetElementName().compareTo(aElementName) == 0)
+ return true;
+ return false;
+ }
+
+
+
+
+ private static void DisplayStates (
+ final StateContext aStates,
+ final PrintStream aLog)
+ {
+ for (final State aState : aStates.GetStates())
+ {
+ aLog.printf(" %s %s\n", aState.GetFullname(),
+ aState.IsAccepting() ? "is accepting" : "");
+ for (final Transition aTransition : aState.GetTransitions())
+ aLog.printf(" -> %s via %s\n",
+ aTransition.GetEndState().GetFullname(),
+ aTransition.GetElementName().GetStateName());
+ }
+ }
+}
diff --git a/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/NonValidatingCreator.java b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/NonValidatingCreator.java
new file mode 100644
index 0000000..2b7ec80
--- /dev/null
+++ b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/NonValidatingCreator.java
@@ -0,0 +1,180 @@
+/**************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+package org.apache.openoffice.ooxml.schema.automaton;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.PrintStream;
+import java.util.Vector;
+
+import org.apache.openoffice.ooxml.schema.iterator.DereferencingNodeIterator;
+import org.apache.openoffice.ooxml.schema.model.base.INode;
+import org.apache.openoffice.ooxml.schema.model.base.NodeType;
+import org.apache.openoffice.ooxml.schema.model.complex.Any;
+import org.apache.openoffice.ooxml.schema.model.complex.ComplexType;
+import org.apache.openoffice.ooxml.schema.model.complex.Element;
+import org.apache.openoffice.ooxml.schema.model.schema.Schema;
+import org.apache.openoffice.ooxml.schema.model.schema.SchemaBase;
+
+/** Create a set of stack automatons for a given set of schemas.
+ * Creates one automaton for each complex type and one for the top level elements.
+ *
+ * Input files but are not validated to conform to the schemas.
+ */
+public class NonValidatingCreator
+{
+ public NonValidatingCreator (
+ final SchemaBase aSchemaBase,
+ final File aLogFile)
+ {
+ maSchemaBase = aSchemaBase;
+ maStateContainer = new StateContainer();
+
+ PrintStream aLog = null;
+ try
+ {
+ aLog = new PrintStream(new FileOutputStream(aLogFile));
+ }
+ catch (FileNotFoundException e)
+ {
+ e.printStackTrace();
+ }
+ maLog = aLog;
+ }
+
+
+
+
+ public FiniteAutomatonContainer Create (
+ final Iterable<Schema> aTopLevelSchemas)
+ {
+ final FiniteAutomatonContainer aAutomatons = new FiniteAutomatonContainer(maStateContainer);
+
+ aAutomatons.AddAutomaton(
+ null,
+ CreateForTopLevelElements(aTopLevelSchemas));
+
+ for (final ComplexType aComplexType : maSchemaBase.ComplexTypes.GetSorted())
+ aAutomatons.AddAutomaton(
+ aComplexType.GetName(),
+ CreateForComplexType(aComplexType));
+
+ maLog.close();
+
+ return aAutomatons;
+ }
+
+
+
+ private FiniteAutomaton CreateForTopLevelElements (
+ final Iterable<Schema> aTopLevelSchemas)
+ {
+ final String sTypeName = "<top-level>";
+ final StateContext aStateContext = new StateContext(
+ maStateContainer,
+ sTypeName);
+ final State aEndState = aStateContext.CreateEndState();
+
+ // top level elements
+ for (final Schema aSchema : aTopLevelSchemas)
+ for (final Element aElement : aSchema.TopLevelElements.GetSorted())
+ aStateContext.GetStartState().AddTransition(
+ new Transition(
+ aStateContext.GetStartState(),
+ aEndState,
+ aElement.GetElementName(),
+ aElement.GetTypeName().GetStateName()));
+
+ return new FiniteAutomaton(aStateContext);
+ }
+
+
+
+
+ private FiniteAutomaton CreateForComplexType (final ComplexType aComplexType)
+ {
+ final StateContext aStateContext = new StateContext(
+ maStateContainer,
+ aComplexType.GetName().GetStateName());
+
+ for (final Element aElement : CollectElements(aComplexType))
+ {
+ aStateContext.GetStartState().AddTransition(
+ new Transition(
+ aStateContext.GetStartState(),
+ aStateContext.GetStartState(),
+ aElement.GetElementName(),
+ aElement.GetTypeName().GetStateName()));
+ }
+
+ for (final Any aAny : CollectAnys(aComplexType))
+ {
+ aStateContext.GetStartState().AddSkipData(
+ new SkipData(
+ aAny.GetProcessContentsFlag(),
+ aAny.GetNamespaces()));
+ }
+
+ aStateContext.GetStartState().SetIsAccepting();
+
+ return new FiniteAutomaton(aStateContext);
+ }
+
+
+
+
+ /** Collect all elements inside the type tree that is rooted in the given
+ * complex type.
+ */
+ private Vector<Element> CollectElements (final ComplexType aType)
+ {
+ final Vector<Element> aElements = new Vector<>();
+ for (final INode aNode : new DereferencingNodeIterator(aType, maSchemaBase, false))
+ {
+ if (aNode.GetNodeType() == NodeType.Element)
+ aElements.add((Element)aNode);
+ }
+ return aElements;
+ }
+
+
+
+
+ private Vector<Any> CollectAnys (final ComplexType aType)
+ {
+ final Vector<Any> aAnys = new Vector<>();
+ for (final INode aNode : new DereferencingNodeIterator(aType, maSchemaBase, false))
+ {
+ if (aNode.GetNodeType() == NodeType.Any)
+ aAnys.add((Any)aNode);
+ }
+ return aAnys;
+ }
+
+
+
+
+ private final SchemaBase maSchemaBase;
+ private final StateContainer maStateContainer;
+ private final PrintStream maLog;
+}
diff --git a/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/SkipData.java b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/SkipData.java
new file mode 100644
index 0000000..9a530af
--- /dev/null
+++ b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/SkipData.java
@@ -0,0 +1,33 @@
+package org.apache.openoffice.ooxml.schema.automaton;
+
+import org.apache.openoffice.ooxml.schema.model.complex.Any;
+
+/** Description of optional content that can be skipped when not supported.
+ * Corresponds to the 'any' schema element.
+ */
+public class SkipData
+{
+ public SkipData (
+ final Any.ProcessContents aProcessContents,
+ final String[] aNamespaces)
+ {
+ maProcessContents = aProcessContents;
+ maNamespaces = aNamespaces;
+ }
+
+
+
+
+ public SkipData Clone (final State aState)
+ {
+ return new SkipData(
+ maProcessContents,
+ maNamespaces);
+ }
+
+
+
+
+ final Any.ProcessContents maProcessContents;
+ final String[] maNamespaces;
+}
diff --git a/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/State.java b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/State.java
similarity index 70%
rename from ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/State.java
rename to ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/State.java
index 7a898a9..d5f297e 100644
--- a/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/State.java
+++ b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/State.java
@@ -19,7 +19,7 @@
*
*************************************************************/
-package org.apache.openoffice.ooxml.schema.generator.automaton;
+package org.apache.openoffice.ooxml.schema.automaton;
import java.util.Vector;
@@ -34,13 +34,9 @@ public class State
implements Comparable<State>
{
/** Create a new state from a basename and an optional suffix.
- * When a state with the resulting name already exists in the state context
- * then a RuntimeException is thrown.
*
- * Call this method instead of GetState() when it is clear from the context
- * that the state must not yet exist.
- */
- /** State objects can only be created via the GetState... methods.
+ * Don't call this constructor directly. Use methods in StateContext instead.
+ * They ensure that states are unique per context.
*/
State (
final QualifiedName aBasename,
@@ -50,6 +46,9 @@ public class State
msSuffix = sSuffix;
msFullname = GetStateName(aBasename, msSuffix);
maTransitions = new Vector<>();
+ maEpsilonTransitions = new Vector<>();
+ maSkipData = new Vector<>();
+ mbIsAccepting = false;
}
@@ -116,35 +115,58 @@ public class State
- /** Define how the optimizer will handle short circuits (epsilon paths)
- * to the end state of an OOXML type.
- * When there is an epsilon path from this state to aEndState then replace
- * it with aReplacementState.
- * It is an error to set two short circuits.
- */
- public void SetShortCircuit (
- final State aEndState,
- final State aReplacementState)
+ public int GetTransitionCount ()
+ {
+ return maTransitions.size();
+ }
+
+
+
+
+ public void AddEpsilonTransition (final EpsilonTransition aTransition)
+ {
+ assert(this == aTransition.GetStartState());
+ maEpsilonTransitions.add(aTransition);
+ }
+
+
+
+
+ public Iterable<EpsilonTransition> GetEpsilonTransitions()
+ {
+ return maEpsilonTransitions;
+ }
+
+
+
+
+ public void AddSkipData (final SkipData aSkipData)
{
- assert(maShortCircuitEnd == null);
- maShortCircuitEnd = aEndState;
- maShortCircuitReplacement = aReplacementState;
+ maSkipData.add(aSkipData);
}
- public State GetShortCircuitEnd ()
+ public Iterable<SkipData> GetSkipData ()
{
- return maShortCircuitEnd;
+ return maSkipData;
}
- public State GetShortCircuitReplacement ()
+ public void SetIsAccepting ()
{
- return maShortCircuitReplacement;
+ mbIsAccepting = true;
+ }
+
+
+
+
+ public boolean IsAccepting ()
+ {
+ return mbIsAccepting;
}
@@ -186,11 +208,7 @@ public class State
private final String msSuffix;
private final String msFullname;
private final Vector<Transition> maTransitions;
- /** Used in the optimization phase.
- * When there is a way to reach the short circuit end only via epsilon transitions
- * then replace it with the replacement state.
- * This is to avoid jumps in sequences (or occurrences, etc.) when transitions become effectively optional.
- */
- private State maShortCircuitEnd;
- private State maShortCircuitReplacement;
+ private final Vector<EpsilonTransition> maEpsilonTransitions;
+ private final Vector<SkipData> maSkipData;
+ private boolean mbIsAccepting;
}
diff --git a/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/StateContainer.java b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/StateContainer.java
new file mode 100644
index 0000000..509f7d2
--- /dev/null
+++ b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/StateContainer.java
@@ -0,0 +1,52 @@
+package org.apache.openoffice.ooxml.schema.automaton;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/** A container of states that spans all StateContext objects that represent each
+ * a single complex type.
+ */
+public class StateContainer
+{
+ public StateContainer ()
+ {
+ maNameToStateMap = new HashMap<>();
+ }
+
+
+
+
+ boolean HasState (final String sFullname)
+ {
+ return maNameToStateMap.containsKey(sFullname);
+ }
+
+
+
+
+ State GetStateForFullname (final String sFullname)
+ {
+ return maNameToStateMap.get(sFullname);
+ }
+
+
+
+
+ public void AddState (final State aState)
+ {
+ maNameToStateMap.put(aState.GetFullname(), aState);
+ }
+
+
+
+
+ public void RemoveState (final State aState)
+ {
+ maNameToStateMap.remove(aState);
+ }
+
+
+
+
+ private final Map<String,State> maNameToStateMap;
+}
diff --git a/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/StateContext.java b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/StateContext.java
similarity index 56%
rename from ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/StateContext.java
rename to ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/StateContext.java
index 0d5c73a..ba33d09 100644
--- a/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/StateContext.java
+++ b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/StateContext.java
@@ -19,31 +19,36 @@
*
*************************************************************/
-package org.apache.openoffice.ooxml.schema.generator.automaton;
+package org.apache.openoffice.ooxml.schema.automaton;
-import java.util.HashMap;
-import java.util.Map;
+import java.util.HashSet;
import java.util.Set;
import java.util.TreeSet;
+import java.util.Vector;
import org.apache.openoffice.ooxml.schema.model.base.QualifiedName;
-/** Represents a set of state.
- * There is a single start state.
+/** Represents the set of states of a single complex type.
+ *
+ * Because states have to be unique, the state container is an object shared
+ * by all StateContext objects.
+ *
+ * There is a single start state but there can be more than one accepting state.
*/
public class StateContext
{
public StateContext (
- final String sStartStateName,
- final String sEndStateName)
+ final StateContainer aStateContainer,
+ final String sBaseStateName)
{
- maStates = new HashMap<>();
- maStartState = CreateState(new QualifiedName(null, null, sStartStateName), null);
- maEndState = CreateState(new QualifiedName(null, null, sEndStateName), null);
+ maStateContainer = aStateContainer;
+ maStates = new HashSet<>();
+ maStartState = GetOrCreateState(new QualifiedName(null, null, sBaseStateName), null);
}
+
public State CreateState (
final QualifiedName aBasename,
final String sSuffix)
@@ -58,6 +63,13 @@ public class StateContext
+ public State CreateState (final String sBasename)
+ {
+ return CreateState(new QualifiedName(sBasename), null);
+ }
+
+
+
public State CreateState (
final State aState,
@@ -80,7 +92,7 @@ public class StateContext
final QualifiedName aBasename,
final String sSuffix)
{
- return maStates.get(State.GetStateName(aBasename, sSuffix));
+ return maStateContainer.GetStateForFullname(State.GetStateName(aBasename, sSuffix));
}
@@ -90,10 +102,10 @@ public class StateContext
final QualifiedName aBasename,
final String sSuffix)
{
- State aState = maStates.get(State.GetStateName(aBasename, sSuffix));
+ State aState = GetState(aBasename, sSuffix);
if (aState == null)
{
- aState = new State(aBasename, sSuffix);
+ aState = CreateState(aBasename, sSuffix);
AddState(aState);
}
return aState;
@@ -102,33 +114,31 @@ public class StateContext
- public State GetStateForTypeName (final QualifiedName aName)
+ public State GetStartStateForTypeName (final QualifiedName aName)
{
- State aState = maStates.get(aName.GetStateName());
- if (aState == null)
- {
- aState = new State(aName, null);
- AddState(aState);
- }
- return aState;
+ return GetOrCreateState(aName, null);
}
- /** The start state is the state a parser is in initially.
- */
- public State GetStartState ()
+ public State CreateEndState ()
{
- return maStartState;
+ final State aEndState = CreateState(
+ maStartState.GetBasename(),
+ "end");
+ aEndState.SetIsAccepting();
+ return aEndState;
}
- public State GetEndState ()
+ public boolean HasState (
+ final QualifiedName aBasename,
+ final String sSuffix)
{
- return maEndState;
+ return maStateContainer.HasState(State.GetStateName(aBasename, sSuffix));
}
@@ -139,7 +149,29 @@ public class StateContext
*/
public boolean HasState (final String sFullname)
{
- return maStates.containsKey(sFullname);
+ return maStateContainer.HasState(sFullname);
+ }
+
+
+
+
+ /** The start state is the state a parser is in initially.
+ */
+ public State GetStartState ()
+ {
+ return maStartState;
+ }
+
+
+
+
+ public Iterable<State> GetAcceptingStates ()
+ {
+ final Vector<State> aAcceptingStates = new Vector<>();
+ for (final State aState : maStates)
+ if (aState.IsAccepting())
+ aAcceptingStates.add(aState);
+ return aAcceptingStates;
}
@@ -149,7 +181,17 @@ public class StateContext
*/
public void AddState (final State aState)
{
- maStates.put(aState.GetFullname(), aState);
+ maStateContainer.AddState(aState);
+ maStates.add(aState);
+ }
+
+
+
+
+ public void RemoveState (final State aState)
+ {
+ maStateContainer.RemoveState(aState);
+ maStates.remove(aState);
}
@@ -166,14 +208,33 @@ public class StateContext
public Iterable<State> GetStatesSorted()
{
final Set<State> aSortedStates = new TreeSet<>();
- aSortedStates.addAll(maStates.values());
+ aSortedStates.addAll(maStates);
return aSortedStates;
}
- private final Map<String,State> maStates;
+ public Iterable<State> GetStates()
+ {
+ return maStates;
+ }
+
+
+
+
+ public int GetTransitionCount ()
+ {
+ int nStateCount = 0;
+ for (final State aState : maStates)
+ nStateCount += aState.GetTransitionCount();
+ return nStateCount;
+ }
+
+
+
+
+ private final StateContainer maStateContainer;
+ private final Set<State> maStates;
private final State maStartState;
- private final State maEndState;
}
diff --git a/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/StateSet.java b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/StateSet.java
new file mode 100644
index 0000000..d094027
--- /dev/null
+++ b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/automaton/StateSet.java
@@ -0,0 +1,223 @@
+package org.apache.openoffice.ooxml.schema.automaton;
+
+import java.util.Iterator;
+import java.util.Set;
+import java.util.TreeSet;
+
+import org.apache.openoffice.ooxml.schema.model.base.QualifiedName;
+
+/** Used in the transformation of NFA to DFA and in the minimization of DFAs.
+ * References a set of regular states.
+ */
+public class StateSet
+ implements Comparable<StateSet>
+{
+ public StateSet ()
+ {
+ maStates = new TreeSet<>();
+ }
+
+
+
+
+ public StateSet (final State aState)
+ {
+ this();
+ maStates.add(aState);
+ }
+
+
+
+
+ public StateSet (final StateSet aSet)
+ {
+ this();
+ maStates.addAll(aSet.maStates);
+ }
+
+
+
+
+ public StateSet (final Iterable<State> aStates)
+ {
+ this();
+ for (final State aState : aStates)
+ maStates.add(aState);
+ }
+
+
+
+
+ public void AddState (final State aState)
+ {
+ maStates.add(aState);
+ }
+
+
+
+
+ public void AddStates (final StateSet aStates)
+ {
+ maStates.addAll(aStates.maStates);
+ }
+
+
+
+
+ public boolean IsDisjoint (final StateSet aOther)
+ {
+ for (final State aState : aOther.maStates)
+ if (maStates.contains(aState))
+ return false;
+ for (final State aState : maStates)
+ if (aOther.maStates.contains(aState))
+ return false;
+ return true;
+ }
+
+
+
+
+ public void RemoveState (final State aState)
+ {
+ maStates.remove(aState);
+ }
+
+
+
+
+ public Iterable<State> GetStates ()
+ {
+ return maStates;
+ }
+
+
+
+
+ public boolean ContainsState (final State aState)
+ {
+ return maStates.contains(aState);
+ }
+
+
+
+
+ public int GetStateCount ()
+ {
+ return maStates.size();
+ }
+
+
+
+
+ public boolean HasStates ()
+ {
+ return ! maStates.isEmpty();
+ }
+
+
+
+
+ public State CreateStateForStateSet (final StateContext aContext)
+ {
+ // Find a name for the new state. If there is type state in the given
+ // set then use its name.
... etc. - the rest is truncated
More information about the Libreoffice-commits
mailing list