[Libreoffice-commits] core.git: Branch 'aoo/trunk' - 2 commits - ooxml/source solenv/inc
Jürgen Schmidt
jsc at apache.org
Tue Jun 3 05:08:41 PDT 2014
ooxml/source/framework/JavaOOXMLParser/.classpath | 6
ooxml/source/framework/JavaOOXMLParser/.project | 17
ooxml/source/framework/JavaOOXMLParser/.settings/org.eclipse.jdt.core.prefs | 11
ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/AttributeManager.java | 78
ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/AttributeProvider.java | 101
ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/Log.java | 103
ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/NameMap.java | 100
ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/NamespaceMap.java | 76
ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/OOXMLParser.java | 268 ++
ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/StateMachine.java | 202 +
ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/Transition.java | 88
ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/TransitionTable.java | 115
ooxml/source/framework/SchemaParser/.classpath | 6
ooxml/source/framework/SchemaParser/.project | 17
ooxml/source/framework/SchemaParser/.settings/org.eclipse.jdt.core.prefs | 11
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/SchemaReader.java | 274 ++
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/LogGenerator.java | 323 ++
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/ParserTablesGenerator.java | 193 +
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/IAction.java | 38
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/NonValidatingCreator.java | 198 +
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/ParseElementAction.java | 90
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/SkipElementAction.java | 67
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/StackAutomaton.java | 110
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/StackAutomatonOptimizer.java | 257 ++
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/State.java | 196 +
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/StateContext.java | 179 +
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/Transition.java | 149 +
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/ValidatingCreator.java | 122 +
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/ValidatingCreatorVisitor.java | 556 ++++
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/iterator/AttributeIterator.java | 109
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/iterator/DereferencingNodeIterator.java | 111
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/iterator/NodeIterator.java | 68
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/attribute/Attribute.java | 93
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/attribute/AttributeBase.java | 89
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/attribute/AttributeGroup.java | 65
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/attribute/AttributeGroupReference.java | 112
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/attribute/AttributeReference.java | 102
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/base/INode.java | 38
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/base/INodeReference.java | 32
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/base/INodeVisitor.java | 85
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/base/Location.java | 61
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/base/Node.java | 156 +
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/base/NodeType.java | 53
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/base/NodeVisitorAdapter.java | 203 +
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/base/QualifiedName.java | 141 +
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/complex/All.java | 87
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/complex/Any.java | 75
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/complex/Choice.java | 68
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/complex/ComplexContent.java | 56
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/complex/ComplexType.java | 72
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/complex/ComplexTypeReference.java | 91
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/complex/Element.java | 108
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/complex/ElementReference.java | 103
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/complex/Extension.java | 152 +
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/complex/Group.java | 70
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/complex/GroupReference.java | 109
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/complex/OccurrenceIndicator.java | 152 +
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/complex/Sequence.java | 73
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/optimize/ProcessTypeVisitor.java | 110
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/optimize/RequestVisitor.java | 119 +
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/optimize/SchemaOptimizer.java | 119 +
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/schema/NamespaceMap.java | 144 +
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/schema/Schema.java | 96
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/schema/TypeContainer.java | 100
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/simple/BuiltIn.java | 146 +
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/simple/List.java | 84
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/simple/Restriction.java | 210 +
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/simple/SimpleContent.java | 56
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/simple/SimpleType.java | 121 +
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/simple/SimpleTypeReference.java | 107
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/model/simple/Union.java | 68
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/parser/SchemaParser.java | 1185 ++++++++++
ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/parser/XmlNamespace.java | 60
solenv/inc/_tg_app.mk | 2
solenv/inc/tg_app.mk | 2
75 files changed, 9412 insertions(+), 2 deletions(-)
New commits:
commit 5e880a2209b11a2828c71e9ae1784dcacb53f34b
Author: Jürgen Schmidt <jsc at apache.org>
Date: Tue Jun 3 08:25:22 2014 +0000
#125003# change LIBRARY path for test run target to use local output dir
diff --git a/solenv/inc/_tg_app.mk b/solenv/inc/_tg_app.mk
index c9295b8..7c2698d 100644
--- a/solenv/inc/_tg_app.mk
+++ b/solenv/inc/_tg_app.mk
@@ -283,7 +283,7 @@ $(APP1TARGETN): $(APP1OBJS) $(APP1LIBS) \
.IF "$(APP1TEST)" == "enabled" && "$(APP1TARGET)" != ""
$(APP1TARGET)_run: $(APP1TARGETN)
- $(COMMAND_ECHO) $(AUGMENT_LIBRARY_PATH) $(APP1TARGETN) --gtest_output="xml:$(BIN)/$(APP1TARGET)_result.xml"
+ $(COMMAND_ECHO) $(AUGMENT_LIBRARY_PATH_LOCAL) $(APP1TARGETN) --gtest_output="xml:$(BIN)/$(APP1TARGET)_result.xml"
.ENDIF
diff --git a/solenv/inc/tg_app.mk b/solenv/inc/tg_app.mk
index 7ce4730..0054c98 100644
--- a/solenv/inc/tg_app.mk
+++ b/solenv/inc/tg_app.mk
@@ -285,7 +285,7 @@ $(APP$(TNR)TARGETN): $(APP$(TNR)OBJS) $(APP$(TNR)LIBS) \
.IF "$(APP$(TNR)TEST)" == "enabled" && "$(APP$(TNR)TARGET)" != ""
$(APP$(TNR)TARGET)_run: $(APP$(TNR)TARGETN)
- $(COMMAND_ECHO) $(AUGMENT_LIBRARY_PATH) $(APP$(TNR)TARGETN) --gtest_output="xml:$(BIN)/$(APP$(TNR)TARGET)_result.xml"
+ $(COMMAND_ECHO) $(AUGMENT_LIBRARY_PATH_LOCAL) $(APP$(TNR)TARGETN) --gtest_output="xml:$(BIN)/$(APP$(TNR)TARGET)_result.xml"
.ENDIF
commit dd0e2e5caa8ff260cb9aa3e8ab07529203e7998b
Author: Andre Fischer <af at apache.org>
Date: Tue Jun 3 06:57:13 2014 +0000
125035: Initial commit for the new OOXML framework.
Created new main/ooxml module.
Created schema parser at main/ooxml/source/framework/SchemaParser.
Created demo parser for OOXML files that is based on output of the schema parser.
diff --git a/ooxml/source/framework/JavaOOXMLParser/.classpath b/ooxml/source/framework/JavaOOXMLParser/.classpath
new file mode 100644
index 0000000..fb565a5
--- /dev/null
+++ b/ooxml/source/framework/JavaOOXMLParser/.classpath
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+ <classpathentry kind="src" path="src"/>
+ <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7"/>
+ <classpathentry kind="output" path="bin"/>
+</classpath>
diff --git a/ooxml/source/framework/JavaOOXMLParser/.project b/ooxml/source/framework/JavaOOXMLParser/.project
new file mode 100644
index 0000000..409d549
--- /dev/null
+++ b/ooxml/source/framework/JavaOOXMLParser/.project
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+ <name>JavaOOXMLParser</name>
+ <comment></comment>
+ <projects>
+ </projects>
+ <buildSpec>
+ <buildCommand>
+ <name>org.eclipse.jdt.core.javabuilder</name>
+ <arguments>
+ </arguments>
+ </buildCommand>
+ </buildSpec>
+ <natures>
+ <nature>org.eclipse.jdt.core.javanature</nature>
+ </natures>
+</projectDescription>
diff --git a/ooxml/source/framework/JavaOOXMLParser/.settings/org.eclipse.jdt.core.prefs b/ooxml/source/framework/JavaOOXMLParser/.settings/org.eclipse.jdt.core.prefs
new file mode 100644
index 0000000..7341ab1
--- /dev/null
+++ b/ooxml/source/framework/JavaOOXMLParser/.settings/org.eclipse.jdt.core.prefs
@@ -0,0 +1,11 @@
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7
+org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
+org.eclipse.jdt.core.compiler.compliance=1.7
+org.eclipse.jdt.core.compiler.debug.lineNumber=generate
+org.eclipse.jdt.core.compiler.debug.localVariable=generate
+org.eclipse.jdt.core.compiler.debug.sourceFile=generate
+org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
+org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
+org.eclipse.jdt.core.compiler.source=1.7
diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/AttributeManager.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/AttributeManager.java
new file mode 100644
index 0000000..75caea7
--- /dev/null
+++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/AttributeManager.java
@@ -0,0 +1,78 @@
+/**************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+package org.apache.openoffice.ooxml.parser;
+
+import java.io.File;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Map.Entry;
+
+public class AttributeManager
+{
+ public AttributeManager (final File aDataLocation)
+ {
+ maStateIdToAttributesMap = new HashMap<>();
+ }
+
+
+
+
+ public void ParseAttributes (
+ final int nStateId,
+ final AttributeProvider aAttributeProvider)
+ {
+ final Map<String,String> aAttributeDefinitions = maStateIdToAttributesMap.get(nStateId);
+ if (aAttributeDefinitions == null)
+ {
+ // if (aAttributeProvider.HasAttributes())
+ //throw new RuntimeException();
+ }
+ else
+ {
+ for (final Entry<String,String> aEntry : aAttributeProvider)
+ {
+ ParseAttributeValue(
+ aEntry.getKey(),
+ aEntry.getValue(),
+ aAttributeDefinitions.get(aEntry.getKey()));
+ }
+ }
+ }
+
+
+
+ private void ParseAttributeValue (
+ final String sName,
+ final String sValue,
+ final String sSimpleTypeName)
+ {
+ Log.Dbg.printf("attribute %s has type %s and value %s\n",
+ sName,
+ sSimpleTypeName,
+ sValue);
+ }
+
+
+
+
+ private final Map<String,Map<String,String>> maStateIdToAttributesMap;
+}
diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/AttributeProvider.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/AttributeProvider.java
new file mode 100644
index 0000000..bf9d24c
--- /dev/null
+++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/AttributeProvider.java
@@ -0,0 +1,101 @@
+/**************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+package org.apache.openoffice.ooxml.parser;
+
+import java.util.Iterator;
+import java.util.Map.Entry;
+
+import javax.xml.stream.XMLStreamReader;
+
+public class AttributeProvider
+ implements Iterable<Entry<String,String>>
+{
+ public AttributeProvider(XMLStreamReader aReader)
+ {
+ maReader = aReader;
+ }
+
+
+
+ public boolean HasAttributes ()
+ {
+ return maReader.getAttributeCount() > 0;
+ }
+
+
+
+
+ public String GetValue (final String sKey)
+ {
+ return maReader.getAttributeValue(null, sKey);
+ }
+
+
+
+ @Override
+ public Iterator<Entry<String,String>> iterator ()
+ {
+ return new Iterator<Entry<String,String>> ()
+ {
+ int nIndex = 0;
+ final int nCount = maReader.getAttributeCount();
+
+ @Override public boolean hasNext()
+ {
+ return nIndex < nCount;
+ }
+
+ @Override public Entry<String, String> next()
+ {
+ final Entry<String,String> aEntry = new Entry<String,String>()
+ {
+ final String msKey = maReader.getAttributeLocalName(nIndex);
+ final String msValue = maReader.getAttributeValue(nIndex);
+
+ @Override public String getKey()
+ {
+ return msKey;
+ }
+
+ @Override public String getValue()
+ {
+ return msValue;
+ }
+
+ @Override public String setValue (final String sValue)
+ {
+ return null;
+ }
+ };
+ ++nIndex;
+ return aEntry;
+ }
+
+ @Override public void remove()
+ {
+ }
+
+ };
+ }
+
+ private final XMLStreamReader maReader;
+}
diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/Log.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/Log.java
new file mode 100644
index 0000000..843b233
--- /dev/null
+++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/Log.java
@@ -0,0 +1,103 @@
+/**************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+package org.apache.openoffice.ooxml.parser;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+
+public class Log
+{
+ public static Log Std = new Log(System.out);
+ public static Log Err = new Log(System.err);
+ public static Log Dbg = Std;
+
+
+ public Log (final OutputStream aOut)
+ {
+ maOut = aOut;
+ msIndentation = "";
+ }
+
+
+
+
+ public Log (final File aFile)
+ {
+ this(CreateFileOutputStream(aFile));
+ }
+
+
+
+
+ private static OutputStream CreateFileOutputStream (final File aFile)
+ {
+ try
+ {
+ return new FileOutputStream(aFile);
+ }
+ catch (final Exception aException)
+ {
+ aException.printStackTrace();
+ return null;
+ }
+ }
+
+
+
+
+ public void printf (final String sFormat, final Object ... aArgumentList)
+ {
+ try
+ {
+ maOut.write(msIndentation.getBytes());
+ maOut.write(String.format(sFormat, aArgumentList).getBytes());
+ }
+ catch (IOException e)
+ {
+ e.printStackTrace();
+ }
+ }
+
+
+
+
+ public void IncreaseIndentation ()
+ {
+ msIndentation += " ";
+ }
+
+
+
+
+ public void DecreaseIndentation ()
+ {
+ msIndentation = msIndentation.substring(4);
+ }
+
+
+
+
+ private final OutputStream maOut;
+ private String msIndentation;
+}
diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/NameMap.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/NameMap.java
new file mode 100644
index 0000000..ee306c5
--- /dev/null
+++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/NameMap.java
@@ -0,0 +1,100 @@
+/**************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+package org.apache.openoffice.ooxml.parser;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Vector;
+
+public class NameMap
+{
+ NameMap (final File aDataLocation)
+ {
+ maNameToIdMap = new HashMap<>();
+ maIdToNameMap = new Vector<>();
+
+ try
+ {
+ final BufferedReader aReader = new BufferedReader(
+ new FileReader(
+ new File(aDataLocation, "names.lst")));
+
+ while (true)
+ {
+ final String sLine = aReader.readLine();
+ if (sLine == null)
+ break;
+ final String aParts[] = sLine.split("\\s+");
+ final int nId = Integer.parseInt(aParts[0]);
+ maNameToIdMap.put(aParts[1], nId);
+ if (maIdToNameMap.size() <= nId)
+ maIdToNameMap.setSize(nId+1);
+ maIdToNameMap.set(nId, aParts[1]);
+ }
+
+ aReader.close();
+ }
+ catch (final Exception aException)
+ {
+ throw new RuntimeException(aException);
+ }
+
+ if (Log.Dbg != null)
+ Log.Dbg.printf("initialized name map with %d definitions\n", maNameToIdMap.size());
+ }
+
+
+
+
+ public int GetIdForName (
+ final String sPrefix,
+ final String sElementName)
+ {
+ final String sName;
+ if (sPrefix == null)
+ sName = sElementName;
+ else
+ sName = sPrefix+"_"+sElementName;
+
+ if ( ! maNameToIdMap.containsKey(sName))
+ throw new RuntimeException("token '"+sName+"' is not known");
+
+ return maNameToIdMap.get(sName);
+ }
+
+
+
+
+ public String GetNameForId (final int nId)
+ {
+ return maIdToNameMap.get(nId);
+ }
+
+
+
+
+ private final Map<String,Integer> maNameToIdMap;
+ private final Vector<String> maIdToNameMap;
+}
diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/NamespaceMap.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/NamespaceMap.java
new file mode 100644
index 0000000..2f493bc
--- /dev/null
+++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/NamespaceMap.java
@@ -0,0 +1,76 @@
+/**************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+package org.apache.openoffice.ooxml.parser;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.util.HashMap;
+import java.util.Map;
+
+public class NamespaceMap
+{
+ NamespaceMap (final File aDataLocation)
+ {
+ maUriToPrefixMap = new HashMap<>();
+
+ try
+ {
+ final BufferedReader aReader = new BufferedReader(
+ new FileReader(
+ new File(aDataLocation, "namespaces.lst")));
+
+ while (true)
+ {
+ final String sLine = aReader.readLine();
+ if (sLine == null)
+ break;
+ final String aParts[] = sLine.split("\\s+");
+ maUriToPrefixMap.put(aParts[0], aParts[1]);
+ }
+
+ aReader.close();
+ }
+ catch (final Exception aException)
+ {
+ throw new RuntimeException(aException);
+ }
+
+ if (Log.Dbg != null)
+ Log.Dbg.printf("initialized namespace map with %d definitions\n", maUriToPrefixMap.size());
+ }
+
+
+
+
+ public String GetPrefixForURI (final String sURI)
+ {
+ if ( ! maUriToPrefixMap.containsKey(sURI))
+ throw new RuntimeException("namespace '"+sURI+"' is not known");
+ return maUriToPrefixMap.get(sURI);
+ }
+
+
+
+
+ private final Map<String,String> maUriToPrefixMap;
+}
diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/OOXMLParser.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/OOXMLParser.java
new file mode 100644
index 0000000..82232c4
--- /dev/null
+++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/OOXMLParser.java
@@ -0,0 +1,268 @@
+/**************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+package org.apache.openoffice.ooxml.parser;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStream;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipFile;
+
+import javax.xml.stream.XMLInputFactory;
+import javax.xml.stream.XMLStreamException;
+import javax.xml.stream.XMLStreamReader;
+
+/** This OOXML parser is based on the output of the schema parser.
+ * It exists to debug the schema parser and as illustration and preparation of
+ * the C++ parse (yet to come.)
+ * Because of this, the parser data (set of states and transitions) are
+ * read at runtime while a real parser would do that at compile time.
+ */
+public class OOXMLParser
+{
+ /** The parser is called with two arguments:
+ * - A path to where the parser tables with the states and transitions can
+ * be found.
+ * - The XML input file or Zip stream to parse.
+ * The syntax for a Zip stream contains a '#' that separates the filename
+ * to its left from the entry name to its right.
+ */
+ public static void main (final String ... aArgumentList)
+ {
+ if (aArgumentList.length != 3)
+ throw new RuntimeException("usage: OOXMLParser <parser-tables-path> <XML-input-file> <log-file>");
+
+ long nStartTime = System.currentTimeMillis();
+ final StateMachine aMachine = new StateMachine(new File(aArgumentList[0]));
+ final InputStream aIn = GetInputStream(aArgumentList[1]);
+ final XMLStreamReader aReader = GetStreamReader(aIn, aArgumentList[1]);
+ long nEndTime = System.currentTimeMillis();
+ System.out.printf("initialzed parser in %fs\n", (nEndTime-nStartTime)/1000.0);
+
+ try
+ {
+ if (aReader != null)
+ {
+ nStartTime = System.currentTimeMillis();
+ final int nElementCount = Parse(aReader, aMachine, new File(aArgumentList[2]));
+ nEndTime = System.currentTimeMillis();
+ System.out.printf("parsed %d elements in %fs\n",
+ nElementCount,
+ (nEndTime-nStartTime)/1000.0);
+ }
+ }
+ catch (final Exception aException)
+ {
+ aException.printStackTrace();
+ }
+ }
+
+
+
+
+ private static InputStream GetInputStream (final String sInputName)
+ {
+ final InputStream aIn;
+ try
+ {
+ final int nSeparator = sInputName.indexOf('#');
+ if (nSeparator >= 0)
+ {
+ // Split the input name into the file name of the archive and the
+ // name of a zip entry.
+ final String sArchiveName = sInputName.substring(0, nSeparator);
+ String sEntryName = sInputName.substring(nSeparator+1);
+
+ // Normalize and cleanup the entry name.
+ sEntryName = sEntryName.replace('\\', '/');
+ if (sEntryName.startsWith("/"))
+ sEntryName = sEntryName.substring(1);
+
+ final ZipFile aZipFile = new ZipFile(new File(sArchiveName));
+ final ZipEntry aZipEntry = aZipFile.getEntry(sEntryName);
+ aIn = aZipFile.getInputStream(aZipEntry);
+ }
+ else
+ {
+ // The input name points to a plain XML file.
+ aIn = new FileInputStream(sInputName);
+ }
+ }
+ catch (final Exception aException)
+ {
+ aException.printStackTrace();
+ return null;
+ }
+ return aIn;
+ }
+
+
+
+
+ private static XMLStreamReader GetStreamReader (
+ final InputStream aIn,
+ final String sDescription)
+ {
+ if (aIn == null)
+ return null;
+
+ try
+ {
+ final XMLInputFactory aFactory = (XMLInputFactory)XMLInputFactory.newInstance();
+ aFactory.setProperty(XMLInputFactory.IS_REPLACING_ENTITY_REFERENCES, false);
+ aFactory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false);
+ aFactory.setProperty(XMLInputFactory.IS_COALESCING, false);
+
+ return (XMLStreamReader)aFactory.createXMLStreamReader(
+ sDescription,
+ aIn);
+ }
+ catch (final Exception aException)
+ {
+ aException.printStackTrace();
+ return null;
+ }
+ }
+
+
+
+
+ private static int Parse (
+ final XMLStreamReader aReader,
+ final StateMachine aMachine,
+ final File aLogFile)
+ {
+ Log.Dbg = new Log(aLogFile);
+
+ int nElementCount = 0;
+ try
+ {
+ final AttributeProvider aAttributeProvider = new AttributeProvider(aReader);
+ while (aReader.hasNext())
+ {
+ final int nCode = aReader.next();
+ switch(nCode)
+ {
+ case XMLStreamReader.START_ELEMENT:
+ ++nElementCount;
+ if (aMachine.IsInSkipState())
+ nElementCount += Skip(aReader);
+ else if ( ! aMachine.ProcessStartElement(
+ aReader.getNamespaceURI(),
+ aReader.getLocalName(),
+ aReader.getLocation(),
+ aAttributeProvider))
+ {
+ nElementCount += Skip(aReader);
+ }
+ break;
+
+ case XMLStreamReader.END_ELEMENT:
+ aMachine.ProcessEndElement(
+ aReader.getNamespaceURI(),
+ aReader.getLocalName(),
+ aReader.getLocation());
+ break;
+
+ case XMLStreamReader.CHARACTERS:
+ final String sText = aReader.getText();
+ Log.Dbg.printf("text [%s]\n", sText);
+ aMachine.ProcessCharacters(sText);
+ break;
+
+ case XMLStreamReader.END_DOCUMENT:
+ Log.Std.printf("--- end of document ---\n");
+ break;
+
+ default:
+ Log.Err.printf("can't handle XML event of type %d\n", nCode);
+ }
+ }
+
+ aReader.close();
+ }
+ catch (final XMLStreamException aException)
+ {
+ aException.printStackTrace();
+ }
+
+ return nElementCount;
+ }
+
+
+
+
+ private static int Skip (final XMLStreamReader aReader)
+ {
+ Log.Dbg.printf("starting to skip on %s at L%dC%d\n",
+ aReader.getLocalName(),
+ aReader.getLocation().getLineNumber(),
+ aReader.getLocation().getColumnNumber());
+ Log.Dbg.IncreaseIndentation();
+
+ // We are called when processing a start element. This means that we are
+ // already at relative depth 1.
+ int nRelativeDepth = 1;
+ int nElementCount = 0;
+ try
+ {
+ while (aReader.hasNext())
+ {
+ final int nCode = aReader.next();
+ switch (nCode)
+ {
+ case XMLStreamReader.START_ELEMENT:
+ ++nRelativeDepth;
+ ++nElementCount;
+ Log.Dbg.printf("skipping start element %s\n", aReader.getLocalName());
+ Log.Dbg.IncreaseIndentation();
+ break;
+
+ case XMLStreamReader.END_ELEMENT:
+ --nRelativeDepth;
+ Log.Dbg.DecreaseIndentation();
+ if (nRelativeDepth <= 0)
+ {
+ Log.Dbg.printf("leaving skip mode on %s\n", aReader.getLocalName());
+ return nElementCount;
+ }
+ break;
+
+ case XMLStreamReader.END_DOCUMENT:
+ throw new RuntimeException("saw end of document while skipping elements\n");
+
+ case XMLStreamReader.CHARACTERS:
+ Log.Dbg.printf("skipping text [%s]\n", aReader.getText());
+ break;
+
+ default:
+ Log.Dbg.printf("%s\n", nCode);
+ }
+ }
+ }
+ catch (final XMLStreamException aException)
+ {
+ aException.printStackTrace();
+ }
+ return nElementCount;
+ }
+}
diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/StateMachine.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/StateMachine.java
new file mode 100644
index 0000000..ffdeafc
--- /dev/null
+++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/StateMachine.java
@@ -0,0 +1,202 @@
+/**************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+package org.apache.openoffice.ooxml.parser;
+
+import java.io.File;
+import java.util.Set;
+import java.util.Stack;
+
+import javax.xml.stream.Location;
+
+/** The state machine is initialized at creation from the data tables
+ * created previously by a stack automaton.
+ */
+public class StateMachine
+{
+ public StateMachine (final File aDataLocation)
+ {
+ maNamespaceMap = new NamespaceMap(aDataLocation);
+ maNameMap = new NameMap(aDataLocation);
+ maTransitions = new TransitionTable(aDataLocation);
+ maSkipStates = maTransitions.GetSkipStates();
+ maAttributeManager = new AttributeManager(aDataLocation);
+
+ mnStartStateId = maNameMap.GetIdForName(null, "_start_");
+ mnEndStateId = maNameMap.GetIdForName(null, "_end_");
+ mnCurrentStateId = mnStartStateId;
+ maStateStack = new Stack<>();
+ Log.Dbg.printf("starting in state _start_ (%d)\n", mnCurrentStateId);
+ }
+
+
+
+
+ public boolean ProcessStartElement (
+ final String sNamespaceURI,
+ final String sElementName,
+ final Location aLocation,
+ final AttributeProvider aAttributes)
+ {
+ final String sPrefix = maNamespaceMap.GetPrefixForURI(sNamespaceURI);
+ boolean bResult = false;
+ try
+ {
+ final int nElementId = maNameMap.GetIdForName(sPrefix, sElementName);
+ Log.Dbg.printf("%s:%s(%d, aArgumentList) L%dC%d\n",
+ sPrefix,
+ sElementName,
+ nElementId,
+ aLocation.getLineNumber(),
+ aLocation.getColumnNumber());
+
+ final Transition aTransition = maTransitions.GetTransition(
+ mnCurrentStateId,
+ nElementId);
+ if (aTransition == null)
+ {
+ Log.Err.printf(
+ "can not find transition for state %s and element %s at L%dC%d\n",
+ maNameMap.GetNameForId(mnCurrentStateId),
+ maNameMap.GetNameForId(nElementId),
+ aLocation.getLineNumber(),
+ aLocation.getColumnNumber());
+ }
+ else
+ {
+ Log.Dbg.printf(" %s(%d) -> %s(%d) via %s(%d)",
+ maNameMap.GetNameForId(mnCurrentStateId),
+ mnCurrentStateId,
+ maNameMap.GetNameForId(aTransition.GetEndStateId()),
+ aTransition.GetEndStateId(),
+ maNameMap.GetNameForId(aTransition.GetActionId()),
+ aTransition.GetActionId());
+ Log.Dbg.printf("\n");
+
+ final int nOldState = mnCurrentStateId;
+ SetCurrentState(aTransition.GetEndStateId());
+
+ ExecuteActions(aTransition, aAttributes, nOldState, mnCurrentStateId);
+
+ bResult = true;
+ }
+ }
+ catch (RuntimeException aException)
+ {
+ aException.printStackTrace();
+ }
+ return bResult;
+ }
+
+
+
+
+ public void ProcessEndElement (
+ final String sNamespaceURI,
+ final String sElementName,
+ final Location aLocation)
+ {
+ final String sPrefix = maNamespaceMap.GetPrefixForURI(sNamespaceURI);
+
+ final int nOldStateId = mnCurrentStateId;
+ SetCurrentState(maStateStack.pop());
+
+ Log.Dbg.DecreaseIndentation();
+ Log.Dbg.printf("/%s:%s L%d%d\n",
+ sPrefix,
+ sElementName,
+ aLocation.getLineNumber(),
+ aLocation.getColumnNumber());
+ Log.Dbg.printf(" %s(%d) <- %s(%d)\n",
+ maNameMap.GetNameForId(nOldStateId),
+ nOldStateId,
+ maNameMap.GetNameForId(mnCurrentStateId),
+ mnCurrentStateId);
+ }
+
+
+
+
+ public void ProcessCharacters (
+ final String sText)
+ {
+ }
+
+
+
+
+ public boolean IsInSkipState ()
+ {
+ return maSkipStates.contains(mnCurrentStateId);
+ }
+
+
+
+
+ private void SetCurrentState (final int nState)
+ {
+ if (mnCurrentStateId != nState)
+ {
+ if (nState == mnEndStateId)
+ mnCurrentStateId = mnStartStateId;
+ else
+ mnCurrentStateId = nState;
+ }
+ }
+
+
+
+
+ private void ExecuteActions (
+ final Transition aTransition,
+ final AttributeProvider aAttributes,
+ final int nOldState,
+ final int nNewState)
+ {
+ switch(aTransition.GetAction())
+ {
+ case 'p' :
+ // Parse action.
+ maStateStack.push(mnCurrentStateId);
+ Log.Dbg.IncreaseIndentation();
+ final int nActionId = aTransition.GetActionId();
+ SetCurrentState(nActionId);
+ maAttributeManager.ParseAttributes(nActionId, aAttributes);
+ break;
+
+ default:
+ throw new RuntimeException();
+ }
+ }
+
+
+
+
+ private final NamespaceMap maNamespaceMap;
+ private final NameMap maNameMap;
+ private final TransitionTable maTransitions;
+ private final AttributeManager maAttributeManager;
+ private int mnCurrentStateId;
+ private Stack<Integer> maStateStack;
+ private final int mnStartStateId;
+ private final int mnEndStateId;
+ private static Set<Integer> maSkipStates;
+}
diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/Transition.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/Transition.java
new file mode 100644
index 0000000..69d4fa3
--- /dev/null
+++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/Transition.java
@@ -0,0 +1,88 @@
+/**************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+package org.apache.openoffice.ooxml.parser;
+
+class Transition
+{
+ Transition (
+ final int nStartStateId,
+ final int nEndStateId,
+ final int nElementId,
+ final String sAction,
+ final int nActionStateId)
+ {
+ mnStartStateId = nStartStateId;
+ mnEndStateId = nEndStateId;
+ mnElementId = nElementId;
+ mcAction = sAction.charAt(0);
+ mnActionStateId = nActionStateId;
+ }
+
+
+
+
+ public int GetStartStateId ()
+ {
+ return mnStartStateId;
+ }
+
+
+
+
+ public int GetEndStateId ()
+ {
+ return mnEndStateId;
+ }
+
+
+
+
+ public int GetElementId ()
+ {
+ return mnElementId;
+ }
+
+
+
+
+ public char GetAction ()
+ {
+ return mcAction;
+ }
+
+
+
+
+ public int GetActionId ()
+ {
+ return mnActionStateId;
+ }
+
+
+
+
+ private final int mnStartStateId;
+ private final int mnEndStateId;
+ private final int mnElementId;
+ private final char mcAction;
+ private final int mnActionStateId;
+}
diff --git a/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/TransitionTable.java b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/TransitionTable.java
new file mode 100644
index 0000000..908c6dd
--- /dev/null
+++ b/ooxml/source/framework/JavaOOXMLParser/src/org/apache/openoffice/ooxml/parser/TransitionTable.java
@@ -0,0 +1,115 @@
+/**************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+package org.apache.openoffice.ooxml.parser;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+
+public class TransitionTable
+{
+ public TransitionTable (final File aDataLocation)
+ {
+ maTransitions = new HashMap<>();
+ maSkipStates = new HashSet<>();
+ int nTransitionCount = 0;
+
+ try
+ {
+ final BufferedReader aReader = new BufferedReader(new FileReader(new File(aDataLocation, "transitions.lst")));
+ while(true)
+ {
+ // Read line, ignore comments, split into parts at whitespace.
+ final String sLine = aReader.readLine();
+ if (sLine == null)
+ break;
+ if (sLine.startsWith("#"))
+ continue;
+ final String[] aParts = sLine.split("\\s+");
+
+ // Create new transition.
+ final int nStartStateId = Integer.parseInt(aParts[0]);
+ final int nEndStateId = Integer.parseInt(aParts[1]);
+ final int nElementId = Integer.parseInt(aParts[2]);
+ final int nActionStateId = Integer.parseInt(aParts[4]);
+ if (nElementId==-1 && nActionStateId==-1)
+ maSkipStates.add(nStartStateId);
+ else
+ {
+ final Transition aTransition = new Transition(
+ nStartStateId,
+ nEndStateId,
+ nElementId,
+ aParts[3],
+ nActionStateId);
+ ++nTransitionCount;
+
+ Map<Integer,Transition> aPerElementTransitions = maTransitions.get(aTransition.GetStartStateId());
+ if (aPerElementTransitions == null)
+ {
+ aPerElementTransitions = new HashMap<>();
+ maTransitions.put(aTransition.GetStartStateId(), aPerElementTransitions);
+ }
+ aPerElementTransitions.put(aTransition.GetElementId(), aTransition);
+ }
+ }
+ aReader.close();
+ }
+ catch (final Exception aException)
+ {
+ aException.printStackTrace();
+ }
+ Log.Std.printf("read %d transitions\n", nTransitionCount);
+ }
+
+
+
+
+ public Transition GetTransition (
+ final int nStateId,
+ final int nElementId)
+ {
+ Map<Integer,Transition> aPerElementTransitions = maTransitions.get(nStateId);
+ if (aPerElementTransitions == null)
+ return null;
+ else
+ return aPerElementTransitions.get(nElementId);
+ }
+
+
+
+
+ public Set<Integer> GetSkipStates ()
+ {
+ return maSkipStates;
+ }
+
+
+
+
+ private final Map<Integer,Map<Integer,Transition>> maTransitions;
+ private final Set<Integer> maSkipStates;
+}
diff --git a/ooxml/source/framework/SchemaParser/.classpath b/ooxml/source/framework/SchemaParser/.classpath
new file mode 100644
index 0000000..fb565a5
--- /dev/null
+++ b/ooxml/source/framework/SchemaParser/.classpath
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<classpath>
+ <classpathentry kind="src" path="src"/>
+ <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7"/>
+ <classpathentry kind="output" path="bin"/>
+</classpath>
diff --git a/ooxml/source/framework/SchemaParser/.project b/ooxml/source/framework/SchemaParser/.project
new file mode 100644
index 0000000..05958e1
--- /dev/null
+++ b/ooxml/source/framework/SchemaParser/.project
@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+ <name>SchemaParser</name>
+ <comment></comment>
+ <projects>
+ </projects>
+ <buildSpec>
+ <buildCommand>
+ <name>org.eclipse.jdt.core.javabuilder</name>
+ <arguments>
+ </arguments>
+ </buildCommand>
+ </buildSpec>
+ <natures>
+ <nature>org.eclipse.jdt.core.javanature</nature>
+ </natures>
+</projectDescription>
diff --git a/ooxml/source/framework/SchemaParser/.settings/org.eclipse.jdt.core.prefs b/ooxml/source/framework/SchemaParser/.settings/org.eclipse.jdt.core.prefs
new file mode 100644
index 0000000..7341ab1
--- /dev/null
+++ b/ooxml/source/framework/SchemaParser/.settings/org.eclipse.jdt.core.prefs
@@ -0,0 +1,11 @@
+eclipse.preferences.version=1
+org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
+org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7
+org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
+org.eclipse.jdt.core.compiler.compliance=1.7
+org.eclipse.jdt.core.compiler.debug.lineNumber=generate
+org.eclipse.jdt.core.compiler.debug.localVariable=generate
+org.eclipse.jdt.core.compiler.debug.sourceFile=generate
+org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
+org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
+org.eclipse.jdt.core.compiler.source=1.7
diff --git a/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/SchemaReader.java b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/SchemaReader.java
new file mode 100644
index 0000000..0c9ce1d
--- /dev/null
+++ b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/SchemaReader.java
@@ -0,0 +1,274 @@
+/**************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+package org.apache.openoffice.ooxml.schema;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileReader;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.Queue;
+import java.util.Set;
+import java.util.Vector;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import javax.xml.stream.XMLStreamException;
+
+import org.apache.openoffice.ooxml.schema.generator.LogGenerator;
+import org.apache.openoffice.ooxml.schema.generator.ParserTablesGenerator;
+import org.apache.openoffice.ooxml.schema.generator.automaton.NonValidatingCreator;
+import org.apache.openoffice.ooxml.schema.generator.automaton.StackAutomaton;
+import org.apache.openoffice.ooxml.schema.model.schema.Schema;
+import org.apache.openoffice.ooxml.schema.parser.SchemaParser;
+import org.apache.openoffice.ooxml.schema.parser.XmlNamespace;
+
+public class SchemaReader
+{
+ public static void main (final String ... aArgumentList)
+ {
+ if (aArgumentList.length != 1)
+ {
+ System.err.printf("usage: SchemaParser <driver-file>\n");
+ System.exit(1);
+ }
+
+ final SchemaReader aReader = new SchemaReader(new File(aArgumentList[0]));
+ aReader.Run();
+ }
+
+
+
+
+ private SchemaReader (final File aDriverFile)
+ {
+ maSchema = new Schema();
+ maMainSchemaFiles = new Vector<>();
+ maSchemaFiles = new HashSet<>();
+ maTodo = new LinkedList<String>();
+ mnTotalLineCount = 0;
+ mnTotalByteCount = 0;
+
+ XmlNamespace.Apply(maSchema);
+
+ ParseDriverFile(aDriverFile);
+ }
+
+
+
+
+ /** Read and parse the driver file that specifies which schema files to read
+ * and where the output should go.
+ */
+ private void ParseDriverFile (final File aDriverFile)
+ {
+ if (aDriverFile == null || ! aDriverFile.exists() || ! aDriverFile.canRead())
+ {
+ System.err.printf("can not read driver file\n");
+ System.exit(1);
+ }
+
+ final Pattern aSchemaPattern = Pattern.compile("^\\s*schema\\s+(.*)\\s+(.*)");
+ final Pattern aOutputPattern = Pattern.compile("^\\s*output-directory\\s+(.*)");
+ try
+ {
+ final BufferedReader aIn = new BufferedReader(new FileReader(aDriverFile));
+ while(true)
+ {
+ final String sLine = aIn.readLine();
+ if (sLine == null)
+ break;
+ // Lines starting with # are comment lines and are ignored.
+ if (sLine.matches("^\\s*#"))
+ continue;
+ // Lines containing only whitespace are also ignored.
+ else if (sLine.matches("^\\s*$"))
+ continue;
+
+ Matcher aMatcher = aSchemaPattern.matcher(sLine);
+ if (aMatcher.matches())
+ {
+ maMainSchemaFiles.add(new String[]{aMatcher.group(1), aMatcher.group(2)});
+ }
+ else
+ {
+ aMatcher = aOutputPattern.matcher(sLine);
+ if (aMatcher.matches())
+ {
+ maOutputDirectory = new File(aMatcher.group(1));
+ if (maOutputDirectory.exists() && ! maOutputDirectory.canWrite())
+ {
+ System.err.printf("can not write output file '%s' \n", maOutputDirectory.toString());
+ System.exit(1);
+ }
+ }
+ }
+
+
+ }
+ aIn.close();
+ }
+ catch (final Exception aException)
+ {
+ aException.printStackTrace();
+ }
+ }
+
+
+
+
+ private void Run ()
+ {
+ try
+ {
+ ParseSchemaFiles();
+ }
+ catch (final Exception aException)
+ {
+ aException.printStackTrace();
+ }
+ final Schema aOptimizedSchema = maSchema.GetOptimizedSchema();
+
+ System.out.printf(" used are %d complex types, %d simple types, %d groups and %d top level elements\n",
+ aOptimizedSchema.ComplexTypes.GetCount(),
+ aOptimizedSchema.SimpleTypes.GetCount(),
+ aOptimizedSchema.Groups.GetCount(),
+ aOptimizedSchema.TopLevelElements.GetCount());
+
+ LogGenerator.Write(maSchema, new File(maOutputDirectory, "original-schema.txt"));
+ LogGenerator.Write(aOptimizedSchema, new File(maOutputDirectory, "bla.txt"));
+
+ final StackAutomaton aAutomaton = CreateStackAutomaton(aOptimizedSchema);
+
+ new ParserTablesGenerator(aAutomaton).Generate(new File("/tmp/ooxml-parser"));
+ }
+
+
+
+
+ private void ParseSchemaFiles ()
+ throws XMLStreamException
+ {
+ System.out.printf("parsing %d main schema files\n", maMainSchemaFiles.size());
+
+ for (final String[] aEntry : maMainSchemaFiles)
+ {
+ final String sShortName = aEntry[0];
+ final String sMainSchemaFile = aEntry[1];
+ final File aMainSchemaFile = new File(sMainSchemaFile);
+ if ( ! aMainSchemaFile.exists())
+ {
+ System.err.printf(" schema file does not exist\n");
+ System.exit(1);
+ }
+ if ( ! aMainSchemaFile.canRead())
+ {
+ System.err.printf("can not read schema file\n");
+ System.exit(1);
+ }
+
+ AddSchemaReference(sMainSchemaFile);
+ }
+
+ long nStartTime = System.currentTimeMillis();
+
+ while ( ! maTodo.isEmpty())
+ {
+ final String sSchemaName = maTodo.poll();
+ System.out.printf("parsing %s\n", sSchemaName);
+ maSchemaFiles.add(sSchemaName);
+
+ final SchemaParser aParser = new SchemaParser(new File(sSchemaName), maSchema);
+ aParser.Parse();
+
+ mnTotalLineCount += aParser.GetLineCount();
+ mnTotalByteCount += aParser.GetByteCount();
+ for (final File aFile : aParser.GetImportedSchemaFilenames())
+ AddSchemaReference(aFile.getAbsolutePath());
+ }
+ long nEndTime = System.currentTimeMillis();
+ System.out.printf("parsed %d schema files with a total of %d lines and %d bytes in %fs\n",
+ maSchemaFiles.size(),
+ mnTotalLineCount,
+ mnTotalByteCount,
+ (nEndTime-nStartTime)/1000.0);
+ System.out.printf(" found %d complex types, %d simple types, %d groups and %d top level elements\n",
+ maSchema.ComplexTypes.GetCount(),
+ maSchema.SimpleTypes.GetCount(),
+ maSchema.Groups.GetCount(),
+ maSchema.TopLevelElements.GetCount());
+ }
+
+
+
+
+ private void AddSchemaReference (final String sSchemaFilename)
+ {
+ if ( ! maSchemaFiles.contains(sSchemaFilename))
+ {
+ if (sSchemaFilename == null)
+ throw new RuntimeException();
+
+ // We don't know yet the file name of the schema, so just store null to mark the schema name as 'known'.
+ maSchemaFiles.add(sSchemaFilename);
+ maTodo.add(sSchemaFilename);
+ }
+ }
+
+
+
+
+ private static StackAutomaton CreateStackAutomaton (final Schema aSchema)
+ {
+ long nStartTime = System.currentTimeMillis();
+ StackAutomaton aAutomaton = new NonValidatingCreator(aSchema).Create(new File("/tmp/schema.log"));
+ long nEndTime = System.currentTimeMillis();
+ System.out.printf(
+ "created stack automaton in %fs, it has %d states and %d transitions\n",
+ (nEndTime-nStartTime)/1000.0,
+ aAutomaton.GetStateCount(),
+ aAutomaton.GetTransitionCount());
+
+ /*
+ nStartTime = System.currentTimeMillis();
+ aAutomaton = aAutomaton.Optimize();
+ nEndTime = System.currentTimeMillis();
+ System.out.printf(
+ "optimized stack automaton in %fs, it now has %d states and %d transitions\n",
+ (nEndTime-nStartTime)/1000.0,
+ aAutomaton.GetStateCount(),
+ aAutomaton.GetTransitionCount());
+ */
+ return aAutomaton;
+ }
+
+
+
+
+ private final Schema maSchema;
+ private final Vector<String[]> maMainSchemaFiles;
+ private File maOutputDirectory;
+ private final Set<String> maSchemaFiles;
+ private final Queue<String> maTodo;
+ private int mnTotalLineCount;
+ private int mnTotalByteCount;
+}
diff --git a/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/LogGenerator.java b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/LogGenerator.java
new file mode 100644
index 0000000..bc997d8
--- /dev/null
+++ b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/LogGenerator.java
@@ -0,0 +1,323 @@
+/**************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+package org.apache.openoffice.ooxml.schema.generator;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.PrintStream;
+import java.util.Map.Entry;
+
+import org.apache.openoffice.ooxml.schema.model.attribute.Attribute;
+import org.apache.openoffice.ooxml.schema.model.attribute.AttributeGroup;
+import org.apache.openoffice.ooxml.schema.model.attribute.AttributeGroupReference;
+import org.apache.openoffice.ooxml.schema.model.attribute.AttributeReference;
+import org.apache.openoffice.ooxml.schema.model.base.INode;
+import org.apache.openoffice.ooxml.schema.model.base.Node;
+import org.apache.openoffice.ooxml.schema.model.complex.ComplexType;
+import org.apache.openoffice.ooxml.schema.model.complex.Element;
+import org.apache.openoffice.ooxml.schema.model.schema.Schema;
+import org.apache.openoffice.ooxml.schema.model.simple.Restriction;
+import org.apache.openoffice.ooxml.schema.model.simple.SimpleType;
+import org.apache.openoffice.ooxml.schema.model.simple.SimpleTypeReference;
+
+public class LogGenerator
+{
+ public static void Write (
+ final Schema aSchema,
+ final File aOutputFile)
+ {
+ final long nStartTime = System.currentTimeMillis();
+
+ try
+ {
+ final LogGenerator aGenerator = new LogGenerator(
+ new PrintStream(aOutputFile),
+ aSchema);
+
+ aGenerator.WriteNamespaces(aSchema);
+ aGenerator.WriteTopLevelElements(aSchema);
+ aGenerator.WriteComplexTypes(aSchema);
+ aGenerator.WriteGroups(aSchema);
+ aGenerator.WriteSimpleTypes(aSchema);
+ aGenerator.WriteAttributeGroups(aSchema);
+ aGenerator.WriteAttributes(aSchema);
+ }
+ catch (final FileNotFoundException aException)
+ {
+ aException.printStackTrace();
+ }
+
+ final long nEndTime = System.currentTimeMillis();
+ System.out.printf("wrote log output to '%s' in %fs\n",
+ aOutputFile.toString(),
+ (nEndTime-nStartTime)/1000.0f);
+ }
+
+
+
+
+ private LogGenerator (
+ final PrintStream aOut,
+ final Schema aSchema)
+ {
+ maSchema = aSchema;
+ maOut = aOut;
+ }
+
+
+
+
+ private void WriteComment (final String sFormat, final Object ... aArgumentList)
+ {
+ maOut.printf("// "+sFormat+"\n", aArgumentList);
+ }
+
+
+
+
+ private void WriteNamespaces (final Schema aSchema)
+ {
+ // Write namespace definitions.
+ WriteComment("%d Namespaces.", aSchema.Namespaces.GetCount());
+ for (final Entry<String,String> aEntry : aSchema.Namespaces)
+ {
+ maOut.printf(" %s -> %s\n",
+ aEntry.getValue()==null ? "<no-prefix>" : aEntry.getValue(),
+ aEntry.getKey());
+ }
+ }
+
+
+
+ private void WriteTopLevelElements (final Schema aSchema)
+ {
+ // Write top level elements.
+ WriteComment("Top-level elements.");
+ for (final Element aElement : aSchema.TopLevelElements.GetSorted())
+ maOut.printf(" \"%s\" -> %s\n",
+ aElement.GetElementName().GetDisplayName(),
+ aElement.GetTypeName().GetDisplayName());
+ }
+
+
+
+
+ private void WriteComplexTypes (final Schema aSchema)
+ {
+ WriteComment(" %d Complex Types.", aSchema.ComplexTypes.GetCount());
+ for (final ComplexType aType : aSchema.ComplexTypes.GetSorted())
+ {
+ WriteType(" ", aType, true);
+ }
+ }
+
+
+
+
+ private void WriteSimpleTypes (final Schema aSchema)
+ {
+ WriteComment(" %d Simple Types.", aSchema.SimpleTypes.GetCount());
+ for (final SimpleType aType : aSchema.SimpleTypes.GetSorted())
+ {
+ WriteType(" ", aType, true);
+ }
+ }
+
+
+
+
+ private void WriteGroups (final Schema aSchema)
+ {
+ WriteComment(" %d Groups.", aSchema.Groups.GetCount());
+ for (final Node aType : aSchema.Groups.GetSorted())
+ {
+ WriteType(" ", aType, true);
+ }
+ }
+
+
+
+
+ private void WriteAttributeGroups (final Schema aSchema)
+ {
+ WriteComment(" %d Attribute Groups.", aSchema.AttributeGroups.GetCount());
+ for (final Node aType : aSchema.AttributeGroups.GetSorted())
+ {
+ WriteType(" ", aType, true);
+ }
+ }
+
+
+
+
+ private void WriteAttributes (final Schema aSchema)
+ {
+ WriteComment(" %d Attributes.", aSchema.Attributes.GetCount());
+ for (final Node aType : aSchema.Attributes.GetSorted())
+ {
+ WriteType(" ", aType, true);
+ }
+ }
+
+
+
+
+ private void WriteType (
+ final String sIndentation,
+ final INode aType,
+ final boolean bIsTopLevel)
+ {
+ maOut.printf("%s%s", sIndentation, aType.toString());
+
+ if (bIsTopLevel)
+ {
+ final Node aNode = (Node)aType;
+ maOut.printf(" defined at %s",
+ aNode.GetLocation());
+ }
+ if ( ! HasChild(aType))
+ {
+ maOut.printf(" {}\n");
+ }
+ else
+ {
+ maOut.printf(" {\n");
+
+ // Write attributes.
+ switch(aType.GetNodeType())
+ {
+ case ComplexType:
+ for (final INode aAttribute : ((ComplexType)aType).GetAttributes())
+ WriteAttribute(sIndentation+" ", aAttribute);
+ break;
+
+ case SimpleType:
+ final Restriction aRestriction = ((SimpleType)aType).GetRestriction();
+ if (aRestriction != null)
+ WriteRestriction(sIndentation+" ", aRestriction);
+ break;
+
+ case SimpleTypeReference:
+ WriteType(sIndentation+" ", ((SimpleTypeReference)aType).GetReferencedSimpleType(maSchema), false);
+ break;
+
+ default:
+ break;
+ }
+
+
+ // Write child types.
+ for (final INode aChild : aType.GetChildren())
+ WriteType(sIndentation+" ", aChild, false);
+
+ maOut.printf("%s}\n", sIndentation);
+ }
+ }
+
+
+
+
+ private void WriteAttribute (
+ final String sIndentation,
+ final INode aAttribute)
+ {
+ switch(aAttribute.GetNodeType())
+ {
+ case Attribute:
+ maOut.printf(
+ "%sattribute %s of type %s\n",
+ sIndentation,
+ ((Attribute)aAttribute).GetName().GetDisplayName(),
+ ((Attribute)aAttribute).GetTypeName().GetDisplayName());
+ break;
+
+ case AttributeGroup:
+ maOut.printf(
+ "%sattribute group %s {\n",
+ sIndentation,
+ ((AttributeGroup)aAttribute).GetName().GetDisplayName());
+ for (final INode aChildAttribute : ((AttributeGroup)aAttribute).GetChildren())
+ WriteAttribute(sIndentation+" ", aChildAttribute);
+ maOut.printf("%s}\n", sIndentation);
+ break;
+ case AttributeGroupReference:
+ maOut.printf(
+ "%sreference to attribute group %s {\n",
+ sIndentation,
+ ((AttributeGroupReference)aAttribute).GetReferencedName().GetDisplayName());
+ WriteAttribute(sIndentation+" ", ((AttributeGroupReference)aAttribute).GetReferencedAttributeGroup(maSchema));
+ maOut.printf("%s}\n", sIndentation);
+ break;
+
+ case AttributeReference:
+ maOut.printf(
+ "%sreference to attribute %s {\n",
+ sIndentation,
+ ((AttributeReference)aAttribute).GetReferencedName().GetDisplayName());
+ WriteAttribute(sIndentation+" ", ((AttributeReference)aAttribute).GetReferencedAttribute(maSchema));
+ maOut.printf("%s}\n", sIndentation);
+ break;
+ default:
+ throw new RuntimeException();
+ }
+ }
+
+
+
+
+ private boolean HasChild (final INode aType)
+ {
+ if (aType.GetChildren().iterator().hasNext())
+ return true;
+
+ switch (aType.GetNodeType())
+ {
+ case ComplexType:
+ return ((ComplexType)aType).GetAttributes().iterator().hasNext();
+
+ case SimpleType:
+ return ((SimpleType)aType).GetRestriction() != null;
+
+ case SimpleTypeReference:
+ return true;
+
+ default:
+ return false;
+ }
+ }
+
+
+
+
+ private void WriteRestriction (
+ final String sIndentation,
+ final Restriction aRestriction)
+ {
+ maOut.printf("%s%s\n", sIndentation, aRestriction.toString());
+ }
+
+
+
+
+ private final Schema maSchema;
+ private final PrintStream maOut;
+}
diff --git a/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/ParserTablesGenerator.java b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/ParserTablesGenerator.java
new file mode 100644
index 0000000..207ddb9
--- /dev/null
+++ b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/ParserTablesGenerator.java
@@ -0,0 +1,193 @@
+/**************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+package org.apache.openoffice.ooxml.schema.generator;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.PrintStream;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.TreeMap;
+
+import org.apache.openoffice.ooxml.schema.generator.automaton.IAction;
+import org.apache.openoffice.ooxml.schema.generator.automaton.StackAutomaton;
+import org.apache.openoffice.ooxml.schema.generator.automaton.State;
+import org.apache.openoffice.ooxml.schema.generator.automaton.Transition;
+
+public class ParserTablesGenerator
+{
+ public ParserTablesGenerator (final StackAutomaton aAutomaton)
+ {
+ maAutomaton = aAutomaton;
+ maNameToIdMap = new TreeMap<>();
+ }
+
+
+
+
+ public void Generate (
+ final File aOutputDirectory)
+ {
+ if ( ! aOutputDirectory.exists())
+ throw new RuntimeException("output directory "+aOutputDirectory+" does not exist");
+ if ( ! aOutputDirectory.canWrite())
+ throw new RuntimeException("output directory "+aOutputDirectory+" is not writable");
+
+ AssignNameIds();
+
+ try
+ {
+ WriteNamespaceList(
+ new PrintStream(
+ new FileOutputStream(
+ new File(aOutputDirectory, "namespaces.lst"))));
+
+ WriteNameList(
+ new PrintStream(
+ new FileOutputStream(
+ new File(aOutputDirectory, "names.lst"))));
+
+ WriteTransitionList(
+ new PrintStream(
+ new FileOutputStream(
+ new File(aOutputDirectory, "transitions.lst"))));
+ }
+ catch (final FileNotFoundException aException)
+ {
+ aException.printStackTrace();
+ }
+ }
+
+
+
+
+ /** During the largest part of the parsing process, states and elements are
+ * identified not via their name but via a unique id.
+ * That allows a fast lookup.
+ */
+ private void AssignNameIds ()
+ {
+ maNameToIdMap.clear();
+ int nIndex = 0;
+ for (final State aState : maAutomaton.GetStatesSorted())
+ maNameToIdMap.put(aState.GetFullname(), nIndex++);
+ for (final Transition aTransition : maAutomaton.GetTransitions())
+ {
+ if (aTransition.GetElementName() == null)
+ continue;
+ // Element names are not necessarily unique.
+ final String sElementName = aTransition.GetElementName().GetStateName();
+ if ( ! maNameToIdMap.containsKey(sElementName))
+ maNameToIdMap.put(sElementName, nIndex++);
+ }
+ }
+
+
+
+
+ private void WriteNamespaceList (final PrintStream aOut)
+ {
+ for (final Entry<String, String> aEntry : maAutomaton.GetNamespaces())
+ {
+ aOut.printf("%s %s\n",
+ aEntry.getKey(),
+ aEntry.getValue());
+ }
+ aOut.close();
+ }
+
+
+
+
+ private void WriteNameList (final PrintStream aOut)
+ {
+ for (final Entry<String, Integer> aEntry : maNameToIdMap.entrySet())
+ {
+ aOut.printf("%d %s\n",
+ aEntry.getValue(),
+ aEntry.getKey());
+ }
+ aOut.close();
+ }
+
+
+
+
+ private void WriteTransitionList (final PrintStream aOut)
+ {
+ // Write regular transitions.
+ for (final Transition aTransition : maAutomaton.GetTransitions())
+ {
+ final IAction aAction = aTransition.GetAction();
+ final State aActionStartState = aAction.GetStartState();
+ if (aTransition.GetElementName() != null)
+ {
+ aOut.printf("%d %d %d %s %d %s %s \"%s\" %s\n",
+ maNameToIdMap.get(aTransition.GetStartState().GetFullname()),
+ maNameToIdMap.get(aTransition.GetEndState().GetFullname()),
+ maNameToIdMap.get(aTransition.GetElementName().GetStateName()),
+ aAction.GetActionName(),
+ aActionStartState!=null
+ ? maNameToIdMap.get(aActionStartState.GetFullname())
+ : -1,
+ aTransition.GetStartState(),
+ aTransition.GetEndState(),
+ aTransition.GetElementName().GetStateName(),
+ aActionStartState!=null
+ ? aActionStartState.GetFullname()
+ : "<none>");
+ }
+ }
+
+ // Write skip transitions.
+ for (final Transition aTransition : maAutomaton.GetTransitions())
+ {
+ final IAction aAction = aTransition.GetAction();
+ final State aActionStartState = aAction.GetStartState();
+
+ if (aTransition.GetElementName() == null)
+ {
+ aOut.printf("%d %d %d %s %d %s %s %s\n",
+ maNameToIdMap.get(aTransition.GetStartState().GetFullname()),
+ maNameToIdMap.get(aTransition.GetEndState().GetFullname()),
+ -1,
+ aAction.GetActionName(),
+ aActionStartState!=null
+ ? maNameToIdMap.get(aActionStartState.GetFullname())
+ : -1,
+ aTransition.GetStartState(),
+ aTransition.GetEndState(),
+ aActionStartState!=null
+ ? aActionStartState.GetFullname()
+ : "<none>");
+
+ }
+ }
+ }
+
+
+
+
+ private final StackAutomaton maAutomaton;
+ private final Map<String,Integer> maNameToIdMap;
+}
diff --git a/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/IAction.java b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/IAction.java
new file mode 100644
index 0000000..85ecd22
--- /dev/null
+++ b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/IAction.java
@@ -0,0 +1,38 @@
+/**************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+package org.apache.openoffice.ooxml.schema.generator.automaton;
+
+/** Interface for actions that are tied to transitions.
+ */
+public interface IAction
+{
+ void Run ();
+
+ String GetActionName ();
+ String GetActionDisplayText ();
+
+ State GetStartState ();
+
+ /** Create a clone of the action that uses states in the given context.
+ */
+ IAction Clone (final StateContext aStateContext);
+}
diff --git a/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/NonValidatingCreator.java b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/NonValidatingCreator.java
new file mode 100644
index 0000000..9bb1689
--- /dev/null
+++ b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/NonValidatingCreator.java
@@ -0,0 +1,198 @@
+/**************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+package org.apache.openoffice.ooxml.schema.generator.automaton;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.PrintStream;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.Set;
+import java.util.Vector;
+
+import org.apache.openoffice.ooxml.schema.iterator.AttributeIterator;
+import org.apache.openoffice.ooxml.schema.iterator.DereferencingNodeIterator;
+import org.apache.openoffice.ooxml.schema.model.attribute.Attribute;
+import org.apache.openoffice.ooxml.schema.model.base.INode;
+import org.apache.openoffice.ooxml.schema.model.base.NodeType;
+import org.apache.openoffice.ooxml.schema.model.complex.Any;
+import org.apache.openoffice.ooxml.schema.model.complex.ComplexType;
+import org.apache.openoffice.ooxml.schema.model.complex.Element;
+import org.apache.openoffice.ooxml.schema.model.schema.Schema;
+
+/** Create a stack automaton for a given Schema object that parses
+ * its input files but does not validate them.
+ */
+public class NonValidatingCreator
+{
+ public NonValidatingCreator (
+ final Schema aSchema)
+ {
+ maSchema = aSchema;
+ maStateContext = new StateContext("_start_", "_end_");
+ maTransitions = new HashSet<Transition>();
+ maAttributeMap = new HashMap<>();
+ }
+
+
+
+
+ public StackAutomaton Create (final File aLogFile)
+ {
+ final Map<State,Vector<Attribute>> aAttributes = new HashMap<>();
+
+ // namespaces
+
+ PrintStream aLog = null;
+ try
+ {
+ aLog = new PrintStream(new FileOutputStream(aLogFile));
+ }
+ catch (FileNotFoundException e)
+ {
+ e.printStackTrace();
+ }
+
+ // top level elements
+ for (final Element aElement : maSchema.TopLevelElements.GetSorted())
+ maTransitions.add(
+ Transition.CreateElementTransition(
+ maStateContext.GetStartState(),
+ maStateContext.GetEndState(),
+ aElement.GetElementName(),
+ new ParseElementAction(
+ maStateContext.GetStateForTypeName(
+ aElement.GetTypeName()))));
+
+ // Complex types.
+ System.out.printf("processing %d complex types\n", maSchema.ComplexTypes.GetCount());
+ for (final ComplexType aComplexType : maSchema.ComplexTypes.GetSorted())
+ {
+ ProcessType(
+ aComplexType,
+ CollectElements(aComplexType),
+ CollectAnys(aComplexType),
+ CollectAttributes(aComplexType));
+ }
+
+ aLog.close();
+
+ // simple types
+ // Finish
+
+ return new StackAutomaton(
+ maSchema.Namespaces,
+ maStateContext,
+ maTransitions,
+ aAttributes);
+ }
+
+
+
+
+ private void ProcessType (
+ final ComplexType aComplexType,
+ final Iterable<Element> aElements,
+ final Iterable<Any> aAnys,
+ final Iterable<Attribute> aAttributes)
+ {
+ for (final Element aElement : aElements)
+ maTransitions.add(
+ Transition.CreateElementTransition(
+ maStateContext.GetStateForTypeName(aComplexType.GetName()),
+ maStateContext.GetStateForTypeName(aComplexType.GetName()),
+ aElement.GetElementName(),
+ new ParseElementAction(
+ maStateContext.GetStateForTypeName(aElement.GetTypeName()))));
+
+ for (final Any aAny : aAnys)
+ maTransitions.add(
+ Transition.CreateElementTransition(
+ maStateContext.GetStateForTypeName(aComplexType.GetName()),
+ maStateContext.GetStateForTypeName(aComplexType.GetName()),
+ null,
+ new SkipElementAction()));
+
+ if (aComplexType != null)
+ {
+ final Vector<Attribute> aAttributeVector = new Vector<Attribute>();
+ if (aAttributes != null)
+ for (final Attribute aAttribute : aAttributes)
+ aAttributeVector.add(aAttribute);
+ maAttributeMap.put(
+ maStateContext.GetStateForTypeName(aComplexType.GetName()),
+ aAttributeVector);
+ }
+ }
+
+
+
+
+ /** Collect all elements inside the type tree that is rooted in the given
+ * complex type.
+ */
+ private Vector<Element> CollectElements (final ComplexType aType)
+ {
+ final Vector<Element> aElements = new Vector<>();
+ for (final INode aNode : new DereferencingNodeIterator(aType, maSchema, false))
+ {
+ if (aNode.GetNodeType() == NodeType.Element)
+ aElements.add((Element)aNode);
+ }
+ return aElements;
+ }
+
+
+
+
+ private Vector<Any> CollectAnys (final ComplexType aType)
+ {
+ final Vector<Any> aAnys = new Vector<>();
+ for (final INode aNode : new DereferencingNodeIterator(aType, maSchema, false))
+ {
+ if (aNode.GetNodeType() == NodeType.Any)
+ aAnys.add((Any)aNode);
+ }
+ return aAnys;
+ }
+
+
+
+
+ private Vector<Attribute> CollectAttributes (final ComplexType aComplexType)
+ {
+ final Vector<Attribute> aAttributes = new Vector<>();
+ for (final INode aNode : new DereferencingNodeIterator(aComplexType, maSchema, false))
+ for (final Attribute aAttribute : new AttributeIterator(aNode, maSchema))
+ aAttributes.add(aAttribute);
+ return aAttributes;
+ }
+
+
+
+ private final Schema maSchema;
+ private final StateContext maStateContext;
+ private final Set<Transition> maTransitions;
+ private final Map<State,Vector<Attribute>> maAttributeMap;
+}
diff --git a/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/ParseElementAction.java b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/ParseElementAction.java
new file mode 100644
index 0000000..7d83781
--- /dev/null
+++ b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/ParseElementAction.java
@@ -0,0 +1,90 @@
+/**************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+package org.apache.openoffice.ooxml.schema.generator.automaton;
+
+public class ParseElementAction
+ implements IAction
+{
+ ParseElementAction (final State aState)
+ {
+ maStartState = aState;
+ }
+
+
+
+
+ @Override
+ public void Run ()
+ {
+
+ }
+
+
+
+
+ @Override
+ public String GetActionName ()
+ {
+ return "parse";
+ }
+
+
+
+
+ @Override
+ public String GetActionDisplayText ()
+ {
+ return "parse-"+maStartState.GetFullname();
+ }
+
+
+
+
+ @Override
+ public State GetStartState ()
+ {
+ return maStartState;
+ }
+
+
+
+
+ @Override
+ public IAction Clone (final StateContext aStateContext)
+ {
+ return new ParseElementAction(maStartState.Clone(aStateContext));
+ }
+
+
+
+
+ @Override
+ public String toString ()
+ {
+ return "parse "+maStartState.GetFullname();
+ }
+
+
+
+
+ private final State maStartState;
+}
diff --git a/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/SkipElementAction.java b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/SkipElementAction.java
new file mode 100644
index 0000000..47d0191
--- /dev/null
+++ b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/SkipElementAction.java
@@ -0,0 +1,67 @@
+/**************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+package org.apache.openoffice.ooxml.schema.generator.automaton;
+
+public class SkipElementAction
+ implements IAction
+{
+ @Override
+ public void Run ()
+ {
+
+ }
+
+
+
+ @Override
+ public String GetActionName ()
+ {
+ return "skip";
+ }
+
+
+
+
+ @Override
+ public String GetActionDisplayText ()
+ {
+ return "skip";
+ }
+
+
+
+
+ @Override
+ public State GetStartState ()
+ {
+ return null;
+ }
+
+
+
+
+ @Override
+ public IAction Clone (final StateContext aStateContext)
+ {
+ return new SkipElementAction();
+ }
+}
diff --git a/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/StackAutomaton.java b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/StackAutomaton.java
new file mode 100644
index 0000000..8ab1107
--- /dev/null
+++ b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/StackAutomaton.java
@@ -0,0 +1,110 @@
+/**************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+package org.apache.openoffice.ooxml.schema.generator.automaton;
+
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Set;
+import java.util.Vector;
+
+import org.apache.openoffice.ooxml.schema.model.attribute.Attribute;
+import org.apache.openoffice.ooxml.schema.model.schema.NamespaceMap;
+
+/** The model of the parser generators is a (nested) stack automaton.
+ * States correspond to complex types, transitions to elements.
+ */
+public class StackAutomaton
+{
+ StackAutomaton (
+ final NamespaceMap aNamespaces,
+ final StateContext aContext,
+ final Set<Transition> aTransitions,
+ final Map<State,Vector<Attribute>> aAttributes)
+ {
+ maNamespaces = aNamespaces;
+ maStateContext = aContext;
+ maTransitions = aTransitions;
+ maAttributes = aAttributes;
+ }
+
+
+
+
+ public Iterable<Entry<String,String>> GetNamespaces ()
+ {
+ return maNamespaces;
+ }
+
+
+
+
+ public int GetStateCount ()
+ {
+ return maStateContext.GetStateCount();
+ }
+
+
+
+
+ public Iterable<State> GetStatesSorted ()
+ {
+ return maStateContext.GetStatesSorted();
+ }
+
+
+
+
+ public int GetTransitionCount ()
+ {
+ return maTransitions.size();
+ }
+
+
+
+
+ public Iterable<Transition> GetTransitions()
+ {
+ return maTransitions;
+ }
+
+
+
+
+ public StackAutomaton Optimize ()
+ {
+ final StackAutomatonOptimizer aOptimizer = new StackAutomatonOptimizer(maStateContext);
+ aOptimizer.Optimize();
+ return new StackAutomaton(
+ maNamespaces,
+ aOptimizer.GetOptimizedStateContext(),
+ aOptimizer.GetOptimizedTransitions(),
+ maAttributes);
+ }
+
+
+
+
+ private final NamespaceMap maNamespaces;
+ private final StateContext maStateContext;
+ private final Set<Transition> maTransitions;
+ private final Map<State,Vector<Attribute>> maAttributes;
+}
diff --git a/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/StackAutomatonOptimizer.java b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/StackAutomatonOptimizer.java
new file mode 100644
index 0000000..688888a
--- /dev/null
+++ b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/StackAutomatonOptimizer.java
@@ -0,0 +1,257 @@
+/**************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+package org.apache.openoffice.ooxml.schema.generator.automaton;
+
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.PrintStream;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.TreeSet;
+import java.util.Vector;
+
+public class StackAutomatonOptimizer
+{
+ StackAutomatonOptimizer (
+ final StateContext aStateContext)
+ {
+ maSourceStateContext = aStateContext;
+ maTargetStateContext = new StateContext(
+ maSourceStateContext.GetStartState().GetFullname(),
+ maSourceStateContext.GetEndState().GetFullname());
+ maTargetTransitions = new HashSet<Transition>();
+ }
+
+
+
+
+ void Optimize ()
+ {
+ try
+ {
+ maLog = new PrintStream(new FileOutputStream("/tmp/ooxml-parser/automaton-optimization.log"));
+ }
+ catch (FileNotFoundException e)
+ {
+ e.printStackTrace();
+ }
+
+ OptimizeStates();
+
+ if (maLog != null)
+ maLog.close();
+ }
+
+
+
+
+ StateContext GetOptimizedStateContext ()
+ {
+ return maTargetStateContext;
+ }
+
+
+
+
+ Set<Transition> GetOptimizedTransitions ()
+ {
+ return maTargetTransitions;
+ }
+
+
+
+
+ /** Optimize the given set of states and transitions so that the optimized set
+ * does not have epsilon transitions or unused states.
+ */
+ private void OptimizeStates ()
+ {
+ final long nStartTime = System.currentTimeMillis();
+
+ OptimizeState(
+ maSourceStateContext.GetStartState(),
+ maTargetStateContext.GetStartState(),
+ new HashSet<State>());
+
+ final long nEndTime = System.currentTimeMillis();
+ System.out.printf("optimized automaton in %fs, it has %d states and %d transitions\n",
+ (nEndTime-nStartTime)/1000.0,
+ maTargetStateContext.GetStateCount(),
+ maTargetTransitions.size());
+ }
+
+
+
+
+ /** Process a single pair of source state and corresponding target state.
+ * First create a set of non-epsilon transitions that start at the source
+ * state or a state that is reachable by epsilon transitions from the source
+ * state.
+ * Then create a target transition for each of these source transitions
+ * that starts at the target state but are otherwise identical to the
+ * source transition.
+ */
+ private void OptimizeState (
+ final State aSourceState,
+ final State aTargetState,
+ final Set<State> aVisitedSourceStates)
+ {
+ if (aVisitedSourceStates.contains(aSourceState))
+ return;
+
+ if (maLog != null)
+ maLog.printf("optimizing state %s\n", aSourceState);
+
+ aVisitedSourceStates.add(aSourceState);
+
+ final Set<State> aTodo = new TreeSet<>();
+
+ final Vector<Transition> aSourceTransitions = GetReachableStates(aSourceState);
+ for (final Transition aSourceTransition : aSourceTransitions)
+ {
+ aTodo.add(aSourceTransition.GetEndState());
+ if (aSourceTransition.GetActionState() != null)
+ aTodo.add(aSourceTransition.GetActionState());
+
+ final Transition aTargetTransition = Transition.CreateElementTransition(
+ aTargetState,
+ aSourceTransition.GetEndState().Clone(maTargetStateContext),
+ aSourceTransition.GetElementName(),
+ aSourceTransition.GetAction().Clone(maTargetStateContext));
+ aTargetState.AddTransition(aTargetTransition);
+ maTargetTransitions.add(aTargetTransition);
+ }
+
+ for (final State aOtherSourceState : aTodo)
+ {
+ // In order to not obfuscate the code that adds elements to the todo queue
+ // we let it add null now and then. Sort it out now.
+ if (aOtherSourceState == null)
+ continue;
+ if (aVisitedSourceStates.contains(aOtherSourceState))
+ continue;
+
+ OptimizeState(
+ aOtherSourceState,
+ aOtherSourceState.Clone(maTargetStateContext),
+ aVisitedSourceStates);
+ }
+ }
+
+
+
+
+ private Vector<Transition> GetReachableStates (final State aSourceState)
+ {
+ final Set<State> aEpsilonFront = GetEpsilonClosure(aSourceState);
+ if (maLog != null)
+ {
+ maLog.printf(" states reachable via epsilon transitions:\n");
+ for (final State aState : aEpsilonFront)
+ maLog.printf(" %s\n", aState);
+ }
+
+ final Vector<Transition> aSourceTransitions = new Vector<>();
+ for (final State aEpsilonReachableState : aEpsilonFront)
+ for (final Transition aTransition : aEpsilonReachableState.GetTransitions())
+ if ( ! aTransition.IsEpsilonTransition())
+ aSourceTransitions.add(aTransition);
+
+ if (maLog != null)
+ {
+ maLog.printf(" joined transitions are:\n");
+ for (final Transition aTransition : aSourceTransitions)
+ maLog.printf(" %s (via %s)\n",
+ aTransition.GetEndState(),
+ aTransition.GetActionState());
+ }
+
+ return aSourceTransitions;
+ }
+
+
+
+
+ /** Collect all states that are reachable via zero or more epsilon transitions
+ * from the given start state.
+ */
+ private Set<State> GetEpsilonClosure (final State aState)
+ {
+ final Set<State> aEpsilonFront = new TreeSet<State>();
+
+ AddToEpsilonClosure(aState, aEpsilonFront, new HashSet<State>(), " ");
+
+ return aEpsilonFront;
+ }
+
+
+
+
+ private void AddToEpsilonClosure (
+ final State aState,
+ final Set<State> aEpsilonClosure,
+ final Set<State> aVisitedStates,
+ final String sIndentation)
+ {
+ if (aVisitedStates.contains(aState))
+ return;
+ aVisitedStates.add(aState);
+ aEpsilonClosure.add(aState);
+
+ // Determine the local epsilon front that originates at aState.
+ final Set<State> aLocalFront = new HashSet<>();
+ for (final Transition aTransition : aState.GetTransitions())
+ if (aTransition.IsEpsilonTransition())
+ {
+ maLog.printf("%sadding state %s (from %s)\n",
+ sIndentation,
+ aTransition.GetEndState(),
+ aTransition.GetStartState());
+ AddToEpsilonClosure(aTransition.GetEndState(), aLocalFront, aVisitedStates, sIndentation+" ");
+ }
+
+ // Process the short circuit.
+ final State aShortCircuitEnd = aState.GetShortCircuitEnd();
+ if (aShortCircuitEnd != null)
+ {
+ if (aLocalFront.contains(aShortCircuitEnd))
+ {
+ maLog.printf(" replacing short circuit from %s to %s with %s\n",
+ aState,
+ aShortCircuitEnd,
+ aState.GetShortCircuitReplacement());
+ aLocalFront.remove(aShortCircuitEnd);
+ aLocalFront.add(aState.GetShortCircuitReplacement());
+ }
+ }
+
+ aEpsilonClosure.addAll(aLocalFront);
+ }
+
+
+
+
+ private final StateContext maSourceStateContext;
+ private final StateContext maTargetStateContext;
+ private final Set<Transition> maTargetTransitions;
+ private PrintStream maLog;
+}
diff --git a/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/State.java b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/State.java
new file mode 100644
index 0000000..7a898a9
--- /dev/null
+++ b/ooxml/source/framework/SchemaParser/src/org/apache/openoffice/ooxml/schema/generator/automaton/State.java
@@ -0,0 +1,196 @@
+/**************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements. See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership. The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License. You may obtain a copy of the License at
+*
+* http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied. See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+package org.apache.openoffice.ooxml.schema.generator.automaton;
+
+import java.util.Vector;
+
+import org.apache.openoffice.ooxml.schema.model.base.QualifiedName;
+
+/** Each complex type is represented by a State object (primary state).
+ * For a validating parser additional states are created for sequences, choices, etc. (secondary states).
+ * Secondary states have the same basename as primary states and suffixes to make their names unique.
+ * Full names of states contain both the basename and the suffix.
+ */
+public class State
+ implements Comparable<State>
+{
... etc. - the rest is truncated
More information about the Libreoffice-commits
mailing list