[Libreoffice-commits] .: sc/inc

Kohei Yoshida kohei at kemper.freedesktop.org
Wed Sep 7 20:18:08 PDT 2011


 sc/inc/orcus/css_parser.hpp |   17 ++
 sc/inc/orcus/csv_parser.hpp |  280 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 293 insertions(+), 4 deletions(-)

New commits:
commit 7015bad5a7c767c51e7a89a005b5315669a954bc
Author: Kohei Yoshida <kohei.yoshida at suse.com>
Date:   Wed Sep 7 23:14:52 2011 -0400

    Updated css_parser from orcus, plus added experimental csv_parser.

diff --git a/sc/inc/orcus/css_parser.hpp b/sc/inc/orcus/css_parser.hpp
index 7a1b3e5..c211cca 100644
--- a/sc/inc/orcus/css_parser.hpp
+++ b/sc/inc/orcus/css_parser.hpp
@@ -143,7 +143,7 @@ void css_parser<_Handler>::parse()
     std::cout << "'" << std::endl;
 #endif
     m_handler.begin_parse();
-    for (; has_char(); next())
+    while (has_char())
         rule();
     m_handler.end_parse();
 }
@@ -151,7 +151,7 @@ void css_parser<_Handler>::parse()
 template<typename _Handler>
 void css_parser<_Handler>::rule()
 {
-    // <name> , ... , <name> { <properties> }
+    // <selector name> , ... , <selector name> <block>
     while (has_char())
     {
         char c = cur_char();
@@ -201,7 +201,11 @@ void css_parser<_Handler>::at_rule_name()
 template<typename _Handler>
 void css_parser<_Handler>::selector_name()
 {
+    // <element name>
+    // '.' <class name>
     // <element name> '.' <class name>
+    //
+    // Both element and class names are identifiers.
 
     assert(has_char());
     char c = cur_char();
@@ -239,6 +243,8 @@ void css_parser<_Handler>::selector_name()
 template<typename _Handler>
 void css_parser<_Handler>::property_name()
 {
+    // <identifier>
+
     assert(has_char());
     char c = cur_char();
     if (!is_alpha(c) && c != '.')
@@ -259,7 +265,8 @@ void css_parser<_Handler>::property_name()
 template<typename _Handler>
 void css_parser<_Handler>::property()
 {
-    // <name> : <value> , ... , <value>
+    // <property name> : <value> , ... , <value>
+
     m_handler.begin_property();
     property_name();
     if (cur_char() != ':')
@@ -286,6 +293,8 @@ void css_parser<_Handler>::property()
 template<typename _Handler>
 void css_parser<_Handler>::quoted_value()
 {
+    // Parse until the the end quote is reached.
+
     assert(cur_char() == '"');
     next();
     const char* p = mp_char;
@@ -373,7 +382,7 @@ void css_parser<_Handler>::property_sep()
 template<typename _Handler>
 void css_parser<_Handler>::block()
 {
-    // '{' <property> ';' ... ';' <property> '}'
+    // '{' <property> ';' ... ';' <property> ';'(optional) '}'
 
     assert(cur_char() == '{');
 #if ORCUS_DEBUG_CSS
diff --git a/sc/inc/orcus/csv_parser.hpp b/sc/inc/orcus/csv_parser.hpp
new file mode 100644
index 0000000..828a8b6
--- /dev/null
+++ b/sc/inc/orcus/csv_parser.hpp
@@ -0,0 +1,280 @@
+/*************************************************************************
+ *
+ * Copyright (c) 2011 Kohei Yoshida
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ ************************************************************************/
+
+#ifndef __ORCUS_CSV_PARSER_HPP__
+#define __ORCUS_CSV_PARSER_HPP__
+
+#define ORCUS_DEBUG_CSV 0
+
+#include <cstdlib>
+#include <cstring>
+#include <exception>
+#include <string>
+#include <cassert>
+#include <sstream>
+
+#if ORCUS_DEBUG_CSV
+#include <iostream>
+using std::cout;
+using std::endl;
+#endif
+
+namespace orcus {
+
+struct csv_parser_config
+{
+    std::string delimiters;
+    char text_qualifier;
+    bool trim_cell_value:1;
+
+    csv_parser_config() :
+        trim_cell_value(true) {}
+};
+
+class csv_parse_error : public std::exception
+{
+    std::string m_msg;
+public:
+    csv_parse_error(const std::string& msg) : m_msg(msg) {}
+    virtual ~csv_parse_error() throw() {}
+    virtual const char* what() const throw() { return m_msg.c_str(); }
+};
+
+template<typename _Handler>
+class csv_parser
+{
+public:
+    typedef _Handler handler_type;
+
+    csv_parser(const char* p, size_t n, handler_type& hdl, const csv_parser_config& config);
+    void parse();
+
+private:
+    bool has_char() const { return m_pos < m_length; }
+    void next();
+    char cur_char() const;
+
+    bool is_delim(char c) const;
+    bool is_text_qualifier(char c) const;
+
+    // handlers
+    void row();
+    void cell();
+    void quoted_cell();
+
+    /**
+     * Push cell value to the handler.
+     */
+    void push_cell_value(const char* p, size_t n);
+
+    static bool is_blank(char c)
+    {
+        return c == ' ' || c == '\t';
+    }
+
+private:
+    handler_type& m_handler;
+    const csv_parser_config& m_config;
+    const char* mp_char;
+    size_t m_pos;
+    size_t m_length;
+};
+
+template<typename _Handler>
+csv_parser<_Handler>::csv_parser(const char* p, size_t n, handler_type& hdl, const csv_parser_config& config) :
+    m_handler(hdl), m_config(config), mp_char(p), m_pos(0), m_length(n) {}
+
+template<typename _Handler>
+void csv_parser<_Handler>::parse()
+{
+#if ORCUS_DEBUG_CSV
+    const char* p = mp_char;
+    for (size_t i = m_pos; i < m_length; ++i, ++p)
+        std::cout << *p;
+    std::cout << std::endl;
+#endif
+
+    m_handler.begin_parse();
+    while (has_char())
+        row();
+    m_handler.end_parse();
+}
+
+template<typename _Handler>
+void csv_parser<_Handler>::next()
+{
+    ++m_pos;
+    ++mp_char;
+}
+
+template<typename _Handler>
+char csv_parser<_Handler>::cur_char() const
+{
+    return *mp_char;
+}
+
+template<typename _Handler>
+bool csv_parser<_Handler>::is_delim(char c) const
+{
+    return m_config.delimiters.find(c) != std::string::npos;
+}
+
+template<typename _Handler>
+bool csv_parser<_Handler>::is_text_qualifier(char c) const
+{
+    return m_config.text_qualifier == c;
+}
+
+template<typename _Handler>
+void csv_parser<_Handler>::row()
+{
+    m_handler.begin_row();
+    while (true)
+    {
+        if (is_text_qualifier(cur_char()))
+            quoted_cell();
+        else
+            cell();
+
+        if (!has_char())
+        {
+            m_handler.end_row();
+            return;
+        }
+
+        char c = cur_char();
+        if (c == '\n')
+        {
+            next();
+#if ORCUS_DEBUG_CSV
+            cout << "(LF)" << endl;
+#endif
+            m_handler.end_row();
+            return;
+        }
+
+        assert(is_delim(c));
+        next();
+    }
+}
+
+template<typename _Handler>
+void csv_parser<_Handler>::cell()
+{
+    const char* p = mp_char;
+    size_t len = 0;
+    char c = cur_char();
+    while (c != '\n' && !is_delim(c))
+    {
+        ++len;
+        next();
+        if (!has_char())
+            break;
+        c = cur_char();
+    }
+
+    if (!len)
+        p = NULL;
+
+    push_cell_value(p, len);
+}
+
+template<typename _Handler>
+void csv_parser<_Handler>::quoted_cell()
+{
+    char c = cur_char();
+    assert(is_text_qualifier(c));
+    next(); // Skip the opening quote.
+    if (!has_char())
+        return;
+
+    const char* p = mp_char;
+    size_t len = 0;
+    for (c = cur_char(); !is_text_qualifier(c); c = cur_char())
+    {
+        ++len;
+        next();
+        if (!has_char())
+        {
+            // Stream ended prematurely.  Handle it gracefully.
+            push_cell_value(p, len);
+            return;
+        }
+    }
+
+    assert(is_text_qualifier(c));
+    next(); // Skip the closing quote.
+    c = cur_char();
+    if (!is_delim(c))
+    {
+        std::ostringstream os;
+        os << "A quoted cell value must be immediately followed by a delimiter. ";
+        os << "'" << c << "' is found instead.";
+        throw csv_parse_error(os.str());
+    }
+
+    if (!len)
+        p = NULL;
+
+    push_cell_value(p, len);
+}
+
+template<typename _Handler>
+void csv_parser<_Handler>::push_cell_value(const char* p, size_t n)
+{
+    size_t len = n;
+
+    if (m_config.trim_cell_value)
+    {
+        // Trim any leading blanks.
+        for (size_t i = 0; i < n; ++i, --len, ++p)
+        {
+            if (!is_blank(*p))
+                break;
+        }
+
+        // Trim any trailing blanks.
+        if (len)
+        {
+            const char* p_end = p + (len-1);
+            for (; p != p_end; --p_end, --len)
+            {
+                if (!is_blank(*p_end))
+                    break;
+            }
+        }
+    }
+
+    m_handler.cell(p, len);
+#if ORCUS_DEBUG_CSV
+    cout << "(cell:'" << std::string(p, len) << "')";
+#endif
+}
+
+}
+
+#endif


More information about the Libreoffice-commits mailing list