Orcus
sax_parser_base.hpp
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
6  */
7 
8 #ifndef INCLUDED_ORCUS_SAX_PARSER_BASE_HPP
9 #define INCLUDED_ORCUS_SAX_PARSER_BASE_HPP
10 
11 #include "env.hpp"
12 #include "pstring.hpp"
13 #include "cell_buffer.hpp"
14 #include "parser_global.hpp"
15 #include "parser_base.hpp"
16 
17 #include <cassert>
18 #include <cstdlib>
19 #include <exception>
20 #include <sstream>
21 #include <memory>
22 
23 #define ORCUS_DEBUG_SAX_PARSER 0
24 
25 #if ORCUS_DEBUG_SAX_PARSER
26 #include <iostream>
27 using std::cout;
28 using std::endl;
29 #endif
30 
31 namespace orcus { namespace sax {
32 
33 class ORCUS_PSR_DLLPUBLIC malformed_xml_error : public ::orcus::parse_error
34 {
35 public:
36  malformed_xml_error() = delete;
37  malformed_xml_error(const std::string& msg, std::ptrdiff_t offset);
38  virtual ~malformed_xml_error() throw();
39 };
40 
46 {
47  enum class keyword_type { dtd_public, dtd_private };
48 
49  keyword_type keyword;
50  pstring root_element;
51  pstring fpi;
52  pstring uri;
53 };
54 
66 ORCUS_PSR_DLLPUBLIC char decode_xml_encoded_char(const char* p, size_t n);
67 
79 ORCUS_PSR_DLLPUBLIC std::string decode_xml_unicode_char(const char* p, size_t n);
80 
86 {
87  pstring ns; // element namespace (optional)
88  pstring name; // element name
89  std::ptrdiff_t begin_pos; // position of the opening brace '<'.
90  std::ptrdiff_t end_pos; // position of the char after the closing brace '>'.
91 };
92 
101 {
102  pstring ns; // attribute namespace (optional)
103  pstring name; // attribute name
104  pstring value; // attribute value
105  bool transient; // whether or not the attribute value is on a temporary buffer.
106 };
107 
108 class ORCUS_PSR_DLLPUBLIC parser_base : public ::orcus::parser_base
109 {
110  struct impl;
111  std::unique_ptr<impl> mp_impl;
112 
113  parser_base() = delete;
114  parser_base(const parser_base&) = delete;
115  parser_base& operator=(const parser_base&) = delete;
116 protected:
117  size_t m_nest_level;
118  size_t m_buffer_pos;
119  bool m_root_elem_open:1;
120 
121 protected:
122  parser_base(const char* content, size_t size);
123  ~parser_base();
124 
125  void next_check()
126  {
127  next();
128  if (!has_char())
129  throw malformed_xml_error("xml stream ended prematurely.", offset());
130  }
131 
132  void nest_up() { ++m_nest_level; }
133  void nest_down()
134  {
135  if (m_nest_level == 0)
136  throw malformed_xml_error("incorrect nesting in xml stream", offset());
137 
138  --m_nest_level;
139  }
140 
141  void inc_buffer_pos();
142  void reset_buffer_pos() { m_buffer_pos = 0; }
143 
144  void has_char_throw(const char* msg) const
145  {
146  if (!has_char())
147  throw malformed_xml_error(msg, offset());
148  }
149 
157  inline size_t remains() const
158  {
159 #if ORCUS_DEBUG_SAX_PARSER
160  if (mp_char >= mp_end)
161  throw malformed_xml_error("xml stream ended prematurely.", offset());
162 #endif
163  return mp_end - mp_char;
164  }
165 
166  char cur_char_checked() const
167  {
168  if (!has_char())
169  throw malformed_xml_error("xml stream ended prematurely.", offset());
170 
171  return *mp_char;
172  }
173 
174  char next_and_char()
175  {
176  next();
177 #if ORCUS_DEBUG_SAX_PARSER
178  if (mp_char >= mp_end)
179  throw malformed_xml_error("xml stream ended prematurely.", offset());
180 #endif
181  return *mp_char;
182  }
183 
184  char next_char_checked()
185  {
186  next();
187  if (!has_char())
188  throw malformed_xml_error("xml stream ended prematurely.", offset());
189 
190  return *mp_char;
191  }
192 
193  cell_buffer& get_cell_buffer();
194 
195  void blank();
196  void comment();
197 
201  void skip_bom();
202 
203  void expects_next(const char* p, size_t n);
204 
205  void parse_encoded_char(cell_buffer& buf);
206  void value_with_encoded_char(cell_buffer& buf, pstring& str);
207 
216  bool value(pstring& str, bool decode);
217 
218  void name(pstring& str);
219  void element_name(parser_element& elem, std::ptrdiff_t begin_pos);
220  void attribute_name(pstring& attr_ns, pstring& attr_name);
221  void characters_with_encoded_char(cell_buffer& buf);
222 };
223 
224 }}
225 
226 #endif
227 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
orcus::sax::parser_element
Definition: sax_parser_base.hpp:85
orcus::sax::doctype_declaration
Definition: sax_parser_base.hpp:45
orcus::sax::malformed_xml_error
Definition: sax_parser_base.hpp:33
orcus::sax::parser_base
Definition: sax_parser_base.hpp:108
orcus::pstring
Definition: pstring.hpp:24
orcus::sax::parser_attribute
Definition: sax_parser_base.hpp:100
orcus::sax::parser_base::remains
size_t remains() const
Definition: sax_parser_base.hpp:157
orcus::parser_base
Definition: parser_base.hpp:35
orcus::parse_error
Definition: parser_base.hpp:21