8 #ifndef ORCUS_SAX_PARSER_HPP
9 #define ORCUS_SAX_PARSER_HPP
11 #include "sax_parser_base.hpp"
29 template<
typename _Handler,
typename _Config = sax_parser_default_config>
33 typedef _Handler handler_type;
34 typedef _Config config_type;
36 sax_parser(
const char* content,
const size_t size, handler_type& handler);
37 sax_parser(
const char* content,
const size_t size,
bool transient_stream, handler_type& handler);
51 void element_open(std::ptrdiff_t begin_pos);
52 void element_close(std::ptrdiff_t begin_pos);
54 void declaration(
const char* name_check);
61 handler_type& m_handler;
64 template<
typename _Handler,
typename _Config>
66 const char* content,
const size_t size, handler_type& handler) :
72 template<
typename _Handler,
typename _Config>
73 sax_parser<_Handler,_Config>::sax_parser(
74 const char* content,
const size_t size,
bool transient_stream, handler_type& handler) :
75 sax::parser_base(content, size, transient_stream),
80 template<
typename _Handler,
typename _Config>
81 sax_parser<_Handler,_Config>::~sax_parser()
85 template<
typename _Handler,
typename _Config>
86 void sax_parser<_Handler,_Config>::parse()
91 skip_space_and_control();
94 assert(m_buffer_pos == 0);
97 template<
typename _Handler,
typename _Config>
98 void sax_parser<_Handler,_Config>::header()
102 skip_space_and_control();
103 if (!has_char() || cur_char() !=
'<')
104 throw sax::malformed_xml_error(
"xml file must begin with '<'.", offset());
106 if (config_type::baseline_version >= 11)
110 if (next_char_checked() !=
'?')
111 throw sax::malformed_xml_error(
"xml file must begin with '<?'.", offset());
117 template<
typename _Handler,
typename _Config>
118 void sax_parser<_Handler,_Config>::body()
122 if (cur_char() ==
'<')
125 if (!m_root_elem_open)
129 else if (m_nest_level)
137 template<
typename _Handler,
typename _Config>
138 void sax_parser<_Handler,_Config>::element()
140 assert(cur_char() ==
'<');
141 std::ptrdiff_t pos = offset();
142 char c = next_char_checked();
152 declaration(
nullptr);
155 if (!is_alpha(c) && c !=
'_')
156 throw sax::malformed_xml_error(
"expected an alphabet.", offset());
161 template<
typename _Handler,
typename _Config>
162 void sax_parser<_Handler,_Config>::element_open(std::ptrdiff_t begin_pos)
164 assert(is_alpha(cur_char()) || cur_char() ==
'_');
166 sax::parser_element elem;
167 element_name(elem, begin_pos);
171 skip_space_and_control();
176 if (next_and_char() !=
'>')
177 throw sax::malformed_xml_error(
"expected '/>' to self-close the element.", offset());
179 elem.end_pos = offset();
180 m_handler.start_element(elem);
182 m_handler.end_element(elem);
184 m_root_elem_open =
false;
185 #if ORCUS_DEBUG_SAX_PARSER
186 cout <<
"element_open: ns='" << elem.ns <<
"', name='" << elem.name <<
"' (self-closing)" << endl;
194 elem.end_pos = offset();
196 m_handler.start_element(elem);
198 #if ORCUS_DEBUG_SAX_PARSER
199 cout <<
"element_open: ns='" << elem.ns <<
"', name='" << elem.name <<
"'" << endl;
208 template<
typename _Handler,
typename _Config>
209 void sax_parser<_Handler,_Config>::element_close(std::ptrdiff_t begin_pos)
211 assert(cur_char() ==
'/');
214 sax::parser_element elem;
215 element_name(elem, begin_pos);
217 if (cur_char() !=
'>')
218 throw sax::malformed_xml_error(
"expected '>' to close the element.", offset());
220 elem.end_pos = offset();
222 m_handler.end_element(elem);
223 #if ORCUS_DEBUG_SAX_PARSER
224 cout <<
"element_close: ns='" << elem.ns <<
"', name='" << elem.name <<
"'" << endl;
227 m_root_elem_open =
false;
230 template<
typename _Handler,
typename _Config>
231 void sax_parser<_Handler,_Config>::special_tag()
233 assert(cur_char() ==
'!');
235 size_t len = remains();
237 throw sax::malformed_xml_error(
"special tag too short.", offset());
239 switch (next_and_char())
244 if (next_and_char() !=
'-')
245 throw sax::malformed_xml_error(
"comment expected.", offset());
249 throw sax::malformed_xml_error(
"malformed comment.", offset());
258 expects_next(
"CDATA[", 6);
266 expects_next(
"OCTYPE", 6);
267 skip_space_and_control();
273 throw sax::malformed_xml_error(
"failed to parse special tag.", offset());
277 template<
typename _Handler,
typename _Config>
278 void sax_parser<_Handler,_Config>::declaration(
const char* name_check)
280 assert(cur_char() ==
'?');
286 #if ORCUS_DEBUG_SAX_PARSER
287 cout <<
"sax_parser::declaration: start name='" << decl_name <<
"'" << endl;
290 if (name_check && decl_name != name_check)
292 std::ostringstream os;
293 os <<
"declaration name of '" << name_check <<
"' was expected, but '" << decl_name <<
"' was found instead.";
294 throw sax::malformed_xml_error(os.str(), offset());
297 m_handler.start_declaration(decl_name);
298 skip_space_and_control();
301 while (cur_char_checked() !=
'?')
304 skip_space_and_control();
306 if (next_char_checked() !=
'>')
307 throw sax::malformed_xml_error(
"declaration must end with '?>'.", offset());
309 m_handler.end_declaration(decl_name);
312 #if ORCUS_DEBUG_SAX_PARSER
313 cout <<
"sax_parser::declaration: end name='" << decl_name <<
"'" << endl;
317 template<
typename _Handler,
typename _Config>
318 void sax_parser<_Handler,_Config>::cdata()
320 size_t len = remains();
324 const char* p0 = mp_char;
325 size_t i = 0, match = 0;
326 for (
char c = cur_char(); i < len; ++i, c = next_and_char())
340 else if (c ==
'>' && match == 2)
343 size_t cdata_len = i - 2;
344 m_handler.characters(pstring(p0, cdata_len), transient_stream());
351 throw sax::malformed_xml_error(
"malformed CDATA section.", offset());
354 template<
typename _Handler,
typename _Config>
355 void sax_parser<_Handler,_Config>::doctype()
358 sax::doctype_declaration param;
359 name(param.root_element);
360 skip_space_and_control();
363 size_t len = remains();
365 throw sax::malformed_xml_error(
"DOCTYPE section too short.", offset());
367 param.keyword = sax::doctype_declaration::keyword_type::dtd_private;
371 if (next_and_char() !=
'U' || next_and_char() !=
'B' || next_and_char() !=
'L' || next_and_char() !=
'I' || next_and_char() !=
'C')
372 throw sax::malformed_xml_error(
"malformed DOCTYPE section.", offset());
374 param.keyword = sax::doctype_declaration::keyword_type::dtd_public;
378 if (next_and_char() !=
'Y' || next_and_char() !=
'S' || next_and_char() !=
'T' || next_and_char() !=
'E' || next_and_char() !=
'M')
379 throw sax::malformed_xml_error(
"malformed DOCTYPE section.", offset());
383 skip_space_and_control();
384 has_char_throw(
"DOCTYPE section too short.");
387 value(param.fpi,
false);
389 has_char_throw(
"DOCTYPE section too short.");
390 skip_space_and_control();
391 has_char_throw(
"DOCTYPE section too short.");
393 if (cur_char() ==
'>')
396 #if ORCUS_DEBUG_SAX_PARSER
397 cout <<
"sax_parser::doctype: root='" << param.root_element <<
"', fpi='" << param.fpi <<
"'" << endl;
399 m_handler.doctype(param);
405 value(param.uri,
false);
407 has_char_throw(
"DOCTYPE section too short.");
408 skip_space_and_control();
409 has_char_throw(
"DOCTYPE section too short.");
411 if (cur_char() !=
'>')
412 throw sax::malformed_xml_error(
"malformed DOCTYPE section - closing '>' expected but not found.", offset());
414 #if ORCUS_DEBUG_SAX_PARSER
415 cout <<
"sax_parser::doctype: root='" << param.root_element <<
"', fpi='" << param.fpi <<
"' uri='" << param.uri <<
"'" << endl;
417 m_handler.doctype(param);
421 template<
typename _Handler,
typename _Config>
422 void sax_parser<_Handler,_Config>::characters()
424 const char* p0 = mp_char;
425 for (; has_char(); next())
427 if (cur_char() ==
'<')
430 if (cur_char() ==
'&')
433 cell_buffer& buf = get_cell_buffer();
435 buf.append(p0, mp_char-p0);
436 characters_with_encoded_char(buf);
438 m_handler.characters(pstring(), transient_stream());
440 m_handler.characters(pstring(buf.get(), buf.size()),
true);
447 pstring val(p0, mp_char-p0);
448 m_handler.characters(val, transient_stream());
452 template<
typename _Handler,
typename _Config>
453 void sax_parser<_Handler,_Config>::attribute()
455 sax::parser_attribute attr;
456 pstring attr_ns_name, attr_name, attr_value;
457 attribute_name(attr.ns, attr.name);
459 #if ORCUS_DEBUG_SAX_PARSER
460 cout <<
"sax_parser::attribute: ns='" << attr.ns <<
"', name='" << attr.name <<
"'" << endl;
463 skip_space_and_control();
468 std::ostringstream os;
469 os <<
"Attribute must begin with 'name=..'. (ns='" << attr.ns <<
"', name='" << attr.name <<
"')";
470 throw sax::malformed_xml_error(os.str(), offset());
474 skip_space_and_control();
476 attr.transient = value(attr.value,
true);
481 #if ORCUS_DEBUG_SAX_PARSER
482 cout <<
"sax_parser::attribute: value='" << attr.value <<
"'" << endl;
485 m_handler.attribute(attr);