8 #ifndef ORCUS_SAX_PARSER_HPP 9 #define ORCUS_SAX_PARSER_HPP 11 #include "sax_parser_base.hpp" 29 template<
typename _Handler,
typename _Config = sax_parser_default_config>
33 typedef _Handler handler_type;
34 typedef _Config config_type;
36 sax_parser(
const char* content,
const size_t size, handler_type& handler);
50 void element_open(
const char* begin_pos);
51 void element_close(
const char* begin_pos);
53 void declaration(
const char* name_check);
60 handler_type& m_handler;
63 template<
typename _Handler,
typename _Config>
65 const char* content,
const size_t size, handler_type& handler) :
71 template<
typename _Handler,
typename _Config>
76 template<
typename _Handler,
typename _Config>
85 assert(m_buffer_pos == 0);
88 template<
typename _Handler,
typename _Config>
94 if (!has_char() || cur_char() !=
'<')
97 if (config_type::baseline_version >= 11)
101 if (next_char_checked() !=
'?')
108 template<
typename _Handler,
typename _Config>
113 if (cur_char() ==
'<')
116 if (!m_root_elem_open)
120 else if (m_nest_level)
128 template<
typename _Handler,
typename _Config>
131 assert(cur_char() ==
'<');
132 const char* pos = mp_char;
133 char c = next_char_checked();
143 declaration(
nullptr);
146 if (!is_alpha(c) && c !=
'_')
152 template<
typename _Handler,
typename _Config>
155 assert(is_alpha(cur_char()) || cur_char() ==
'_');
158 element_name(elem, begin_pos);
167 if (next_and_char() !=
'>')
170 elem.end_pos = mp_char;
171 m_handler.start_element(elem);
173 m_handler.end_element(elem);
175 m_root_elem_open =
false;
176 #if ORCUS_DEBUG_SAX_PARSER 177 cout <<
"element_open: ns='" << elem.ns <<
"', name='" << elem.name <<
"' (self-closing)" << endl;
185 elem.end_pos = mp_char;
187 m_handler.start_element(elem);
189 #if ORCUS_DEBUG_SAX_PARSER 190 cout <<
"element_open: ns='" << elem.ns <<
"', name='" << elem.name <<
"'" << endl;
199 template<
typename _Handler,
typename _Config>
202 assert(cur_char() ==
'/');
206 element_name(elem, begin_pos);
208 if (cur_char() !=
'>')
211 elem.end_pos = mp_char;
213 m_handler.end_element(elem);
214 #if ORCUS_DEBUG_SAX_PARSER 215 cout <<
"element_close: ns='" << elem.ns <<
"', name='" << elem.name <<
"'" << endl;
218 m_root_elem_open =
false;
221 template<
typename _Handler,
typename _Config>
224 assert(cur_char() ==
'!');
226 size_t len = remains();
230 switch (next_and_char())
235 if (next_and_char() !=
'-')
249 expects_next(
"CDATA[", 6);
257 expects_next(
"OCTYPE", 6);
268 template<
typename _Handler,
typename _Config>
271 assert(cur_char() ==
'?');
277 #if ORCUS_DEBUG_SAX_PARSER 278 cout <<
"sax_parser::declaration: start name='" << decl_name <<
"'" << endl;
281 if (name_check && decl_name != name_check)
283 std::ostringstream os;
284 os <<
"declaration name of '" << name_check <<
"' was expected, but '" << decl_name <<
"' was found instead.";
288 m_handler.start_declaration(decl_name);
292 while (cur_char_checked() !=
'?')
297 if (next_char_checked() !=
'>')
300 m_handler.end_declaration(decl_name);
303 #if ORCUS_DEBUG_SAX_PARSER 304 cout <<
"sax_parser::declaration: end name='" << decl_name <<
"'" << endl;
308 template<
typename _Handler,
typename _Config>
311 size_t len = remains();
315 const char* p0 = mp_char;
316 size_t i = 0, match = 0;
317 for (
char c = cur_char(); i < len; ++i, c = next_and_char())
331 else if (c ==
'>' && match == 2)
334 size_t cdata_len = i - 2;
335 m_handler.characters(
pstring(p0, cdata_len),
false);
345 template<
typename _Handler,
typename _Config>
350 name(param.root_element);
354 size_t len = remains();
358 param.keyword = sax::doctype_declaration::keyword_type::dtd_private;
362 if (next_and_char() !=
'U' || next_and_char() !=
'B' || next_and_char() !=
'L' || next_and_char() !=
'I' || next_and_char() !=
'C')
365 param.keyword = sax::doctype_declaration::keyword_type::dtd_public;
369 if (next_and_char() !=
'Y' || next_and_char() !=
'S' || next_and_char() !=
'T' || next_and_char() !=
'E' || next_and_char() !=
'M')
375 has_char_throw(
"DOCTYPE section too short.");
378 value(param.fpi,
false);
380 has_char_throw(
"DOCTYPE section too short.");
382 has_char_throw(
"DOCTYPE section too short.");
384 if (cur_char() ==
'>')
387 #if ORCUS_DEBUG_SAX_PARSER 388 cout <<
"sax_parser::doctype: root='" << param.root_element <<
"', fpi='" << param.fpi <<
"'" << endl;
390 m_handler.doctype(param);
396 value(param.uri,
false);
398 has_char_throw(
"DOCTYPE section too short.");
400 has_char_throw(
"DOCTYPE section too short.");
402 if (cur_char() !=
'>')
405 #if ORCUS_DEBUG_SAX_PARSER 406 cout <<
"sax_parser::doctype: root='" << param.root_element <<
"', fpi='" << param.fpi <<
"' uri='" << param.uri <<
"'" << endl;
408 m_handler.doctype(param);
412 template<
typename _Handler,
typename _Config>
415 const char* p0 = mp_char;
416 for (; has_char(); next())
418 if (cur_char() ==
'<')
421 if (cur_char() ==
'&')
426 buf.append(p0, mp_char-p0);
427 characters_with_encoded_char(buf);
429 m_handler.characters(
pstring(),
false);
431 m_handler.characters(
pstring(buf.get(), buf.size()),
true);
439 m_handler.characters(val,
false);
443 template<
typename _Handler,
typename _Config>
447 pstring attr_ns_name, attr_name, attr_value;
448 attribute_name(attr.ns, attr.name);
450 #if ORCUS_DEBUG_SAX_PARSER 451 std::ostringstream os;
452 os <<
"sax_parser::attribute: ns='" << attr.ns <<
"', name='" << attr.name <<
"'";
458 std::ostringstream os;
459 os <<
"Attribute must begin with 'name=..'. (ns='" << attr.ns <<
"', name='" << attr.name <<
"')";
464 attr.transient = value(attr.value,
true);
469 #if ORCUS_DEBUG_SAX_PARSER 470 os <<
" value='" << attr.value <<
"'" << endl;
474 m_handler.attribute(attr);
Definition: pstring.hpp:24
Definition: cell_buffer.hpp:21
Definition: sax_parser.hpp:15
static const uint8_t baseline_version
Definition: sax_parser.hpp:22
Definition: sax_parser_base.hpp:100
Definition: sax_parser_base.hpp:85
Definition: sax_parser_base.hpp:45
Definition: base64.hpp:15
Definition: sax_parser.hpp:30
Definition: sax_parser_base.hpp:108