Orcus
Loading...
Searching...
No Matches
css_parser.hpp
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
6 */
7
8#ifndef INCLUDED_ORCUS_CSS_PARSER_HPP
9#define INCLUDED_ORCUS_CSS_PARSER_HPP
10
11#define ORCUS_DEBUG_CSS 0
12
13#include "parser_global.hpp"
14#include "css_parser_base.hpp"
15#include "global.hpp"
16
17#include <cassert>
18#include <algorithm>
19
20#if ORCUS_DEBUG_CSS
21#include <iostream>
22using std::cout;
23using std::endl;
24#endif
25
26namespace orcus {
27
33{
34public:
35 void at_rule_name(const char* p, size_t n)
36 {
37 (void)p; (void)n;
38 }
39
40 void simple_selector_type(const char* p, size_t n)
41 {
42 (void)p; (void)n;
43 }
44
45 void simple_selector_class(const char* p, size_t n)
46 {
47 (void)p; (void)n;
48 }
49
50 void simple_selector_pseudo_element(orcus::css::pseudo_element_t pe)
51 {
52 (void)pe;
53 }
54
55 void simple_selector_pseudo_class(orcus::css::pseudo_class_t pc)
56 {
57 (void)pc;
58 }
59
60 void simple_selector_id(const char* p, size_t n)
61 {
62 (void)p; (void)n;
63 }
64
65 void end_simple_selector() {}
66
67 void end_selector() {}
68
69 void combinator(orcus::css::combinator_t combinator)
70 {
71 (void)combinator;
72 }
73
80 void property_name(const char* p, size_t n)
81 {
82 (void)p; (void)n;
83 }
84
91 void value(const char* p, size_t n)
92 {
93 (void)p; (void)n;
94 }
95
103 void rgb(uint8_t red, uint8_t green, uint8_t blue)
104 {
105 (void)red; (void)green; (void)blue;
106 }
107
117 void rgba(uint8_t red, uint8_t green, uint8_t blue, double alpha)
118 {
119 (void)red; (void)green; (void)blue; (void)alpha;
120 }
121
129 void hsl(uint8_t hue, uint8_t sat, uint8_t light)
130 {
131 (void)hue; (void)sat; (void)light;
132 }
133
143 void hsla(uint8_t hue, uint8_t sat, uint8_t light, double alpha)
144 {
145 (void)hue; (void)sat; (void)light; (void)alpha;
146 }
147
154 void url(const char* p, size_t n)
155 {
156 (void)p; (void)n;
157 }
158
162 void begin_parse() {}
163
167 void end_parse() {}
168
173 void begin_block() {}
174
179 void end_block() {}
180
185
189 void end_property() {}
190};
191
192template<typename _Handler>
194{
195public:
196 typedef _Handler handler_type;
197
198 css_parser(const char* p, size_t n, handler_type& hdl);
199 void parse();
200
201private:
202 // Handlers - at the time a handler is called the current position is
203 // expected to point to the first unprocessed non-blank character, and
204 // each handler must set the current position to the next unprocessed
205 // non-blank character when it finishes.
206 void rule();
207 void at_rule_name();
208 void simple_selector_name();
209 void property_name();
210 void property();
211 void quoted_value(char c);
212 void value();
213 void function_value(std::string_view v);
214 void function_rgb(bool alpha);
215 void function_hsl(bool alpha);
216 void function_url();
217 void name_sep();
218 void property_sep();
219 void block();
220
221 handler_type& m_handler;
222};
223
224template<typename _Handler>
225css_parser<_Handler>::css_parser(const char* p, size_t n, handler_type& hdl) :
226 css::parser_base(p, n), m_handler(hdl) {}
227
228template<typename _Handler>
229void css_parser<_Handler>::parse()
230{
231 shrink_stream();
232
233#if ORCUS_DEBUG_CSS
234 std::cout << "compressed: '";
235 const char* p = mp_char;
236 for (; p != mp_end; ++p)
237 std::cout << *p;
238 std::cout << "'" << std::endl;
239#endif
240 m_handler.begin_parse();
241 while (has_char())
242 rule();
243 m_handler.end_parse();
244}
245
246template<typename _Handler>
247void css_parser<_Handler>::rule()
248{
249 // <selector name> , ... , <selector name> <block>
250 while (has_char())
251 {
252 if (skip_comment())
253 continue;
254
255 char c = cur_char();
256 if (is_alpha(c))
257 {
258 simple_selector_name();
259 continue;
260 }
261
262 switch (c)
263 {
264 case '>':
265 set_combinator(c, css::combinator_t::direct_child);
266 break;
267 case '+':
268 set_combinator(c, css::combinator_t::next_sibling);
269 break;
270 case '.':
271 case '#':
272 case '@':
273 simple_selector_name();
274 break;
275 case ',':
276 name_sep();
277 break;
278 case '{':
279 reset_before_block();
280 block();
281 break;
282 default:
283 css::parse_error::throw_with("rule: failed to parse '", c, "'");
284 }
285 }
286}
287
288template<typename _Handler>
289void css_parser<_Handler>::at_rule_name()
290{
291 assert(has_char());
292 assert(cur_char() == '@');
293 next();
294 char c = cur_char();
295 if (!is_alpha(c))
296 throw css::parse_error("at_rule_name: first character of an at-rule name must be an alphabet.");
297
298 const char* p;
299 size_t len;
300 identifier(p, len);
301 skip_blanks();
302
303 m_handler.at_rule_name(p, len);
304#if ORCUS_DEBUG_CSS
305 std::string foo(p, len);
306 std::cout << "at-rule name: " << foo.c_str() << std::endl;
307#endif
308}
309
310template<typename _Handler>
311void css_parser<_Handler>::simple_selector_name()
312{
313 assert(has_char());
314 char c = cur_char();
315 if (c == '@')
316 {
317 // This is the name of an at-rule.
318 at_rule_name();
319 return;
320 }
321
322 if (m_simple_selector_count)
323 {
324#if ORCUS_DEBUG_CSS
325 cout << "combinator: " << m_combinator << endl;
326#endif
327 m_handler.combinator(m_combinator);
328 m_combinator = css::combinator_t::descendant;
329 }
330 assert(is_alpha(c) || c == '.' || c == '#');
331
332 const char* p = nullptr;
333 size_t n = 0;
334
335#if ORCUS_DEBUG_CSS
336 cout << "simple_selector_name: (" << m_simple_selector_count << ")";
337#endif
338
339 if (c != '.' && c != '#')
340 {
341 identifier(p, n);
342#if ORCUS_DEBUG_CSS
343 std::string s(p, n);
344 cout << " type=" << s;
345#endif
346 m_handler.simple_selector_type(p, n);
347 }
348
349 bool in_loop = true;
350 while (in_loop && has_char())
351 {
352 switch (cur_char())
353 {
354 case '.':
355 {
356 next();
357 identifier(p, n);
358 m_handler.simple_selector_class(p, n);
359#if ORCUS_DEBUG_CSS
360 std::string s(p, n);
361 std::cout << " class=" << s;
362#endif
363 }
364 break;
365 case '#':
366 {
367 next();
368 identifier(p, n);
369 m_handler.simple_selector_id(p, n);
370#if ORCUS_DEBUG_CSS
371 std::string s(p, n);
372 std::cout << " id=" << s;
373#endif
374 }
375 break;
376 case ':':
377 {
378 // This could be either a pseudo element or pseudo class.
379 next();
380 if (cur_char() == ':')
381 {
382 // pseudo element.
383 next();
384 identifier(p, n);
385 css::pseudo_element_t elem = css::to_pseudo_element({p, n});
386 if (!elem)
387 css::parse_error::throw_with(
388 "selector_name: unknown pseudo element '", p, n, "'");
389
390 m_handler.simple_selector_pseudo_element(elem);
391 }
392 else
393 {
394 // pseudo class (or pseudo element in the older version of CSS).
395 identifier(p, n);
396 css::pseudo_class_t pc = css::to_pseudo_class({p, n});
397 if (!pc)
398 css::parse_error::throw_with(
399 "selector_name: unknown pseudo class '", p, n, "'");
400
401 m_handler.simple_selector_pseudo_class(pc);
402 }
403 }
404 break;
405 default:
406 in_loop = false;
407 }
408 }
409
410 m_handler.end_simple_selector();
411 skip_comments_and_blanks();
412
413 ++m_simple_selector_count;
414
415#if ORCUS_DEBUG_CSS
416 std::cout << std::endl;
417#endif
418}
419
420template<typename _Handler>
421void css_parser<_Handler>::property_name()
422{
423 // <identifier>
424
425 assert(has_char());
426 char c = cur_char();
427 if (!is_alpha(c) && c != '.')
428 css::parse_error::throw_with(
429 "property_name: first character of a name must be an alphabet or a dot, but found '", c, "'");
430
431 const char* p;
432 size_t len;
433 identifier(p, len);
434 skip_comments_and_blanks();
435
436 m_handler.property_name(p, len);
437#if ORCUS_DEBUG_CSS
438 std::string foo(p, len);
439 std::cout << "property name: " << foo.c_str() << std::endl;
440#endif
441}
442
443template<typename _Handler>
444void css_parser<_Handler>::property()
445{
446 // <property name> : <value> , ... , <value>
447
448 m_handler.begin_property();
449 property_name();
450 if (cur_char() != ':')
451 throw css::parse_error("property: ':' expected.");
452 next();
453 skip_comments_and_blanks();
454
455 bool in_loop = true;
456 while (in_loop && has_char())
457 {
458 value();
459 char c = cur_char();
460 switch (c)
461 {
462 case ',':
463 {
464 // separated by commas.
465 next();
466 skip_comments_and_blanks();
467 }
468 break;
469 case ';':
470 case '}':
471 in_loop = false;
472 break;
473 default:
474 ;
475 }
476 }
477
478 skip_comments_and_blanks();
479 m_handler.end_property();
480}
481
482template<typename _Handler>
483void css_parser<_Handler>::quoted_value(char c)
484{
485 // Parse until the the end quote is reached.
486 const char* p = nullptr;
487 size_t len = 0;
488 literal(p, len, c);
489 next();
490 skip_blanks();
491
492 m_handler.value(p, len);
493#if ORCUS_DEBUG_CSS
494 std::string foo(p, len);
495 std::cout << "quoted value: " << foo.c_str() << std::endl;
496#endif
497}
498
499template<typename _Handler>
500void css_parser<_Handler>::value()
501{
502 assert(has_char());
503 char c = cur_char();
504 if (c == '"' || c == '\'')
505 {
506 quoted_value(c);
507 return;
508 }
509
510 std::string_view v = parse_value();
511 if (v.empty())
512 return;
513
514 if (cur_char() == '(')
515 {
516 function_value(v);
517 return;
518 }
519
520 m_handler.value(v.data(), v.size());
521
522 skip_comments_and_blanks();
523
524#if ORCUS_DEBUG_CSS
525 std::cout << "value: " << v << std::endl;
526#endif
527}
528
529template<typename _Handler>
530void css_parser<_Handler>::function_value(std::string_view v)
531{
532 assert(cur_char() == '(');
533 css::property_function_t func = css::to_property_function(v);
534 if (func == css::property_function_t::unknown)
535 css::parse_error::throw_with("function_value: unknown function '", v, "'");
536
537 // Move to the first character of the first argument.
538 next();
539 skip_comments_and_blanks();
540
541 switch (func)
542 {
543 case css::property_function_t::rgb:
544 function_rgb(false);
545 break;
546 case css::property_function_t::rgba:
547 function_rgb(true);
548 break;
549 case css::property_function_t::hsl:
550 function_hsl(false);
551 break;
552 case css::property_function_t::hsla:
553 function_hsl(true);
554 break;
555 case css::property_function_t::url:
556 function_url();
557 break;
558 default:
559 css::parse_error::throw_with("function_value: unhandled function '", v, "'");
560 }
561
562 char c = cur_char();
563 if (c != ')')
564 css::parse_error::throw_with("function_value: ')' expected but '", c, "' found.");
565
566 next();
567 skip_comments_and_blanks();
568}
569
570template<typename _Handler>
571void css_parser<_Handler>::function_rgb(bool alpha)
572{
573 // rgb(num, num, num) rgba(num, num, num, float)
574
575 uint8_t vals[3];
576 uint8_t* p = vals;
577 const uint8_t* plast = p + 2;
578 char c = 0;
579
580 for (; ; ++p)
581 {
582 *p = parse_uint8();
583
584 skip_comments_and_blanks();
585
586 if (p == plast)
587 break;
588
589 c = cur_char();
590
591 if (c != ',')
592 css::parse_error::throw_with("function_rgb: ',' expected but '", c, "' found.");
593
594 next();
595 skip_comments_and_blanks();
596 }
597
598 if (alpha)
599 {
600 c = cur_char();
601 if (c != ',')
602 css::parse_error::throw_with("function_rgb: ',' expected but '", c, "' found.");
603
604 next();
605 skip_comments_and_blanks();
606
607 double alpha_val = parse_double_or_throw();
608
609 alpha_val = std::clamp(alpha_val, 0.0, 1.0);
610 m_handler.rgba(vals[0], vals[1], vals[2], alpha_val);
611 }
612 else
613 m_handler.rgb(vals[0], vals[1], vals[2]);
614
615#if ORCUS_DEBUG_CSS
616 std::cout << "rgb";
617 if (alpha)
618 std::cout << 'a';
619 std::cout << '(';
620 p = vals;
621 const uint8_t* pend = plast + 1;
622 for (; p != pend; ++p)
623 std::cout << ' ' << (int)*p;
624 std::cout << " )" << std::endl;
625#endif
626}
627
628template<typename _Handler>
629void css_parser<_Handler>::function_hsl(bool alpha)
630{
631 // hsl(num, percent, percent) hsla(num, percent, percent, float)
632
633 double hue = parse_double_or_throw(); // casted to uint8_t eventually.
634 hue = std::clamp(hue, 0.0, 360.0);
635 skip_comments_and_blanks();
636
637 char c = cur_char();
638 if (c != ',')
639 css::parse_error::throw_with("function_hsl: ',' expected but '", c, "' found.");
640
641 next();
642 skip_comments_and_blanks();
643
644 double sat = parse_percent();
645 sat = std::clamp(sat, 0.0, 100.0);
646 skip_comments_and_blanks();
647
648 c = cur_char();
649 if (c != ',')
650 css::parse_error::throw_with("function_hsl: ',' expected but '", c, "' found.");
651
652 next();
653 skip_comments_and_blanks();
654
655 double light = parse_percent();
656 light = std::clamp(light, 0.0, 100.0);
657 skip_comments_and_blanks();
658
659 if (!alpha)
660 {
661 m_handler.hsl(hue, sat, light);
662 return;
663 }
664
665 c = cur_char();
666 if (c != ',')
667 css::parse_error::throw_with("function_hsl: ',' expected but '", c, "' found.");
668
669 next();
670 skip_comments_and_blanks();
671
672 double alpha_val = parse_double_or_throw();
673 alpha_val = std::clamp(alpha_val, 0.0, 1.0);
674 skip_comments_and_blanks();
675 m_handler.hsla(hue, sat, light, alpha_val);
676}
677
678template<typename _Handler>
679void css_parser<_Handler>::function_url()
680{
681 char c = cur_char();
682
683 if (c == '"' || c == '\'')
684 {
685 // Quoted URL value.
686 const char* p;
687 size_t len;
688 literal(p, len, c);
689 next();
690 skip_comments_and_blanks();
691 m_handler.url(p, len);
692#if ORCUS_DEBUG_CSS
693 std::cout << "url(" << std::string(p, len) << ")" << std::endl;
694#endif
695 return;
696 }
697
698 // Unquoted URL value.
699 const char* p;
700 size_t len;
701 skip_to_or_blank(p, len, ORCUS_ASCII(")"));
702 skip_comments_and_blanks();
703 m_handler.url(p, len);
704#if ORCUS_DEBUG_CSS
705 std::cout << "url(" << std::string(p, len) << ")" << std::endl;
706#endif
707}
708
709template<typename _Handler>
710void css_parser<_Handler>::name_sep()
711{
712 assert(cur_char() == ',');
713#if ORCUS_DEBUG_CSS
714 std::cout << "," << std::endl;
715#endif
716 next();
717 skip_blanks();
718 m_handler.end_selector();
719}
720
721template<typename _Handler>
722void css_parser<_Handler>::property_sep()
723{
724#if ORCUS_DEBUG_CSS
725 std::cout << ";" << std::endl;
726#endif
727 next();
728 skip_comments_and_blanks();
729}
730
731template<typename _Handler>
732void css_parser<_Handler>::block()
733{
734 // '{' <property> ';' ... ';' <property> ';'(optional) '}'
735
736 assert(cur_char() == '{');
737#if ORCUS_DEBUG_CSS
738 std::cout << "{" << std::endl;
739#endif
740 m_handler.end_selector();
741 m_handler.begin_block();
742
743 next();
744 skip_comments_and_blanks();
745
746 // parse properties.
747 while (has_char())
748 {
749 property();
750 if (cur_char() != ';')
751 break;
752 property_sep();
753 if (cur_char() == '}')
754 // ';' after the last property. This is optional but allowed.
755 break;
756 }
757
758 if (cur_char() != '}')
759 throw css::parse_error("block: '}' expected.");
760
761 m_handler.end_block();
762
763 next();
764 skip_comments_and_blanks();
765
766#if ORCUS_DEBUG_CSS
767 std::cout << "}" << std::endl;
768#endif
769}
770
771}
772
773#endif
774
775/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Definition: css_parser_base.hpp:32
Definition: css_parser.hpp:33
void end_parse()
Definition: css_parser.hpp:167
void end_block()
Definition: css_parser.hpp:179
void hsl(uint8_t hue, uint8_t sat, uint8_t light)
Definition: css_parser.hpp:129
void value(const char *p, size_t n)
Definition: css_parser.hpp:91
void end_property()
Definition: css_parser.hpp:189
void begin_parse()
Definition: css_parser.hpp:162
void begin_block()
Definition: css_parser.hpp:173
void url(const char *p, size_t n)
Definition: css_parser.hpp:154
void rgba(uint8_t red, uint8_t green, uint8_t blue, double alpha)
Definition: css_parser.hpp:117
void property_name(const char *p, size_t n)
Definition: css_parser.hpp:80
void rgb(uint8_t red, uint8_t green, uint8_t blue)
Definition: css_parser.hpp:103
void begin_property()
Definition: css_parser.hpp:184
void hsla(uint8_t hue, uint8_t sat, uint8_t light, double alpha)
Definition: css_parser.hpp:143
Definition: css_parser.hpp:194
Definition: parser_base.hpp:41