5#ifndef MHTML_PARSER_TAGS_INIT_SZ
6# define MHTML_PARSER_TAGS_INIT_SZ 10
13#ifndef MHTML_DUMP_LINE_SZ
14# define MHTML_DUMP_LINE_SZ 255
17#ifndef MHTML_SRC_HREF_SZ_MAX
18# define MHTML_SRC_HREF_SZ_MAX 128
21#ifndef MHTML_TRACE_LVL
22# define MHTML_TRACE_LVL 0
26#define MHTML_TAG_FLAG_STYLE 0x02
28#define MHTML_INPUT_TYPE_BUTTON 0x01
33#define MHTML_ATTRIB_TABLE( f ) \
43#define MHTML_TAG_TABLE( f ) \
44 f( 0, NONE, void* none;, NONE ) \
45 f( 1, BODY, void* none;, BLOCK ) \
46 f( 2, DIV, void* none;, BLOCK ) \
47 f( 3, HEAD, void* none;, NONE ) \
48 f( 4, HTML, void* none;, BLOCK ) \
49 f( 5, TEXT, mdata_strpool_idx_t content_idx; size_t content_sz;, INLINE ) \
50 f( 6, TITLE, mdata_strpool_idx_t content_idx; size_t content_sz;, NONE ) \
51 f( 7, SPAN, void* none;, INLINE ) \
52 f( 8, BR, void* none;, BLOCK ) \
53 f( 9, STYLE, void* none;, NONE ) \
54 f( 10, IMG, char src[MHTML_SRC_HREF_SZ_MAX + 1]; size_t src_sz;, BLOCK ) \
55 f( 11, INPUT, uint8_t input_type; char name[MCSS_ID_SZ_MAX + 1]; size_t name_sz; char value[MCSS_ID_SZ_MAX + 1]; size_t value_sz;, INLINE )
57#define MHTML_PARSER_PSTATE_TABLE( f ) \
58 f( MHTML_PSTATE_NONE, 0 ) \
59 f( MHTML_PSTATE_ELEMENT, 1 ) \
60 f( MHTML_PSTATE_ATTRIB_KEY, 2 ) \
61 f( MHTML_PSTATE_ATTRIB_VAL, 3 ) \
62 f( MHTML_PSTATE_END_ELEMENT, 4 ) \
63 f( MHTML_PSTATE_STRING, 5 ) \
64 f( MHTML_PSTATE_STYLE, 6 )
69#define mhtml_tag( parser, idx ) (&((parser)->tags[idx]))
71#define mhtml_tag_parent( parser, idx ) \
72 (0 <= (parser)->tags[idx].parent ? \
73 (&((parser)->tags[(parser)->tags[idx].parent]])) : NULL)
75#define mhtml_tag_child( parser, idx ) \
76 (0 <= (parser)->tags[idx].first_child ? \
77 (&((parser)->tags[(parser)->tags[idx].first_child]])) : NULL)
79#define mhtml_tag_sibling( parser, idx ) \
80 (0 <= (parser)->tags[idx].next_sibling ? \
81 (&((parser)->tags[(parser)->tags[idx].next_sibling]])) : NULL)
84#define mhtml_parser_pstate( parser ) \
85 mparser_pstate( &((parser)->base) )
87#ifdef MPARSER_TRACE_NAMES
88# define mhtml_parser_pstate_push( parser, new_pstate ) \
89 mparser_pstate_push( \
90 "mhtml", &((parser)->base), new_pstate, gc_mhtml_pstate_names )
92# define mhtml_parser_pstate_pop( parser ) \
94 "mhtml", &((parser)->base), gc_mhtml_pstate_names )
96# define mhtml_parser_pstate_push( parser, new_pstate ) \
97 mparser_pstate_push( "mhtml", &((parser)->base), new_pstate )
99# define mhtml_parser_pstate_pop( parser ) \
100 mparser_pstate_pop( "mhtml", &((parser)->base) )
103#define mhtml_parser_invalid_c( parser, c, retval ) \
104 mparser_invalid_c( mhtml, &((parser)->base), c, retval )
106#define mhtml_parser_reset_token( parser ) \
107 mparser_reset_token( "mhtml", &((parser)->base) )
109#define mhtml_parser_append_token( parser, c ) \
110 mparser_append_token( "mhtml", &((parser)->base), c )
112#define mhtml_parser_set_tag_iter( parser, iter ) \
113 debug_printf( MHTML_TRACE_LVL, "setting tag_iter to: " SSIZE_T_FMT \
114 " (previously: " SSIZE_T_FMT ")", (ssize_t)iter, (parser)->tag_iter ); \
115 (parser)->tag_iter = iter;
117#define mhtml_parser_is_locked( parser ) (NULL != (parser)->tags)
124 ssize_t next_sibling;
127 char classes[MCSS_CLASS_SZ_MAX + 1];
130 char id[MCSS_ID_SZ_MAX + 1];
134#define MHTML_TAG_TABLE_STRUCT( tag_id, tag_name, fields, disp ) \
135 struct MHTML_TAG_ ## tag_name { \
136 struct MHTML_TAG_BASE base; \
140MHTML_TAG_TABLE( MHTML_TAG_TABLE_STRUCT )
142#define MHTML_TAG_TABLE_UNION_FIELD( tag_id, tag_name, fields, disp ) \
143 struct MHTML_TAG_ ## tag_name tag_name;
147 MHTML_TAG_TABLE( MHTML_TAG_TABLE_UNION_FIELD )
159 struct MCSS_PARSER styler;
178#define MHTML_PSTATE_TABLE_CONST( name, idx ) \
179 MAUG_CONST uint8_t SEG_MCONST name = idx;
181MHTML_PARSER_PSTATE_TABLE( MHTML_PSTATE_TABLE_CONST )
183MPARSER_PSTATE_NAMES( MHTML_PARSER_PSTATE_TABLE, mhtml )
185#define MHTML_TAG_TABLE_CONST( tag_id, tag_name, fields, disp ) \
186 MAUG_CONST uint16_t SEG_MCONST MHTML_TAG_TYPE_ ## tag_name = tag_id;
188MHTML_TAG_TABLE( MHTML_TAG_TABLE_CONST )
190#define MHTML_TAG_TABLE_NAMES( tag_id, tag_name, fields, disp ) \
193MAUG_CONST
char* SEG_MCONST gc_mhtml_tag_names[] = {
194 MHTML_TAG_TABLE( MHTML_TAG_TABLE_NAMES )
198#define MHTML_ATTRIB_TABLE_NAME( name, idx ) \
201static MAUG_CONST
char* SEG_MCONST gc_mhtml_attrib_names[] = {
202 MHTML_ATTRIB_TABLE( MHTML_ATTRIB_TABLE_NAME )
206#define MHTML_ATTRIB_TABLE_NAME_CONST( attrib_name, attrib_id ) \
207 MAUG_CONST uint16_t SEG_MCONST MHTML_ATTRIB_KEY_ ## attrib_name = attrib_id;
209MHTML_ATTRIB_TABLE( MHTML_ATTRIB_TABLE_NAME_CONST )
215 debug_printf( MHTML_TRACE_LVL,
"freeing HTML parser..." );
217 mdata_strpool_free( &(parser->strpool) );
219 mdata_vector_lock( &(parser->tags) );
221 while( 0 < mdata_vector_ct( &(parser->tags) ) ) {
222 tag_iter = mdata_vector_get( &(parser->tags), 0,
union MHTML_TAG );
223 assert( NULL != tag_iter );
225 mdata_vector_unlock( &(parser->tags) );
226 mdata_vector_remove( &(parser->tags), 0 );
227 mdata_vector_lock( &(parser->tags) );
232 mcss_parser_free( &(parser->styler) );
234 if( mdata_vector_is_locked( &(parser->tags) ) ) {
235 mdata_vector_unlock( &(parser->tags) );
238 mdata_vector_free( &(parser->tags) );
248 assert( parser->tag_iter >= 0 );
249 mdata_vector_lock( &(parser->tags) );
250 tag_iter = mdata_vector_get(
251 &(parser->tags), parser->tag_iter,
union MHTML_TAG );
252 assert( NULL != tag_iter );
254 mhtml_parser_set_tag_iter( parser, tag_iter->base.parent );
256 if( 0 <= parser->tag_iter ) {
257 debug_printf( MHTML_TRACE_LVL,
258 "moved iter back to tag %s (" SIZE_T_FMT
")",
259 gc_mhtml_tag_names[tag_iter->base.type], parser->tag_iter );
261 debug_printf( MHTML_TRACE_LVL,
"moved iter back to root (-1)" );
266 mdata_vector_unlock( &(parser->tags) );
273 ssize_t new_tag_idx = -1;
274 ssize_t next_sibling_idx = -1;
279 maug_mzero( &tag_new,
sizeof(
union MHTML_TAG ) );
280 tag_new.base.parent = -1;
281 tag_new.base.first_child = -1;
282 tag_new.base.next_sibling = -1;
283 tag_new.base.style = -1;
287 new_tag_idx = mdata_vector_append(
288 &(parser->tags), &tag_new,
sizeof(
union MHTML_TAG ) );
289 if( 0 > new_tag_idx ) {
290 retval = mdata_retval( new_tag_idx );
294 mdata_vector_lock( &(parser->tags) );
295 p_tag_new = mdata_vector_get(
296 &(parser->tags), new_tag_idx,
union MHTML_TAG );
297 assert( NULL != p_tag_new );
299 if( 0 > parser->tag_iter ) {
300 mhtml_parser_set_tag_iter( parser, new_tag_idx );
305 p_tag_iter = mdata_vector_get(
306 &(parser->tags), parser->tag_iter,
union MHTML_TAG );
307 assert( NULL != p_tag_iter );
310 p_tag_new->base.parent = parser->tag_iter;
313 if( 0 > p_tag_iter->base.first_child ) {
314 debug_printf( MHTML_TRACE_LVL,
315 "zxzx attached " SSIZE_T_FMT
" as first child to "
316 SSIZE_T_FMT, new_tag_idx, parser->tag_iter );
317 p_tag_iter->base.first_child = new_tag_idx;
320 next_sibling_idx = p_tag_iter->base.first_child;
321 p_tag_iter = mdata_vector_get(
322 &(parser->tags), next_sibling_idx,
union MHTML_TAG );
323 while( NULL != p_tag_iter && 0 <= p_tag_iter->base.next_sibling ) {
324 next_sibling_idx = p_tag_iter->base.next_sibling;
325 p_tag_iter = mdata_vector_get(
326 &(parser->tags), next_sibling_idx,
union MHTML_TAG );
328 assert( NULL != p_tag_iter );
329 p_tag_iter->base.next_sibling = new_tag_idx;
330 debug_printf( MHTML_TRACE_LVL,
331 "attached " SSIZE_T_FMT
" as next sibling to "
332 SSIZE_T_FMT, new_tag_idx, next_sibling_idx );
335 debug_printf( MHTML_TRACE_LVL,
336 "pushed new tag " SSIZE_T_FMT
" under " SSIZE_T_FMT,
337 new_tag_idx, p_tag_new->base.parent );
339 mhtml_parser_set_tag_iter( parser, new_tag_idx );
343 mdata_vector_unlock( &(parser->tags) );
353 mparser_token_upper( &((parser)->base), i );
355 if( 0 == strncmp(
"STYLE", parser->base.token, 6 ) ) {
360 parser->
tag_flags |= MHTML_TAG_FLAG_STYLE;
364 retval = mhtml_push_tag( parser );
365 maug_cleanup_if_not_ok();
367 mdata_vector_lock( &(parser->tags) );
369 p_tag_iter = mdata_vector_get(
370 &(parser->tags), parser->tag_iter,
union MHTML_TAG );
371 assert( NULL != p_tag_iter );
375 while(
'\0' != gc_mhtml_tag_names[i][0] ) {
377 parser->base.token_sz == maug_strlen( gc_mhtml_tag_names[i] ) &&
379 gc_mhtml_tag_names[i], parser->base.token, parser->base.token_sz )
381 debug_printf( MHTML_TRACE_LVL,
382 "new tag (" SSIZE_T_FMT
") type: %s",
383 parser->tag_iter, gc_mhtml_tag_names[i] );
384 p_tag_iter->base.type = i;
386 if( MHTML_TAG_TYPE_BODY == i ) {
390 assert( -1 == parser->body_idx );
391 parser->body_idx = parser->tag_iter;
392 debug_printf( MHTML_TRACE_LVL,
393 "set body index to: " SSIZE_T_FMT,
402 error_printf(
"could not find type for new tag (" SSIZE_T_FMT
")",
407 if( mdata_vector_is_locked( &(parser->tags) ) ) {
408 mdata_vector_unlock( &(parser->tags) );
419 retval = mhtml_push_tag( parser );
420 maug_cleanup_if_not_ok();
422 mdata_vector_lock( &(parser->tags) );
424 p_tag_iter = mdata_vector_get(
425 &(parser->tags), parser->tag_iter,
union MHTML_TAG );
426 assert( NULL != p_tag_iter );
429 MHTML_TAG_FLAG_STYLE == (MHTML_TAG_FLAG_STYLE &
430 p_tag_iter->base.flags)
432 p_tag_iter->base.type = MHTML_TAG_TYPE_STYLE;
434 p_tag_iter->base.type = MHTML_TAG_TYPE_TEXT;
437 if( MHTML_TAG_TYPE_STYLE == p_tag_iter->base.type ) {
439 debug_printf( MHTML_TRACE_LVL,
"parsing STYLE tag..." );
440 for( ; parser->base.token_sz > i ; i++ ) {
441 retval = mcss_parse_c( &(parser->styler), parser->base.token[i] );
442 maug_cleanup_if_not_ok();
444 debug_printf( 1,
"out of style characters..." );
445 mcss_parser_flush( &(parser->styler) );
446 mcss_parser_reset( &(parser->styler) );
449 while(
' ' == parser->base.token[parser->base.token_sz - 1] ) {
450 parser->base.token_sz--;
454 p_tag_iter->TEXT.content_idx = mdata_strpool_append(
455 &(parser->strpool), parser->base.token, parser->base.token_sz,
456 MDATA_STRPOOL_FLAG_DEDUPE );
458 p_tag_iter->TEXT.content_idx, 0, SIZE_T_FMT, MERROR_ALLOC );
459 p_tag_iter->TEXT.content_sz = parser->base.token_sz;
462 debug_printf( 1,
"done processing tag contents..." );
466 if( mdata_vector_is_locked( &(parser->tags) ) ) {
467 mdata_vector_unlock( &(parser->tags) );
477 debug_printf( MHTML_TRACE_LVL,
"attrib: %s", parser->base.token );
479 mparser_token_upper( &((parser)->base), i );
483 while(
'\0' != gc_mhtml_attrib_names[i][0] ) {
485 parser->base.token_sz == maug_strlen( gc_mhtml_attrib_names[i] ) &&
487 gc_mhtml_attrib_names[i], parser->base.token, parser->base.token_sz )
490 MHTML_TRACE_LVL,
"new attrib type: %s", gc_mhtml_attrib_names[i] );
491 parser->attrib_key = i;
497 error_printf(
"unknown attrib: %s", parser->base.token );
509 mdata_vector_lock( &(parser->tags) );
511 p_tag_iter = mdata_vector_get(
512 &(parser->tags), parser->tag_iter,
union MHTML_TAG );
513 assert( NULL != p_tag_iter );
515 if( MHTML_ATTRIB_KEY_STYLE == parser->attrib_key ) {
516 debug_printf( MHTML_TRACE_LVL,
"style: %s", parser->base.token );
520 mdata_vector_unlock( &(parser->tags) );
521 retval = mcss_push_style( &(parser->styler), MCSS_SELECT_NONE, NULL, 0 );
522 maug_cleanup_if_not_ok();
523 mdata_vector_lock( &(parser->tags) );
526 p_tag_iter->base.style =
527 mdata_vector_ct( &(parser->styler.styles) ) - 1;
529 for( ; parser->base.token_sz > i ; i++ ) {
530 retval = mcss_parse_c( &(parser->styler), parser->base.token[i] );
531 maug_cleanup_if_not_ok();
534 debug_printf( 1,
"out of style characters..." );
535 mcss_parser_flush( &(parser->styler) );
539 }
else if( MHTML_ATTRIB_KEY_CLASS == parser->attrib_key ) {
541 p_tag_iter->base.classes,
544 p_tag_iter->base.classes_sz = parser->base.token_sz;
546 }
else if( MHTML_ATTRIB_KEY_ID == parser->attrib_key ) {
551 p_tag_iter->base.id_sz = parser->base.token_sz;
553 }
else if( MHTML_ATTRIB_KEY_SRC == parser->attrib_key ) {
558 MHTML_SRC_HREF_SZ_MAX );
559 p_tag_iter->IMG.src_sz = parser->base.token_sz;
561 }
else if( MHTML_ATTRIB_KEY_TYPE == parser->attrib_key ) {
564 maug_strncpy( parser->base.token,
"button", 7 );
566 p_tag_iter->INPUT.input_type =
567 MHTML_INPUT_TYPE_BUTTON;
569 }
else if( MHTML_ATTRIB_KEY_NAME == parser->attrib_key ) {
572 p_tag_iter->INPUT.name,
575 p_tag_iter->INPUT.name_sz = parser->base.token_sz;
577 }
else if( MHTML_ATTRIB_KEY_VALUE == parser->attrib_key ) {
580 p_tag_iter->INPUT.value,
583 p_tag_iter->INPUT.value_sz = parser->base.token_sz;
588 if( mdata_vector_is_locked( &(parser->tags) ) ) {
589 mdata_vector_unlock( &(parser->tags) );
598 size_t tag_iter_type = 0;
602 if( MHTML_PSTATE_NONE == mhtml_parser_pstate( parser ) ) {
603 if( 0 < parser->base.token_sz ) {
604 retval = mhtml_push_text_tag( parser );
605 maug_cleanup_if_not_ok();
608 mdata_vector_lock( &(parser->tags) );
609 p_tag_iter = mdata_vector_get(
610 &(parser->tags), parser->tag_iter,
union MHTML_TAG );
611 assert( NULL != p_tag_iter );
612 tag_iter_type = p_tag_iter->base.type;
613 mdata_vector_unlock( &(parser->tags) );
620 MHTML_TAG_TYPE_STYLE != tag_iter_type
623 retval = mhtml_pop_tag( parser );
624 maug_cleanup_if_not_ok();
627 retval = mhtml_parser_pstate_push( parser, MHTML_PSTATE_ELEMENT );
628 maug_cleanup_if_not_ok();
629 mhtml_parser_reset_token( parser );
632 mhtml_parser_invalid_c( parser, c, retval );
637 if( MHTML_PSTATE_ELEMENT == mhtml_parser_pstate( parser ) ) {
638 retval = mhtml_push_element_tag( parser );
639 maug_cleanup_if_not_ok();
640 mhtml_parser_pstate_pop( parser );
641 mhtml_parser_reset_token( parser );
643 }
else if( MHTML_PSTATE_ATTRIB_KEY == mhtml_parser_pstate( parser ) ) {
644 mhtml_parser_pstate_pop( parser );
645 assert( MHTML_PSTATE_ELEMENT == mhtml_parser_pstate( parser ) );
646 mhtml_parser_pstate_pop( parser );
647 mhtml_parser_reset_token( parser );
649 }
else if( MHTML_PSTATE_END_ELEMENT == mhtml_parser_pstate( parser ) ) {
651 retval = mhtml_pop_tag( parser );
652 maug_cleanup_if_not_ok();
654 mhtml_parser_pstate_pop( parser );
655 if( MHTML_PSTATE_ATTRIB_KEY == mhtml_parser_pstate( parser ) ) {
656 mhtml_parser_pstate_pop( parser );
658 assert( MHTML_PSTATE_ELEMENT == mhtml_parser_pstate( parser ) );
659 mhtml_parser_pstate_pop( parser );
660 mhtml_parser_reset_token( parser );
662 }
else if( MHTML_PSTATE_STRING == mhtml_parser_pstate( parser ) ) {
663 retval = mhtml_parser_append_token( parser, c );
664 maug_cleanup_if_not_ok();
666 }
else if( MHTML_PSTATE_NONE == mhtml_parser_pstate( parser ) ) {
667 retval = mhtml_parser_append_token( parser, c );
668 maug_cleanup_if_not_ok();
671 mhtml_parser_invalid_c( parser, c, retval );
677 MHTML_PSTATE_ELEMENT == mhtml_parser_pstate( parser ) &&
678 0 == parser->base.token_sz
681 retval = mhtml_parser_pstate_push( parser, MHTML_PSTATE_END_ELEMENT );
682 maug_cleanup_if_not_ok();
684 }
else if( MHTML_PSTATE_ATTRIB_KEY == mhtml_parser_pstate( parser ) ) {
686 retval = mhtml_parser_pstate_push( parser, MHTML_PSTATE_END_ELEMENT );
687 maug_cleanup_if_not_ok();
689 }
else if( MHTML_PSTATE_STRING == mhtml_parser_pstate( parser ) ) {
690 retval = mhtml_parser_append_token( parser, c );
691 maug_cleanup_if_not_ok();
693 }
else if( MHTML_PSTATE_NONE == mhtml_parser_pstate( parser ) ) {
694 retval = mhtml_parser_append_token( parser, c );
695 maug_cleanup_if_not_ok();
698 mhtml_parser_invalid_c( parser, c, retval );
703 if( MHTML_PSTATE_ATTRIB_KEY == mhtml_parser_pstate( parser ) ) {
704 retval = mhtml_push_attrib_key( parser );
705 maug_cleanup_if_not_ok();
706 retval = mhtml_parser_pstate_push( parser, MHTML_PSTATE_ATTRIB_VAL );
707 maug_cleanup_if_not_ok();
708 mhtml_parser_reset_token( parser );
710 }
else if( MHTML_PSTATE_ATTRIB_VAL == mhtml_parser_pstate( parser ) ) {
711 retval = mhtml_parser_append_token( parser, c );
712 maug_cleanup_if_not_ok();
714 }
else if( MHTML_PSTATE_NONE == mhtml_parser_pstate( parser ) ) {
715 retval = mhtml_parser_append_token( parser, c );
716 maug_cleanup_if_not_ok();
719 mhtml_parser_invalid_c( parser,
'_', retval );
724 if( MHTML_PSTATE_ATTRIB_VAL == mhtml_parser_pstate( parser ) ) {
725 retval = mhtml_parser_pstate_push( parser, MHTML_PSTATE_STRING );
726 maug_cleanup_if_not_ok();
727 mhtml_parser_reset_token( parser );
729 }
else if( MHTML_PSTATE_STRING == mhtml_parser_pstate( parser ) ) {
730 retval = _mhtml_set_attrib_val( parser );
731 maug_cleanup_if_not_ok();
732 mhtml_parser_pstate_pop( parser );
733 assert( MHTML_PSTATE_ATTRIB_VAL == mhtml_parser_pstate( parser ) );
734 mhtml_parser_pstate_pop( parser );
735 mhtml_parser_reset_token( parser );
737 }
else if( MHTML_PSTATE_NONE == mhtml_parser_pstate( parser ) ) {
738 retval = mhtml_parser_append_token( parser, c );
739 maug_cleanup_if_not_ok();
742 mhtml_parser_invalid_c( parser,
'_', retval );
752 if( MHTML_PSTATE_ELEMENT == mhtml_parser_pstate( parser ) ) {
753 retval = mhtml_push_element_tag( parser );
754 maug_cleanup_if_not_ok();
755 retval = mhtml_parser_pstate_push( parser, MHTML_PSTATE_ATTRIB_KEY );
756 maug_cleanup_if_not_ok();
757 mhtml_parser_reset_token( parser );
759 }
else if( MHTML_PSTATE_STRING == mhtml_parser_pstate( parser ) ) {
760 retval = mhtml_parser_append_token( parser, c );
761 maug_cleanup_if_not_ok();
763 }
else if( MHTML_PSTATE_ATTRIB_KEY == mhtml_parser_pstate( parser ) ) {
766 }
else if( MHTML_PSTATE_NONE == mhtml_parser_pstate( parser ) ) {
769 0 < parser->base.token_sz &&
770 ' ' != parser->base.token[parser->base.token_sz - 1]
772 retval = mhtml_parser_append_token( parser,
' ' );
773 maug_cleanup_if_not_ok();
777 mhtml_parser_invalid_c( parser,
'_', retval );
782 retval = mhtml_parser_append_token( parser, c );
783 maug_cleanup_if_not_ok();
789 mparser_wait( &((parser)->base) );
793 parser->base.last_c = c;
795 if( mdata_vector_is_locked( &(parser->tags) ) ) {
796 mdata_vector_unlock( &(parser->tags) );
806 mhtml_parser_set_tag_iter( parser, -1 );
807 parser->body_idx = -1;
809 retval = mcss_parser_init( &(parser->styler) );
810 maug_cleanup_if_not_ok();
821 char dump_line[MHTML_DUMP_LINE_SZ + 1];
823 ssize_t first_child = -1;
824 ssize_t next_sibling = -1;
826 char* tag_contents = NULL;
832 mdata_vector_lock( &(parser->tags) );
834 p_tag_iter = mdata_vector_get( &(parser->tags), iter,
union MHTML_TAG );
835 assert( NULL != p_tag_iter );
837 maug_mzero( dump_line, MHTML_DUMP_LINE_SZ + 1 );
839 for( i = 0 ; d > i ; i++ ) {
840 assert( i < MHTML_DUMP_LINE_SZ );
841 strcat( dump_line,
" " );
843 if( MHTML_TAG_TYPE_TEXT == p_tag_iter->base.type ) {
844 if( -1 == p_tag_iter->TEXT.content_idx ) {
845 error_printf(
"no tag content present!" );
849 mdata_strpool_lock( &(parser->strpool) );
852 maug_strlen( dump_line ) + 7
853 + p_tag_iter->TEXT.content_sz < MHTML_DUMP_LINE_SZ
855 strcat( dump_line,
"TEXT: " );
856 tag_contents = mdata_strpool_get(
857 &(parser->strpool), p_tag_iter->TEXT.content_idx );
858 if( NULL == tag_contents ) {
859 error_printf(
"could not retrieve tag contents!" );
860 retval = MERROR_ALLOC;
863 strcat( dump_line, tag_contents );
864 strcat( dump_line,
"\n" );
867 mdata_strpool_unlock( &(parser->strpool) );
871 maug_strlen( dump_line ) +
872 maug_strlen( gc_mhtml_tag_names[p_tag_iter->base.type] ) <
876 gc_mhtml_tag_names[p_tag_iter->base.type] );
880 0 <= p_tag_iter->base.style &&
881 maug_strlen( dump_line ) + 9 < MHTML_DUMP_LINE_SZ
883 strcat( dump_line,
" (styled)" );
887 0 < p_tag_iter->base.id_sz &&
888 maug_strlen( dump_line ) + 7
889 + maug_strlen( p_tag_iter->base.id ) < MHTML_DUMP_LINE_SZ
891 maug_snprintf( &(dump_line[maug_strlen( dump_line )]),
892 MHTML_DUMP_LINE_SZ - maug_strlen( dump_line ),
893 " (id: %s)", p_tag_iter->base.id );
897 0 < p_tag_iter->base.classes_sz &&
898 maug_strlen( dump_line ) + 12
899 + maug_strlen( p_tag_iter->base.id ) < MHTML_DUMP_LINE_SZ
901 maug_snprintf( &(dump_line[maug_strlen( dump_line )]),
902 MHTML_DUMP_LINE_SZ - maug_strlen( dump_line ),
903 " (classes: %s)", p_tag_iter->base.classes );
907 MHTML_TAG_TYPE_IMG == p_tag_iter->base.type &&
908 0 < p_tag_iter->IMG.src_sz &&
909 maug_strlen( dump_line ) + 8
910 + maug_strlen( p_tag_iter->IMG.src ) < MHTML_DUMP_LINE_SZ
912 maug_snprintf( &(dump_line[maug_strlen( dump_line )]),
913 MHTML_DUMP_LINE_SZ - maug_strlen( dump_line ),
914 " (src: %s)", p_tag_iter->IMG.src );
918 MHTML_TAG_TYPE_INPUT == p_tag_iter->base.type &&
919 0 < p_tag_iter->INPUT.value_sz &&
920 maug_strlen( dump_line ) + 10
921 + maug_strlen( p_tag_iter->INPUT.value ) < MHTML_DUMP_LINE_SZ
923 maug_snprintf( &(dump_line[maug_strlen( dump_line )]),
924 MHTML_DUMP_LINE_SZ - maug_strlen( dump_line ),
925 " (value: %s)", p_tag_iter->INPUT.value );
930 debug_printf( 1,
"%s", dump_line );
932 first_child = p_tag_iter->base.first_child;
933 next_sibling = p_tag_iter->base.next_sibling;
935 mdata_vector_unlock( &(parser->tags) );
937 retval = mhtml_dump_tree( parser, first_child, d + 1 );
938 maug_cleanup_if_not_ok();
940 retval = mhtml_dump_tree( parser, next_sibling, d );
941 maug_cleanup_if_not_ok();
945 if( mdata_vector_is_locked( &(parser->tags) ) ) {
946 mdata_vector_unlock( &(parser->tags) );
954#define MHTML_TAG_TABLE_CONST( tag_id, tag_name, fields, disp ) \
955 extern MAUG_CONST uint16_t SEG_MCONST MHTML_TAG_TYPE_ ## tag_name;
957MHTML_TAG_TABLE( MHTML_TAG_TABLE_CONST )
959extern MAUG_CONST
char* SEG_MCONST gc_mhtml_tag_names[];
uint16_t MERROR_RETVAL
Return type indicating function returns a value from this list.
Definition merror.h:28
A pool of immutable text strings. Deduplicates strings to save memory.
Definition mdata.h:68
A vector of uniformly-sized objects, stored contiguously.
Definition mdata.h:93
uint8_t tag_flags
Flags to be pushed to MHTML_TAG_BASE::flags on next mhtml_push_tag().
Definition mhtml.h:158