5#ifndef MHTML_PARSER_TAGS_INIT_SZ
6# define MHTML_PARSER_TAGS_INIT_SZ 10
13#ifndef MHTML_DUMP_LINE_SZ
14# define MHTML_DUMP_LINE_SZ 255
17#ifndef MHTML_SRC_HREF_SZ_MAX
18# define MHTML_SRC_HREF_SZ_MAX 128
21#ifndef MHTML_TRACE_LVL
22# define MHTML_TRACE_LVL 0
26#define MHTML_TAG_FLAG_STYLE 0x02
28#define MHTML_INPUT_TYPE_BUTTON 0x01
33#define MHTML_ATTRIB_TABLE( f ) \
43#define MHTML_TAG_TABLE( f ) \
44 f( 0, NONE, void* none;, NONE ) \
45 f( 1, BODY, void* none;, BLOCK ) \
46 f( 2, DIV, void* none;, BLOCK ) \
47 f( 3, HEAD, void* none;, NONE ) \
48 f( 4, HTML, void* none;, BLOCK ) \
49 f( 5, TEXT, ssize_t content_idx; size_t content_sz;, INLINE ) \
50 f( 6, TITLE, ssize_t content_idx; size_t content_sz;, NONE ) \
51 f( 7, SPAN, void* none;, INLINE ) \
52 f( 8, BR, void* none;, BLOCK ) \
53 f( 9, STYLE, void* none;, NONE ) \
54 f( 10, IMG, char src[MHTML_SRC_HREF_SZ_MAX + 1]; size_t src_sz;, BLOCK ) \
55 f( 11, INPUT, uint8_t input_type; char name[MCSS_ID_SZ_MAX + 1]; size_t name_sz; char value[MCSS_ID_SZ_MAX + 1]; size_t value_sz;, INLINE )
57#define MHTML_PARSER_PSTATE_TABLE( f ) \
58 f( MHTML_PSTATE_NONE, 0 ) \
59 f( MHTML_PSTATE_ELEMENT, 1 ) \
60 f( MHTML_PSTATE_ATTRIB_KEY, 2 ) \
61 f( MHTML_PSTATE_ATTRIB_VAL, 3 ) \
62 f( MHTML_PSTATE_END_ELEMENT, 4 ) \
63 f( MHTML_PSTATE_STRING, 5 ) \
64 f( MHTML_PSTATE_STYLE, 6 )
69#define mhtml_tag( parser, idx ) (&((parser)->tags[idx]))
71#define mhtml_tag_parent( parser, idx ) \
72 (0 <= (parser)->tags[idx].parent ? \
73 (&((parser)->tags[(parser)->tags[idx].parent]])) : NULL)
75#define mhtml_tag_child( parser, idx ) \
76 (0 <= (parser)->tags[idx].first_child ? \
77 (&((parser)->tags[(parser)->tags[idx].first_child]])) : NULL)
79#define mhtml_tag_sibling( parser, idx ) \
80 (0 <= (parser)->tags[idx].next_sibling ? \
81 (&((parser)->tags[(parser)->tags[idx].next_sibling]])) : NULL)
84#define mhtml_parser_pstate( parser ) \
85 mparser_pstate( &((parser)->base) )
87#ifdef MPARSER_TRACE_NAMES
88# define mhtml_parser_pstate_push( parser, new_pstate ) \
89 mparser_pstate_push( \
90 "mhtml", &((parser)->base), new_pstate, gc_mhtml_pstate_names )
92# define mhtml_parser_pstate_pop( parser ) \
94 "mhtml", &((parser)->base), gc_mhtml_pstate_names )
96# define mhtml_parser_pstate_push( parser, new_pstate ) \
97 mparser_pstate_push( "mhtml", &((parser)->base), new_pstate )
99# define mhtml_parser_pstate_pop( parser ) \
100 mparser_pstate_pop( "mhtml", &((parser)->base) )
103#define mhtml_parser_invalid_c( parser, c, retval ) \
104 mparser_invalid_c( mhtml, &((parser)->base), c, retval )
106#define mhtml_parser_reset_token( parser ) \
107 mparser_reset_token( "mhtml", &((parser)->base) )
109#define mhtml_parser_append_token( parser, c ) \
110 mparser_append_token( "mhtml", &((parser)->base), c )
112#define mhtml_parser_set_tag_iter( parser, iter ) \
113 debug_printf( MHTML_TRACE_LVL, "setting tag_iter to: " SSIZE_T_FMT \
114 " (previously: " SSIZE_T_FMT ")", (ssize_t)iter, (parser)->tag_iter ); \
115 (parser)->tag_iter = iter;
117#define mhtml_parser_is_locked( parser ) (NULL != (parser)->tags)
124 ssize_t next_sibling;
127 char classes[MCSS_CLASS_SZ_MAX + 1];
130 char id[MCSS_ID_SZ_MAX + 1];
134#define MHTML_TAG_TABLE_STRUCT( tag_id, tag_name, fields, disp ) \
135 struct MHTML_TAG_ ## tag_name { \
136 struct MHTML_TAG_BASE base; \
140MHTML_TAG_TABLE( MHTML_TAG_TABLE_STRUCT )
142#define MHTML_TAG_TABLE_UNION_FIELD( tag_id, tag_name, fields, disp ) \
143 struct MHTML_TAG_ ## tag_name tag_name;
147 MHTML_TAG_TABLE( MHTML_TAG_TABLE_UNION_FIELD )
159 struct MCSS_PARSER styler;
160 struct MDATA_STRPOOL strpool;
161 struct MDATA_VECTOR tags;
178#define MHTML_PSTATE_TABLE_CONST( name, idx ) \
179 MAUG_CONST uint8_t SEG_MCONST name = idx;
181MHTML_PARSER_PSTATE_TABLE( MHTML_PSTATE_TABLE_CONST )
183MPARSER_PSTATE_NAMES( MHTML_PARSER_PSTATE_TABLE, mhtml )
185#define MHTML_TAG_TABLE_CONST( tag_id, tag_name, fields, disp ) \
186 MAUG_CONST uint16_t SEG_MCONST MHTML_TAG_TYPE_ ## tag_name = tag_id;
188MHTML_TAG_TABLE( MHTML_TAG_TABLE_CONST )
190#define MHTML_TAG_TABLE_NAMES( tag_id, tag_name, fields, disp ) \
193MAUG_CONST
char* SEG_MCONST gc_mhtml_tag_names[] = {
194 MHTML_TAG_TABLE( MHTML_TAG_TABLE_NAMES )
198#define MHTML_ATTRIB_TABLE_NAME( name, idx ) \
201static MAUG_CONST
char* SEG_MCONST gc_mhtml_attrib_names[] = {
202 MHTML_ATTRIB_TABLE( MHTML_ATTRIB_TABLE_NAME )
206#define MHTML_ATTRIB_TABLE_NAME_CONST( attrib_name, attrib_id ) \
207 MAUG_CONST uint16_t SEG_MCONST MHTML_ATTRIB_KEY_ ## attrib_name = attrib_id;
209MHTML_ATTRIB_TABLE( MHTML_ATTRIB_TABLE_NAME_CONST )
215 debug_printf( MHTML_TRACE_LVL,
"freeing HTML parser..." );
217 mdata_strpool_free( &(parser->strpool) );
219 mdata_vector_lock( &(parser->tags) );
221 while( 0 < mdata_vector_ct( &(parser->tags) ) ) {
222 tag_iter = mdata_vector_get( &(parser->tags), 0,
union MHTML_TAG );
223 assert( NULL != tag_iter );
225 mdata_vector_unlock( &(parser->tags) );
226 mdata_vector_remove( &(parser->tags), 0 );
227 mdata_vector_lock( &(parser->tags) );
232 mcss_parser_free( &(parser->styler) );
234 if( mdata_vector_is_locked( &(parser->tags) ) ) {
235 mdata_vector_unlock( &(parser->tags) );
238 mdata_vector_free( &(parser->tags) );
248 assert( parser->tag_iter >= 0 );
249 mdata_vector_lock( &(parser->tags) );
250 tag_iter = mdata_vector_get(
251 &(parser->tags), parser->tag_iter,
union MHTML_TAG );
252 assert( NULL != tag_iter );
254 mhtml_parser_set_tag_iter( parser, tag_iter->base.parent );
256 if( 0 <= parser->tag_iter ) {
257 debug_printf( MHTML_TRACE_LVL,
258 "moved iter back to tag %s (" SIZE_T_FMT
")",
259 gc_mhtml_tag_names[tag_iter->base.type], parser->tag_iter );
261 debug_printf( MHTML_TRACE_LVL,
"moved iter back to root (-1)" );
266 mdata_vector_unlock( &(parser->tags) );
273 ssize_t new_tag_idx = -1;
274 ssize_t next_sibling_idx = -1;
280 tag_new.base.parent = -1;
281 tag_new.base.first_child = -1;
282 tag_new.base.next_sibling = -1;
283 tag_new.base.style = -1;
287 new_tag_idx = mdata_vector_append(
288 &(parser->tags), &tag_new,
sizeof(
union MHTML_TAG ) );
289 if( 0 > new_tag_idx ) {
290 retval = mdata_retval( new_tag_idx );
294 mdata_vector_lock( &(parser->tags) );
295 p_tag_new = mdata_vector_get(
296 &(parser->tags), new_tag_idx,
union MHTML_TAG );
297 assert( NULL != p_tag_new );
299 if( 0 > parser->tag_iter ) {
300 mhtml_parser_set_tag_iter( parser, new_tag_idx );
305 p_tag_iter = mdata_vector_get(
306 &(parser->tags), parser->tag_iter,
union MHTML_TAG );
307 assert( NULL != p_tag_iter );
310 p_tag_new->base.parent = parser->tag_iter;
313 if( 0 > p_tag_iter->base.first_child ) {
314 debug_printf( MHTML_TRACE_LVL,
315 "zxzx attached " SSIZE_T_FMT
" as first child to "
316 SSIZE_T_FMT, new_tag_idx, parser->tag_iter );
317 p_tag_iter->base.first_child = new_tag_idx;
320 next_sibling_idx = p_tag_iter->base.first_child;
321 p_tag_iter = mdata_vector_get(
322 &(parser->tags), next_sibling_idx,
union MHTML_TAG );
323 while( NULL != p_tag_iter && 0 <= p_tag_iter->base.next_sibling ) {
324 next_sibling_idx = p_tag_iter->base.next_sibling;
325 p_tag_iter = mdata_vector_get(
326 &(parser->tags), next_sibling_idx,
union MHTML_TAG );
328 assert( NULL != p_tag_iter );
329 p_tag_iter->base.next_sibling = new_tag_idx;
330 debug_printf( MHTML_TRACE_LVL,
331 "attached " SSIZE_T_FMT
" as next sibling to "
332 SSIZE_T_FMT, new_tag_idx, next_sibling_idx );
335 debug_printf( MHTML_TRACE_LVL,
336 "pushed new tag " SSIZE_T_FMT
" under " SSIZE_T_FMT,
337 new_tag_idx, p_tag_new->base.parent );
339 mhtml_parser_set_tag_iter( parser, new_tag_idx );
343 mdata_vector_unlock( &(parser->tags) );
353 mparser_token_upper( &((parser)->base), i );
355 if( 0 == strncmp(
"STYLE", parser->base.token, 6 ) ) {
360 parser->
tag_flags |= MHTML_TAG_FLAG_STYLE;
364 retval = mhtml_push_tag( parser );
365 maug_cleanup_if_not_ok();
367 mdata_vector_lock( &(parser->tags) );
369 p_tag_iter = mdata_vector_get(
370 &(parser->tags), parser->tag_iter,
union MHTML_TAG );
371 assert( NULL != p_tag_iter );
375 while(
'\0' != gc_mhtml_tag_names[i][0] ) {
377 parser->base.token_sz == maug_strlen( gc_mhtml_tag_names[i] ) &&
379 gc_mhtml_tag_names[i], parser->base.token, parser->base.token_sz )
381 debug_printf( MHTML_TRACE_LVL,
382 "new tag (" SSIZE_T_FMT
") type: %s",
383 parser->tag_iter, gc_mhtml_tag_names[i] );
384 p_tag_iter->base.type = i;
386 if( MHTML_TAG_TYPE_BODY == i ) {
390 assert( -1 == parser->body_idx );
391 parser->body_idx = parser->tag_iter;
392 debug_printf( MHTML_TRACE_LVL,
393 "set body index to: " SSIZE_T_FMT,
402 error_printf(
"could not find type for new tag (" SSIZE_T_FMT
")",
407 if( mdata_vector_is_locked( &(parser->tags) ) ) {
408 mdata_vector_unlock( &(parser->tags) );
419 retval = mhtml_push_tag( parser );
420 maug_cleanup_if_not_ok();
422 mdata_vector_lock( &(parser->tags) );
424 p_tag_iter = mdata_vector_get(
425 &(parser->tags), parser->tag_iter,
union MHTML_TAG );
426 assert( NULL != p_tag_iter );
429 MHTML_TAG_FLAG_STYLE == (MHTML_TAG_FLAG_STYLE &
430 p_tag_iter->base.flags)
432 p_tag_iter->base.type = MHTML_TAG_TYPE_STYLE;
434 p_tag_iter->base.type = MHTML_TAG_TYPE_TEXT;
437 if( MHTML_TAG_TYPE_STYLE == p_tag_iter->base.type ) {
439 debug_printf( MHTML_TRACE_LVL,
"parsing STYLE tag..." );
440 for( ; parser->base.token_sz > i ; i++ ) {
441 retval = mcss_parse_c( &(parser->styler), parser->base.token[i] );
442 maug_cleanup_if_not_ok();
444 debug_printf( 1,
"out of style characters..." );
445 mcss_parser_flush( &(parser->styler) );
446 mcss_parser_reset( &(parser->styler) );
449 while(
' ' == parser->base.token[parser->base.token_sz - 1] ) {
450 parser->base.token_sz--;
454 p_tag_iter->TEXT.content_idx = mdata_strpool_append(
455 &(parser->strpool), parser->base.token, parser->base.token_sz );
456 p_tag_iter->TEXT.content_sz = parser->base.token_sz;
459 debug_printf( 1,
"done processing tag contents..." );
463 if( mdata_vector_is_locked( &(parser->tags) ) ) {
464 mdata_vector_unlock( &(parser->tags) );
474 debug_printf( MHTML_TRACE_LVL,
"attrib: %s", parser->base.token );
476 mparser_token_upper( &((parser)->base), i );
480 while(
'\0' != gc_mhtml_attrib_names[i][0] ) {
482 parser->base.token_sz == maug_strlen( gc_mhtml_attrib_names[i] ) &&
484 gc_mhtml_attrib_names[i], parser->base.token, parser->base.token_sz )
487 MHTML_TRACE_LVL,
"new attrib type: %s", gc_mhtml_attrib_names[i] );
488 parser->attrib_key = i;
494 error_printf(
"unknown attrib: %s", parser->base.token );
506 mdata_vector_lock( &(parser->tags) );
508 p_tag_iter = mdata_vector_get(
509 &(parser->tags), parser->tag_iter,
union MHTML_TAG );
510 assert( NULL != p_tag_iter );
512 if( MHTML_ATTRIB_KEY_STYLE == parser->attrib_key ) {
513 debug_printf( MHTML_TRACE_LVL,
"style: %s", parser->base.token );
517 mdata_vector_unlock( &(parser->tags) );
518 retval = mcss_push_style( &(parser->styler), MCSS_SELECT_NONE, NULL, 0 );
519 maug_cleanup_if_not_ok();
520 mdata_vector_lock( &(parser->tags) );
523 p_tag_iter->base.style =
524 mdata_vector_ct( &(parser->styler.styles) ) - 1;
526 for( ; parser->base.token_sz > i ; i++ ) {
527 retval = mcss_parse_c( &(parser->styler), parser->base.token[i] );
528 maug_cleanup_if_not_ok();
531 debug_printf( 1,
"out of style characters..." );
532 mcss_parser_flush( &(parser->styler) );
536 }
else if( MHTML_ATTRIB_KEY_CLASS == parser->attrib_key ) {
538 p_tag_iter->base.classes,
541 p_tag_iter->base.classes_sz = parser->base.token_sz;
543 }
else if( MHTML_ATTRIB_KEY_ID == parser->attrib_key ) {
548 p_tag_iter->base.id_sz = parser->base.token_sz;
550 }
else if( MHTML_ATTRIB_KEY_SRC == parser->attrib_key ) {
555 MHTML_SRC_HREF_SZ_MAX );
556 p_tag_iter->IMG.src_sz = parser->base.token_sz;
558 }
else if( MHTML_ATTRIB_KEY_TYPE == parser->attrib_key ) {
561 if( 0 == maug_strncpy( parser->base.token,
"button", 7 ) ) {
562 p_tag_iter->INPUT.input_type =
563 MHTML_INPUT_TYPE_BUTTON;
566 }
else if( MHTML_ATTRIB_KEY_NAME == parser->attrib_key ) {
569 p_tag_iter->INPUT.name,
572 p_tag_iter->INPUT.name_sz = parser->base.token_sz;
574 }
else if( MHTML_ATTRIB_KEY_VALUE == parser->attrib_key ) {
577 p_tag_iter->INPUT.value,
580 p_tag_iter->INPUT.value_sz = parser->base.token_sz;
585 if( mdata_vector_is_locked( &(parser->tags) ) ) {
586 mdata_vector_unlock( &(parser->tags) );
595 size_t tag_iter_type = 0;
599 if( MHTML_PSTATE_NONE == mhtml_parser_pstate( parser ) ) {
600 if( 0 < parser->base.token_sz ) {
601 retval = mhtml_push_text_tag( parser );
602 maug_cleanup_if_not_ok();
605 mdata_vector_lock( &(parser->tags) );
606 p_tag_iter = mdata_vector_get(
607 &(parser->tags), parser->tag_iter,
union MHTML_TAG );
608 assert( NULL != p_tag_iter );
609 tag_iter_type = p_tag_iter->base.type;
610 mdata_vector_unlock( &(parser->tags) );
617 MHTML_TAG_TYPE_STYLE != tag_iter_type
620 retval = mhtml_pop_tag( parser );
621 maug_cleanup_if_not_ok();
624 retval = mhtml_parser_pstate_push( parser, MHTML_PSTATE_ELEMENT );
625 maug_cleanup_if_not_ok();
626 mhtml_parser_reset_token( parser );
629 mhtml_parser_invalid_c( parser, c, retval );
634 if( MHTML_PSTATE_ELEMENT == mhtml_parser_pstate( parser ) ) {
635 retval = mhtml_push_element_tag( parser );
636 maug_cleanup_if_not_ok();
637 mhtml_parser_pstate_pop( parser );
638 mhtml_parser_reset_token( parser );
640 }
else if( MHTML_PSTATE_ATTRIB_KEY == mhtml_parser_pstate( parser ) ) {
641 mhtml_parser_pstate_pop( parser );
642 assert( MHTML_PSTATE_ELEMENT == mhtml_parser_pstate( parser ) );
643 mhtml_parser_pstate_pop( parser );
644 mhtml_parser_reset_token( parser );
646 }
else if( MHTML_PSTATE_END_ELEMENT == mhtml_parser_pstate( parser ) ) {
648 retval = mhtml_pop_tag( parser );
649 maug_cleanup_if_not_ok();
651 mhtml_parser_pstate_pop( parser );
652 if( MHTML_PSTATE_ATTRIB_KEY == mhtml_parser_pstate( parser ) ) {
653 mhtml_parser_pstate_pop( parser );
655 assert( MHTML_PSTATE_ELEMENT == mhtml_parser_pstate( parser ) );
656 mhtml_parser_pstate_pop( parser );
657 mhtml_parser_reset_token( parser );
659 }
else if( MHTML_PSTATE_STRING == mhtml_parser_pstate( parser ) ) {
660 retval = mhtml_parser_append_token( parser, c );
661 maug_cleanup_if_not_ok();
663 }
else if( MHTML_PSTATE_NONE == mhtml_parser_pstate( parser ) ) {
664 retval = mhtml_parser_append_token( parser, c );
665 maug_cleanup_if_not_ok();
668 mhtml_parser_invalid_c( parser, c, retval );
674 MHTML_PSTATE_ELEMENT == mhtml_parser_pstate( parser ) &&
675 0 == parser->base.token_sz
678 retval = mhtml_parser_pstate_push( parser, MHTML_PSTATE_END_ELEMENT );
679 maug_cleanup_if_not_ok();
681 }
else if( MHTML_PSTATE_ATTRIB_KEY == mhtml_parser_pstate( parser ) ) {
683 retval = mhtml_parser_pstate_push( parser, MHTML_PSTATE_END_ELEMENT );
684 maug_cleanup_if_not_ok();
686 }
else if( MHTML_PSTATE_STRING == mhtml_parser_pstate( parser ) ) {
687 retval = mhtml_parser_append_token( parser, c );
688 maug_cleanup_if_not_ok();
690 }
else if( MHTML_PSTATE_NONE == mhtml_parser_pstate( parser ) ) {
691 retval = mhtml_parser_append_token( parser, c );
692 maug_cleanup_if_not_ok();
695 mhtml_parser_invalid_c( parser, c, retval );
700 if( MHTML_PSTATE_ATTRIB_KEY == mhtml_parser_pstate( parser ) ) {
701 retval = mhtml_push_attrib_key( parser );
702 maug_cleanup_if_not_ok();
703 retval = mhtml_parser_pstate_push( parser, MHTML_PSTATE_ATTRIB_VAL );
704 maug_cleanup_if_not_ok();
705 mhtml_parser_reset_token( parser );
707 }
else if( MHTML_PSTATE_ATTRIB_VAL == mhtml_parser_pstate( parser ) ) {
708 retval = mhtml_parser_append_token( parser, c );
709 maug_cleanup_if_not_ok();
711 }
else if( MHTML_PSTATE_NONE == mhtml_parser_pstate( parser ) ) {
712 retval = mhtml_parser_append_token( parser, c );
713 maug_cleanup_if_not_ok();
716 mhtml_parser_invalid_c( parser,
'_', retval );
721 if( MHTML_PSTATE_ATTRIB_VAL == mhtml_parser_pstate( parser ) ) {
722 retval = mhtml_parser_pstate_push( parser, MHTML_PSTATE_STRING );
723 maug_cleanup_if_not_ok();
724 mhtml_parser_reset_token( parser );
726 }
else if( MHTML_PSTATE_STRING == mhtml_parser_pstate( parser ) ) {
727 retval = _mhtml_set_attrib_val( parser );
728 maug_cleanup_if_not_ok();
729 mhtml_parser_pstate_pop( parser );
730 assert( MHTML_PSTATE_ATTRIB_VAL == mhtml_parser_pstate( parser ) );
731 mhtml_parser_pstate_pop( parser );
732 mhtml_parser_reset_token( parser );
734 }
else if( MHTML_PSTATE_NONE == mhtml_parser_pstate( parser ) ) {
735 retval = mhtml_parser_append_token( parser, c );
736 maug_cleanup_if_not_ok();
739 mhtml_parser_invalid_c( parser,
'_', retval );
749 if( MHTML_PSTATE_ELEMENT == mhtml_parser_pstate( parser ) ) {
750 retval = mhtml_push_element_tag( parser );
751 maug_cleanup_if_not_ok();
752 retval = mhtml_parser_pstate_push( parser, MHTML_PSTATE_ATTRIB_KEY );
753 maug_cleanup_if_not_ok();
754 mhtml_parser_reset_token( parser );
756 }
else if( MHTML_PSTATE_STRING == mhtml_parser_pstate( parser ) ) {
757 retval = mhtml_parser_append_token( parser, c );
758 maug_cleanup_if_not_ok();
760 }
else if( MHTML_PSTATE_ATTRIB_KEY == mhtml_parser_pstate( parser ) ) {
763 }
else if( MHTML_PSTATE_NONE == mhtml_parser_pstate( parser ) ) {
766 0 < parser->base.token_sz &&
767 ' ' != parser->base.token[parser->base.token_sz - 1]
769 retval = mhtml_parser_append_token( parser,
' ' );
770 maug_cleanup_if_not_ok();
774 mhtml_parser_invalid_c( parser,
'_', retval );
779 retval = mhtml_parser_append_token( parser, c );
780 maug_cleanup_if_not_ok();
786 mparser_wait( &((parser)->base) );
790 parser->base.last_c = c;
792 if( mdata_vector_is_locked( &(parser->tags) ) ) {
793 mdata_vector_unlock( &(parser->tags) );
803 mhtml_parser_set_tag_iter( parser, -1 );
804 parser->body_idx = -1;
806 retval = mcss_parser_init( &(parser->styler) );
807 maug_cleanup_if_not_ok();
818 char* strpool = NULL;
819 char dump_line[MHTML_DUMP_LINE_SZ + 1];
821 ssize_t first_child = -1;
822 ssize_t next_sibling = -1;
829 mdata_vector_lock( &(parser->tags) );
831 p_tag_iter = mdata_vector_get( &(parser->tags), iter,
union MHTML_TAG );
832 assert( NULL != p_tag_iter );
834 maug_mzero( dump_line, MHTML_DUMP_LINE_SZ + 1 );
836 for( i = 0 ; d > i ; i++ ) {
837 assert( i < MHTML_DUMP_LINE_SZ );
838 strcat( dump_line,
" " );
840 if( MHTML_TAG_TYPE_TEXT == p_tag_iter->base.type ) {
841 if( -1 == p_tag_iter->TEXT.content_idx ) {
842 error_printf(
"no tag content present!" );
846 mdata_strpool_lock( &(parser->strpool), strpool );
849 maug_strlen( dump_line ) + 7
850 + p_tag_iter->TEXT.content_sz < MHTML_DUMP_LINE_SZ
852 strcat( dump_line,
"TEXT: " );
853 strcat( dump_line, &(strpool[p_tag_iter->TEXT.content_idx]) );
854 strcat( dump_line,
"\n" );
857 mdata_strpool_unlock( &(parser->strpool), strpool );
861 maug_strlen( dump_line ) +
862 maug_strlen( gc_mhtml_tag_names[p_tag_iter->base.type] ) <
866 gc_mhtml_tag_names[p_tag_iter->base.type] );
870 0 <= p_tag_iter->base.style &&
871 maug_strlen( dump_line ) + 9 < MHTML_DUMP_LINE_SZ
873 strcat( dump_line,
" (styled)" );
877 0 < p_tag_iter->base.id_sz &&
878 maug_strlen( dump_line ) + 7
879 + maug_strlen( p_tag_iter->base.id ) < MHTML_DUMP_LINE_SZ
881 maug_snprintf( &(dump_line[maug_strlen( dump_line )]),
882 MHTML_DUMP_LINE_SZ - maug_strlen( dump_line ),
883 " (id: %s)", p_tag_iter->base.id );
887 0 < p_tag_iter->base.classes_sz &&
888 maug_strlen( dump_line ) + 12
889 + maug_strlen( p_tag_iter->base.id ) < MHTML_DUMP_LINE_SZ
891 maug_snprintf( &(dump_line[maug_strlen( dump_line )]),
892 MHTML_DUMP_LINE_SZ - maug_strlen( dump_line ),
893 " (classes: %s)", p_tag_iter->base.classes );
897 MHTML_TAG_TYPE_IMG == p_tag_iter->base.type &&
898 0 < p_tag_iter->IMG.src_sz &&
899 maug_strlen( dump_line ) + 8
900 + maug_strlen( p_tag_iter->IMG.src ) < MHTML_DUMP_LINE_SZ
902 maug_snprintf( &(dump_line[maug_strlen( dump_line )]),
903 MHTML_DUMP_LINE_SZ - maug_strlen( dump_line ),
904 " (src: %s)", p_tag_iter->IMG.src );
908 MHTML_TAG_TYPE_INPUT == p_tag_iter->base.type &&
909 0 < p_tag_iter->INPUT.value_sz &&
910 maug_strlen( dump_line ) + 10
911 + maug_strlen( p_tag_iter->INPUT.value ) < MHTML_DUMP_LINE_SZ
913 maug_snprintf( &(dump_line[maug_strlen( dump_line )]),
914 MHTML_DUMP_LINE_SZ - maug_strlen( dump_line ),
915 " (value: %s)", p_tag_iter->INPUT.value );
920 debug_printf( 1,
"%s", dump_line );
922 first_child = p_tag_iter->base.first_child;
923 next_sibling = p_tag_iter->base.next_sibling;
925 mdata_vector_unlock( &(parser->tags) );
927 retval = mhtml_dump_tree( parser, first_child, d + 1 );
928 maug_cleanup_if_not_ok();
930 retval = mhtml_dump_tree( parser, next_sibling, d );
931 maug_cleanup_if_not_ok();
935 if( mdata_vector_is_locked( &(parser->tags) ) ) {
936 mdata_vector_unlock( &(parser->tags) );
944#define MHTML_TAG_TABLE_CONST( tag_id, tag_name, fields, disp ) \
945 extern MAUG_CONST uint16_t SEG_MCONST MHTML_TAG_TYPE_ ## tag_name;
947MHTML_TAG_TABLE( MHTML_TAG_TABLE_CONST )
949extern MAUG_CONST
char* SEG_MCONST gc_mhtml_tag_names[];
int MERROR_RETVAL
Return type indicating function returns a value from this list.
Definition merror.h:19
#define maug_mzero(ptr, sz)
Zero the block of memory pointed to by ptr.
Definition mmem.h:62
uint8_t tag_flags
Flags to be pushed to MHTML_TAG_BASE::flags on next mhtml_push_tag().
Definition mhtml.h:158