maug
Quick and dirty C mini-augmentation library.
Loading...
Searching...
No Matches
mlispp.h
Go to the documentation of this file.
1
2#ifndef MLISPP_H
3#define MLISPP_H
4
5#include <mlisps.h>
6
15#ifndef MLISP_PARSE_TRACE_LVL
16# define MLISP_PARSE_TRACE_LVL 0
17#endif /* !MLISP_PARSE_TRACE_LVL */
18
19#define MLISP_AST_FLAG_LAMBDA 0x02
20
21#define MLISP_AST_FLAG_IF 0x04
22
23#define MLISP_AST_FLAG_DEFINE 0x08
24
25#define MLISP_AST_FLAG_BEGIN 0x20
26
27#define MLISP_PARSER_PSTATE_TABLE( f ) \
28 f( MLISP_PSTATE_NONE, 0 ) \
29 f( MLISP_PSTATE_SYMBOL_OP, 1 ) \
30 f( MLISP_PSTATE_SYMBOL, 2 ) \
31 f( MLISP_PSTATE_STRING, 3 ) \
32 f( MLISP_PSTATE_LAMBDA_ARGS, 4 ) \
33 f( MLISP_PSTATE_COMMENT, 5 )
34
40#define mlisp_parser_pstate( parser ) \
41 ((parser)->base.pstate_sz > 0 ? \
42 (parser)->base.pstate[(parser)->base.pstate_sz - 1] : MLISP_PSTATE_NONE)
43
44#ifdef MPARSER_TRACE_NAMES
45# define mlisp_parser_pstate_push( parser, new_pstate ) \
46 mparser_pstate_push( \
47 "mlisp", &((parser)->base), new_pstate, gc_mlisp_pstate_names );
48
49# define mlisp_parser_pstate_pop( parser ) \
50 mparser_pstate_pop( \
51 "mlisp", &((parser)->base), gc_mlisp_pstate_names );
52#else
53# define mlisp_parser_pstate_push( parser, new_pstate ) \
54 mparser_pstate_push( "mlisp", &((parser)->base), new_pstate )
55
56# define mlisp_parser_pstate_pop( parser ) \
57 mparser_pstate_pop( "mlisp", &((parser)->base) )
58#endif /* MPARSER_TRACE_NAMES */
59
60#define mlisp_parser_invalid_c( parser, c, retval ) \
61 mparser_invalid_c( mlisp, &((parser)->base), c, retval )
62
63#define mlisp_parser_reset_token( parser ) \
64 mparser_reset_token( "mlisp", &((parser)->base) )
65
66#define mlisp_parser_append_token( parser, c ) \
67 mparser_append_token( "mlisp", &((parser)->base), c )
68
69#define mlisp_parser_parse_token( parser ) \
70 parser->token_parser( \
71 (parser)->token, (parser)->token_sz, (parser)->token_parser_arg )
72
73MERROR_RETVAL mlisp_ast_dump(
74 struct MLISP_PARSER* parser, size_t ast_node_idx, size_t depth, char ab );
75
76 /* mlisp_parser */
77
78MERROR_RETVAL mlisp_parse_c( struct MLISP_PARSER* parser, char c );
79
80MERROR_RETVAL mlisp_parser_init( struct MLISP_PARSER* parser );
81
82MERROR_RETVAL mlisp_exec_init(
83 struct MLISP_PARSER* parser, struct MLISP_EXEC_STATE* exec );
84
85void mlisp_parser_free( struct MLISP_PARSER* parser );
86
87 /* mlisp */
88
89#ifdef MLISPP_C
90
91#define _MLISP_TYPE_TABLE_CONSTS( idx, ctype, name, const_name, fmt ) \
92 MAUG_CONST uint8_t SEG_MCONST MLISP_TYPE_ ## const_name = idx;
93
94MLISP_TYPE_TABLE( _MLISP_TYPE_TABLE_CONSTS );
95
96MLISP_PARSER_PSTATE_TABLE( MPARSER_PSTATE_TABLE_CONST )
97
98MPARSER_PSTATE_NAMES( MLISP_PARSER_PSTATE_TABLE, mlisp )
99
100/* === */
101
102/* AST Functions */
103
104/* === */
105
106static MERROR_RETVAL
107_mlisp_ast_add_child( struct MLISP_PARSER* parser, uint8_t flags ) {
108 MERROR_RETVAL retval = MERROR_OK;
109 struct MLISP_AST_NODE* n_parent = NULL;
110 struct MLISP_AST_NODE ast_node;
111 ssize_t parent_child_idx = -1;
112 ssize_t new_idx_out = 0;
113 size_t i = 0;
114
115 /* Setup the new node to copy. */
116 maug_mzero( &ast_node, sizeof( struct MLISP_AST_NODE ) );
117 ast_node.ast_idx_parent = parser->ast_node_iter;
118 for( i = 0 ; MLISP_AST_IDX_CHILDREN_MAX > i ; i++ ) {
119 ast_node.ast_idx_children[i] = -1;
120 }
121 ast_node.token_idx = -1;
122 ast_node.ast_idx_children_sz = 0;
123 ast_node.flags = flags;
124
125 debug_printf( MLISP_PARSE_TRACE_LVL, "adding node under " SSIZE_T_FMT "...",
126 ast_node.ast_idx_parent );
127
128 /* Add the node to the AST and set it as the current node. */
129 new_idx_out = mdata_vector_append(
130 &(parser->ast), &ast_node, sizeof( struct MLISP_AST_NODE ) );
131 if( 0 > new_idx_out ) {
132 retval = mdata_retval( new_idx_out );
133 }
134
135 /* Find an available child slot on the parent, if there is one. */
136 if( 0 <= ast_node.ast_idx_parent ) {
137 mdata_vector_lock( &(parser->ast) );
138
139 n_parent = mdata_vector_get(
140 &(parser->ast), ast_node.ast_idx_parent, struct MLISP_AST_NODE );
141
142 /* Find the first free child slot. */
143 parent_child_idx = 0;
144 while( -1 != n_parent->ast_idx_children[parent_child_idx] ) {
145 parent_child_idx++;
146 }
147
148 n_parent->ast_idx_children[parent_child_idx] = new_idx_out;
149 n_parent->ast_idx_children_sz++;
150
151 n_parent = NULL;
152 mdata_vector_unlock( &(parser->ast) );
153 } else {
154 /* Find the first free child slot *on the parser*. */
155 /*
156 parent_child_idx = 0;
157 while( -1 != parser->ast_idx_children[parent_child_idx] ) {
158 parent_child_idx++;
159 }
160
161 parser->ast_idx_children[parent_child_idx] = new_idx_out;
162 */
163 }
164
165 parser->ast_node_iter = new_idx_out;
166
167 debug_printf( MLISP_PARSE_TRACE_LVL, "added node " SSIZE_T_FMT
168 " under parent: " SSIZE_T_FMT " as child " SSIZE_T_FMT,
169 new_idx_out, ast_node.ast_idx_parent, parent_child_idx );
170
171cleanup:
172
173 return retval;
174}
175
176/* === */
177
178static MERROR_RETVAL _mlisp_ast_set_child_token(
179 struct MLISP_PARSER* parser, mdata_strpool_idx_t token_idx, size_t token_sz
180) {
181 MERROR_RETVAL retval = MERROR_OK;
182 char* strpool = NULL;
183 struct MLISP_AST_NODE* n = NULL;
184
185 mdata_vector_lock( &(parser->ast) );
186
187 n = mdata_vector_get(
188 &(parser->ast), parser->ast_node_iter, struct MLISP_AST_NODE );
189 assert( NULL != n );
190
191 mdata_strpool_lock( &(parser->strpool), strpool );
192 if( 0 == token_sz ) {
193 token_sz = maug_strlen( &(strpool[token_idx]) );
194 }
195 assert( 0 < token_sz );
196
197 /* Setup flags based on token name. */
198 if( 0 == strncmp( &(strpool[token_idx]), "lambda", token_sz + 1 ) ) {
199 /* Special node: lambda. */
200 debug_printf( MLISP_PARSE_TRACE_LVL,
201 "setting node \"%s\" (" SIZE_T_FMT ") flag: LAMBDA",
202 &(strpool[token_idx]), token_sz );
203 n->flags |= MLISP_AST_FLAG_LAMBDA;
204
205 } else if( 0 == strncmp( &(strpool[token_idx]), "if", token_sz + 1 ) ) {
206 /* Special node: if. */
207 debug_printf( MLISP_PARSE_TRACE_LVL,
208 "setting node \"%s\" (" SIZE_T_FMT ") flag: IF",
209 &(strpool[token_idx]), token_sz );
210 n->flags |= MLISP_AST_FLAG_IF;
211
212 } else if( 0 == strncmp( &(strpool[token_idx]), "begin", token_sz + 1 ) ) {
213 /* Special node: begin. */
214 debug_printf( MLISP_PARSE_TRACE_LVL,
215 "setting node \"%s\" (" SIZE_T_FMT ") flag: BEGIN",
216 &(strpool[token_idx]), token_sz );
217 n->flags |= MLISP_AST_FLAG_BEGIN;
218
219 } else if( 0 == strncmp( &(strpool[token_idx]), "define", token_sz + 1 ) ) {
220 /* Special node: define. */
221 debug_printf( MLISP_PARSE_TRACE_LVL,
222 "setting node \"%s\" (" SIZE_T_FMT ") flag: DEFINE",
223 &(strpool[token_idx]), token_sz );
224 n->flags |= MLISP_AST_FLAG_DEFINE;
225 }
226
227 /* Debug report. */
228 debug_printf( MLISP_PARSE_TRACE_LVL, "setting node " SSIZE_T_FMT
229 " token: \"%s\" (" SIZE_T_FMT ")",
230 parser->ast_node_iter, &(strpool[token_idx]), token_sz );
231 mdata_strpool_unlock( &(parser->strpool), strpool );
232
233 /* Set the token from the strpool. */
234 n->token_idx = token_idx;
235 n->token_sz = token_sz;
236
237cleanup:
238
239 if( NULL != strpool ) {
240 mdata_strpool_unlock( &(parser->strpool), strpool );
241 }
242
243 mdata_vector_unlock( &(parser->ast) );
244
245 return retval;
246}
247
248/* === */
249
250static
251MERROR_RETVAL _mlisp_ast_traverse_parent( struct MLISP_PARSER* parser ) {
252 MERROR_RETVAL retval = MERROR_OK;
253 struct MLISP_AST_NODE* n = NULL;
254
255 mdata_vector_lock( &(parser->ast) );
256
257 assert( 0 <= parser->ast_node_iter );
258
259 n = mdata_vector_get(
260 &(parser->ast), parser->ast_node_iter, struct MLISP_AST_NODE );
261
262 parser->ast_node_iter = n->ast_idx_parent;
263
264 debug_printf( MLISP_PARSE_TRACE_LVL, "moved up to node: " SSIZE_T_FMT,
265 parser->ast_node_iter );
266
267cleanup:
268
269 mdata_vector_unlock( &(parser->ast) );
270
271 return retval;
272}
273
274/* === */
275
276static
277MERROR_RETVAL _mlisp_ast_add_raw_token( struct MLISP_PARSER* parser ) {
278 MERROR_RETVAL retval = MERROR_OK;
279 mdata_strpool_idx_t str_idx = -1;
280
281 str_idx = mdata_strpool_append( &(parser->strpool),
282 parser->base.token, parser->base.token_sz );
283 if( 0 > str_idx ) {
284 error_printf( "invalid str_idx: " SSIZE_T_FMT, str_idx );
285 retval = MERROR_ALLOC;
286 goto cleanup;
287 }
288
289 _mlisp_ast_add_child( parser, 0 );
290 _mlisp_ast_set_child_token( parser, str_idx, parser->base.token_sz );
291 mlisp_parser_reset_token( parser );
292 retval = _mlisp_ast_traverse_parent( parser );
293
294cleanup:
295 return retval;
296}
297
298/* === */
299
300MERROR_RETVAL mlisp_ast_dump(
301 struct MLISP_PARSER* parser, size_t ast_node_idx, size_t depth, char ab
302) {
303 MERROR_RETVAL retval = MERROR_OK;
304 uint8_t autolock = 0;
305 struct MLISP_AST_NODE* n = NULL;
306 char indent[101];
307 size_t i = 0;
308 char* strpool = NULL;
309
310 if( NULL == parser->ast.data_bytes ) {
311 autolock = 1;
312 mdata_vector_lock( &(parser->ast) );
313 debug_printf( MLISP_TRACE_LVL,
314 MLISP_TRACE_SIGIL " --- BEGIN AST DUMP ---" );
315 }
316
317 /* Make indent. */
318 maug_mzero( indent, 101 );
319 assert( depth < 100 );
320 for( i = 0 ; depth > i ; i++ ) {
321 indent[i] = ' ';
322 }
323
324 if( 0 == ab ) {
325 ab = 'X';
326 }
327
328 /* Iterate node and children .*/
329 n = mdata_vector_get( &(parser->ast), ast_node_idx, struct MLISP_AST_NODE );
330 mdata_strpool_lock( &(parser->strpool), strpool );
331 debug_printf( MLISP_TRACE_LVL,
332 MLISP_TRACE_SIGIL " %s%c: \"%s\" (i: " SIZE_T_FMT ", t: " SSIZE_T_FMT
333 ", c: " SSIZE_T_FMT ", f: 0x%02x)",
334 indent, ab, 0 <= n->token_idx ? &(strpool[n->token_idx]) : "",
335 ast_node_idx, n->token_idx, n->ast_idx_children_sz, n->flags );
336 mdata_strpool_unlock( &(parser->strpool), strpool );
337 for( i = 0 ; MLISP_AST_IDX_CHILDREN_MAX > i ; i++ ) {
338 if( -1 == n->ast_idx_children[i] ) {
339 continue;
340 }
341
342 mlisp_ast_dump( parser, n->ast_idx_children[i], depth + 1, '0' + i );
343 }
344
345cleanup:
346
347 if( NULL != parser->ast.data_bytes && autolock ) {
348 mdata_vector_unlock( &(parser->ast) );
349 debug_printf( MLISP_TRACE_LVL,
350 MLISP_TRACE_SIGIL " --- END AST DUMP ---" );
351 }
352
353 return retval;
354}
355
356/* === */
357
358/* Parse Functions */
359
360/* === */
361
362MERROR_RETVAL mlisp_parse_c( struct MLISP_PARSER* parser, char c ) {
363 MERROR_RETVAL retval = MERROR_OK;
364 mdata_strpool_idx_t str_idx = -1;
365 uint8_t n_flags = 0;
366 size_t n_children = 0;
367 struct MLISP_AST_NODE* n = NULL;
368
369#ifdef MPARSER_TRACE_NAMES
370 debug_printf( MLISP_PARSE_TRACE_LVL,
371 SIZE_T_FMT ": \"%c\" (last: \"%c\") (%s (%d)) (sz: " SIZE_T_FMT ")",
372 parser->base.i, c, parser->base.last_c,
373 gc_mlisp_pstate_names[mlisp_parser_pstate( parser )],
374 mlisp_parser_pstate( parser ),
375 parser->base.pstate_sz );
376#endif /* MPARSER_TRACE_NAMES */
377
378 mdata_vector_lock( &(parser->ast) );
379 n = mdata_vector_get(
380 &(parser->ast), parser->ast_node_iter, struct MLISP_AST_NODE );
381 if( NULL != n ) {
382 n_flags = n->flags;
383 n_children = n->ast_idx_children_sz;
384 }
385 mdata_vector_unlock( &(parser->ast) );
386
387 switch( c ) {
388 case '\r':
389 case '\n':
390 if( MLISP_PSTATE_COMMENT == mlisp_parser_pstate( parser ) ) {
391 /* End comment on newline. */
392 mlisp_parser_pstate_pop( parser );
393 break;
394 }
395
396 case '\t':
397 case ' ':
398 if(
399 MLISP_PSTATE_SYMBOL_OP == mlisp_parser_pstate( parser )
400 /* Don't terminate the current symbol if the last_c was *any* of the
401 * other terminating characters.
402 */
403 && '\r' != parser->base.last_c
404 && '\n' != parser->base.last_c
405 && '\t' != parser->base.last_c
406 && ' ' != parser->base.last_c
407 && ')' != parser->base.last_c
408 && '(' != parser->base.last_c
409 ) {
410 assert( 0 < parser->base.token_sz );
411 debug_printf( MLISP_PARSE_TRACE_LVL,
412 "found symbol: %s (" SIZE_T_FMT ")",
413 parser->base.token, parser->base.token_sz );
414
415 /* Grab the symbol to use for the op of the child created by the last
416 * open paren.
417 */
418 str_idx = mdata_strpool_append( &(parser->strpool),
419 parser->base.token, parser->base.token_sz );
420 mlisp_parser_reset_token( parser );
421 _mlisp_ast_set_child_token( parser, str_idx, parser->base.token_sz );
422
423 /* Switch from OP to SYMBOL for subsequent tokens. */
424 mlisp_parser_pstate_pop( parser );
425 retval = mlisp_parser_pstate_push( parser, MLISP_PSTATE_SYMBOL );
426 maug_cleanup_if_not_ok();
427
428 /* mlisp_ast_dump( parser, 0, 0, 0 ); */
429
430 } else if(
431 (
432 MLISP_PSTATE_SYMBOL == mlisp_parser_pstate( parser ) ||
433 MLISP_PSTATE_LAMBDA_ARGS == mlisp_parser_pstate( parser )
434 )
435 /* Don't terminate the current symbol if the last_c was *any* of the
436 * other terminating characters.
437 */
438 && '\r' != parser->base.last_c
439 && '\n' != parser->base.last_c
440 && '\t' != parser->base.last_c
441 && ' ' != parser->base.last_c
442 && ')' != parser->base.last_c
443 && '(' != parser->base.last_c
444 ) {
445 assert( 0 < parser->base.token_sz );
446 debug_printf( MLISP_PARSE_TRACE_LVL,
447 "found symbol: %s (" SIZE_T_FMT ")",
448 parser->base.token, parser->base.token_sz );
449
450 /* A raw token without parens terminated by whitespace can't have
451 * children, so just create a one-off.
452 */
453 _mlisp_ast_add_raw_token( parser );
454
455 } else if( MLISP_PSTATE_STRING == mlisp_parser_pstate( parser ) ) {
456 retval = mlisp_parser_append_token( parser, c );
457 maug_cleanup_if_not_ok();
458
459 }
460 break;
461
462 case '(':
463 if(
464 MLISP_PSTATE_NONE == mlisp_parser_pstate( parser ) ||
465 MLISP_PSTATE_SYMBOL == mlisp_parser_pstate( parser )
466 ) {
467 if(
468 MLISP_AST_FLAG_LAMBDA == (MLISP_AST_FLAG_LAMBDA & n_flags) &&
469 0 == n_children
470 ) {
471 /* Special case: all tokens in this parent are lambda args. */
472 retval =
473 mlisp_parser_pstate_push( parser, MLISP_PSTATE_LAMBDA_ARGS );
474 } else {
475 /* Otherwise, first symbol after an open paren is an op. */
476 retval = mlisp_parser_pstate_push( parser, MLISP_PSTATE_SYMBOL_OP );
477 }
478 maug_cleanup_if_not_ok();
479 mlisp_parser_reset_token( parser );
480
481 /* Add a new empty child to be filled out when tokens are parsed. */
482 _mlisp_ast_add_child( parser, 0 );
483
484 } else if( MLISP_PSTATE_STRING == mlisp_parser_pstate( parser ) ) {
485 retval = mlisp_parser_append_token( parser, c );
486 maug_cleanup_if_not_ok();
487
488 } else {
489 mlisp_parser_invalid_c( parser, c, retval );
490 }
491 break;
492
493 case ')':
494 if(
495 MLISP_PSTATE_SYMBOL_OP == mlisp_parser_pstate( parser ) ||
496 MLISP_PSTATE_SYMBOL == mlisp_parser_pstate( parser ) ||
497 MLISP_PSTATE_LAMBDA_ARGS == mlisp_parser_pstate( parser )
498 ) {
499 if( 0 < parser->base.token_sz ) {
500 /* A raw token without parens terminated by whitespace can't have
501 * children, so just create a one-off.
502 */
503 _mlisp_ast_add_raw_token( parser );
504 }
505
506 /* Reset the parser and AST cursor up one level. */
507 mlisp_parser_pstate_pop( parser );
508 _mlisp_ast_traverse_parent( parser );
509
510 /* mlisp_ast_dump( parser, 0, 0, 0 ); */
511
512 } else if( MLISP_PSTATE_STRING == mlisp_parser_pstate( parser ) ) {
513 retval = mlisp_parser_append_token( parser, c );
514 maug_cleanup_if_not_ok();
515
516 } else {
517 mlisp_parser_invalid_c( parser, c, retval );
518 }
519 break;
520
521 case ';':
522 if( MLISP_PSTATE_COMMENT != mlisp_parser_pstate( parser ) ) {
523 mlisp_parser_pstate_push( parser, MLISP_PSTATE_COMMENT );
524 break;
525 }
526
527 default:
528 if( MLISP_PSTATE_COMMENT == mlisp_parser_pstate( parser ) ) {
529 break;
530 }
531 retval = mlisp_parser_append_token( parser, c );
532 maug_cleanup_if_not_ok();
533 break;
534 }
535
536 mparser_wait( &(parser->base) );
537
538 parser->base.i++;
539
540cleanup:
541
542 parser->base.last_c = c;
543
544 return retval;
545}
546
547/* === */
548
549MERROR_RETVAL mlisp_parser_init( struct MLISP_PARSER* parser ) {
550 MERROR_RETVAL retval = MERROR_OK;
551 ssize_t append_retval = 0;
552
553 debug_printf( MLISP_TRACE_LVL,
554 "initializing mlisp parser (" SIZE_T_FMT " bytes)...",
555 sizeof( struct MLISP_PARSER ) );
556
557 maug_mzero( parser, sizeof( struct MLISP_PARSER ) );
558
559 parser->ast_node_iter = -1;
560 /*
561 for( i = 0 ; MLISP_AST_IDX_CHILDREN_MAX > i ; i++ ) {
562 parser->ast_idx_children[i] = -1;
563 }
564 */
565
566 /* Allocate the vectors for AST and ENV. */
567
568 append_retval = mdata_vector_alloc(
569 &(parser->ast), sizeof( struct MLISP_AST_NODE ), MDATA_VECTOR_INIT_SZ );
570 if( 0 > append_retval ) {
571 retval = mdata_retval( append_retval );
572 }
573 maug_cleanup_if_not_ok();
574
575cleanup:
576
577 if( MERROR_OK != retval ) {
578 error_printf( "mlisp parser initialization failed: %d", retval );
579 }
580
581 return retval;
582}
583
584/* === */
585
586void mlisp_parser_free( struct MLISP_PARSER* parser ) {
587 mdata_strpool_free( &(parser->strpool) );
588 mdata_vector_free( &(parser->ast) );
589}
590
591#else
592
593# define _MLISP_TYPE_TABLE_CONSTS( idx, ctype, name, const_name, fmt ) \
594 extern MAUG_CONST uint8_t SEG_MCONST MLISP_TYPE_ ## const_name;
595
596MLISP_TYPE_TABLE( _MLISP_TYPE_TABLE_CONSTS );
597
598#endif /* MLISPP_C */
599
600#endif /* !MLISPP_H */
601
int MERROR_RETVAL
Return type indicating function returns a value from this list.
Definition merror.h:19
#define maug_mzero(ptr, sz)
Zero the block of memory pointed to by ptr.
Definition mmem.h:62
#define MLISP_TYPE_TABLE(f)
Table of other types.
Definition mlisps.h:60
MLISP Interpreter/Parser Structs.
Definition mlisps.h:98
size_t ast_idx_children_sz
Number of children in MLISP_AST_NODE::ast_idx_children.
Definition mlisps.h:106
Definition mlisps.h:109
Definition mlisps.h:135