/* Copyright Dave Bone 1998 - 2014 All Rights Reserved. No part of this document may be reproduced without written consent from the author. FILE: pass3.lex dates: 7 Juin 2003 Purpose: Lexical Phase Changed: added keyword recognition by symbol table lookup. eliminate parallelism between identifier and yacco2_keyword Note: The use_cnt_ deals with the problem of recursion using this grammar and to write only once the eog tokens to close off the producer container. */ /@ @i "/usr/local/yacco2/copyright.w" @** pass3 stand alone Grammar.\fbreak Lexical / syntactic dispatcher for the grammar being parsed.\fbreak Why the name |pass3|? The short of it: when i was developing a long time ago there were 3 passes. Through attrition it became one. I was too lazy to rename it to |pass1|. So that's the blah blah blog of it all.\fbreak \fbreak It calls |PROCESS_KEYWORD_FOR_SYNTAX_CODE| when keywords are discovered. This is a top/down approach to parsing as the ``called procedure'' then calls (dispatches on) the keyword's content to its parsing procedure that of course contains a monolithic bottom-up grammar to digest the grammar constuct. Neat stuff to see how the 2 approaches to parsing can blend together: a monolithic grammar using threaded grammars to parse its stuff.\fbreak \fbreak Let's look at this from above:\fbreak Here we have a standalone grammar calling a procedure that runs a standalone grammar that calls more threads. Well i think this is neat... no ice sir and hold the applause. thk u.\fbreak \fbreak Changed: added keyword recognition by symbol table lookup. eliminated competing parallelism between identifier and |yacco2_keyword| that now no longer exists.\fbreak \fbreak Note:\fbreak The use\_cnt\_ deals with the problem of recursion using this grammar and to write only once the eog tokens to close off the producer container. \fbreak \fbreak Please see the |Rprefile_inc_dispatcher| rule as an example of chained procedure call. The chaining is on the ``\ATsign'' which is also the start character of grammar |NS_prefile_inc::PROC_TH_prefile_inc| which is called as a procedure. This is an example of an optimization: thread activation is toooooo slow compared to a procedure call. Now what about inlining ugh? \fbreak \fbreak An improvement: grammar file has no contents\fbreak 21-Apr-2014 @/ fsm (fsm-id "pass3.lex",fsm-filename pass3,fsm-namespace NS_pass3 ,fsm-class Cpass3{ user-declaration public: static int nested_use_cnt_; *** user-implementation int Cpass3::nested_use_cnt_(0); *** op ++Cpass3::nested_use_cnt_; *** user-prefix-declaration #include "ws.h" #include "cweb_or_c_k.h" #include "bad_char_set.h" #include "eol.h" #include "prefile_include.h" #include "identifier.h" #include "o2_externs.h" *** } ,fsm-version "1.0",fsm-date "17 Apr 2001",fsm-debug "false" ,fsm-comments "\\O2's lexer constructing tokens for syntax parser stage.") @"/usr/local/yacco2/compiler/grammars/yacco2_T_includes.T" rules{ Rpass3 ( lhs{ op using namespace NS_yacco2_k_symbols; if(Cpass3::nested_use_cnt_ == 1){ ADD_TOKEN_TO_PRODUCER_QUEUE(*yacco2::PTR_LR1_eog__); ADD_TOKEN_TO_PRODUCER_QUEUE(*yacco2::PTR_LR1_eog__); } --Cpass3::nested_use_cnt_; *** } ){ -> eog { op CAbs_lr1_sym* sym = new Err_empty_file; sym->set_rc(*rule_info__.parser__->current_token(),__FILE__,__LINE__); ADD_TOKEN_TO_ERROR_QUEUE(*sym); rule_info__.parser__->set_abort_parse(true); *** } -> Relems eog { op if (GRAMMAR_TREE == 0){ CAbs_lr1_sym* sym = new Err_empty_file; sym->set_rc(*rule_info__.parser__->current_token(),__FILE__,__LINE__); ADD_TOKEN_TO_ERROR_QUEUE(*sym); rule_info__.parser__->set_abort_parse(true); } *** } } Relems /@ @*2 Relems --- left recursion diagram.\fbreak Note: the left recursion drawn as a Pascal railroad diagram. @/ (){ -> Rtoken -> Relems Rtoken } Rtoken (){ -> ||| ws NS_ws::TH_ws -> ||| eol NS_eol::TH_eol -> ||| "identifier" NS_identifier::TH_identifier { /@ @*2 Identifier slip thru.\fbreak As i use a top / down process to consume constructs, an Identifier some how is slipping thru due to either a premature ending of Rules top / down parse process or a before out-of-alignment token that should be within the Rules Vocabulary, or a misplaced misspelt T. @/ op sf->p2__->set_auto_delete(true); CAbs_lr1_sym* sym = new Err_misplaced_or_misspelt_Rule_or_T; sym->set_rc(*sf->p2__,__FILE__,__LINE__); ADD_TOKEN_TO_ERROR_QUEUE(*sym); rule_info__.parser__->set_abort_parse(true); *** } -> ||| |+| NULL{// keywords emitted from identifier /@ @*2 Dispatch keyword to process its construct phrase.\fbreak Again neat stuff with its co-operation of top/down and bottom-up parsing paradigms. Notice that i use the catch all ``|+|'' to show case it where as i could have referenced``keyword''. This is how the parser works:\fbreak \ptindent{1) check the state's table for specificly returned T from thread call} \ptindent{2) check for ``catch all'' returned presence from a thread call} \ptindent{3) try current token T to shift if thread call did not work or is not present in state} \ptindent{4) check for ``catch all'' presence in the state to shift } Note: there are 2 types of ``catch all'': one for returned T from thread calls and the other for regular parsing. @/ op CAbs_lr1_sym* key = sf->p2__;// extract specific keyword yacco2::INT cont_pos = rule_info__.parser__->current_token_pos__; CAbs_lr1_sym* cont_tok = rule_info__.parser__->current_token(); bool result = PROCESS_KEYWORD_FOR_SYNTAX_CODE(*rule_info__.parser__,key,&cont_tok,&cont_pos); if(result == false){ rule_info__.parser__->set_abort_parse(true); return; } ADD_TOKEN_TO_PRODUCER_QUEUE(*key); // adv. to phrase's LA pos rule_info__.parser__->override_current_token(*cont_tok,cont_pos); *** } -> ||| "rule-in-stbl" NULL {// emitted from identifier thread /@ @*3 ``rule-in-stbl'' slip thru.\fbreak The top / down process lexes and consumes this token so this token never hits this grammar. Probable cause is a pre-mature stoppage of the ``rules'' construct. Why is ``rule-in-stbl'' returned? The token has been defined or referenced by one or more defining rules. The first occurance in the rules construct enters it into the symbol table as defined or referenced. Depending on its next usuage context, defined or referenced will update the its symbol table's attributes. @/ op sf->p2__->set_auto_delete(true); CAbs_lr1_sym* sym = new Err_use_of_N_outside_Rules_construct; sym->set_rc(*sf->p2__,__FILE__,__LINE__); ADD_TOKEN_TO_ERROR_QUEUE(*sym); rule_info__.parser__->set_abort_parse(true); *** } -> ||| "T-in-stbl" NULL {// emitted from identifier thread /@ @*3 ``T-in-stbl'' slip thru.\fbreak Could be an out-of-construct token that has been defined in the Terminal vocabulary. For example the token came before the Rules construct. Probably a typo mistake by the grammar writer. @/ op sf->p2__->set_auto_delete(true); CAbs_lr1_sym* sym = new Err_use_of_T_outside_Rules_construct; sym->set_rc(*sf->p2__,__FILE__,__LINE__); ADD_TOKEN_TO_ERROR_QUEUE(*sym); rule_info__.parser__->set_abort_parse(true); *** } -> /@ @*2 \Yacco2's pre-processor include directive.\fbreak \fbreak This demonstrates a nested environment where the grammar uses recursion by calling a function which contains the |pass3| grammar sequence. In this example, grammar |pass3| manually calls a thread via |start_manually_parallel_parsing| to get its file name to process. With the returned ``file-inclusion'' terminal, |PROCESS_INCLUDE_FILE| is called to parse the include file: a bom-de-bom-bom bump-and-grind sequence. The |use_cnt_| is a global variable that protects against the file include recursion of calling self until a stack overflow occurs. @/ "@" Rprefile_inc_dispatcher -> ||| "comment" NS_cweb_or_c_k::TH_cweb_or_c_k { op T_comment* k = sf->p2__; k->set_auto_delete(true); *** } -> ||| "cweb-comment" NULL { op T_cweb_comment* k = sf->p2__; AST* cwebk_t_ = new AST(*k); AST* cweb_t_ = new AST(); T_cweb_marker* cw = new T_cweb_marker(cweb_t_); cw->set_rc(*k,__FILE__,__LINE__); AST::set_content(*cweb_t_,*cw); AST::join_pts(*cweb_t_,*cwebk_t_); BUILD_GRAMMAR_TREE(*cweb_t_); CWEB_MARKER = 0; *** } -> /@ @* Error subrules.\fbreak @/ |?| { op CAbs_lr1_sym* sym = new Err_not_kw_defining_grammar_construct; sym->set_rc(*sf->p1__,__FILE__,__LINE__); ADD_TOKEN_TO_ERROR_QUEUE(*sym); rule_info__.parser__->set_abort_parse(true); *** } -> ||| "bad eos" NULL { // op ADD_TOKEN_TO_ERROR_QUEUE(*sf->p2__); rule_info__.parser__->set_abort_parse(true); *** } -> ||| "comment-overrun" NULL{ op ADD_TOKEN_TO_ERROR_QUEUE(*sf->p2__); rule_info__.parser__->set_abort_parse(true); *** } -> ||| "bad esc" NULL{ op ADD_TOKEN_TO_ERROR_QUEUE(*sf->p2__); rule_info__.parser__->set_abort_parse(true); *** } -> ||| "bad char" NS_bad_char_set::TH_bad_char_set{ op Err_bad_char* k = sf->p2__; ADD_TOKEN_TO_ERROR_QUEUE(*k); rule_info__.parser__->set_abort_parse(true); *** } } Rprefile_inc_dispatcher (){ -> |t| "file-inclusion" NS_prefile_include::PROC_TH_prefile_include { op CAbs_lr1_sym* err = sf->p2__->error_sym(); if(err != 0) { rule_info__.parser__->set_abort_parse(true); ADD_TOKEN_TO_ERROR_QUEUE(*sf->p2__); ADD_TOKEN_TO_ERROR_QUEUE(*sf->p2__->error_sym()); return; } bool result = PROCESS_INCLUDE_FILE(*rule_info__.parser__ ,*sf->p2__,*rule_info__.parser__->token_producer__); if(result == false){ // exceeded nested file limit rule_info__.parser__->set_abort_parse(true); return; } ADD_TOKEN_TO_RECYCLE_BIN(*sf->p2__);//file name inside return; *** } -> |t| |?| NULL { op sf->p2__->set_auto_delete(true); CAbs_lr1_sym* sym = new Err_bad_directive; sym->set_rc(*sf->p2__,__FILE__,__LINE__); RSVP(sym); rule_info__.parser__->set_stop_parse(true); *** } -> |?| { op CAbs_lr1_sym* sym = new Err_no_directive_present; sym->set_rc(*rule_info__.parser__->current_token(),__FILE__,__LINE__); RSVP(sym); rule_info__.parser__->set_stop_parse(true); *** } } }// end of rules