@q file: parser.w@> @q% Copyright Dave Bone 1998 - 2015@> @q% /*@> @q% This Source Code Form is subject to the terms of the Mozilla Public@> @q% License, v. 2.0. If a copy of the MPL was not distributed with this@> @q% file, You can obtain one at http://mozilla.org/MPL/2.0/.@> @q% */@> @** Parser Definitions --- Pushdown Automaton. Just what you've been taught at university with its associated components:\fbreak \ptindent{parse stack} \ptindent{finite automaton tables} It supports 2 parsing paradigms: hohum and parallel. The extras added to the pushdown automaton are the abort and stop parsing instructions, and the turning on and off of the wild shift facility. All 3 of these activities are controlled by the grammar writer's syntax directed code. They all get reset back to their initial settings when the thread completes parsing. The abort parse is an abrupt way of killing the parse. It justs stops it. No result returned to the calling grammar. The stop parse is more refined in that one normally adds a terminal to the accept queue of the calling grammar before shutting down. If used, the all shift facilty needs to be turned off within some running context or else the terminal stream being parsed will overrun. This is protected against in the PDA but... @*2 The parser structure. @+= struct Parser{ enum parse_result{erred,accepted,reduced,paralleled,no_thds_to_run}; @ Parser(yacco2::CAbs_fsm& Fsm_tbl@/ ,yacco2::token_container_type* Token_supplier@/ ,yacco2::token_container_type* Token_producer@/ ,yacco2::UINT Token_supplier_key_pos = Token_start_pos@/ ,yacco2::token_container_type* Error_queue = 0@/ ,yacco2::token_container_type* Recycle_bin = 0@/ ,yacco2::tble_lkup_type* Sym_lookup_functor=0@/ ,bool Use_all_shift=ON);@/ Parser(yacco2::CAbs_fsm& Fsm_tbl ,yacco2::Thread_entry& Thread_entry ,yacco2::Parser* Calling_parser);// parallel parser Parser(yacco2::CAbs_fsm& Fsm_tbl ,yacco2::Parser* Calling_parser);// parallel parser: procedure called ~Parser(); @ @ @ @ @ yacco2::CAbs_fsm* fsm_tbl(); void fsm_tbl(yacco2::CAbs_fsm* Fsm_tbl); yacco2::tble_lkup_type* sym_lookup_functor(); Parser::parse_result parallel_parse_successful(); Parser::parse_result parallel_parse_unsuccessful(); Parser::parse_result proc_call_parse_successful(); Parser::parse_result proc_call_parse_unsuccessful(); bool spawn_thread_manually(yacco2::USINT Thread_id); @ }; @*2 Parser's internal variables. @= yacco2::CAbs_fsm* fsm_tbl__; yacco2::KCHARP thread_name__; yacco2::Thread_entry* thread_entry__; yacco2::token_container_type* token_supplier__; yacco2::token_container_type* token_producer__; yacco2::token_container_type* recycle_bin__; yacco2::token_container_type* error_queue__; yacco2::lr_stk parse_stack__; yacco2::CAbs_lr1_sym* current_token__; yacco2::UINT current_token_pos__; yacco2::CAbs_lr1_sym* start_token__; yacco2::UINT start_token_pos__; yacco2::tble_lkup_type* sym_lookup_functor__; bool abort_parse__; bool stop_parse__; bool use_all_shift__; bool has_questionable_shift_occured__; yacco2::Parser* from_thread__; yacco2::THREAD_NO thread_no__; yacco2::COND_VAR cv__; yacco2::MUTEX mu__; int cv_cond__; yacco2::worker_thread_blk th_blk__; yacco2::pp_accept_queue_type pp_accept_queue__; int pp_accept_queue_idx__; yacco2::INT th_active_cnt__; yacco2::INT th_accepting_cnt__; yacco2::Parser* pp_requesting_parallelism__; yacco2::INT msg_id__; yacco2::Caccept_parse* arbitrated_token__; yacco2::Caccept_parse pp_rsvp__; int no_competing_pp_ths__; int no_requested_ths_to_run__; yacco2::yacco2_threads_to_run_type th_lst__; bool launched_as_procedure__; USINT supplier_r_w_cnt__; @*2 Parallel parsing support definitions. @= yacco2::Parser* from_thread(); yacco2::KCHARP thread_name(); yacco2::Thread_entry* thread_entry(); void post_event_to_requesting_grammar@/ (yacco2::Parser& To_thread@/ ,yacco2::INT Message_id@/ ,yacco2::Parser& From_thread);@/ void wait_for_event(); bool start_threads();// how thread or procedure THR_result start_procedure_call(yacco2::State& S); void put_T_into_accept_queue(yacco2::Caccept_parse& Parm); void clean_up(); void call_arbitrator(yacco2::Type_pp_fnct_ptr The_judge); bool have_all_threads_reported_back(); void abort_accept_queue_irregularites(yacco2::Caccept_parse& Calling_parm); void abort_no_selected_accept_parse_in_arbitrator(); @*2 Parse's all shift, stop, and abort defs. @= void set_use_all_shift_on(); void set_use_all_shift_off(); bool use_all_shift(); bool abort_parse(); void set_abort_parse(bool Abort); bool stop_parse(); void set_stop_parse(bool Stop); @*2 PDA's defs. @= parse_result parse(); void shift(yacco2::Shift_entry& SE); void invisible_shift(yacco2::Shift_entry& SE); void questionable_shift(yacco2::Shift_entry& SE); void all_shift(yacco2::Shift_entry& SE); void parallel_shift(yacco2::CAbs_lr1_sym& Accept_terminal); void proc_call_shift(yacco2::CAbs_lr1_sym& Accept_terminal); parse_result reduce(yacco2::Reduce_entry& RE); parse_result parallel_parse();@/ parse_result proc_call_parse();@/ parse_result start_parallel_parsing(yacco2::State& S); THR_result chained_proc_call_parsing(yacco2::State& S); parse_result start_manually_parallel_parsing(yacco2::USINT Thread_id);@/ yacco2::Shift_entry* find_cur_T_shift_entry(); yacco2::Shift_entry* find_R_or_paralleled_T_shift_entry(yacco2::USINT Enum_id); yacco2::Reduce_entry* find_questionable_sym_in_reduce_lookahead(); yacco2::Reduce_entry* find_reduce_entry(); yacco2::Reduce_entry* find_parallel_reduce_entry(); yacco2::Reduce_entry* find_proc_call_reduce_entry(); @*2 Parser's containers defs. @= yacco2::token_container_type* token_supplier(); void set_token_supplier(yacco2::token_container_type& Token_supplier); yacco2::token_container_type* token_producer(); void set_token_producer(yacco2::token_container_type& Token_producer); yacco2::token_container_type* recycle_bin(); void set_recycle_bin(yacco2::token_container_type& Recycle_bin); void set_error_queue(yacco2::token_container_type& Error_queue); yacco2::token_container_type* error_queue(); void add_token_to_supplier(yacco2::CAbs_lr1_sym& Token); void add_token_to_producer(yacco2::CAbs_lr1_sym& Token); void add_token_to_recycle_bin(yacco2::CAbs_lr1_sym& Token); void add_token_to_error_queue(yacco2::CAbs_lr1_sym& Token); @*2 Parse's stack defs. @= void cleanup_stack_due_to_abort(); yacco2::lr_stk* parse_stack(); yacco2::INT no_items_on_stack(); yacco2::Cparse_record* get_stack_record(yacco2::INT Pos);//rel 0 yacco2::Cparse_record* top_stack_record(); void remove_from_stack(yacco2::INT No_to_remove); void add_to_stack(yacco2::State& State_no); yacco2::INT current_stack_pos(); void clear_parse_stack(); yacco2::CAbs_lr1_sym* get_spec_stack_token(yacco2::UINT Pos);//rel 0 @*2 Parser's token defs. @= void get_shift_s_next_token(); yacco2::CAbs_lr1_sym* get_next_token(); yacco2::CAbs_lr1_sym* get_spec_token(yacco2::UINT Pos); yacco2::CAbs_lr1_sym* current_token(); yacco2::CAbs_lr1_sym* start_token(); void set_start_token(yacco2::CAbs_lr1_sym& Start_tok); yacco2::UINT start_token_pos(); void set_start_token_pos(yacco2::UINT Pos); void reset_current_token(yacco2::UINT Pos); void override_current_token(yacco2::CAbs_lr1_sym& Current_token,yacco2::UINT Pos); void override_current_token_pos(yacco2::UINT Pos); yacco2::UINT current_token_pos(); @*2 |Parser| Regular parser.\fbreak Runs a monolithic grammar: not a threaded grammar. i/o token containers are required whereas the threaded parser receives this information via a parameter at first thread startup or as a message within the calling parser. Not much is required in start up but to establish the runtime parse stack and fetch the first terminal for processing if it is available. How can it not be available? Well I support the empty language: moot but hugging theory. Notice that the items imported are references instead of pointers. I'm trying it again. I hope that it works within the threaded environment. It didn't with cica Microsoft Visual studio 6 \CPLUSPLUS/ compiler. Pointers were consistent. |cv__(0)| and |mu__(0)| are removed from the initializer list due to linux honking. @^ To think out - rel. 1 terminal not zero!@> @= yacco2:: Parser:: Parser@/ (yacco2::CAbs_fsm& Fsm_tbl@/ ,yacco2::token_container_type* Token_supplier@/ ,yacco2::token_container_type* Token_producer@/ ,yacco2::UINT Token_supplier_key_pos@/ ,yacco2::token_container_type* Error_queue@/ ,yacco2::token_container_type* Recycle_bin@/ ,yacco2::tble_lkup_type* Sym_lookup_functor@/ ,bool Use_all_shift)@/ :fsm_tbl__(&Fsm_tbl)@/ ,thread_name__(Fsm_tbl.id__)@/ ,thread_entry__(0)@/ ,token_supplier__(Token_supplier)@/ ,token_producer__(Token_producer)@/ ,error_queue__(Error_queue)@/ ,recycle_bin__(Recycle_bin)@/ ,current_token__(0)@/ ,current_token_pos__(Token_supplier_key_pos)@/ ,start_token__(0)@/ ,start_token_pos__(Token_supplier_key_pos)@/ ,sym_lookup_functor__(Sym_lookup_functor)@/ ,abort_parse__(OFF)@/ ,stop_parse__(OFF)@/ ,use_all_shift__(Use_all_shift)@/ ,has_questionable_shift_occured__(OFF)@/ ,from_thread__(0)@/ ,thread_no__(THREAD_SELF())@/ ,cv_cond__(WAIT_FOR_EVENT)@/ ,th_blk__()@/ ,pp_accept_queue_idx__(0)@/ ,pp_accept_queue__()@/ ,th_active_cnt__(0)@/ ,th_accepting_cnt__(0)@/ ,pp_requesting_parallelism__(0)@/ ,msg_id__(0)@/ ,arbitrated_token__(0)@/ ,no_competing_pp_ths__(0)@/ ,no_requested_ths_to_run__(0)@/ ,th_lst__()@/ ,launched_as_procedure__(false)@/ ,supplier_r_w_cnt__(1)@/ {@/ CREATE_COND_VAR(cv__); CREATE_MUTEX(mu__); LOCK_MUTEX_OF_CALLED_PARSER(mu__,*this," of self"); parse_stack__.lr_stk_init(*Fsm_tbl.start_state__); for(int x=0;xr_w_cnt__; } fsm_tbl__->parser(*this); Fsm_tbl.parser(*this); if(Token_supplier != 0){ current_token__ = get_spec_token(current_token_pos__); }else{ current_token__ = yacco2::PTR_LR1_eog__; } start_token__ = current_token__; @; parse_stack__.lr_stk_init(*fsm_tbl__->start_state__); if(YACCO2_T__ != 0){ if (current_token__ == 0) return;// no tokens @; yacco2::lrclog << "YACCO2_T__::" << thread_no__ << "::" << thread_name() << "::" << " enum: " << current_token__->enumerated_id__ << ' ' << '"'<< current_token__->id__ << '"' << " pos: " << current_token_pos__ << FILE_LINE << std::endl; yacco2::lrclog << "\t\t::GPS FILE: "; EXTERNAL_GPSing(current_token__) yacco2::lrclog << " GPS LINE: " << current_token__->tok_co_ords__.line_no__ << " GPS CHR POS: " << current_token__->tok_co_ords__.pos_in_line__ << FILE_LINE << std::endl; @; } } @ Check for empty language. @= if (current_token__ == 0) return; @*2 |Parser| Parallel parser.\fbreak The parse containers are all global. One can set up some of these containers for local requirements within the threaded grammar. Threaded grammar use this constructor. Elsewhere the threaded code is developed exposing its deployment. The calling grammar's parse object provides all the gory details to parse with its current token, token position, and token dispensor. At initial startup, the token co-ordinates --- dispensor, token, and position set --- will be set within the |Parser|. The parse thread awakened by a message will have in its critical region the requestor's parallel parser address. Within the request for work loop, the messaged parser will extract from the calling parser its token assemble --- dispensor, token, and position set The error, recycle containers are optional. All these containers are taken from the monolithic parser that started the rave. Use of recursion to create a new i/o token containers is permissible. It's up to the designer. Lets hear it for openness! Don't be too cheery boy due to the following: |cv__(0)| and |mu__(0)| are removed from the initializer list due to linux honking. @= yacco2:: Parser:: Parser(yacco2::CAbs_fsm& Fsm_tbl@/ ,yacco2::Thread_entry& Thread_entry ,yacco2::Parser* Calling_parser)@/ :fsm_tbl__(&Fsm_tbl)@/ ,thread_name__(Thread_entry.thread_fnct_name__)@/ ,thread_entry__(&Thread_entry)@/ ,token_supplier__(0)@/ ,token_producer__(0)@/ ,current_token__(0)@/ ,current_token_pos__(0)@/ ,start_token__(0)@/ ,start_token_pos__(0)@/ ,recycle_bin__(0)@/ ,sym_lookup_functor__(0)@/ ,abort_parse__(OFF)@/ ,stop_parse__(OFF)@/ ,use_all_shift__(YES)@/ ,has_questionable_shift_occured__(OFF)@/ ,from_thread__(0)@/ ,thread_no__(THREAD_SELF())@/ ,cv_cond__(EVENT_RECEIVED)@/ ,th_blk__(this,Calling_parser)@/ ,pp_accept_queue__()@/ ,pp_accept_queue_idx__(0)@/ ,th_active_cnt__(0)@/ ,th_accepting_cnt__(0)@/ ,pp_requesting_parallelism__(0)@/ ,msg_id__(0)@/ ,arbitrated_token__(0)@/ ,no_competing_pp_ths__(0)@/ ,no_requested_ths_to_run__(0)@/ ,th_lst__()@/ ,launched_as_procedure__(false)@/ ,supplier_r_w_cnt__(0)@/ {@/ CREATE_COND_VAR(cv__); CREATE_MUTEX(mu__); LOCK_MUTEX_OF_CALLED_PARSER(mu__,*this," of self"); fsm_tbl__->parser(*this); Fsm_tbl.parser(*this); parse_stack__.lr_stk_init(*fsm_tbl__->start_state__);// no token yet for(int x=0;x= yacco2:: Parser:: Parser(yacco2::CAbs_fsm& Fsm_tbl@/ ,yacco2::Parser* Calling_parser)@/ :fsm_tbl__(&Fsm_tbl)@/ ,thread_name__(Fsm_tbl.id__)@/ ,thread_entry__(0)@/ ,token_supplier__(0)@/ ,token_producer__(0)@/ ,current_token__(0)@/ ,current_token_pos__(0)@/ ,start_token__(0)@/ ,start_token_pos__(0)@/ ,recycle_bin__(0)@/ ,sym_lookup_functor__(0)@/ ,abort_parse__(OFF)@/ ,stop_parse__(OFF)@/ ,use_all_shift__(YES)@/ ,has_questionable_shift_occured__(OFF)@/ ,from_thread__(0)@/ ,thread_no__(THREAD_SELF())@/ ,cv_cond__(EVENT_RECEIVED)@/ ,th_blk__()@/ ,pp_accept_queue__()@/ ,pp_accept_queue_idx__(0)@/ ,th_active_cnt__(0)@/ ,th_accepting_cnt__(0)@/ ,pp_requesting_parallelism__(0)@/ ,msg_id__(0)@/ ,arbitrated_token__(0)@/ ,no_competing_pp_ths__(0)@/ ,no_requested_ths_to_run__(0)@/ ,th_lst__()@/ ,launched_as_procedure__(true)@/ ,supplier_r_w_cnt__(0)@/ {@/ CREATE_COND_VAR(cv__); CREATE_MUTEX(mu__); LOCK_MUTEX_OF_CALLED_PARSER(mu__,*this," of self"); fsm_tbl__->parser(*this); Fsm_tbl.parser(*this); parse_stack__.lr_stk_init(*fsm_tbl__->start_state__);// no token yet for(int x=0;x= yacco2:: Parser:: ~Parser(){ clear_parse_stack(); DESTROY_COND_VAR(cv__); DESTROY_MUTEX(mu__); } @** Parser --- PDA's implementation. @*2 Shift. @= void yacco2:: Parser:: shift(yacco2::Shift_entry& SE){@/ @; @; yacco2::State* Goto_state = SE.goto__; @<|add_to_stack|@>; @; get_next_token(); } @*2 Find shift entry. @= yacco2::Shift_entry* se(0); if(pr->state__->shift_tbl_ptr__ != 0) se = find_cur_T_shift_entry(); @*2 Invisible shift. Its symbol \INVshift. @= void yacco2:: Parser:: invisible_shift(yacco2::Shift_entry& SE){@/ @; @; yacco2::State* Goto_state = SE.goto__; @<|add_to_stack|@>; @; } @*3 Set parse stack symbol to invisible shift operator. @= pr->symbol__ = NS_yacco2_k_symbols::PTR_LR1_invisible_shift_operator__; @*2 Questionable shift. Its symbol is \QUEshift. Note, as it is used for error situations though it acts like a wild token as in \ALLshift, it does not advance to the next token in the parse stream! It must be explicitly done by the grammar writer. I haven't head wrestled ``error processing / correction'' yet. @= void yacco2:: Parser:: questionable_shift(yacco2::Shift_entry& SE){@/ has_questionable_shift_occured__ = ON; @; @; yacco2::State* Goto_state = SE.goto__; @<|add_to_stack|@>; @; } @*2 All shift.\fbreak The current terminal and not \ALLshift{} is placed onto the parse stack. The fsm's `go to' state is the vectored \ALLshift{} symbol. @= void yacco2:: Parser:: all_shift(yacco2::Shift_entry& SE){@/ @; @; yacco2::State* Goto_state = SE.goto__; @<|add_to_stack|@>; @; get_next_token(); } @ Set parse stack symbol to current token. @= pr->symbol__ = current_token__;// state's shift symbol @*2 Reduce. The reduce. @= yacco2:: Parser::parse_result yacco2:: Parser:: reduce(yacco2::Reduce_entry& RE){@/ @; @; @; @; @; @; } @*2 Execute the subrule, its directives, and create the rule.\fbreak Inside the rule's constructor is the |lhs-constructor| directive code. The top of the stack address is passed to |reduce_rhs_of_rule| to efficiently calculate the subrule's parameters as its just an array of |Cparse_record|. This is a tricky-dicky, now no politics, cuz I'm really fetching the first component of the stack record which is its grammatical symbol. See notes on the real story. Added a rule recycling program to speed up parser due to new hit on birth-run-delete cycle. See |Recycled_rule_struct| discussion. @= Rule_s_reuse_entry* rule_rec1(0); Rule_s_reuse_entry** rule_rec = &rule_rec1; fsm_tbl__->reduce_rhs_of_rule(RE.rhs_id__,rule_rec); @ @= yacco2::State* Goto_state = se->goto__; @<|add_to_stack|@>; @; if(se->goto__->state_no__ == 1)@/ return Parser::accepted; return Parser::reduced; @ @= remove_from_stack((*rule_rec)->rule_->rule_info__.rhs_no_of_parms__); @ @= parse_stack__.top__->set_symbol((*rule_rec)->rule_);// stack state's rule shift symbol parse_stack__.top__->set_rule_s_reuse_entry(*rule_rec); @ @= Shift_entry* se(0); if(parse_stack__.top__->state__->shift_tbl_ptr__ != 0) se = find_R_or_paralleled_T_shift_entry((*rule_rec)->rule_->enumerated_id__); @*2 Regular parse.\fbreak This parse comes from a non-threaded grammar executed from a process. One can use recursion to start many parse streams. In fact, processing of include files is done this way with an appropriate nested file count limit to prevent overruns. Added |failed| call to monolithic grammar as it becomes a global way to handle an aborted parse. For example, a general error message could be put into the error queue by the monolithic grammar. This becomes a cheap way to deal with invalid token sequences. At least it pin points where it occured by a general error message. The proper refinement is to go to each grammar and program the catching of the error by use of the \INVshift{} terminal or the \ALLshift{} terminal within the subrule. How refined do u want to go or be or not to go? that is the ? @= yacco2:: Parser::parse_result yacco2:: Parser:: parse(){@/ @; @; parse_result result; read_token_stream:@/ { @; } parse_successful:@/ return Parser::accepted; parse_unsuccessful:@/ fsm_tbl__->failed(); // ?sdc from grammar writer for the error queue @; cleanup_stack_due_to_abort(); return Parser::erred; } @ Check for empty language. @= if (current_token__ == 0) return Parser::accepted; @ Process tokens. @= @; if(stop_parse__ == ON){ cleanup_stack_due_to_abort();// quasi controlled abort goto parse_successful; } if(abort_parse__ == ON) goto parse_unsuccessful; State* cur_state = pr->state__; @; parallel_parsing:@/ @; @; @< parallel parsing unsuccessful. So, set up + go to straight parsing@>; proc_call_parsing:@/ { @; @; @; } straight_parsing:@/ @;@/ @; @;@/ @; goto parse_unsuccessful;@/ @ Fire off fsm's op directive.\fbreak This is the fsm's directive that gets run when the parser starts up. As a parallel parser is within a run loop, each time it starts running this directive gets called. It is a directive that allows the grammar writer to preset or pre-evaluate approprite events. For example, it is used in the Pascal translator to pre-evaluate by symbol table lookup the passed identifier token. If it is morphed, the new token is then used in the parse. Good stuff. @= fsm_tbl__->op(); @ Try various shift types.\fbreak The parser favours a shift before a reduce operation. There are 4 types of shifts. The regular shift found in the token stream and 3 meta terminal shifts --- \QUEshift{} questionable, \INVshift{} invisible, and \ALLshift{} all of which are not found in the token stream. The rank of shifts is conditionally checked for their presence within the current parse state with their test order being regular, followed by questionable, invisible, and all shift. The all shift is controlled by the parser's `all shift' facility. If this facility was not present, the parse would always overrun the token stream. The turning on and off is controlled by the syntax directed code of the parsing grammar.\fbreak Comment:\fbreak See bug's comment. @= if(se != 0){ shift(*se); goto read_token_stream; } if(cur_state->questionable_shift__ != 0){ // guard against perpetual machine using \QUEshift and last token ``eog'' if(has_questionable_shift_occured__ == ON){// previous state action @; } questionable_shift(*cur_state->questionable_shift__); goto read_token_stream; } if(cur_state->inv_shift__){ invisible_shift(*cur_state->inv_shift__); goto read_token_stream; } if(use_all_shift__ == ON){ if(cur_state->all_shift__ == 0){ }else{ // guard against overrun of token dispensor using \ALLshift if(current_token__->enumerated_id__ == LR1_Eog)@/ { use_all_shift__ = OFF;// turn off the all shift operator all_shift(*cur_state->all_shift__); }else{ all_shift(*cur_state->all_shift__); goto read_token_stream; } } } @ Dispatch to parallel, proc call, or straight parsing. @= @; if(cur_state->parallel_shift__ != 0) goto parallel_parsing; if(cur_state->proc_call_shift__ != 0) goto proc_call_parsing; else goto straight_parsing; @ Try parallel parse.\fbreak It checks whether there are threads to be run by their first set. If not, the |no_thds_to_run| result is returned so go do some straight parsing. @= result = start_parallel_parsing(*cur_state); if(result == no_thds_to_run) goto straight_parsing; @ Is parallel parsing successful?. If so reduce the \PARshift phrase. The wrinkle is whether a chained procedure call is present. This extends the subrule expression until after the chained procedure call and then it is reduced. @= if (result == paralleled){ if(parse_stack__.top__->state__->proc_call_shift__ != 0){ cur_state=parse_stack__.top__->state__; goto proc_call_parsing;// chained proc call so reduce later } @; @; @; @; } @ find parallel reduce entry. @= Reduce_entry* re(0); if(parse_stack__.top__->state__->reduce_tbl_ptr__ != 0) re = find_parallel_reduce_entry(); @ Parallel parsing unsuccessful.\fbreak So, set up + go to straight parsing. @< parallel parsing unsuccessful. So, set up + go to straight parsing@>= @; @; goto straight_parsing; @ Try proc call parse.\fbreak It checks whether there is a proc call entry in state. If not, the |no_thds_to_run| result is returned so go do some straight parsing. @= THR_result rslt = chained_proc_call_parsing(*cur_state); //|result = rslt;| switch(rslt){ case erred: goto straight_parsing; case no_thds_to_run: goto straight_parsing; default: { result = paralleled; break; } } @ Is proc call parsing successful?. If so reduce the \TRAshift phrase. @= if (result == paralleled){ @; @; @; @; } @ find proc call reduce entry. @= Reduce_entry* re(0); if(parse_stack__.top__->state__->reduce_tbl_ptr__ != 0) re = find_proc_call_reduce_entry(); @ Proc call parsing unsuccessful.\fbreak So, set up + go to straight parsing. @= @; @; goto straight_parsing; @ find reduce entry. @= Reduce_entry* re(0); if(parse_stack__.top__->state__->reduce_tbl_ptr__ != 0) re = find_reduce_entry(); @ Try reduce.\fbreak The stop parse is checked after the reduce syntax directed code has been run. Provides a little more flexibility to the grammar writer's actions. @= if(re != 0){ result = reduce(*re); if(stop_parse__ == ON){ cleanup_stack_due_to_abort();// quasi controlled abort goto parse_successful; } if(abort_parse__ == ON) goto parse_unsuccessful; if(result == Parser::reduced) goto read_token_stream; if( result == Parser::accepted) goto parse_successful; } @*2 Parallel shift.\fbreak A parallel shift has the following stack configuration:\fbreak \ptindent{ \PARshift, followed by \ALLshift, \QUEshift, or newly minted terminal} It places the parallel terminal onto the parse stack even though it is not part of the input token stream. I felt that it should faithfully follow the grammatical expression. This is the tailend of the parallel parse that shifts the arbitrated symbol onto the parse stack. Please note the conditional 2nd attempt on the \ALLshift. If it is present in the current state configuration, then the shift is successful. The only subtlety is in the arbitration code. What happens if there are many returned terminals? There has to be a choice made or the first item in the accept queue gets returned. Should this be a run-time-error if the arbitration code does not select the many to one situation? As parallelism is quasi-random in execution order so are the terminal placements in the accept queue. Where a single processor seems to work, a multi-processor can lead to different results per execution. The grammar should honk with a mildly acidic warning. It does now --- see note. Note: Support for \QUEshift --- questionable shift operator.\fbreak This is like the meta \ALLshift terminal but it allows the grammar write to state that the returned T is an error. In the pecking order of shift presence, the returned T is tested first for its presence within the state. If it is not found then the meta shift terminals are tested in the following order: \QUEshift, \ALLshift. @= void yacco2:: Parser:: parallel_shift(yacco2::CAbs_lr1_sym& Accept_terminal){ @; Shift_entry* se(0); if(pr->state__->shift_tbl_ptr__ != 0) se = find_R_or_paralleled_T_shift_entry(Accept_terminal.enumerated_id__); if(se != 0) goto set_stack_to_symbol_being_shifted; se = pr->state__->questionable_shift__; if(se != 0) goto set_stack_to_symbol_being_shifted; se = pr->state__->all_shift__; if(se != 0) goto set_stack_to_symbol_being_shifted; @; set_stack_to_symbol_being_shifted:@/ @; } @ Shift parallel's returned symbol and goto state. @= pr->symbol__ = &Accept_terminal; // state's \PARshift shift symbol yacco2::State* Goto_state = se->goto__; @<|add_to_stack|@>; //; @*2 Proc call shift.\fbreak A proc call shift has the following stack configuration:\fbreak \ptindent{ \TRAshift, \ALLshift or \QUEshift or newly minted terminal} It places the proc call terminal onto the parse stack even though it is not part of the input token stream. I felt that it should faithfully follow the grammatical expression. This is the tailend of the proc call parse that shifts the arbitrated symbol onto the parse stack. Please note the conditional 2nd attempt on the \ALLshift or \QUEshift to catch the eye as an error. If it is present in the current state configuration, then the shift is successful. The only subtlety is in the arbitration code. What happens if there are many returned terminals? There has to be a choice made or the first item in the accept queue gets returned. Should this be a run-time-error if the arbitration code does not select the many to one situation? As parallelism is quasi-random in execution order so are the terminal placements in the accept queue. Where a single processor seems to work, a multi-processor can lead to different results per execution. The grammar should honk with a mildly acidic warning. It does now --- see note. @= void yacco2:: Parser:: proc_call_shift(yacco2::CAbs_lr1_sym& Accept_terminal){ @; Shift_entry* se(0); if(pr->state__->shift_tbl_ptr__ != 0) se = find_R_or_paralleled_T_shift_entry(Accept_terminal.enumerated_id__); if(se != 0) goto set_stack_to_symbol_being_shifted; se = pr->state__->all_shift__; if(se != 0) goto set_stack_to_symbol_being_shifted; se = pr->state__->questionable_shift__; if(se != 0) goto set_stack_to_symbol_being_shifted; @; set_stack_to_symbol_being_shifted:@/ @; } @ Shift proc call's returned symbol and goto state. @= pr->symbol__ = &Accept_terminal; // state's \TRAshift shift symbol yacco2::State* Goto_state = se->goto__; @<|add_to_stack|@>; //; @*2 Parallel parse.\fbreak The control loop consuming the parallel tokens. @= yacco2:: Parser::parse_result yacco2:: Parser:: parallel_parse(){ @; parse_result result; @; read_token_stream:{@/ @; } parse_successful:@/ return parallel_parse_successful(); parse_unsuccessful:@/ return parallel_parse_unsuccessful(); } @ Check for empty language. yes unsuccessful parallel parse. @= if (current_token__ == 0) goto parse_unsuccessful; goto read_token_stream; @ Process parallel tokens. @= @; if(stop_parse__ == ON){ cleanup_stack_due_to_abort();// quasi controlled abort goto parse_successful; } if(abort_parse__ == ON) goto parse_unsuccessful; State* cur_state = pr->state__; @; parallel_parsing:@/ @; @; @; proc_call_parsing:@/ { @; @; @; } straight_parsing:@/ @; @; @; @; goto parse_unsuccessful; @*2 Parallel parse successful.\fbreak Put the accept message into the requesting grammar's accept queue. It checks whether it is the last active thread stopping. If so, it wakes up the requesting grammar by an event. Notice the |@| is placed in the following parallel parse procedures: |parallel_parse_successful| and |parallel_parse_unsuccessful|. This is done to optimize the number of threads run instead of after the thread has cleanised itself from parsing in the thread loop. See |Parallel thread code| loop. |@| was just after the |@|. Here's the take, when a event is sent to the requesting grammar, the thread library can restart executing the calling grammar while in a single cpu environment the parallel thread is put on hold to complete its duties some time later. Now the grammar requesting parallelism can continue its parse that can again request parallelism that can contain the thread that is winding down. Due to the winding down thread's status being busy, another copy of the thread is created and run. A little softshoe please... @= yacco2:: Parser::parse_result yacco2:: Parser:: parallel_parse_successful(){ @; if(launched_as_procedure__ == true){ @; @; clean_up(); return Parser::accepted; } else{ @; @; @; @; clean_up(); @; @; return Parser::accepted; } } @ Set thread status if launched as a thread. @= @; th_blk__.set_waiting_for_work(); @; @ Notify requesting grammar if launched as a thread. @= @; @ Acquire parallelism requesting grammar's mutex if required.\fbreak If there is only 1 thread running, the critical region is down graded to just a local context. This is an optimization to minimize ``acquire-release'' of mutexes. @= LOCK_MUTEX_OF_CALLED_PARSER(pp_requesting_parallelism__->mu__,*this," of calling grammar"); @ Release parallelism requesting grammar's mutex if required. This is an optimization to minimize ``acquire-release'' of mutexes. |no_competing_pp_ths__| is a read-only variable that gets set when the thread is called. It eliminates the called thread having to acquire the mutex of the calling grammar to determine whether only 1 thread launched. @= UNLOCK_MUTEX_OF_CALLED_PARSER(pp_requesting_parallelism__->mu__,*this," of calling grammar"); @ Notify parallelism requesting grammar if last thread to complete. @= if(have_all_threads_reported_back() == YES){ @; post_event_to_requesting_grammar(*pp_requesting_parallelism__,Accept_parallel_parse,*this); }else{ @; } @ Insert token into requesting grammar's accept queue. @= pp_requesting_parallelism__->put_T_into_accept_queue(pp_rsvp__); @*2 Parallel parse unsuccessful.\fbreak If it is the last active thread, it wakes up the requesting grammar via a message. Otherwise, it just winds down without any message: a bit of an optimization to lowering messages between friends. @= yacco2:: Parser::parse_result yacco2:: Parser:: parallel_parse_unsuccessful(){@/ @; @; if(launched_as_procedure__ == true){ @; goto fire_off_error_functor; } else{ @; @; @; @; @; } fire_off_error_functor:@/ cleanup_stack_due_to_abort(); clean_up(); return Parser::erred; } @ Reduce requesting grammar's active threads count. @= @; --pp_requesting_parallelism__->th_active_cnt__; if(supplier_r_w_cnt__ > 1){ --pp_requesting_parallelism__->supplier_r_w_cnt__; if(token_supplier__->r_w_cnt__ > 1){ @; --token_supplier__->r_w_cnt__; @; } } @; @ Check failed directive for possible acceptance.\fbreak A fsm |failed| directive was added to allow for a last chance attempt at an aborted thread parse. One can return an error token to the calling grammar making its look like a successful parse via syntax directed code of the |failed| directive. It's not a panacea but hey it helps. @= if(fsm_tbl__->failed() == true){ return parallel_parse_successful(); } @*2 Proc call parse successful.\fbreak Put the accept message into the requesting grammar's accept queue. Just return back to callr. @= yacco2:: Parser::parse_result yacco2:: Parser:: proc_call_parse_successful(){ @; @; clean_up(); return Parser::accepted; } @*2 Proc call parse unsuccessful.\fbreak If it is the last active thread, it wakes up the requesting grammar via a message. Otherwise, it just winds down without any message: a bit of an optimization to lowering messages between friends. @= yacco2:: Parser::parse_result yacco2:: Parser:: proc_call_parse_unsuccessful(){@/ @; @; goto fire_off_error_functor; fire_off_error_functor:@/ cleanup_stack_due_to_abort(); clean_up(); return Parser::erred; } @*2 Find current T shift entry.\fbreak Algo. binary search 6.2.1 from Knuth Vol. 3. A little speed to eliminate the passing of the enumerate value. A quick test showed approximately the sequential search is faster than the binary search when the table population is less than 72. @= yacco2::Shift_entry* yacco2:: Parser:: find_cur_T_shift_entry(){@/ @< Reserve and get current stack record@>; yacco2::USINT Enum_id = current_token__->enumerated_id__; State* State_ptr = pr->state__; Shift_tbl* st = State_ptr->shift_tbl_ptr__; yacco2::USINT cnt = st->no_entries__; Shift_entry_array_type* shft_entry_array = (Shift_entry_array_type*)&st->first_entry__; yacco2::Shift_entry* k_entry; if(cnt > SEQ_SRCH_VS_BIN_SRCH_LIMIT) goto bin_srch; for(int x=0;xid__) return k_entry; if(Enum_id < k_entry->id__) break; } eolr_seq: for(int x=0;xid__) return k_entry; if(LR1_Eolr < k_entry->id__) return 0; } return 0; bin_srch: int lower = 1; int upper = cnt; int seg_ln; int mid_pt; int mid_pt_rel0; B2: // calc mid pt if(upper < lower) goto eolr_srch; seg_ln = upper + lower; mid_pt = seg_ln >> 1; mid_pt_rel0 = mid_pt - 1; k_entry = &(*shft_entry_array)[mid_pt_rel0]; B3: // compare if(Enum_id == k_entry->id__)return k_entry; if(Enum_id > k_entry->id__) goto B5; B4: // adjust upper upper = mid_pt - 1; goto B2; B5: // adjust lower lower = mid_pt + 1; goto B2; eolr_srch: // see if all T in set lower = 1; upper = st->no_entries__; B2_eolr: // calc mid pt if(upper < lower) return 0; seg_ln = upper + lower; mid_pt = seg_ln >> 1; mid_pt_rel0 = mid_pt - 1; k_entry = &(*shft_entry_array)[mid_pt_rel0]; if(LR1_Eolr == k_entry->id__) return k_entry; if(LR1_Eolr > k_entry->id__) goto B5_eolr; B4_eolr: // adjust upper upper= mid_pt - 1; goto B2_eolr; B5_eolr: // adjust lower lower = mid_pt + 1; goto B2_eolr; return 0; } @*2 Find Rule or paralleled returned T shift entry.\fbreak Algo. binary search 6.2.1 from Knuth Vol. 3. @= yacco2::Shift_entry* yacco2:: Parser:: find_R_or_paralleled_T_shift_entry(yacco2::USINT Enum_id){@/ @< Reserve and get current stack record@>; State* State_ptr = pr->state__; Shift_tbl* st = State_ptr->shift_tbl_ptr__; yacco2::USINT cnt = st->no_entries__; Shift_entry_array_type* shft_entry_array = (Shift_entry_array_type*)&st->first_entry__; yacco2::Shift_entry* k_entry; if(cnt > SEQ_SRCH_VS_BIN_SRCH_LIMIT) goto bin_srch; for(int x=0;x=cnt) break; k_entry = &(*shft_entry_array)[x]; if(Enum_id == k_entry->id__) return k_entry; if(Enum_id < k_entry->id__) break; } eolr_seq: for(int x=0;x=cnt) break; k_entry = &(*shft_entry_array)[x]; if(LR1_Eolr == k_entry->id__) return k_entry; if(LR1_Eolr < k_entry->id__) return 0; } return 0; bin_srch: int lower = 1; int upper = cnt; int seg_ln; int mid_pt; int mid_pt_rel0; B2: // calc mid pt if(upper < lower) goto eolr_srch; seg_ln = upper + lower; mid_pt = seg_ln >> 1; mid_pt_rel0 = mid_pt - 1; k_entry = &(*shft_entry_array)[mid_pt_rel0]; B3: // compare if(Enum_id == k_entry->id__) return k_entry; if(Enum_id > k_entry->id__) goto B5; B4: // adjust upper upper = mid_pt - 1; goto B2; B5: // adjust lower lower = mid_pt + 1; goto B2; eolr_srch: // see if all T in set lower = 1; upper = st->no_entries__; B2_eolr: // calc mid pt if(upper < lower) return 0; seg_ln = upper + lower; mid_pt = seg_ln >> 1; mid_pt_rel0 = mid_pt - 1; k_entry = &(*shft_entry_array)[mid_pt_rel0]; if(LR1_Eolr == k_entry->id__) return k_entry; if(LR1_Eolr > k_entry->id__) goto B5_eolr; B4_eolr: // adjust upper upper= mid_pt - 1; goto B2_eolr; B5_eolr: // adjust lower lower = mid_pt + 1; goto B2_eolr; return 0; } @*2 |add_set_to_map|. @= void add_set_to_map(yacco2::yacco2_set_type& Map,int Partition,int Element){@/ yacco2::yacco2_set_iter_type e = Map.find(Partition); if(e == Map.end()){ Map[Partition] = Element; }else{ int se = e->second; int v = se + Element; e->second = v; } } @*2 Reduce Attempts.\fbreak The following points detail the order of reduce attempts. Apart from point 1 which is the regular reduce attempt, points 2 and 3 use various meta terminals attempts for different parsing contexts.\fbreak \INDENT{1.5cm}{1) current token --- standard lr(1) reduce} \INDENT{1.5cm}{2) meta Tes except \QUEshift, eog, and \PARshift} \INDENT{2cm}{in set --- eolr, \REDshift, \INVshift, \ALLshift, and \PROCshift} \INDENT{1.5cm}{3) Only \QUEshift for forced lr(0) reduction} Point 2 is sensitive to the next state's shift attempts --- be it wild or \emptyrule. Point 3 is a specific attempt at drawing the reader's eye to errors within the grammar. It is used in 2 situations:\fbreak \INDENT{1.5cm}{a) shift with its syntax directed code to deal with the error} \INDENT{1.5cm}{b) when in another rule's follow set enforce a reduction} Point b covers the situation whereby the subrule to be reduced will reduce and shift the rule into its next parse state which contains the \QUEshift where the error will be dealt with by its syntax directed code. It is a forcefull reduce instead of considering it an error which it is due to the bad lookahead T by prolonging the error situation to be dealt with by the next parse state environment. This allows the parsing to continue (shift favoured) and to catch the error in the \QUEshift ``shift operation'' of the new current parse state. @*3 Find \QUEshift in reduce lookahead to force a LR(0) reduction.\fbreak Algo. binary search 6.2.1 from Knuth Vol. 3. What do u do when the lookahead is faulty (current token) and u want the state's subrule to reduce so as to force the parser into the rule's shift state which deals with the \QUEshift error? Remember the \QUEshift sym has been properly calculated in the lookahead set for the reduce to take place as it is part of the follow set symbol string in the grammar! This is my experiment. @= yacco2::Reduce_entry* yacco2:: Parser:: find_questionable_sym_in_reduce_lookahead(){@/ @< Reserve and get current stack record@>; State* State_ptr = pr->state__; UCHAR partition; UCHAR element; int lower; int upper; int seg_ln; int mid_pt; int mid_pt_rel0; yacco2::Set_entry* k_entry; Reduce_tbl* rt = State_ptr->reduce_tbl_ptr__; yacco2::USINT cnt_of_reducing_subrules = rt->no_entries__; Reduce_entry* re = (Reduce_entry*)&rt->first_entry__; yacco2::Set_tbl* pla_set; yacco2::INT no_set_pairs; for(yacco2::UINT x = 1;x <= cnt_of_reducing_subrules;++x,++re){ pla_set = re->la_set__; no_set_pairs = pla_set->no_entries__; Set_entry_array_type* set_entry_array = (Set_entry_array_type*)&(pla_set->first_entry__); if(no_set_pairs > SEQ_SRCH_VS_BIN_SRCH_LIMIT) goto QUE_srch; for(int x=0;xpartition__){ if(LRK_LA_QUE_SET.elements__ & k_entry->elements__){ return re; }else{ break; // next reducing rule; not in set } } if(LRK_LA_QUE_SET.partition__ < k_entry->partition__) break; } continue;// next re QUE_srch: // see if meta \QUEshift in set lower = 1; upper = no_set_pairs; B2_que: // calc mid pt if(upper < lower) return 0; seg_ln = upper + lower; mid_pt = seg_ln >> 1; mid_pt_rel0 = mid_pt - 1; k_entry = &(*set_entry_array)[mid_pt_rel0]; if(LRK_LA_QUE_SET.partition__ == k_entry->partition__){ if(LRK_LA_QUE_SET.elements__ & k_entry->elements__){ return re; }else{ continue; // this reducing rule not it so next reducing subrule } } if(LRK_LA_QUE_SET.partition__ > k_entry->partition__) goto B5_que; B4_que: // adjust upper upper= mid_pt - 1; goto B2_que; B5_que: // adjust lower lower = mid_pt + 1; goto B2_que; } return 0; } @*3 |find_reduce_entry|.\fbreak Use own bsearch to speed things up --- too much overhead in generic bsearch. See Knuth algo. --- variant used shift entry lookup. The reduce table contains a sequential list of potential reducing subrules. Each lookahead set is composed of pairs of set partition with its elements. Each entry is a 2 byte of compressed format. The number of pairs in the table is the 1st byte in the reducing set structure. The algorithm is potentially a 2 pass over the number of potential reducing subrules in the state. The pecking order is find the current token within the reducing state followed by other attempts of meta symbols, and last the \QUEshift{} symbol.\fbreak Pass 1: Is current token in one of the subrule lookahead sets.\fbreak If yes then exit with the appropriate reduce entry for that found reducing subrule.\fbreak \fbreak Pass 2: Is the Meta set elements found within one of the reducing subrules?\fbreak The Meta symbol LA set elements are Eolr, \INVshift, \ALLshift, \PROCshift, and \INVshift. If yes then exit with the appropriate subrule's reduce entry having found a meta symbol.\fbreak \fbreak Last gasp: Is \QUEshift{} in the LA sets?. \fbreak \fbreak As an optimization i implicitly use the current token who already has with it the compressed set key to be searched against the lookahead set. A wrinkle is support of the \QUEshift --- questionable situations. |has_questionable_shift_occured__| flags its use and so returns the 1st entry as it is a lr(0) context. It is not dependent on the lookahead symbol with its context search. @= yacco2::Reduce_entry* yacco2:: Parser:: find_reduce_entry(){@/ @< Reserve and get current stack record@>; State* State_ptr = pr->state__; UCHAR partition = current_token__->tok_co_ords__.set_entry__.partition__; UCHAR element = current_token__->tok_co_ords__.set_entry__.elements__; int cp = partition; int ce = element; Reduce_tbl* rt = State_ptr->reduce_tbl_ptr__; yacco2::USINT cnt_of_reducing_subrules = rt->no_entries__; Reduce_entry* re = (Reduce_entry*)&rt->first_entry__; yacco2::Set_tbl* pla_set; yacco2::INT no_set_pairs; int lower; int upper; int seg_ln; int mid_pt; int mid_pt_rel0; yacco2::Set_entry* k_entry; if(has_questionable_shift_occured__ == ON){ return re; } @; @; return find_questionable_sym_in_reduce_lookahead(); } @ Create element's key set. @= Set_entry la_set; @<|create_set_entry|@>; @ Pass1: find current tok in potential reducing subrules.\fbreak Rip thru the potential subrules list looking for mister current token. If found return its subrule's reduce entry. If not found against the subrules reducing LAs then it drops out of the loop and gives controll to Pass2. @= { Pass1_reduce:@/ re = (Reduce_entry*)&rt->first_entry__; for(yacco2::UINT x = 1;x <= cnt_of_reducing_subrules;++x,++re){ pla_set = re->la_set__; no_set_pairs = pla_set->no_entries__; Set_entry_array_type* set_entry_array = (Set_entry_array_type*)&(pla_set->first_entry__); if(no_set_pairs > SEQ_SRCH_VS_BIN_SRCH_LIMIT){ @; }else{ @; } } } @ Sequential search for token in current subrule la. @= for(int xx=0;xxpartition__){ if(element & k_entry->elements__){ return re; }else{ break; // next reducing rule; not in set } } if(partition < k_entry->partition__) break; } @ Binary search for token in current subrule la. @= { bin_srch_cur_tok:@/ lower = 1; upper = no_set_pairs; B2: // calc mid pt if(upper < lower) goto srch_end_cur_tok; seg_ln = upper + lower; mid_pt = seg_ln >> 1; mid_pt_rel0 = mid_pt - 1; k_entry = &(*set_entry_array)[mid_pt_rel0]; B3: // compare if(partition == k_entry->partition__){ if(element & k_entry->elements__){ return re; }else{ goto srch_end_cur_tok; // T not in LA } } if(partition > k_entry->partition__) goto B5; B4: // adjust upper upper = mid_pt - 1; goto B2; B5: // adjust lower lower = mid_pt + 1; goto B2; srch_end_cur_tok:; } @ Pass2: find meta symbols in potential reducing subrules.\fbreak Rip thru the potential subrules list looking for meta symbols. If found return its subrule's reduce entry. If not found against the subrules reducing LAs then it drops out of the loop and gives controll to the last Gasp. @= { re = (Reduce_entry*)&rt->first_entry__; for(yacco2::UINT x = 1;x <= cnt_of_reducing_subrules;++x,++re){ pla_set = re->la_set__; no_set_pairs = pla_set->no_entries__; Set_entry_array_type* set_entry_array = (Set_entry_array_type*)&(pla_set->first_entry__); if(no_set_pairs > SEQ_SRCH_VS_BIN_SRCH_LIMIT){ @; }else{ @; } } } @ Sequential search for meta symbol in current subrule la. @= { seq_meta:@/ for(int x=0;xpartition__){ if(LRK_LA_EOLR_SET.elements__ & k_entry->elements__){ return re; }else{ break; // next reducing rule; not in set } } if(LRK_LA_EOLR_SET.partition__ < k_entry->partition__) break; } } @ Binary search for meta symbol in current subrule la. @= { bin_srch_meta: lower = 1; upper = no_set_pairs; Meta_srch: // see if meta Ts in set lower = 1; upper = no_set_pairs; B2_meta: // calc mid pt if(upper < lower){ continue; // next subrule return 0; } seg_ln = upper + lower; mid_pt = seg_ln >> 1; mid_pt_rel0 = mid_pt - 1; k_entry = &(*set_entry_array)[mid_pt_rel0]; if(LRK_LA_EOLR_SET.partition__ == k_entry->partition__){ if(LRK_LA_EOLR_SET.elements__ & k_entry->elements__){ return re; }else{ continue; // this reducing rule no meta so next reducing subrule } } if(LRK_LA_EOLR_SET.partition__ > k_entry->partition__) goto B5_meta; B4_meta: // adjust upper upper= mid_pt - 1; goto B2_meta; B5_meta: // adjust lower lower = mid_pt + 1; goto B2_meta; } @*2 |find_parallel_reduce_entry|.\fbreak See ``Notes to myself''. This is a lr(0) reduction. So pick up the first entry in the table. This forces a reduction to take place regardless of the ``lookahead'' token. It allows the calling parser to complete the reduction and then use the ``shift'' mechanism of \INVshift, \ALLshift to catch errors. @= yacco2::Reduce_entry* yacco2:: Parser:: find_parallel_reduce_entry(){@/ @< Reserve and get current stack record@>; State* State_ptr = pr->state__; Reduce_tbl* rt = State_ptr->reduce_tbl_ptr__; Reduce_entry* re = (Reduce_entry*)&rt->first_entry__; return re; } @*2 |find_proc_call_reduce_entry|.\fbreak See ``Notes to myself''. This is a lr(0) reduction. So pick up the first entry in the table. This forces a reduction to take place regardless of the ``lookahead'' token. It allows the calling parser to complete the reduction and then use the ``shift'' mechanism of \INVshift, \ALLshift to catch errors. @= yacco2::Reduce_entry* yacco2:: Parser:: find_proc_call_reduce_entry(){@/ @< Reserve and get current stack record@>; State* State_ptr = pr->state__; Reduce_tbl* rt = State_ptr->reduce_tbl_ptr__; Reduce_entry* re = (Reduce_entry*)&rt->first_entry__; return re; } @** Start token routines. @*2 |start_token|. @= yacco2:: CAbs_lr1_sym* yacco2:: Parser:: start_token(){ return start_token__; } @*2 |set_start_token|. @= void yacco2:: Parser:: set_start_token(CAbs_lr1_sym& Token){ start_token__ = &Token; } @*2 |start_token_pos|. @= yacco2::UINT yacco2:: Parser:: start_token_pos(){ return start_token_pos__; } @*2 |set_start_token_pos|. @= void yacco2:: Parser:: set_start_token_pos(yacco2::UINT Pos){ start_token_pos__ = Pos; } @*2 All shift routines.\fbreak These routines control how the parser reacts to the \ALLshift all shift terminal. As this terminal is never in the token stream, it is a condition that the parser checks within the current state's configuration. If the facility is on and the `all shift' terminal is present in the current PDA's state, then the parser shifts the terminal. Not on or present, the parser tries the next inline operation which is a reduce. The parser favors shifting over reducing. It is turned on both at initialization time and reset time after a parallel parse. It is up to the grammar writer to turn off this facility. To shutoff this facility, usually the syntax directed code tests for a specific terminal by its enumeration id during the shift operation. Shuting off of the facility allows the grammar to complete instead of sitting in an open loop of consuming terminals until an overrun occurs against the token stream. @*2 |set_use_all_shift_on|. @= void yacco2:: Parser:: set_use_all_shift_on(){ use_all_shift__ = ON; } @*2 |set_use_all_shift_off|. @= void yacco2:: Parser:: set_use_all_shift_off(){ use_all_shift__ = OFF; } @*2 |use_all_shift|. @= bool yacco2:: Parser:: use_all_shift(){ return use_all_shift__; } @** Parser symbol table functor and abort, stop routines. @*2 |sym_lookup_functor|.\fbreak This is your imported functor used to do token remapping: another term for symbol table handling. The functor is specific to the language being parsed. It has been tested against the Pascal language and Yacco2's grammar. Of course |cweb| was used to develop these symbol tables. @= yacco2::tble_lkup_type* yacco2:: Parser:: sym_lookup_functor(){ return sym_lookup_functor__; } @*2 |abort_parse|. @= bool yacco2:: Parser:: abort_parse(){ return abort_parse__; } @*2 |set_abort_parse|.\fbreak Used to abort abruptly a parse. Not too subtle. Directs the parser to do its abort-winddown thing. @= void yacco2:: Parser:: set_abort_parse(bool Abort){ abort_parse__ = Abort; } @*2 |stop_parse|. @= bool yacco2:: Parser:: stop_parse(){ return stop_parse__; } @*2 |set_stop_parse|.\fbreak Used to stop a parse. This is much more refined as one can place an error token into the accept queue for grammatical error processing and come to a gentle stop. This is a refinement to an abort. It does the same thing as abort in its cleanup except that it is considered a successful parse. This process is a grammar writer's statement within syntax directed code whereas the abort comes from 2 sources: the grammar writer's syntax directed code or an invalid token stream causing the parse thread to abort. Cavate: You still must use the |RSVP| macro to place the token into the accept queue. If you don't, you'll get a runtime check due to the accepted token (current lookahead token) having the same lookahead token boundary. @= void yacco2:: Parser:: set_stop_parse(bool Stop){ stop_parse__ = Stop; } @** Parser's FSM support routines. @*2 |fsm_tbl|.\fbreak Just the fsm automaton ensemble. @= yacco2::CAbs_fsm* yacco2:: Parser:: fsm_tbl(){ return fsm_tbl__; } @** Parse containers. The four containers are:\fbreak \ptindent{Token supplier --- input token stream to parser} \ptindent{Token producer --- receives output from the parser for next stage processing} \ptindent{Error --- container of error terminals} \ptindent{Recycle --- ecological bio-degradable } As containers are template driven due to the diversity of inputs, there are 2 typedefs describing containers. |token_container_type| is a |tok_can| based template that other containers inherit from. Used by the |error| queue is the |TOKEN_GAGGLE| container based on a vector template. The 2 variants of an input queue are the source file to raw character conversion, and the regular supplier queue that feeds the lexical and syntatic parse stages. These are specialized templates. Another container handles tree related structures with their associated walkers and terminal filter functors. This allows one to process a tree as a stream of tokens that get digested by a grammar. The filter has a complement indicator as to include or exclude the terminal types in the filter set. This eases the declaration task of the compiler writer. Given a large population of terminal types, the set of exclusion terminal enumerates minimizes the effort of unwanted terminals in the parse stream. The same holds for a small number of terminals for processing using inclusion. See |tree containers|. @+= struct Caccept_parse; #define pp_accept_queue_size 8 typedef yacco2::Caccept_parse pp_accept_queue_type[pp_accept_queue_size];@/ @*2 Supplier container.\fbreak This is your standard token dispensor that feeds a parser. Due to parallelism, there is only 1 supplier of tokens. Somewhere in the call chain of the threads there is a token dispensor. It is always supplied by the calling grammar to its threads. The container is a ``many reader'' to the called threads of parallelism. @*3 |token_supplier|. @= yacco2::token_container_type* yacco2:: Parser:: token_supplier(){ return token_supplier__; } @*3 |set_token_supplier|. @= void yacco2:: Parser:: set_token_supplier(yacco2::token_container_type& Token_supplier){ token_supplier__ = &Token_supplier; } @*3 |add_token_to_supplier|. @= void yacco2:: Parser:: add_token_to_supplier(yacco2::CAbs_lr1_sym& Token){ if(token_supplier__->r_w_cnt__ > 1) @; token_supplier__->push_back(Token); if(token_supplier__->r_w_cnt__ > 1) @; } @*2 Producer container.\fbreak Receiver of outputted terminals from a parse stage. It normally becomes a supplier queue to a down stream parse stage. @*3 |token_producer|. @= yacco2::token_container_type* yacco2:: Parser:: token_producer(){ return token_producer__; } @*3 |set_token_producer|. @= void yacco2:: Parser:: set_token_producer(yacco2::token_container_type& Token_producer){ token_producer__ = &Token_producer; } @*3 |add_token_to_producer|. @= void yacco2:: Parser:: add_token_to_producer(yacco2::CAbs_lr1_sym& Token){ @; token_producer__->push_back(Token); @; } @*2 Recycle container.\fbreak A holder of tokens that need to be postprocessed. Typical use is to remove tokens out of a token stream but will be re-integrated later back into some other token stream. For example, a translator that retargets one language into another and the comments need re-integrating back into the targetted output. @*3 |recycle_bin|. @= yacco2::token_container_type* yacco2:: Parser:: recycle_bin(){ return recycle_bin__; } @*3 |set_recycle_bin|. @= void yacco2:: Parser:: set_recycle_bin(yacco2::token_container_type& Recycle_bin){ recycle_bin__ = &Recycle_bin; } @*3 |add_token_to_recycle_bin|. @= void yacco2:: Parser:: add_token_to_recycle_bin(yacco2::CAbs_lr1_sym& Token){ @; recycle_bin__->push_back(Token); @; } @*2 Error queue.\fbreak Just a holding container for error terminals. I use this container to express warnings and errors within Yacco2. If one is creative, error sentences can be outputted that will be later parsed by an error grammar. This is how Yacco2 handles its errors outputted to the grammar writer by matching the errors to the source file co-ordinates. The error queue is just another input queue to be parsed. Error sentences can be expressed be it of a single token to a complete language of various structures. To process the errors, it can be as simple as iterating through the container, to use a grammar having only the `all shift' facility, to grammars describing the error language. @*3 |set_error_queue|. @= void yacco2:: Parser:: set_error_queue(yacco2::token_container_type& Error_queue){ error_queue__ = &Error_queue; } @*3 |error_queue|. @= yacco2::token_container_type* yacco2:: Parser:: error_queue(){return error_queue__;} @*3 |add_token_to_error_queue|. @= void yacco2:: Parser:: add_token_to_error_queue(yacco2::CAbs_lr1_sym& Token){ @; @; error_queue__->push_back(Token); @; } @*2 Accept queue |RSVP|, |RSVP_FSM|, |RSVP_WLA| macro use comments.\fbreak This is an array where the arbitrator's syntax directed code tests against it for the specific presence of an accepted token. For example, the terminals `identifier' and `keyword' are parallel competitors. The arbitrator needs to test if the keyword is present to throw away the identifier. The |RSVP| macro is used to added to the parser's accept queue from within the grammar's rule context. The |RSVP_WLA| macro is used to added to the parser's accept queue and to use its lookahead parameters instead of the defaults. The |RSVP_FSM| macro is used to added to the parser's accept queue from within the fsm's context. |put_T_into_accept_queue| is another way to do it. @*3 Put potential |Caccept_parse| into accept queue.\fbreak |Caccept_parse| is just a carrier of the real terminal contained inside it. The parallel thread submitting its result to the accept queue already has ownership of |pp_requesting_parallelism__|'s mutex. |pp_accept_queue__| is an array where the 0 subscript does nothing. The parameter is needed as this is the context of the called thread who is placing its contents into the calling thread's accept queue. @+= void yacco2:: Parser:: put_T_into_accept_queue(yacco2::Caccept_parse& Calling_parm){ ++th_accepting_cnt__; if(th_accepting_cnt__ < pp_accept_queue_size){ pp_accept_queue__[th_accepting_cnt__].fill_it(Calling_parm);// copy its contents }else{// throw error abort_accept_queue_irregularites(Calling_parm); } } @** Token Get routines: specific stack token, next token in stream. A word on the subscript used to access a container's content. I'm not a fan of relative-to-zero situations. I count by 1 and a 2 and a... Lawrence Welk anyone? Just because its more efficient to access an array by relative-to-zero subscripts doesn't mean that I must adhold to this. So what are the options. Sit quite and be efficient... ugh. Hear my teeth grinding? Subtract 1 from the subscript every time the container is being accessed: a bit too expensive --- what, can't u count this way? Put a boggus record at container creation time into the zero position of the container. Humm --- consider it a bs record: before start. Now what are the merits: no calculation required, Dave can count, and no off-by-one situations. Now the demerits: extra space, must watch to skip over the first item in the container if iterators are used. Oh well. Come on u old dog or is it Humpty Dumpty had a great... No, one is one and that's it. For now the relative-to-zero works. To integrate symbol table facilities into the Yacco2, a functor was created. Appropriate |cweb| macros were written to easy the pain. |Remap_token| retargets the token read from the input stream. It clones off the token having the same source co-ordinates. Its logic est tres simple:\fbreak \ptindent{1) is there a symbol table functor present: no return token fetched} \ptindent{2) is symbol table lookup turned off: yes return token fetched} \ptindent{3) try look up: if returned token is nil return the fetched token} \ptindent{4) return the looked up token} There are 2 companion |cweb| macros: |Remap_set_result_and_return| and |Remap_return_result|. The first macro takes the symbol table's returned token and sets it as the parser's current token and returns the new token. |Remap_return_result| just returns the retargeted token used by |get_spec_token| which is a random query of a token stream. Remapped tokens eventually get put into other token containers for down stream processing. @d Remap_token(Token) if (sym_lookup_functor__ == 0) return Token; if (sym_lookup_functor__->lkup__ == OFF){ return Token; } CAbs_lr1_sym* x = sym_lookup_functor__->operator()(Token); if(x == 0) return Token; @d Remap_set_result_and_return(Token) Token = x; return Token; @d Remap_return_result return x; @*3 |get_spec_stack_token|. @= yacco2::CAbs_lr1_sym* yacco2::Parser:: get_spec_stack_token(yacco2::UINT Pos){ if(Pos > MAX_LR_STK_ITEMS) return 0;// |is_pos_within_bnds| Cparse_record* pr = parse_stack__.sf_by_sub(Pos); return pr->symbol__; } @*3 |get_next_token|. @^ To do RETHINK parse stack macros rel 1 + 0@> Due to the ``jit'' accessing the mutex guarding the container read is NEEDED. Tests between not ``jit'' versus ``jit'' with mutex yielded just 3 seconds difference across 80 compiles. SO KEEP IT. \fbreak \fbreak Some subtle comments on overflow per token container.\fbreak The container template implements the access [] operator which guards against overflow. It returns the ``eog'' token to indicate end-of-token stream reached. In this context the end-of-token stream depends on the specific container. From a tree container's perspective, the container's size is open-ended and its internal tree walking stack determines whether it has been reached. It returns the maximum unsigned integer value within its size method which forces a call using the access operator []. So the size method is not quite accurate though the other containers are. But what is your problem Dave? When porting to VMS/Alpha, the implemented virtual method of the container template did not execute the |TOKEN_GAGGLE| container's virtual operator [] which tests its internal state before accessing its own internal stl array container's access operator. |TOKEN_GAGGLE| container is specificly declared for the ``Error queue'' while all the other containers used in parsing like Supplier and Producer are abstract |tok_base| type which forces the compiler to call the implemented virtual table of the container to deal with size, [] and other methods. |tok_base| enforces regularity. When parsing the ``Error queue'' aka |TOKEN_GAGGLE| using a grammar/Parser approach, the native container's [] operator and not the virtual method was called and so aborted on ``array bounds exceeded'' error. This is why the pre and post overflow evaluation before calling the container's access [] operator. The first check is ``has overflow already happened'' and so don't increment |current_token_pos__|, just reset the |current_token__| to ``eog'' and exit. This keeps the internal token subscript accurate. The post overflow evaluation is after the |current_token_pos__| increment to see if it just reached the end-of-token stream condition and so set |current_token__| to ``eog'' and exit. \fbreak \fbreak Extracting the token from the container:\fbreak So now the Parser's token container needs to be called to get its next token with the incremented subscript. It is up to the token container's implementation to determine whether the token is within its internal stl's container's bounds. The subscript is checked against the stl container's size method for the overflow condition and to take appropriate action which is return the ``eog'' token back. Finally the internal stl's container is accessed by its [] operation to extract the called for token. @= yacco2::CAbs_lr1_sym* yacco2:: Parser:: get_next_token(){ if (token_supplier__==0) return 0;//|is_there_a_token_supplier:| if (token_supplier__->empty() == true){// out-of-bnds: protect current pos current_token__ = yacco2::PTR_LR1_eog__; return current_token__; } if (current_token_pos__ >= token_supplier__->size()){// out-of-bnds: protect current pos current_token__ = yacco2::PTR_LR1_eog__; return current_token__; } ++current_token_pos__; if (current_token_pos__ >= token_supplier__->size()){// out-of-bnds: protect current pos current_token__ = yacco2::PTR_LR1_eog__; return current_token__; } if(YACCO2_T__ != 0){ @; yacco2::lrclog << "YACCO2_T__::" << thread_no__ << "::" << thread_name() << " get_next_token:: pos to fetch: " << current_token_pos__ << FILE_LINE << std::endl; @; } current_token__ = (*token_supplier__)[current_token_pos__]; if(YACCO2_T__ != 0){ @; yacco2::lrclog << "YACCO2_T__::" << thread_no__ << "::" << thread_name() << " get_next_token:: pos: " << current_token_pos__ << " enum: " << current_token__->enumerated_id__ << ' ' << '"' << current_token__->id__ << '"' << " token fetched*: " << current_token__ << FILE_LINE << std::endl; yacco2::lrclog << "\t\t::GPS FILE: "; EXTERNAL_GPSing(current_token__) yacco2::lrclog <<" GPS LINE: " <tok_co_ords__.line_no__ <<" GPS CHR POS: " <tok_co_ords__.pos_in_line__ << FILE_LINE <tok_co_ords__.line_no__ << " GPS CHR POS: " << current_token__->tok_co_ords__.pos_in_line__ << FILE_LINE << std::endl; @; } Remap_token(current_token__)@/ if((YACCO2_T__ != 0) && (sym_lookup_functor__ != 0)){ if(sym_lookup_functor__->lkup__ == ON != 0){ @; yacco2::lrclog << "YACCO2_T__::" << thread_no__ << "::" << thread_name() << " get_next_token:: pos: " << current_token_pos__ << " enum: " << current_token__->enumerated_id__ << ' ' << " after remap " << '"' << current_token__->id__ << '"' << " token fetched*: " << current_token__ << FILE_LINE << std::endl; yacco2::lrclog << "\t\t::GPS FILE: "; EXTERNAL_GPSing(current_token__) yacco2::lrclog << " GPS LINE: " << current_token__->tok_co_ords__.line_no__ << " GPS CHR POS: " << current_token__->tok_co_ords__.pos_in_line__ << FILE_LINE << std::endl; @; } } Remap_set_result_and_return(current_token__)@/ } @*3 |get_spec_token|. @= yacco2::CAbs_lr1_sym* yacco2:: Parser:: get_spec_token(yacco2::UINT Pos){@/ @; @; @; if(YACCO2_T__ != 0){ @; yacco2::lrclog << "YACCO2_T__::" << thread_no__ << "::" << thread_name() << "::" << " get_spec_token pos: " << Pos << FILE_LINE << std::endl; @; } CAbs_lr1_sym* token = (*token_supplier__)[Pos]; if(YACCO2_T__ != 0){ @; yacco2::lrclog << "YACCO2_T__::" << thread_no__ << "::" << thread_name() << "::" << " get_spec_token: returned token " << token->id__ << " pos: " << Pos << " enum: " << token->enumerated_id__ << '"' << token->id__ << '"' << FILE_LINE << std::endl; yacco2::lrclog << "\t\t::GPS FILE: "; EXTERNAL_GPSing(token) yacco2::lrclog <<" GPS LINE: " <tok_co_ords__.line_no__ <<" GPS CHR POS: " <tok_co_ords__.pos_in_line__ << FILE_LINE <; } Remap_token(token)@/ if((YACCO2_T__ != 0) && (sym_lookup_functor__ != 0)){ if(sym_lookup_functor__->lkup__ == ON != 0){ @; yacco2::lrclog << "YACCO2_T__::" << thread_no__ << "::" << thread_name() << "::" << " get_spec_token: returned token " << token->id__ << " pos: " << Pos << " enum: " << token->enumerated_id__ << " after remap " << '"' << token->id__ << '"' << FILE_LINE<< std::endl; yacco2::lrclog << "\t\t::GPS FILE: "; EXTERNAL_GPSing(token) yacco2::lrclog <<" GPS LINE: " <tok_co_ords__.line_no__ <<" GPS CHR POS: " <tok_co_ords__.pos_in_line__ << FILE_LINE <; } } Remap_return_result@/ } @ Any tokens in container?. no return nil ptr. @= if(token_supplier__->empty() == YES) return 0; @** Parse stack routines. Currently the subscript to access the stack is relative to ONE. @^ To revisit - rel 0, 1 attitude on parse stack. currently rel. ZERO@> @ |cleanup_stack_due_to_abort|. The last item on the stack is left so that the thread can be re-used. This is why its one less for the popping. The thread sits idle, twirling its whatever until a requesting grammar asks to be serviced. @*2 |cleanup_stack_due_to_abort|. @= void yacco2:: Parser:: cleanup_stack_due_to_abort(){ yacco2::INT stack_items_to_process = parse_stack__.top_sub__-1; if(stack_items_to_process > 0){ remove_from_stack(stack_items_to_process); } set_abort_parse(OFF); set_stop_parse(OFF); } @*2 |current_stack_pos|. @= yacco2::INT yacco2:: Parser:: current_stack_pos(){ return parse_stack__.top_sub__; } @*2 |parse_stack|. @= yacco2::lr_stk* yacco2:: Parser:: parse_stack(){ return &parse_stack__; } @*2 |top_stack_record|. @= yacco2::Cparse_record* yacco2:: Parser:: top_stack_record(){ if(parse_stack__.top_sub__ < 1) return 0; //if(parse\_stack\_\_.empty() == YES) return 0; @< Reserve and get current stack...@>; return pr;@/ } @*2 |get_stack_record|.\fbreak The subscript of stack is rel 1 not 0 while the request is rel to 0. In between counting strategies: Ugh! @= yacco2::Cparse_record* yacco2:: Parser:: get_stack_record(yacco2::INT Pos){ @; if(Pos >= (parse_stack__.top_sub__)) return 0; return parse_stack__.sf_by_sub(Pos+1); } @*2 |no_items_on_stack|. \fbreak Twist no oliver, it returns one less than whats on the stack. The reason is the first stack record, which is the |start state| of the finite automaton, is always maintained for optimization reasons. This allows the parser to begin just start when its re-commissioned to work. Normally calling |no_items_on_stack| is a general way to winddown the parse be it successful or aborted. @= yacco2::INT yacco2:: Parser:: no_items_on_stack(){ return parse_stack__.top_sub__; } @*2 Add state to parse stack |add_to_stack|. @= void yacco2:: Parser:: add_to_stack(yacco2::State& State){@/ parse_stack__.push_state(State); @; } @ Add to parse stack --- Speed Demon. @<|add_to_stack|@>= @<|lr_stk::push_state|@>; //; @*2 Remove items from the parse stack |remove_from_stack|.\fbreak Parse stack is a LIFO order of ${}$ configuration pairs. The parse stack configuration for S1 shifting `a' into S2 has 2 records. The first record contains as an example without the pointer ${1:`a'}$. Symbol `a' is the shift item that takes the finite state from state 1 into state 2. The second record contains the entered state ${2:nil}$. There is no symbol as the next parse action has not happened. This routine also cleans up aborted parses. It always leaves the first parse record on the stack as an optimization as the thread is snapping its fingers for the next message request to parse. @= void yacco2:: Parser:: remove_from_stack(yacco2::INT No_to_remove){ @; @< Validate parse stack removal for underflow@>; @< Check parse stack for epsilon removal...@>; @< Remove items from the parse stack@>; } @ Check parse stack for epsilon removal |remove_from_stack|. @< Check parse stack for epsilon removal. yes exit@>= if (No_to_remove == 0){ @; return; } @ Reserve and get current stack record. @< Reserve and get current stack record@>= Cparse_record* pr = parse_stack__.top__; @ Get current stack record. @< Get current stack record@>= pr = parse_stack__.top__; @ Initialize stack record. @< Initialize stack record@>= pr->symbol__ = 0; pr->aborted__ = 0; pr->rule_s_reuse_entry_ptr__ = 0; @ Pop parse stack. @< Pop parse stack@>= --parse_stack__.top_sub__; --parse_stack__.top__; //parse\_stack\_\_.pop(); @ Clean up parse stack record and pop from stack.\fbreak When the state is popped, the exposed record is the state:symbol pair used by the finite automaton to map into the state just popped. @< Clean up parse stack record and pop state from stack exposing symbol record@>= @< Initialize stack record@>; @< Pop parse stack@>; @< Get current stack record@>;// symbol record @ Check for zeroed out symbol on parse stack.\fbreak This situation can happen if the grammar user plays with the stack's symbols. Once apon a time, meta symbols were zeroed out to protect from deletion due to their re-cycled nature: for example the parallel and invisible shift symbols are created once and recycled many times throughout the parse history. Now these symbols are protected by having their |auto_delete| attribute turned off. @< Check for zeroed out symbol on parse stack. If so goto next element to remove@>= if(pr->symbol__ == 0){ @; goto next_stack_element_to_remove; } @ Is popping symbol auto deleted?.\fbreak This deals with the grammar symbol's `AD' attribute. Due to MSN and their bug brigade, |@^MSN heap delete bug...@>|, the delete arttribute is commented out. So the memory heap just grows but with no occasional aborts. When the parser stops, it's left to the operating system to reset the heap allocated to the program. @< Is popping symbol auto deleted? then deal with it and goto next element to remove @>= if(pr->rule_s_reuse_entry_ptr__ != 0){ fsm_tbl__->recycle_rule(pr->rule_s_reuse_entry_ptr__); pr->rule_s_reuse_entry_ptr__ = 0;// wipe off the rule from the ``in use'' slate } else{ if (pr->symbol__->auto_delete__ == ON){ @; if(pr->symbol__->dtor__ != 0) (*pr->symbol__->dtor__)(pr->symbol__,this); delete pr->symbol__; pr->symbol__ = 0;// keep that stack clean goto next_stack_element_to_remove; } } @ Check for aborted parse situation. \fbreak If the parse record is clean, then goto next element to remove. @= if(pr->aborted__ == 0) goto next_stack_element_to_remove; @ Deal with auto abort.\fbreak This is the grammar symbol's `AB' attribute. It checks to see if there is a destructor function to run. @= if(pr->rule_s_reuse_entry_ptr__ != 0){ fsm_tbl__->recycle_rule(pr->rule_s_reuse_entry_ptr__); pr->rule_s_reuse_entry_ptr__ = 0;// wipe off the rule from the ``in use'' slate }else{ if(pr->symbol__->affected_by_abort__ == OFF) goto next_stack_element_to_remove; if(pr->symbol__->dtor__ != 0)@/ (*pr->symbol__->dtor__)(pr->symbol__,this); delete pr->symbol__; } @ Remove items from the parse stack. \fbreak The remove routine is a straddler. The number of records to pop is the appropriate grammar's subrule: all the king's men... The straddler part is how the PDA works: the top record is the state just entered. The symbol that vectored into it is one back. This is the straggler. So one is popping the vectored into state leaving the exposed symbol record. This holds for accepted and aborted parse situations. The Start state record is always on the stack: even at parse shutdown as there is nothing to clean up. @< Remove items from the parse stack@>= Cparse_record* pr; @< Get current stack record@>; @; while (No_to_remove > 0){ @; @< Clean up parse stack record and pop...@>; @< Check for zeroed out symbol...@>; @< Trace TH exposed symbol on parse stack@>; @< Is popping symbol auto deleted? ...@>; @; @; @; @< Initialize stack record@>; next_stack_element_to_remove:@/ --No_to_remove; } @; @*2 |clear_parse_stack|. @= void yacco2:: Parser:: clear_parse_stack(){ yacco2::INT s = parse_stack__.top_sub__-1;// always leave 1st parse record if (s > 0) remove_from_stack(s); if (s == 0){// cleanse possible acceptance start rule @; if(pr->rule_s_reuse_entry_ptr__ != 0){//don't need hanging around like a dirty smell pr->rule_s_reuse_entry_ptr__ = 0;// already recycled } } } @** Token Get, Reset, Override Flavours: |current_token|, |reset_current_token|, etc.\fbreak @*2 |current_token|.\fbreak It checks whether it has a symbol table lookup functor. If it does not exist or the facility is turned off, the current terminal is returned. The table lookup will try to remap a generic terminal. The terminal remapped can be anything. This is dependent on the functor written for the language being compiled. @= yacco2::CAbs_lr1_sym* yacco2:: Parser:: current_token(){ Remap_token(current_token__)@/ Remap_set_result_and_return(current_token__)@/ } @*2 Reset current token.\fbreak |reset_current_token| 15 micro seconds of fame by re-aligning the calling parser's current token's co-ordinate within the token stream using the |Pos| parameter. @= void yacco2:: Parser:: reset_current_token(yacco2::UINT Pos){@/ @; @; if(YACCO2_T__ != 0){ @; yacco2::lrclog << "YACCO2_T__::" << thread_no__ << "::" << thread_name() << "::" << " reset_current_token pos: " << Pos << FILE_LINE << std::endl; @; } current_token_pos__ = Pos; current_token__ = (*token_supplier__)[Pos]; if(YACCO2_T__ != 0){ @; yacco2::lrclog << "YACCO2_T__::" << thread_no__ << "::" << thread_name() << "::" << " reset_current_token: token to: " << current_token__->id__ << " pos: " << current_token_pos__ << " enum: " << current_token__->enumerated_id__ << '"' << current_token__->id__ << '"' << FILE_LINE << std::endl; yacco2::lrclog << "\t\t::GPS FILE: "; EXTERNAL_GPSing(current_token__) yacco2::lrclog <<" GPS LINE: " <tok_co_ords__.line_no__ <<" GPS CHR POS: " <tok_co_ords__.pos_in_line__ << FILE_LINE <; } } @*2 |override_current_token|. @= void yacco2:: Parser:: override_current_token (yacco2::CAbs_lr1_sym& Token ,yacco2::UINT Pos){ current_token_pos__ = Pos; current_token__ = &Token; } @*2 |override_current_token_pos|. @= void yacco2:: Parser:: override_current_token_pos(yacco2::UINT Pos){ current_token_pos__ = Pos; } @*2 |current_token_pos|. @= yacco2::UINT yacco2:: Parser:: current_token_pos(){ return current_token_pos__; } @*2 Get shift's next token |get_shift_s_next_token|. @= void yacco2:: Parser:: get_shift_s_next_token(){ get_next_token(); } @** Thread name of grammar that is a thread. Monolithic grammars use their ``fsm'' name. @*2 |thread_name|. @= yacco2::KCHARP yacco2:: Parser:: thread_name(){ return thread_name__; } @*2 Thread entry.\fbreak Contains all the dirt about the thread. This entry is |nil| if its a monolithic grammar. This entry's thread id is used as the key into the parallel thread global table. @= yacco2::Thread_entry* yacco2:: Parser:: thread_entry(){ return thread_entry__; } @** Thread ``hows and whys'' on thread activation.\fbreak There are just 2 critical region classifications:\fbreak \ptindent{1) launched threads' table} \ptindent{2) each grammar's threading region} Each grammar's threading region supports the framework for inter-thread communications: messaging (re: events) and acceptance token queue --- tokens passed back as results from a thread's execution.\fbreak Messaging components:\fbreak The |th_active_cnt__| and |th_accepting_cnt__| are variables that are dynamicly set at each thread launch invocation within the launching grammar. The number of attempted parallel parses is indicated by the |th_active_cnt__| which is the launched number of threads. As each thread stops processing, it decrements the counter of the launching grammar. When the counter reaches 0, it is that thread's responsibility to notify the sleeping |pp| parser by event to wake up and assess the parallel parse results. |th_accepting_cnt__| is the number of accept messages placed into the message queue by successful parallel parses. This number can be 0 indicating that all the attempted parallel parses have failed. Originally the control monitor was the go between for the grammar requesting parallelism and the threads controlled by it. Now the requesting grammar launches the threads given by the its fa's configuration state. A little optimization is done by the requesting grammar: only launch threads whose first set contains the current token. The launching first checks if the thread is in the global thread table and that it is available for work. To further the pursuit of speed, variables |no_competing_pp_ths__| and |no_requested_ths_to_run__| determine how the threads should be executed within the local context of the launched grammar. If there is only 1 thread to launch, it is executed as a procedure call without the thread baggage and its critical region entourage (not any more: pure thrrreading in the scotish roll of ``r''). Why the 2 variables? |no_competing_pp_ths__| tells the current thread how many others are competing and have been launched by the requesting grammar. Without it being local the threaded grammar needs to acquire the mutex of its caller to determine the number of launched threads. It is a read-only variable that receives its value from the requesting grammar's |no_requested_ths_to_run__| variable at start up time. If this grammar requests parallelism, it sets its own |no_requested_ths_to_run__| variable and calls the appropriate threads who in term set their |no_competing_pp_ths__| variable at their invocation time. The nesting of threads requires this 2 variable approach: read-only, and read/write along with the optimization requirement. The last part to the flow of messages between threads and the launching grammar is the waking up of the calling grammar. The launching grammar waits on ``the wakeup'' event posted by the last completed execution of the launched threads Originally there were many posted messages due to the above middlemen but this was streamlined to just wake up the grammar requesting parallelism. It then checks the critical region variable |th_accepting_cnt__| as to whether any of the launched threads were successful. Why are there variants on ``Wait for an event with or no loop''? Cuz of ``pthread'' implementations. It depends on how the library deals with messages for an intended thread that has not gone into the waiting stupor. Some ``pthread'' implementation will queue up the potential message while others just drop it. It's a question of how to sync the wait. If the ``pthread'' supports a future thread eventually getting to wait on the message and the called thread has already fired off the message, this pooled ``to be awakened'' message will be be forwarded to the thread asking to be put on hold. Your choice. @*1 How to call a thread. @*2 Procedure call: |start_procedure_call|. @= yacco2::THR_result Parser::start_procedure_call(yacco2::State& S){ th_active_cnt__ = 1; no_requested_ths_to_run__ = 1; @; THR_result rslt = (*S.proc_call_addr__)(this); @; return rslt; } @*2 Manually: |spawn_thread_manually|.\fbreak There is no checking on the first set of the thread. It just runs it. Allows the grammar writer to explicitly run a thread. @= bool yacco2:: Parser:: spawn_thread_manually(yacco2::USINT Thread_id){ yacco2::thread_array_record* thd_stable = (yacco2::thread_array_record*)THDS_STABLE__; Thread_entry** thd_tbl = (Thread_entry**)&thd_stable->first_entry__; int no_thds = thd_stable->no_entries__ - 1;// rel to 0 if((Thread_id > no_thds)){ char a[BUFFER_SIZE]; @.Err spawn\_thread\_manually thre...@> yacco2::KCHARP msg = "spawn_thread_manually thread id: %i out of bounds 0 to %i: no thread available"; sprintf(a,msg,Thread_id,no_thds); Yacco2_faulty_precondition(a,__FILE__,__LINE__); exit(1); } th_lst__.clear(); Thread_entry* pe = thd_tbl[Thread_id]; th_lst__.push_back(pe); return start_threads(); } @*2 Start threads: |start_threads|.\fbreak The grammar has already determined what threads to launch before calling this routine. See |@| for details. It supplies this threads thru its own private list. It searches through the global table for a thread tapping its toes to some ipod beat. If the thread is not in the table, the thread is created and passed back. If the thread is found and it's snapping its fingers for service--- gar\c con, then it is taken, marked in the table as working, and passed back. The last condition is the thread is found but not available to work as it already is working. This situation is nested parallelism which is equivalent to recursion used by top down parses. So, create the thread and enter it in the global table list of same thread, run it, and return. Question. Why do you use a global mutex to protect the global thread table? As I do not know how a template runtime library controls multi-access, this is an assurance that there is no destruction or strange behaviours caused by multiple cpu systems or hyper thread systems. This might be overkill but it can be fine tuned when ported to a specific platform having standard template library thread safety. Just comment out the contents of |@| and |@|.\fbreak \fbreak Dance of the thread / procedure samba.\fbreak Sirens of speed are calling. The procedure call happenns when there is only 1 thread to call so its sidekick doubles for him. What happens when this sidekick is called recursively? For speed reasons, the called procedure's fsm table is static and global. Rephrased having the fsm table locally defined in the procedure takes on the ctor / use / dtor overhead. So? Well recursion becomes a destructive action on the singular fsm table. 2 or more chefs adding salt to the same pot without their knowledge of the other. Now detect whether the procedure is in use so that its thread partner does the strutting. @^Porting - cr on global thread table@> @*4 Determine if there are threads to run by current token. @= th_lst__.clear(); find_threads_by_first_set(id_of_T,th_lst__,*S.state_s_thread_tbl__); @*4 Are there threads to run?. no exit with no-thds-to-run result. @= if(th_lst__.empty() == YES) return Parser::no_thds_to_run; @*4 Acquire global thread table critical region. @= if(yacco2::YACCO2_MU_TH_TBL__){ @; yacco2::lrclog << " --> Attempting to acquire thread table Mutex" << FILE_LINE << std::endl; @; } LOCK_MUTEX(yacco2::TH_TBL_MU); if(yacco2::YACCO2_MU_TH_TBL__){ @; yacco2::lrclog << " --> Acquired thread table Mutex" << FILE_LINE << std::endl; @; } @*4 Release global thread table critical region. @= if(yacco2::YACCO2_MU_TH_TBL__){ @; yacco2::lrclog << " --> Attempting to release thread table Mutex" << FILE_LINE << std::endl; @; } UNLOCK_MUTEX(yacco2::TH_TBL_MU); if(yacco2::YACCO2_MU_TH_TBL__){ @; yacco2::lrclog << " --> Released thread table Mutex" << FILE_LINE << std::endl; @; } @*4 Determine disposition of thread in global thread table.\fbreak There are 3 possibilities:\fbreak \ptindent{1) thread not in global table so needs to be created} \ptindent{2) all threads of same name busy so need to create another copy - nested situation} \ptindent{3) thread loitering around so put it to work} @= int thread_disposition(0); Parallel_thread_list_type& i = Parallel_thread_table[pe->thd_id__]; Parallel_thread_list_iterator_type j; Parallel_thread_list_iterator_type je; worker_thread_blk* tb ; if(i.empty() == true){ thread_disposition = NO_THREAD_AT_ALL; goto dispatch_disposition; } j = i.begin(); je = i.end(); for(;j != je;++j){ tb = *j; @; if(tb->status__ == THREAD_WAITING_FOR_WORK){ thread_disposition = THREAD_WAITING_FOR_WORK; goto dispatch_disposition; } } thread_disposition = ALL_THREADS_BUSY; goto dispatch_disposition; @*4 Dispatch on thread availability.\fbreak Note at the time of thread creation, it will fill in its operating system's ``thread no'' returned from |THREAD_SELF| procedure. Also the thread's |pp_requesting_parallelism__|, |from_thread__|, and |no_competing_pp_ths__| gets filled in by the canned |wpp_core.h| code. So this is why u do not see these variables set in the code parts of |NO_THREAD_AT_ALL| , and |ALL_THREADS_BUSY|. @= switch (thread_disposition){ case THREAD_WAITING_FOR_WORK:{ LOCK_MUTEX_OF_CALLED_PARSER(tb->grammar_s_parser__->mu__ ,*tb->grammar_s_parser__," of self"); tb->status__ = THREAD_WORKING; ++tb->run_cnt__; tb->grammar_s_parser__->pp_requesting_parallelism__ = this; tb->grammar_s_parser__->no_competing_pp_ths__ = this->no_requested_ths_to_run__; tb->grammar_s_parser__->from_thread__ = this; @; UNLOCK_MUTEX_OF_CALLED_PARSER(tb->grammar_s_parser__->mu__ ,*tb->grammar_s_parser__," of self"); SIGNAL_COND_VAR(*tb->grammar_s_parser__,*this); break; } case NO_THREAD_AT_ALL:{ @; THR_result result = CREATE_THREAD(pe->thread_fnct_ptr__,*this); break; } case ALL_THREADS_BUSY:{ @; yacco2::THR_result result = CREATE_THREAD(pe->thread_fnct_ptr__,*this); break; } } @*4 Request threads to work.\fbreak It goes thru the thread list of the current fa's state configuration. If there is only 1 thread to be run, it calls it as a procedure rather than as a thread. The crowd is going mad... A little Fraggle Roc. I got to keep that white cane from removing me off the stage. Why the ``|VMS__|'' macro variable? Don't ask, HP fumbled the pthread library implementation and the procedure call interfers with their pananoia. Blow ups on what they think is recursion to same mutex whereby a called procedure can then down the grammar call chain call itself again but the thread is launched as a thread. There is no interference on mutex recursion: each instantiation of a thread / procedure call contains its own mutex / conditional variable. Oh well enough of the core dump reguritation. Also see their stutter on the |pthread_attr_t| variable that does not default properly on stack size. It really blows its brains out even with their debugger as the firing up of the threads can't even get the registers created and so nada on the debugger scene with bad exception thrown. @= th_active_cnt__ = th_lst__.size(); no_requested_ths_to_run__ = th_active_cnt__; yacco2_threads_to_run_iter_type i = th_lst__.begin(); yacco2_threads_to_run_iter_type ie = th_lst__.end(); USINT new_r_w_cnt = supplier_r_w_cnt__ + no_requested_ths_to_run__ - 1; if(new_r_w_cnt > 1){ if(supplier_r_w_cnt__ == 1){ if(token_supplier__ != 0){ token_supplier__->r_w_cnt__ = new_r_w_cnt;} }else{ if(token_supplier__ != 0){ @; token_supplier__->r_w_cnt__ = new_r_w_cnt; @; } } } Thread_entry* pe = *i; @; #ifndef VMS111__ if(no_requested_ths_to_run__ > 1) goto thread_call; procedure_call:{ if(Parallel_thread_proc_call_table[pe->thd_id__].proc_call_in_use__ == true){ @; goto thread_call; } Parallel_thread_proc_call_table[pe->thd_id__].proc_call_in_use__ = true; @; @; THR_result rslt = (*pe->proc_thread_fnct_ptr__)(this); @; Parallel_thread_proc_call_table[pe->thd_id__].proc_call_in_use__ = false; @; @; return CALLED_AS_PROC; } #endif thread_call:{ for(;i != ie;++i){ pe = *i; @; @; dispatch_disposition:@/ @; @; } } @; return CALLED_AS_THREAD; @*3 |start_threads|. @= bool yacco2:: Parser::start_threads(){ @; @; } @*1 Call arbitrator: |call_arbitrator|.\fbreak No distinction made between automatically launched thread and its manual breathern. A pre-canned arbitrator |AR_for_manual_thread_spawning| is used that just returns the first item in the queue cuz there is no specialized selective code. There is a check as to more than one accept message within the queue that produces a thrown error. Note the optimization code: If there is only 1 parallel thread within the configuration and there is no arbritration code present, then no arbitrator code for that grammar's state configuration is emitted by Yacco2. Also if only 1 T accepting then don't call the arbitrator function. @+= void yacco2:: Parser:: call_arbitrator(yacco2::Type_pp_fnct_ptr The_judge){ if(th_accepting_cnt__ == 1){// optimize no arbitration needed arbitrated_token__ = &pp_accept_queue__[1]; pp_accept_queue_idx__ = 1; return; } (*The_judge)(this); } @ @= if(The_judge == 0){// arbitrator not present in grammar arbitrated_token__ = &pp_accept_queue__[1]; pp_accept_queue_idx__ = 1; } if(The_judge != 0){// arbitrator present due to code in grammar if(th_accepting_cnt__ == 1){// optimize no arbitration needed arbitrated_token__ = &pp_accept_queue__[1]; pp_accept_queue_idx__ = 1; return; } (*The_judge)(this); return; } arbitrated_token__ = &pp_accept_queue__[1]; pp_accept_queue_idx__ = 1; @*1 Pedestrian routines for threading. @*2 Acquire trace mu.\fbreak Used to serialize trace output. Sometimes the traced output is skewed due to the threading. The output to a global container is not thread safe, so make it by use of a mutex. @= LOCK_MUTEX(yacco2::TRACE_MU); if(yacco2::YACCO2_MU_TRACING__){ yacco2::lrclog << "YACCO2_MU_TRACING__::Acquired trace mu" << FILE_LINE << std::endl; } @*2 Release trace mu. @= if(yacco2::YACCO2_MU_TRACING__){ yacco2::lrclog << "YACCO2_MU_TRACING__::Releasing trace mu" << FILE_LINE << std::endl; } UNLOCK_MUTEX(yacco2::TRACE_MU); @*2 Acquire token mu.\fbreak Used to serialize token reading. @= LOCK_MUTEX(yacco2::TOKEN_MU); @*2 Release token mu. @= UNLOCK_MUTEX(yacco2::TOKEN_MU); @*2 Wait for event: |wait_for_event|. @= void yacco2:: Parser:: wait_for_event(){ @; #if THREAD_LIBRARY_TO_USE__ == 1 @; #else @; #endif @; } @*2 Wait for an event to arrive with no loop.\fbreak This is a free-for-all loop, in my case only 1:1. The conditional variable and its associated data value is protected by the mutex. The calling thread has possession of the called thread's mutex. It does its thing in the critical region of the called thread by depositing the message and setting the conditional variable's data indicator to |EVENT_RECEIVED|. It releases the called thread's critical region and signals the thread library to wake up the called thread thru a conditional variable. |SIGNAL_COND_VAR| is the wrapper function to do this with the passed in variable being the selected thread to wakeup. The wakened thread has now in its possession its critical region protecting the conditional variable and associated message indicator. @= COND_WAIT(cv__,mu__,*this); cv_cond__ = WAIT_FOR_EVENT; @*2 Wait for an event to arrive with loop.\fbreak This is a free-for-all loop, in my case only 1:1. The conditional variable and its associated data value is protected by the mutex. The calling thread has possession of the called thread's mutex. It does its thing in the critical region of the called thread by depositing the message and setting the conditional variable's data indicator to |EVENT_RECEIVED|. It releases the called thread's critical region and signals the thread library to wake up the called thread thru a conditional variable. |SIGNAL_COND_VAR| is the wrapper function to do this with the passed in variable being the selected thread to wakeup. The wakened thread has now in its possession its critical region protecting the conditional variable and associated message indicator. But to be in good keeping, I used Pthread's recommendation to protect against spurious interrupts. This is why the wait loop tests the message indicator. If it was a spurious event, it quitely goes back to sleep waiting for that prince charming to... To protect against false messages received, the condition is set right after the loop. THIS DOES NOT WORK IN HP's Alpha. That is why |wait_for_event()| uses |@| in its macro conditional. @= while (cv_cond__ == WAIT_FOR_EVENT){ COND_WAIT(cv__,mu__,*this); } cv_cond__ = WAIT_FOR_EVENT; @*2 |post_event_to_requesting_grammar|.\fbreak The calling thread already has the write access to the called thread's critical region. Note: All messages are synchronous in nature\fbreak \ptindent{1) A thread waits for an event. There is only one thread that will reply.} \ptindent{2) The replying thread already has the caller's mutex in its posession.} Therefore, the called grammar's mutex only needs releasing before it gets wakened by the |SIGNAL_COND_VAR| routine. It interrupts the thread runtime library with the thread's conditional variable. @= void yacco2:: Parser:: post_event_to_requesting_grammar@/ (yacco2::Parser& To_thread@/ ,yacco2::INT Message_id@/ ,yacco2::Parser& From_thread){ @; @; @; } @*2 Signal thread to wake up and work.\fbreak This is the wake up event for the thread library to activate the thread from slumber. @= @; SIGNAL_COND_VAR(To_thread,*this); @; @*2 Deposit sender's co-ordinates and event in called thread's critical region. @= To_thread.from_thread__ = &From_thread; To_thread.msg_id__ = Message_id; @*2 |have_all_threads_reported_back|.\fbreak Each thread has the responsibility to check whether it is the last thread to finish processing launched by the requesting grammar. There is no distinction on success or failure. If it is the last thread to complete, it must report back via an event to the grammar requesting parallelism. If this is not done, well you've heard of Rip Van Winkle? The requestor grammar and its dwarfs will sleep forever but not the grammar writer. Trust me, `after you circles' of politness, or in computer terms the `5 dining philosophers' is down right hard to solve. @= bool yacco2:: Parser:: have_all_threads_reported_back(){ if (pp_requesting_parallelism__->th_active_cnt__ == 0) return YES; return NO; } @*1 Paranoid routines --- Aborts. @*2 |abort_accept_queue_irregularites|.\fbreak Provide logic clues to grammar writer. At least give the writer the grammar's state, list of threads launched, and accept tokens to figure out logic bug. @= void yacco2:: Parser:: abort_accept_queue_irregularites (yacco2::Caccept_parse& Calling_parm){ @; char a[BUFFER_SIZE]; int i = 1; int ie = th_accepting_cnt__; KCHARP grammar_having_logic_bug = "abort_accept_queue_irregularites " "- Overflow on accept queue Grammar name: %s in parse state: %i"; sprintf(a,grammar_having_logic_bug,fsm_tbl__->id__,top_stack_record()->state__->state_no__); yacco2::lrclog << a << FILE_LINE << std::endl; yacco2::lrclog << " List of launched threads" << __FILE__ << __LINE__<< std::endl; KCHARP thread_in_launched_list = " - %s"; yacco2_threads_to_run_iter_type ii = th_lst__.begin(); yacco2_threads_to_run_iter_type iie = th_lst__.end(); for(;ii != iie;++ii){ Thread_entry* pe = *ii; sprintf(a,thread_in_launched_list,pe->thread_fnct_name__); yacco2::lrclog << a << FILE_LINE << std::endl; } yacco2::lrclog << " List of potential accept parse Tes" << __FILE__ << __LINE__<< std::endl; KCHARP no_of_accept_tokens_in_queue = " no of accept tokens in queue: %i"; sprintf(a,no_of_accept_tokens_in_queue,th_accepting_cnt__); yacco2::lrclog << a << FILE_LINE << std::endl; KCHARP accept_queue_tokens = " - id: %s, token position: %i"; for(;i<=ie;++i){ sprintf(a,accept_queue_tokens ,pp_accept_queue__[i].accept_token__->id__ ,pp_accept_queue__[i].accept_token_pos__); yacco2::lrclog << a << FILE_LINE << std::endl; } @; @.Err Overflow on Accept queue no ...@> KCHARP msg = "Overflow on Accept queue no of items: %i not eq to thread accepting cnt: %i\n" "This means more than 1 thread adding same accept token into queue?"; sprintf(a,msg,th_accepting_cnt__+1,th_accepting_cnt__); Yacco2_faulty_precondition(a,__FILE__,__LINE__); exit(1); } @*2 |abort_no_selected_accept_parse_in_arbitrator|.\fbreak Provide logic clues to grammar writer. At least give the writer the grammar's state, list of threads launched, and accept tokens to figure out logic bug. @= void yacco2:: Parser:: abort_no_selected_accept_parse_in_arbitrator(){ @; char a[BUFFER_SIZE]; int i = 1; int ie = th_accepting_cnt__; KCHARP grammar_having_logic_bug = "abort_no_selected_accept_parse_in_arbitrator " "- No selected accept T Grammar name: %s in parse state: %i"; sprintf(a,grammar_having_logic_bug,fsm_tbl__->id__,top_stack_record()->state__->state_no__); yacco2::lrclog << a << FILE_LINE << std::endl; yacco2::lrclog << " List of launched threads" << __FILE__ << __LINE__<< std::endl; KCHARP thread_in_launched_list = " - %s"; yacco2_threads_to_run_iter_type ii = th_lst__.begin(); yacco2_threads_to_run_iter_type iie = th_lst__.end(); for(;ii != iie;++ii){ Thread_entry* pe = *ii; sprintf(a,thread_in_launched_list,pe->thread_fnct_name__); yacco2::lrclog << a << FILE_LINE << std::endl; } yacco2::lrclog << " List of potential accept parse Tes" << __FILE__ << __LINE__<< std::endl; KCHARP no_of_accept_tokens_in_queue = " no of accept tokens in queue: %i"; sprintf(a,no_of_accept_tokens_in_queue,th_accepting_cnt__); yacco2::lrclog << a << FILE_LINE << std::endl; KCHARP accept_queue_tokens = " - id: %s, token position: %i"; for(;i<=ie;++i){ sprintf(a,accept_queue_tokens ,pp_accept_queue__[i].accept_token__->id__ ,pp_accept_queue__[i].accept_token_pos__); yacco2::lrclog << a << FILE_LINE << std::endl; } @; @.Err No selected accept parse T n...@> KCHARP msg = "No selected accept parse T no of items: %i \n"; sprintf(a,msg,th_accepting_cnt__); Yacco2_faulty_precondition(a,__FILE__,__LINE__); exit(1); } @*1 Lets parse do u?. @*2 Common parsing code. @*3 Clean up aborted parallel parse and exit erred. @= clean_up(); return Parser::erred; @*3 Exit as paralleled.\fbreak The passed back token co-ordinates are the token, position in the token stream, and the lookahead token and its position in the token stream. This is lodged in |arbitrated_token__| taken from the |accept_queue__|. The accepted token is determined by the arbitrator. Why the 2 token co-ordinates? The returned terminal is a digested statement of one or more consumed tokens in the token stream. Its token position is usually the first terminal passed for the parallel parsing: The position used the stamp the returned token can be anywhere within the position bounds of the just consummed tokens. The lookahead co-ordinates is the current token for future use. It has the same meaning as the lookahead set used by a reduce operation. @= clean_up(); return Parser::paralleled; @*3 Wait for parallelism response if required. @= if(how_thread_called == CALLED_AS_THREAD){ wait_for_event(); } @*3 Extract accept parse's token |Caccept_parse|.\fbreak It extracts the arbitrated accept parse's token, and zeroes out its presence from the accept queue. This protects against the accept parse cleanup process deleting it as it dutifully erases all potential accept tokens in its queue. @= arbitrated_token__->accept_token__ = 0; @*3 Dispatch on parallel result. @= if(th_accepting_cnt__ != 0) goto parallelism_successful; else goto parallelism_unsuccessful; @*3 Re-align token stream to la boundry. @= override_current_token(*arbitrated_token__->la_token__,arbitrated_token__->la_token_pos__); @*3 Re-align current token stream to accept token co-ordinates. @= override_current_token(*arbitrated_token__->accept_token__ ,arbitrated_token__->accept_token_pos__); @*3 Allocate T id to search with. @= yacco2::USINT id_of_T = current_token__->enumerated_id__; @*3 Startup those threads. On your mark, get set, ... @= bool how_thread_called = start_threads(); @*3 Clean up parallelism scribbles: |clean_up|.\fbreak Sanitize for another round of parallel parses. Its variables are re-initialized, and potential accept messages deleted from the queue. It is rare that there is many accept messages in the queue. But when it happens, arbitration zeroed out the winner from the list leaving the balance of messages to be flushed out. The winning message is handed off to the requesting grammar to digest. |no_competing_pp_ths__| is not cleared as it's a read-only variable set by the grammar requesting parallelism. @+= void yacco2:: Parser:: clean_up(){ if(th_accepting_cnt__ > 1){ // delete losers for(int x=1;x<=th_accepting_cnt__;++x){ if(x == pp_accept_queue_idx__) continue; if(pp_accept_queue__[x].accept_token__->auto_delete() == YES){ delete pp_accept_queue__[x].accept_token__; } pp_accept_queue__[x].initialize_it(); } } th_active_cnt__ = 0; th_accepting_cnt__ = 0; pp_accept_queue_idx__ = 0; } @*2 Chained procedure call parsing: |chained_proc_call_parsing|.\fbreak Procedure call parsing's logic:\fbreak \ptindent{1) if \TRAshift is present in the state.} This is a subrule expression that links the prefix symbol to an explicit procedure call. Its a top-down attitude to parsing with the efficiency of a procedure call. Though thread calls are neat they have their runtime inefficiences caused by their launching requirements: registers setup, address paging domains etc. Until thread calls become hardwire-support equivalent in procedure call speed this allows one to fiddle. See |pass3.lex| grammar dealing with \o2's include file expression. @*4 Dispatch on proc call result. @= if(result == th_accepting_cnt__ != 0) goto proc_call_successful; else goto proc_call_unsuccessful; @*4 Shift \TRAshift onto parse stack. @= top_stack_record()->set_symbol(NS_yacco2_k_symbols::PTR_LR1_fset_transience_operator__); State* Goto_state = S.proc_call_shift__->goto__; @<|add_to_stack|@>; //; @*3 |chained_proc_call_parsing|. @= yacco2:: THR_result yacco2:: Parser:: chained_proc_call_parsing(yacco2::State& S){ THR_result result = start_procedure_call(S); @; @; proc_call_successful:@/ { @; @; @; proc_call_shift(*arbitrated_token__->accept_token__); @; @; @; clean_up(); return Parser::paralleled; } proc_call_unsuccessful:@/ @; } @*2 Start parallel parsing: |start_parallel_parsing|.\fbreak start parallel parsing's logic:\fbreak \ptindent{1) determine by first set evalution if there are threads. exit if none.} \ptindent{2) parser spawns the parallel parser threads and waits for results} \ptindent{3) dispatching of the Arbitrator. Arbitration is local per state} @*4 Shift $(\vert\vert\vert)$ onto parse stack. @= top_stack_record()->set_symbol(NS_yacco2_k_symbols::PTR_LR1_parallel_operator__); Goto_state = S.parallel_shift__->goto__; @<|add_to_stack|@>; //; @*3 |start_parallel_parsing|. @= yacco2:: Parser::parse_result yacco2:: Parser:: start_parallel_parsing(yacco2::State& S){ yacco2::State* Goto_state; @; @; @; @; wait_for_response:@/ @; @; @; parallelism_successful:@/ @; if(S.state_s_thread_tbl__->ar_fnct_ptr__ == 0){ arbitrated_token__ = &pp_accept_queue__[1]; pp_accept_queue_idx__ = 1; }else{ call_arbitrator(S.state_s_thread_tbl__->ar_fnct_ptr__); } //Validate accept message; @; @; parallel_shift(*arbitrated_token__->accept_token__); @; @; @; clean_up(); return Parser::paralleled; parallelism_unsuccessful:@/ @; } @*2 |start_manually_parallel_parsing|.\fbreak This facility allows one to do parallel parsing from syntax directed code within a grammar. For example, one might test a returned terminal whose lookahead expressions need parsing. This is a context sensitive way to process text dynamically. The Yacco2 compiler uses this approach to process its directives' syntax directed code. Here is a code sample using it.\fbreak \fbreak \let\setuplistinghook = \linenumberedlisting \listing{"/usr/local/yacco2/diagrams/threadmanualcall.txt"} \fbreak \let\setuplistinghook = \relax @= Parser::parse_result yacco2:: Parser:: start_manually_parallel_parsing@/ (yacco2::USINT Thread_id){@/ bool how_thread_called = spawn_thread_manually(Thread_id); @; @; @; parallelism_successful:{@/ if(yacco2::PTR_AR_for_manual_thread_spawning == 0){ arbitrated_token__ = &pp_accept_queue__[1]; pp_accept_queue_idx__ = 1; }else{ call_arbitrator(yacco2::PTR_AR_for_manual_thread_spawning); } //Validate accept message; @; @; } parallelism_unsuccessful:@/ @; }