@q file: CAbs_lr1_sym.w@> @q% Copyright Dave Bone 1998 - 2015@> @q% /*@> @q% This Source Code Form is subject to the terms of the Mozilla Public@> @q% License, v. 2.0. If a copy of the MPL was not distributed with this@> @q% file, You can obtain one at http://mozilla.org/MPL/2.0/.@> @q% */@> @** Abstract symbol class for all alphabets.\fbreak |CAbs_lr1_sym| is your base structure from which all grammar symbols of terminal and rule alphabets are derived. Two symbol identities are maintained: description and enumeration. The descriptive form is its name used in the grammar while the enumeration id depends on how Yacco2 has iterated across the Terminal alphabet. This iteration is described elsewhere. To save space, an union structure is used between the co-ordinate of a terminal and the rule's associated number of right-handside elements (subrule) and parser context. At one time there was a distinction of generated symbols for the rule and its subrules. Now a subrule is a method within the rule's class. The utility for separate symbols for rules and their subrules was evaluated. The cost of the extra subrule symbols was too heavy in the little utility that they gave but rarely exercised! A rule and the lrk constants terminals have no association with the token source stream, only terminals do in their various forms --- error, raw characters, and user defined. The source file co-ordinates are expressed in terms of a line number and a character position within the line. A file number index is kept as a key into the global table of copied files that holds their file names. The balance of the variables are grammatical attributes: `auto delete', `auto abort', and its destructor function if present. Why is there a dtor function instead of a class destructor. Efficiency! Virtual tables can be expensive in space and time. In this case, it is not needed very often and it is controlled by Yacco2's output code. Remember there are hoards of symbols: at least one per character. I've added the terminal's compressed set key to speed things up for the lookahead set operations. Some parsing operations use the raw enumerate value as it is a 1:1 in content. Lookahead sets are composed of sorted dupples where each dupple is composed of a partition no and its elements members derived from the terminal's enumerated value. This eliminates the calculation of a terminal's enumerate value to its set equivalent every time it is checked for membership within a set. @+= struct CAbs_lr1_sym { CAbs_lr1_sym( yacco2::KCHARP Id ,yacco2::FN_DTOR Dtor ,yacco2::USINT Enum_id ,bool Auto_delete ,bool Affected_by_abort); CAbs_lr1_sym( yacco2::KCHARP Id ,yacco2::FN_DTOR Dtor ,yacco2::USINT Enum_id ,bool Auto_delete ,bool Affected_by_abort ,yacco2::USINT Ext_file_no ,yacco2::UINT Rc_pos); CAbs_lr1_sym( yacco2::KCHARP Id ,yacco2::FN_DTOR Dtor ,yacco2::USINT Enum_id ,yacco2::Parser* P ,bool Auto_delete=false ,bool Affected_by_abort=false); yacco2::KCHARP id()const; yacco2::USINT enumerated_id()const; void set_enumerated_id(yacco2::USINT Id); void set_auto_delete(bool X); bool auto_delete()const; void set_affected_by_abort(bool X); bool affected_by_abort()const; yacco2::UINT rc_pos(); void set_rc_pos(yacco2::UINT Pos); yacco2::UINT external_file_id(); void set_external_file_id(yacco2::UINT File); void set_rc(yacco2::CAbs_lr1_sym& Rc ,yacco2::KCHARP GPS_FILE=__FILE__,yacco2::UINT GPS_LINE=__LINE__); yacco2::UINT line_no(); void set_line_no(yacco2::UINT Line_no); yacco2::UINT pos_in_line(); void set_pos_in_line(yacco2::UINT Pos_in_line); void set_line_no_and_pos_in_line(yacco2::CAbs_lr1_sym& Rc); void set_line_no_and_pos_in_line(yacco2::UINT Line_no,yacco2::UINT Pos_in_line); void set_who_created(yacco2::KCHARP File,yacco2::UINT Line_no); yacco2::UINT who_line_no(); yacco2::KCHARP who_file(); yacco2::Parser* parser(); yacco2::FN_DTOR dtor(); yacco2::USINT rhs_no_of_parms(); yacco2::KCHARP id__; yacco2::FN_DTOR dtor__; yacco2::USINT enumerated_id__; bool auto_delete__; bool affected_by_abort__; UCHAR enum_id_set_partition_no()const; UCHAR enum_id_set_member()const; struct tok_co_ordinates{ yacco2::KCHARP who_file__; yacco2::UINT who_line_no__; yacco2::UINT rc_pos__; yacco2::UINT line_no__; yacco2::USINT external_file_id__; yacco2::USINT pos_in_line__; Set_entry set_entry__; }; struct rule_info{ yacco2::Parser* parser__; yacco2::USINT rhs_no_of_parms__; }; union { tok_co_ordinates tok_co_ords__; rule_info rule_info__; }; }; @*2 Grammar abstract symbol implementation.\fbreak Why the 3 |CAbs_lr1_sym| constructors? The 1st |CAbs_lr1_sym| defines rules, the 2nd defines the terminals without the GPS, while the 3rd can be used by the grammar writer in the syntax directed code to create terminals having a GPS to its source file. @ |CAbs_lr1_sym| constructor. @= yacco2:: CAbs_lr1_sym:: CAbs_lr1_sym( yacco2::KCHARP Id ,yacco2::FN_DTOR Dtor ,yacco2::USINT Enum_id ,yacco2::Parser* P ,bool Auto_delete ,bool Affected_by_abort ) :id__(Id) ,dtor__(Dtor) ,enumerated_id__(Enum_id) ,auto_delete__(Auto_delete) ,affected_by_abort__(Affected_by_abort) {rule_info__.parser__ = P; @<|create_set_entry for CAbs_lr1_sym|@>; } yacco2:: CAbs_lr1_sym:: CAbs_lr1_sym( yacco2::KCHARP Id ,yacco2::FN_DTOR Dtor ,yacco2::USINT Enum_id ,bool Auto_delete ,bool Affected_by_abort) :id__(Id) ,dtor__(Dtor) ,enumerated_id__(Enum_id) ,auto_delete__(Auto_delete) ,affected_by_abort__(Affected_by_abort) { tok_co_ords__.rc_pos__ = 0; tok_co_ords__.line_no__ = 0; tok_co_ords__.external_file_id__ = 0; tok_co_ords__.pos_in_line__ = 0; tok_co_ords__.who_file__ = 0; tok_co_ords__.who_line_no__ = 0; @<|create_set_entry for CAbs_lr1_sym|@>; } yacco2:: CAbs_lr1_sym:: CAbs_lr1_sym( yacco2::KCHARP Id ,yacco2::FN_DTOR Dtor ,yacco2::USINT Enum_id ,bool Auto_delete ,bool Affected_by_abort ,yacco2::USINT Ext_file_no ,yacco2::UINT Rc_pos) :id__(Id) ,dtor__(Dtor) ,enumerated_id__(Enum_id) ,auto_delete__(Auto_delete) ,affected_by_abort__(Affected_by_abort) { tok_co_ords__.rc_pos__ = Rc_pos; tok_co_ords__.line_no__ = 0; tok_co_ords__.external_file_id__ = Ext_file_no; tok_co_ords__.pos_in_line__ = 0; tok_co_ords__.who_file__ = 0; tok_co_ords__.who_line_no__ = 0; @<|create_set_entry for CAbs_lr1_sym|@>; } @*2 |enum_id_set_partition_no| and |enum_id_set_member|.\fbreak A compressed set key. @= yacco2::UCHAR yacco2::CAbs_lr1_sym::enum_id_set_partition_no()const{ return tok_co_ords__.set_entry__.partition__; } yacco2::UCHAR yacco2::CAbs_lr1_sym::enum_id_set_member()const{ return tok_co_ords__.set_entry__.elements__; } @*2 |rhs_no_of_parms|. Number of elements contained in a rule's right hand side subrule.\fbreak @= yacco2::USINT yacco2:: CAbs_lr1_sym:: rhs_no_of_parms(){ return rule_info__.rhs_no_of_parms__; } @*2 |parser|. Associated parser with the grammar being used.\fbreak A terminal symbol has no association with a parser apart from where it was constructed> Where as a rule does require this reference that gets assigned at construction time. So be ware as the parser variable is unionized! @= yacco2:: Parser* yacco2:: CAbs_lr1_sym:: parser(){ return rule_info__.parser__; } @*2 |id|. Descriptive form of the symbol for tracing purposes.\fbreak For rules, this is optimized out when the grammar's debug switch is set to off. You must regenerate the grammar when you want to turn on the grammar's debug facilty. Just setting the \CPLUSPLUS/ code for debug is not sufficient. Trust me. @= yacco2::KCHARP yacco2:: CAbs_lr1_sym:: id()const{ return id__; } @*2 |enumerated_id|.\fbreak The iteration scheme for the terminal alphabet starts at 0 followed by the grammar's rules. Subrules enumeration start from 1. Their enumerates are mutually exclusive and are defined in the generated fsm class of the grammar. @= yacco2::USINT yacco2:: CAbs_lr1_sym:: enumerated_id()const{ return enumerated_id__; } @*2 |set_enumerated_id|. @= void yacco2:: CAbs_lr1_sym:: set_enumerated_id(yacco2::USINT Id){ enumerated_id__ = Id; } @*2 |set_affected_by_abort| and |affected_by_abort|. \fbreak These are the writer and reader of the grammar's auto abort attribute `AB' for the symbol. @= void yacco2:: CAbs_lr1_sym:: set_affected_by_abort(bool X){ affected_by_abort__ = X; } bool yacco2:: CAbs_lr1_sym:: affected_by_abort()const{ return affected_by_abort__; } @*2 |set_auto_delete| and |CAbs_lr1_sym::auto_delete|.\fbreak These are the writer and reader of the grammar's auto delete attribute `AD' for the symbol. @= void yacco2:: CAbs_lr1_sym:: set_auto_delete(bool X){ auto_delete__ = X; } bool yacco2:: CAbs_lr1_sym:: auto_delete()const{ return auto_delete__; } @*2 |dtor|.\fbreak Destructor function defined by the grammar writer for the symbol. Why not use the class genetics? A class is too expensive in its implementation. Your basic structure is sufficient with no virtual table overhead. Within this context, the dtor is rarely needed and it's upto Yacco2 to create when needed. See the |destructor| directive of the grammar. @= yacco2::FN_DTOR yacco2:: CAbs_lr1_sym:: dtor(){ return dtor__; } @*2 |set_rc|, |set_rc_pos|, and |rc_pos|.\fbreak These are the writers and reader of the terminal's co-ordinate. The only symbol that directly sets these values are the raw character symbols. All other symbols are composites built from raw character terminals. The co-ordinate parts can be individually set, or all parts of the co-ordinate can be copied from a previous symbol's co-ordinate. Normally their use comes from a parsing environment producing tokens built from a grammar but this is not a hardfast rule. The reason why the parser address is passed to |CAbs_lr1_sym::set_rc| is due to |eog|. It is shared across all token containers and all copied source files. This sharing behavior was taken to lower the new-delete overhead to creating of the terminal. Consequently there is no definite co-ordinate associated with this terminal and one must go to the previous token of the supplier to tack on the real co-ordinates + the number of previous terminals tried for a co-ordinate. The supplier context comes from the |parser__|. The 2 GPS parameters allows parental histronics: Don't know if this is received well by the user of \O2 but it certainly helps to debug. This was added down the road and so the reason for the defaults in the prototype as to not disturb existing grammars. If the default is taken then the GPS is not set as it could be done elsewhere. |set_who_created| allows one to initially set or override previous settings. Some marginal additives: parse stack co-ordinates for error tokens and ``eog'' association with from current token supplier. Added the situation if no token symbol to find for the ``eog'' token (no data entered at the command line), i force the command line co-ordinates instead of throwing up. @= void yacco2:: CAbs_lr1_sym:: set_rc(yacco2::CAbs_lr1_sym& Rc,yacco2::KCHARP GPS_FILE,yacco2::UINT GPS_LINE){@/ if(GPS_FILE != 0){ tok_co_ords__.who_file__ = GPS_FILE; tok_co_ords__.who_line_no__ = GPS_LINE; } if(Rc.tok_co_ords__.external_file_id__ > 0){ tok_co_ords__.external_file_id__ = Rc.tok_co_ords__.external_file_id__; tok_co_ords__.rc_pos__ = Rc.tok_co_ords__.rc_pos__; tok_co_ords__.line_no__ = Rc.tok_co_ords__.line_no__; tok_co_ords__.pos_in_line__ = Rc.tok_co_ords__.pos_in_line__; return; } return; } @*2 Does terminal have a legitimate co-ordinate?.\fbreak Do you see the moonwalk? This goes backwards through the supplier tokens looking for a source address. Inside the supplier routine is the validation on the requested subscript. @= if(pt->tok_co_ords__.rc_pos__ != 0) goto set_co_ordinates; ++bk_cnt; --prev_pos; goto find_legitimate_terminal; @ @= void yacco2:: CAbs_lr1_sym:: set_rc_pos(yacco2::UINT Pos){ @; tok_co_ords__.rc_pos__ = Pos; } yacco2::UINT yacco2:: CAbs_lr1_sym:: rc_pos(){ return tok_co_ords__.rc_pos__; } @*2 |set_external_file_id| and |external_file_id|. \fbreak These are the writer and reader of the grammar's external file index used to reference the copied files descriptive name. @= void yacco2:: CAbs_lr1_sym:: set_external_file_id(yacco2::UINT File_no){ @; tok_co_ords__.external_file_id__ = File_no; } yacco2::UINT yacco2:: CAbs_lr1_sym:: external_file_id(){ return tok_co_ords__.external_file_id__; } @*2 Set line no, and character position routines.\fbreak These are the writer and reader to parts of the co-ordinate. @= void yacco2:: CAbs_lr1_sym:: set_line_no(yacco2::UINT Line_no){ @; tok_co_ords__.line_no__ = Line_no; } yacco2::UINT yacco2:: CAbs_lr1_sym:: line_no(){ return tok_co_ords__.line_no__; } yacco2::UINT yacco2:: CAbs_lr1_sym:: pos_in_line(){ return tok_co_ords__.pos_in_line__; } void yacco2:: CAbs_lr1_sym:: set_pos_in_line(yacco2::UINT Pos_in_line){ @; tok_co_ords__.pos_in_line__ = Pos_in_line; } void yacco2:: CAbs_lr1_sym:: set_line_no_and_pos_in_line(yacco2::UINT Line_no,yacco2::UINT Pos_in_line){ @; @; tok_co_ords__.line_no__ = Line_no; tok_co_ords__.pos_in_line__ = Pos_in_line; } void yacco2:: CAbs_lr1_sym:: set_line_no_and_pos_in_line(yacco2::CAbs_lr1_sym& Rc){ tok_co_ords__.line_no__ = Rc.tok_co_ords__.line_no__; tok_co_ords__.pos_in_line__ = Rc.tok_co_ords__.pos_in_line__; } @*2 |set_who_created|, |who_line_no|, |who_file|.\fbreak These are the writer and reader to parts of the co-ordinate giving the source that created the symbol. @= void yacco2:: CAbs_lr1_sym:: set_who_created(yacco2::KCHARP File,yacco2::UINT Line_no){ tok_co_ords__.who_file__ = File; tok_co_ords__.who_line_no__ = Line_no; } yacco2::UINT yacco2:: CAbs_lr1_sym:: who_line_no(){ return tok_co_ords__.who_line_no__; } yacco2::KCHARP yacco2:: CAbs_lr1_sym:: who_file(){ return tok_co_ords__.who_file__; }