diff --git a/.gitattributes b/.gitattributes index 076368f..39ee708 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,3 @@ -* text=auto - -lalrpop/src/parser/lrgrammar.lalrpop text eol=lf +* text=auto + +lalrpop/src/parser/lrgrammar.lalrpop text eol=lf diff --git a/doc/pascal/lalrpop/pascal.y b/doc/pascal/lalrpop/pascal.y index 386ca3a..1dd3d5c 100644 --- a/doc/pascal/lalrpop/pascal.y +++ b/doc/pascal/lalrpop/pascal.y @@ -1,523 +1,523 @@ -%{ -%} - -%token AND ARRAY ASSIGNMENT CASE CHARACTER_STRING COLON COMMA CONST DIGSEQ -%token DIV DO DOT DOTDOT DOWNTO ELSE END EQUAL EXTERNAL FOR FORWARD FUNCTION -%token GE GOTO GT IDENTIFIER IF IN LABEL LBRAC LE LPAREN LT MINUS MOD NIL NOT -%token NOTEQUAL OF OR OTHERWISE PACKED PBEGIN PFILE PLUS PROCEDURE PROGRAM RBRAC -%token REALNUMBER RECORD REPEAT RPAREN SEMICOLON SET SLASH STAR STARSTAR THEN -%token TO TYPE UNTIL UPARROW VAR WHILE WITH - -%% -file : program - | module - ; - -program : program_heading semicolon block DOT - ; - -program_heading : PROGRAM identifier - | PROGRAM identifier LPAREN identifier_list RPAREN - ; - -identifier_list : identifier_list comma identifier - | identifier - ; - -block : label_declaration_part - constant_definition_part - type_definition_part - variable_declaration_part - procedure_and_function_declaration_part - statement_part - ; - -module : constant_definition_part - type_definition_part - variable_declaration_part - procedure_and_function_declaration_part - ; - -label_declaration_part : LABEL label_list semicolon - | - ; - -label_list : label_list comma label - | label - ; - -label : DIGSEQ - ; - -constant_definition_part : CONST constant_list - | - ; - -constant_list : constant_list constant_definition - | constant_definition - ; - -constant_definition : identifier EQUAL cexpression semicolon - ; - -cexpression : csimple_expression - | csimple_expression relop csimple_expression - ; - -csimple_expression : cterm - | csimple_expression addop cterm - ; - -cterm : cfactor - | cterm mulop cfactor - ; - -cfactor : sign cfactor - | cexponentiation - ; - -cexponentiation : cprimary - | cprimary STARSTAR cexponentiation - ; - -cprimary : identifier - | LPAREN cexpression RPAREN - | unsigned_constant - | NOT cprimary - ; - -constant : non_string - | sign non_string - | CHARACTER_STRING - ; - -sign : PLUS - | MINUS - ; - -non_string : DIGSEQ - | identifier - | REALNUMBER - ; - -type_definition_part : TYPE type_definition_list - | - ; - -type_definition_list : type_definition_list type_definition - | type_definition - ; - -type_definition : identifier EQUAL type_denoter semicolon - ; - -type_denoter : identifier - | new_type - ; - -new_type : new_ordinal_type - | new_structured_type - | new_pointer_type - ; - -new_ordinal_type : enumerated_type - | subrange_type - ; - -enumerated_type : LPAREN identifier_list RPAREN - ; - -subrange_type : constant DOTDOT constant - ; - -new_structured_type : structured_type - | PACKED structured_type - ; - -structured_type : array_type - | record_type - | set_type - | file_type - ; - -array_type : ARRAY LBRAC index_list RBRAC OF component_type - ; - -index_list : index_list comma index_type - | index_type - ; - -index_type : ordinal_type ; - -ordinal_type : new_ordinal_type - | identifier - ; - -component_type : type_denoter ; - -record_type : RECORD record_section_list END - | RECORD record_section_list semicolon variant_part END - | RECORD variant_part END - ; - -record_section_list : record_section_list semicolon record_section - | record_section - ; - -record_section : identifier_list COLON type_denoter - ; - -variant_part : CASE variant_selector OF variant_list semicolon - | CASE variant_selector OF variant_list - | - ; - -variant_selector : tag_field COLON tag_type - | tag_type - ; - -variant_list : variant_list semicolon variant - | variant - ; - -variant : case_constant_list COLON LPAREN record_section_list RPAREN - | case_constant_list COLON LPAREN record_section_list semicolon - variant_part RPAREN - | case_constant_list COLON LPAREN variant_part RPAREN - ; - -case_constant_list : case_constant_list comma case_constant - | case_constant - ; - -case_constant : constant - | constant DOTDOT constant - ; - -tag_field : identifier ; - -tag_type : identifier ; - -set_type : SET OF base_type - ; - -base_type : ordinal_type ; - -file_type : PFILE OF component_type - ; - -new_pointer_type : UPARROW domain_type - ; - -domain_type : identifier ; - -variable_declaration_part : VAR variable_declaration_list semicolon - | - ; - -variable_declaration_list : - variable_declaration_list semicolon variable_declaration - | variable_declaration - ; - -variable_declaration : identifier_list COLON type_denoter - ; - -procedure_and_function_declaration_part : - proc_or_func_declaration_list semicolon - | - ; - -proc_or_func_declaration_list : - proc_or_func_declaration_list semicolon proc_or_func_declaration - | proc_or_func_declaration - ; - -proc_or_func_declaration : procedure_declaration - | function_declaration - ; - -procedure_declaration : procedure_heading semicolon directive - | procedure_heading semicolon procedure_block - ; - -procedure_heading : procedure_identification - | procedure_identification formal_parameter_list - ; - -directive : FORWARD - | EXTERNAL - ; - -formal_parameter_list : LPAREN formal_parameter_section_list RPAREN ; - -formal_parameter_section_list : formal_parameter_section_list semicolon formal_parameter_section - | formal_parameter_section - ; - -formal_parameter_section : value_parameter_specification - | variable_parameter_specification - | procedural_parameter_specification - | functional_parameter_specification - ; - -value_parameter_specification : identifier_list COLON identifier - ; - -variable_parameter_specification : VAR identifier_list COLON identifier - ; - -procedural_parameter_specification : procedure_heading ; - -functional_parameter_specification : function_heading ; - -procedure_identification : PROCEDURE identifier ; - -procedure_block : block ; - -function_declaration : function_heading semicolon directive - | function_identification semicolon function_block - | function_heading semicolon function_block - ; - -function_heading : FUNCTION identifier COLON result_type - | FUNCTION identifier formal_parameter_list COLON result_type - ; - -result_type : identifier ; - -function_identification : FUNCTION identifier ; - -function_block : block ; - -statement_part : compound_statement ; - -compound_statement : PBEGIN statement_sequence END ; - -statement_sequence : statement_sequence semicolon statement - | statement - ; - -statement : open_statement - | closed_statement - ; - -open_statement : label COLON non_labeled_open_statement - | non_labeled_open_statement - ; - -closed_statement : label COLON non_labeled_closed_statement - | non_labeled_closed_statement - ; - -non_labeled_closed_statement : assignment_statement - | procedure_statement - | goto_statement - | compound_statement - | case_statement - | repeat_statement - | closed_with_statement - | closed_if_statement - | closed_while_statement - | closed_for_statement - | - ; - -non_labeled_open_statement : open_with_statement - | open_if_statement - | open_while_statement - | open_for_statement - ; - -repeat_statement : REPEAT statement_sequence UNTIL boolean_expression - ; - -open_while_statement : WHILE boolean_expression DO open_statement - ; - -closed_while_statement : WHILE boolean_expression DO closed_statement - ; - -open_for_statement : FOR control_variable ASSIGNMENT initial_value direction - final_value DO open_statement - ; - -closed_for_statement : FOR control_variable ASSIGNMENT initial_value direction - final_value DO closed_statement - ; - -open_with_statement : WITH record_variable_list DO open_statement - ; - -closed_with_statement : WITH record_variable_list DO closed_statement - ; - -open_if_statement : IF boolean_expression THEN statement - | IF boolean_expression THEN closed_statement ELSE open_statement - ; - -closed_if_statement : IF boolean_expression THEN closed_statement - ELSE closed_statement - ; - -assignment_statement : variable_access ASSIGNMENT expression - ; - -variable_access : identifier - | indexed_variable - | field_designator - | variable_access UPARROW - ; - -indexed_variable : variable_access LBRAC index_expression_list RBRAC - ; - -index_expression_list : index_expression_list comma index_expression - | index_expression - ; - -index_expression : expression ; - -field_designator : variable_access DOT identifier - ; - -procedure_statement : identifier params - | identifier - ; - -params : LPAREN actual_parameter_list RPAREN ; - -actual_parameter_list : actual_parameter_list comma actual_parameter - | actual_parameter - ; - -actual_parameter : expression - | expression COLON expression - | expression COLON expression COLON expression - ; - -goto_statement : GOTO label - ; - -case_statement : CASE case_index OF case_list_element_list END - | CASE case_index OF case_list_element_list SEMICOLON END - | CASE case_index OF case_list_element_list semicolon - otherwisepart statement END - | CASE case_index OF case_list_element_list semicolon - otherwisepart statement SEMICOLON END - ; - -case_index : expression ; - -case_list_element_list : case_list_element_list semicolon case_list_element - | case_list_element - ; - -case_list_element : case_constant_list COLON statement - ; - -otherwisepart : OTHERWISE - | OTHERWISE COLON - ; - -control_variable : identifier ; - -initial_value : expression ; - -direction : TO - | DOWNTO - ; - -final_value : expression ; - -record_variable_list : record_variable_list comma variable_access - | variable_access - ; - -boolean_expression : expression ; - -expression : simple_expression - | simple_expression relop simple_expression - ; - -simple_expression : term - | simple_expression addop term - ; - -term : factor - | term mulop factor - ; - -factor : sign factor - | exponentiation - ; - -exponentiation : primary - | primary STARSTAR exponentiation - ; - -primary : variable_access - | unsigned_constant - | function_designator - | set_constructor - | LPAREN expression RPAREN - | NOT primary - ; - -unsigned_constant : unsigned_number - | CHARACTER_STRING - | NIL - ; - -unsigned_number : unsigned_integer | unsigned_real ; - -unsigned_integer : DIGSEQ - ; - -unsigned_real : REALNUMBER - ; - -function_designator : identifier params - ; - -set_constructor : LBRAC member_designator_list RBRAC - | LBRAC RBRAC - ; - -member_designator_list : member_designator_list comma member_designator - | member_designator - ; - -member_designator : member_designator DOTDOT expression - | expression - ; - -addop: PLUS - | MINUS - | OR - ; - -mulop : STAR - | SLASH - | DIV - | MOD - | AND - ; - -relop : EQUAL - | NOTEQUAL - | LT - | GT - | LE - | GE - | IN - ; - -identifier : IDENTIFIER - ; - -semicolon : SEMICOLON - ; - -comma : COMMA - ; +%{ +%} + +%token AND ARRAY ASSIGNMENT CASE CHARACTER_STRING COLON COMMA CONST DIGSEQ +%token DIV DO DOT DOTDOT DOWNTO ELSE END EQUAL EXTERNAL FOR FORWARD FUNCTION +%token GE GOTO GT IDENTIFIER IF IN LABEL LBRAC LE LPAREN LT MINUS MOD NIL NOT +%token NOTEQUAL OF OR OTHERWISE PACKED PBEGIN PFILE PLUS PROCEDURE PROGRAM RBRAC +%token REALNUMBER RECORD REPEAT RPAREN SEMICOLON SET SLASH STAR STARSTAR THEN +%token TO TYPE UNTIL UPARROW VAR WHILE WITH + +%% +file : program + | module + ; + +program : program_heading semicolon block DOT + ; + +program_heading : PROGRAM identifier + | PROGRAM identifier LPAREN identifier_list RPAREN + ; + +identifier_list : identifier_list comma identifier + | identifier + ; + +block : label_declaration_part + constant_definition_part + type_definition_part + variable_declaration_part + procedure_and_function_declaration_part + statement_part + ; + +module : constant_definition_part + type_definition_part + variable_declaration_part + procedure_and_function_declaration_part + ; + +label_declaration_part : LABEL label_list semicolon + | + ; + +label_list : label_list comma label + | label + ; + +label : DIGSEQ + ; + +constant_definition_part : CONST constant_list + | + ; + +constant_list : constant_list constant_definition + | constant_definition + ; + +constant_definition : identifier EQUAL cexpression semicolon + ; + +cexpression : csimple_expression + | csimple_expression relop csimple_expression + ; + +csimple_expression : cterm + | csimple_expression addop cterm + ; + +cterm : cfactor + | cterm mulop cfactor + ; + +cfactor : sign cfactor + | cexponentiation + ; + +cexponentiation : cprimary + | cprimary STARSTAR cexponentiation + ; + +cprimary : identifier + | LPAREN cexpression RPAREN + | unsigned_constant + | NOT cprimary + ; + +constant : non_string + | sign non_string + | CHARACTER_STRING + ; + +sign : PLUS + | MINUS + ; + +non_string : DIGSEQ + | identifier + | REALNUMBER + ; + +type_definition_part : TYPE type_definition_list + | + ; + +type_definition_list : type_definition_list type_definition + | type_definition + ; + +type_definition : identifier EQUAL type_denoter semicolon + ; + +type_denoter : identifier + | new_type + ; + +new_type : new_ordinal_type + | new_structured_type + | new_pointer_type + ; + +new_ordinal_type : enumerated_type + | subrange_type + ; + +enumerated_type : LPAREN identifier_list RPAREN + ; + +subrange_type : constant DOTDOT constant + ; + +new_structured_type : structured_type + | PACKED structured_type + ; + +structured_type : array_type + | record_type + | set_type + | file_type + ; + +array_type : ARRAY LBRAC index_list RBRAC OF component_type + ; + +index_list : index_list comma index_type + | index_type + ; + +index_type : ordinal_type ; + +ordinal_type : new_ordinal_type + | identifier + ; + +component_type : type_denoter ; + +record_type : RECORD record_section_list END + | RECORD record_section_list semicolon variant_part END + | RECORD variant_part END + ; + +record_section_list : record_section_list semicolon record_section + | record_section + ; + +record_section : identifier_list COLON type_denoter + ; + +variant_part : CASE variant_selector OF variant_list semicolon + | CASE variant_selector OF variant_list + | + ; + +variant_selector : tag_field COLON tag_type + | tag_type + ; + +variant_list : variant_list semicolon variant + | variant + ; + +variant : case_constant_list COLON LPAREN record_section_list RPAREN + | case_constant_list COLON LPAREN record_section_list semicolon + variant_part RPAREN + | case_constant_list COLON LPAREN variant_part RPAREN + ; + +case_constant_list : case_constant_list comma case_constant + | case_constant + ; + +case_constant : constant + | constant DOTDOT constant + ; + +tag_field : identifier ; + +tag_type : identifier ; + +set_type : SET OF base_type + ; + +base_type : ordinal_type ; + +file_type : PFILE OF component_type + ; + +new_pointer_type : UPARROW domain_type + ; + +domain_type : identifier ; + +variable_declaration_part : VAR variable_declaration_list semicolon + | + ; + +variable_declaration_list : + variable_declaration_list semicolon variable_declaration + | variable_declaration + ; + +variable_declaration : identifier_list COLON type_denoter + ; + +procedure_and_function_declaration_part : + proc_or_func_declaration_list semicolon + | + ; + +proc_or_func_declaration_list : + proc_or_func_declaration_list semicolon proc_or_func_declaration + | proc_or_func_declaration + ; + +proc_or_func_declaration : procedure_declaration + | function_declaration + ; + +procedure_declaration : procedure_heading semicolon directive + | procedure_heading semicolon procedure_block + ; + +procedure_heading : procedure_identification + | procedure_identification formal_parameter_list + ; + +directive : FORWARD + | EXTERNAL + ; + +formal_parameter_list : LPAREN formal_parameter_section_list RPAREN ; + +formal_parameter_section_list : formal_parameter_section_list semicolon formal_parameter_section + | formal_parameter_section + ; + +formal_parameter_section : value_parameter_specification + | variable_parameter_specification + | procedural_parameter_specification + | functional_parameter_specification + ; + +value_parameter_specification : identifier_list COLON identifier + ; + +variable_parameter_specification : VAR identifier_list COLON identifier + ; + +procedural_parameter_specification : procedure_heading ; + +functional_parameter_specification : function_heading ; + +procedure_identification : PROCEDURE identifier ; + +procedure_block : block ; + +function_declaration : function_heading semicolon directive + | function_identification semicolon function_block + | function_heading semicolon function_block + ; + +function_heading : FUNCTION identifier COLON result_type + | FUNCTION identifier formal_parameter_list COLON result_type + ; + +result_type : identifier ; + +function_identification : FUNCTION identifier ; + +function_block : block ; + +statement_part : compound_statement ; + +compound_statement : PBEGIN statement_sequence END ; + +statement_sequence : statement_sequence semicolon statement + | statement + ; + +statement : open_statement + | closed_statement + ; + +open_statement : label COLON non_labeled_open_statement + | non_labeled_open_statement + ; + +closed_statement : label COLON non_labeled_closed_statement + | non_labeled_closed_statement + ; + +non_labeled_closed_statement : assignment_statement + | procedure_statement + | goto_statement + | compound_statement + | case_statement + | repeat_statement + | closed_with_statement + | closed_if_statement + | closed_while_statement + | closed_for_statement + | + ; + +non_labeled_open_statement : open_with_statement + | open_if_statement + | open_while_statement + | open_for_statement + ; + +repeat_statement : REPEAT statement_sequence UNTIL boolean_expression + ; + +open_while_statement : WHILE boolean_expression DO open_statement + ; + +closed_while_statement : WHILE boolean_expression DO closed_statement + ; + +open_for_statement : FOR control_variable ASSIGNMENT initial_value direction + final_value DO open_statement + ; + +closed_for_statement : FOR control_variable ASSIGNMENT initial_value direction + final_value DO closed_statement + ; + +open_with_statement : WITH record_variable_list DO open_statement + ; + +closed_with_statement : WITH record_variable_list DO closed_statement + ; + +open_if_statement : IF boolean_expression THEN statement + | IF boolean_expression THEN closed_statement ELSE open_statement + ; + +closed_if_statement : IF boolean_expression THEN closed_statement + ELSE closed_statement + ; + +assignment_statement : variable_access ASSIGNMENT expression + ; + +variable_access : identifier + | indexed_variable + | field_designator + | variable_access UPARROW + ; + +indexed_variable : variable_access LBRAC index_expression_list RBRAC + ; + +index_expression_list : index_expression_list comma index_expression + | index_expression + ; + +index_expression : expression ; + +field_designator : variable_access DOT identifier + ; + +procedure_statement : identifier params + | identifier + ; + +params : LPAREN actual_parameter_list RPAREN ; + +actual_parameter_list : actual_parameter_list comma actual_parameter + | actual_parameter + ; + +actual_parameter : expression + | expression COLON expression + | expression COLON expression COLON expression + ; + +goto_statement : GOTO label + ; + +case_statement : CASE case_index OF case_list_element_list END + | CASE case_index OF case_list_element_list SEMICOLON END + | CASE case_index OF case_list_element_list semicolon + otherwisepart statement END + | CASE case_index OF case_list_element_list semicolon + otherwisepart statement SEMICOLON END + ; + +case_index : expression ; + +case_list_element_list : case_list_element_list semicolon case_list_element + | case_list_element + ; + +case_list_element : case_constant_list COLON statement + ; + +otherwisepart : OTHERWISE + | OTHERWISE COLON + ; + +control_variable : identifier ; + +initial_value : expression ; + +direction : TO + | DOWNTO + ; + +final_value : expression ; + +record_variable_list : record_variable_list comma variable_access + | variable_access + ; + +boolean_expression : expression ; + +expression : simple_expression + | simple_expression relop simple_expression + ; + +simple_expression : term + | simple_expression addop term + ; + +term : factor + | term mulop factor + ; + +factor : sign factor + | exponentiation + ; + +exponentiation : primary + | primary STARSTAR exponentiation + ; + +primary : variable_access + | unsigned_constant + | function_designator + | set_constructor + | LPAREN expression RPAREN + | NOT primary + ; + +unsigned_constant : unsigned_number + | CHARACTER_STRING + | NIL + ; + +unsigned_number : unsigned_integer | unsigned_real ; + +unsigned_integer : DIGSEQ + ; + +unsigned_real : REALNUMBER + ; + +function_designator : identifier params + ; + +set_constructor : LBRAC member_designator_list RBRAC + | LBRAC RBRAC + ; + +member_designator_list : member_designator_list comma member_designator + | member_designator + ; + +member_designator : member_designator DOTDOT expression + | expression + ; + +addop: PLUS + | MINUS + | OR + ; + +mulop : STAR + | SLASH + | DIV + | MOD + | AND + ; + +relop : EQUAL + | NOTEQUAL + | LT + | GT + | LE + | GE + | IN + ; + +identifier : IDENTIFIER + ; + +semicolon : SEMICOLON + ; + +comma : COMMA + ; diff --git a/doc/pascal/yacc/pascal.l b/doc/pascal/yacc/pascal.l index 0e6944a..5219028 100644 --- a/doc/pascal/yacc/pascal.l +++ b/doc/pascal/yacc/pascal.l @@ -1,157 +1,157 @@ -%{ -/* - * scan.l - * - * lex input file for pascal scanner - * - * extensions: to ways to spell "external" and "->" ok for "^". - */ - -#include -#include "y.tab.h" -int line_no = 1; - -%} - -A [aA] -B [bB] -C [cC] -D [dD] -E [eE] -F [fF] -G [gG] -H [hH] -I [iI] -J [jJ] -K [kK] -L [lL] -M [mM] -N [nN] -O [oO] -P [pP] -Q [qQ] -R [rR] -S [sS] -T [tT] -U [uU] -V [vV] -W [wW] -X [xX] -Y [yY] -Z [zZ] -NQUOTE [^'] - -%% - -{A}{N}{D} return(AND); -{A}{R}{R}{A}{Y} return(ARRAY); -{C}{A}{S}{E} return(CASE); -{C}{O}{N}{S}{T} return(CONST); -{D}{I}{V} return(DIV); -{D}{O} return(DO); -{D}{O}{W}{N}{T}{O} return(DOWNTO); -{E}{L}{S}{E} return(ELSE); -{E}{N}{D} return(END); -{E}{X}{T}{E}{R}{N} | -{E}{X}{T}{E}{R}{N}{A}{L} return(EXTERNAL); -{F}{O}{R} return(FOR); -{F}{O}{R}{W}{A}{R}{D} return(FORWARD); -{F}{U}{N}{C}{T}{I}{O}{N} return(FUNCTION); -{G}{O}{T}{O} return(GOTO); -{I}{F} return(IF); -{I}{N} return(IN); -{L}{A}{B}{E}{L} return(LABEL); -{M}{O}{D} return(MOD); -{N}{I}{L} return(NIL); -{N}{O}{T} return(NOT); -{O}{F} return(OF); -{O}{R} return(OR); -{O}{T}{H}{E}{R}{W}{I}{S}{E} return(OTHERWISE); -{P}{A}{C}{K}{E}{D} return(PACKED); -{B}{E}{G}{I}{N} return(PBEGIN); -{F}{I}{L}{E} return(PFILE); -{P}{R}{O}{C}{E}{D}{U}{R}{E} return(PROCEDURE); -{P}{R}{O}{G}{R}{A}{M} return(PROGRAM); -{R}{E}{C}{O}{R}{D} return(RECORD); -{R}{E}{P}{E}{A}{T} return(REPEAT); -{S}{E}{T} return(SET); -{T}{H}{E}{N} return(THEN); -{T}{O} return(TO); -{T}{Y}{P}{E} return(TYPE); -{U}{N}{T}{I}{L} return(UNTIL); -{V}{A}{R} return(VAR); -{W}{H}{I}{L}{E} return(WHILE); -{W}{I}{T}{H} return(WITH); -[a-zA-Z]([a-zA-Z0-9])+ return(IDENTIFIER); - -":=" return(ASSIGNMENT); -'({NQUOTE}|'')+' return(CHARACTER_STRING); -":" return(COLON); -"," return(COMMA); -[0-9]+ return(DIGSEQ); -"." return(DOT); -".." return(DOTDOT); -"=" return(EQUAL); -">=" return(GE); -">" return(GT); -"[" return(LBRAC); -"<=" return(LE); -"(" return(LPAREN); -"<" return(LT); -"-" return(MINUS); -"<>" return(NOTEQUAL); -"+" return(PLUS); -"]" return(RBRAC); -[0-9]+"."[0-9]+ return(REALNUMBER); -")" return(RPAREN); -";" return(SEMICOLON); -"/" return(SLASH); -"*" return(STAR); -"**" return(STARSTAR); -"->" | -"^" return(UPARROW); - -"(*" | -"{" { register int c; - while ((c = input())) - { - if (c == '}') - break; - else if (c == '*') - { - if ((c = input()) == ')') - break; - else - unput (c); - } - else if (c == '\n') - line_no++; - else if (c == 0) - commenteof(); - } - } - -[ \t\f] ; - -\n line_no++; - -. { fprintf (stderr, - "'%c' (0%o): illegal charcter at line %d\n", - yytext[0], yytext[0], line_no); - } - -%% - -commenteof() -{ - fprintf (stderr, "unexpected EOF inside comment at line %d\n", - line_no); - exit (1); -} - -yywrap () -{ - return (1); -} - - +%{ +/* + * scan.l + * + * lex input file for pascal scanner + * + * extensions: to ways to spell "external" and "->" ok for "^". + */ + +#include +#include "y.tab.h" +int line_no = 1; + +%} + +A [aA] +B [bB] +C [cC] +D [dD] +E [eE] +F [fF] +G [gG] +H [hH] +I [iI] +J [jJ] +K [kK] +L [lL] +M [mM] +N [nN] +O [oO] +P [pP] +Q [qQ] +R [rR] +S [sS] +T [tT] +U [uU] +V [vV] +W [wW] +X [xX] +Y [yY] +Z [zZ] +NQUOTE [^'] + +%% + +{A}{N}{D} return(AND); +{A}{R}{R}{A}{Y} return(ARRAY); +{C}{A}{S}{E} return(CASE); +{C}{O}{N}{S}{T} return(CONST); +{D}{I}{V} return(DIV); +{D}{O} return(DO); +{D}{O}{W}{N}{T}{O} return(DOWNTO); +{E}{L}{S}{E} return(ELSE); +{E}{N}{D} return(END); +{E}{X}{T}{E}{R}{N} | +{E}{X}{T}{E}{R}{N}{A}{L} return(EXTERNAL); +{F}{O}{R} return(FOR); +{F}{O}{R}{W}{A}{R}{D} return(FORWARD); +{F}{U}{N}{C}{T}{I}{O}{N} return(FUNCTION); +{G}{O}{T}{O} return(GOTO); +{I}{F} return(IF); +{I}{N} return(IN); +{L}{A}{B}{E}{L} return(LABEL); +{M}{O}{D} return(MOD); +{N}{I}{L} return(NIL); +{N}{O}{T} return(NOT); +{O}{F} return(OF); +{O}{R} return(OR); +{O}{T}{H}{E}{R}{W}{I}{S}{E} return(OTHERWISE); +{P}{A}{C}{K}{E}{D} return(PACKED); +{B}{E}{G}{I}{N} return(PBEGIN); +{F}{I}{L}{E} return(PFILE); +{P}{R}{O}{C}{E}{D}{U}{R}{E} return(PROCEDURE); +{P}{R}{O}{G}{R}{A}{M} return(PROGRAM); +{R}{E}{C}{O}{R}{D} return(RECORD); +{R}{E}{P}{E}{A}{T} return(REPEAT); +{S}{E}{T} return(SET); +{T}{H}{E}{N} return(THEN); +{T}{O} return(TO); +{T}{Y}{P}{E} return(TYPE); +{U}{N}{T}{I}{L} return(UNTIL); +{V}{A}{R} return(VAR); +{W}{H}{I}{L}{E} return(WHILE); +{W}{I}{T}{H} return(WITH); +[a-zA-Z]([a-zA-Z0-9])+ return(IDENTIFIER); + +":=" return(ASSIGNMENT); +'({NQUOTE}|'')+' return(CHARACTER_STRING); +":" return(COLON); +"," return(COMMA); +[0-9]+ return(DIGSEQ); +"." return(DOT); +".." return(DOTDOT); +"=" return(EQUAL); +">=" return(GE); +">" return(GT); +"[" return(LBRAC); +"<=" return(LE); +"(" return(LPAREN); +"<" return(LT); +"-" return(MINUS); +"<>" return(NOTEQUAL); +"+" return(PLUS); +"]" return(RBRAC); +[0-9]+"."[0-9]+ return(REALNUMBER); +")" return(RPAREN); +";" return(SEMICOLON); +"/" return(SLASH); +"*" return(STAR); +"**" return(STARSTAR); +"->" | +"^" return(UPARROW); + +"(*" | +"{" { register int c; + while ((c = input())) + { + if (c == '}') + break; + else if (c == '*') + { + if ((c = input()) == ')') + break; + else + unput (c); + } + else if (c == '\n') + line_no++; + else if (c == 0) + commenteof(); + } + } + +[ \t\f] ; + +\n line_no++; + +. { fprintf (stderr, + "'%c' (0%o): illegal charcter at line %d\n", + yytext[0], yytext[0], line_no); + } + +%% + +commenteof() +{ + fprintf (stderr, "unexpected EOF inside comment at line %d\n", + line_no); + exit (1); +} + +yywrap () +{ + return (1); +} + + diff --git a/doc/pascal/yacc/pascal.y b/doc/pascal/yacc/pascal.y index 591b2a9..8e41193 100644 --- a/doc/pascal/yacc/pascal.y +++ b/doc/pascal/yacc/pascal.y @@ -1,547 +1,547 @@ -%{ -/* - * grammar.y - * - * Pascal grammar in Yacc format, based originally on BNF given - * in "Standard Pascal -- User Reference Manual", by Doug Cooper. - * This in turn is the BNF given by the ANSI and ISO Pascal standards, - * and so, is PUBLIC DOMAIN. The grammar is for ISO Level 0 Pascal. - * The grammar has been massaged somewhat to make it LALR, and added - * the following extensions. - * - * constant expressions - * otherwise statement in a case - * productions to correctly match else's with if's - * beginnings of a separate compilation facility - */ - -%} - -%token AND ARRAY ASSIGNMENT CASE CHARACTER_STRING COLON COMMA CONST DIGSEQ -%token DIV DO DOT DOTDOT DOWNTO ELSE END EQUAL EXTERNAL FOR FORWARD FUNCTION -%token GE GOTO GT IDENTIFIER IF IN LABEL LBRAC LE LPAREN LT MINUS MOD NIL NOT -%token NOTEQUAL OF OR OTHERWISE PACKED PBEGIN PFILE PLUS PROCEDURE PROGRAM RBRAC -%token REALNUMBER RECORD REPEAT RPAREN SEMICOLON SET SLASH STAR STARSTAR THEN -%token TO TYPE UNTIL UPARROW VAR WHILE WITH - -%% -file : program - | module - ; - -program : program_heading semicolon block DOT - ; - -program_heading : PROGRAM identifier - | PROGRAM identifier LPAREN identifier_list RPAREN - ; - -identifier_list : identifier_list comma identifier - | identifier - ; - -block : label_declaration_part - constant_definition_part - type_definition_part - variable_declaration_part - procedure_and_function_declaration_part - statement_part - ; - -module : constant_definition_part - type_definition_part - variable_declaration_part - procedure_and_function_declaration_part - ; - -label_declaration_part : LABEL label_list semicolon - | - ; - -label_list : label_list comma label - | label - ; - -label : DIGSEQ - ; - -constant_definition_part : CONST constant_list - | - ; - -constant_list : constant_list constant_definition - | constant_definition - ; - -constant_definition : identifier EQUAL cexpression semicolon - ; - -/*constant : cexpression ; /* good stuff! */ - -cexpression : csimple_expression - | csimple_expression relop csimple_expression - ; - -csimple_expression : cterm - | csimple_expression addop cterm - ; - -cterm : cfactor - | cterm mulop cfactor - ; - -cfactor : sign cfactor - | cexponentiation - ; - -cexponentiation : cprimary - | cprimary STARSTAR cexponentiation - ; - -cprimary : identifier - | LPAREN cexpression RPAREN - | unsigned_constant - | NOT cprimary - ; - -constant : non_string - | sign non_string - | CHARACTER_STRING - ; - -sign : PLUS - | MINUS - ; - -non_string : DIGSEQ - | identifier - | REALNUMBER - ; - -type_definition_part : TYPE type_definition_list - | - ; - -type_definition_list : type_definition_list type_definition - | type_definition - ; - -type_definition : identifier EQUAL type_denoter semicolon - ; - -type_denoter : identifier - | new_type - ; - -new_type : new_ordinal_type - | new_structured_type - | new_pointer_type - ; - -new_ordinal_type : enumerated_type - | subrange_type - ; - -enumerated_type : LPAREN identifier_list RPAREN - ; - -subrange_type : constant DOTDOT constant - ; - -new_structured_type : structured_type - | PACKED structured_type - ; - -structured_type : array_type - | record_type - | set_type - | file_type - ; - -array_type : ARRAY LBRAC index_list RBRAC OF component_type - ; - -index_list : index_list comma index_type - | index_type - ; - -index_type : ordinal_type ; - -ordinal_type : new_ordinal_type - | identifier - ; - -component_type : type_denoter ; - -record_type : RECORD record_section_list END - | RECORD record_section_list semicolon variant_part END - | RECORD variant_part END - ; - -record_section_list : record_section_list semicolon record_section - | record_section - ; - -record_section : identifier_list COLON type_denoter - ; - -variant_part : CASE variant_selector OF variant_list semicolon - | CASE variant_selector OF variant_list - | - ; - -variant_selector : tag_field COLON tag_type - | tag_type - ; - -variant_list : variant_list semicolon variant - | variant - ; - -variant : case_constant_list COLON LPAREN record_section_list RPAREN - | case_constant_list COLON LPAREN record_section_list semicolon - variant_part RPAREN - | case_constant_list COLON LPAREN variant_part RPAREN - ; - -case_constant_list : case_constant_list comma case_constant - | case_constant - ; - -case_constant : constant - | constant DOTDOT constant - ; - -tag_field : identifier ; - -tag_type : identifier ; - -set_type : SET OF base_type - ; - -base_type : ordinal_type ; - -file_type : PFILE OF component_type - ; - -new_pointer_type : UPARROW domain_type - ; - -domain_type : identifier ; - -variable_declaration_part : VAR variable_declaration_list semicolon - | - ; - -variable_declaration_list : - variable_declaration_list semicolon variable_declaration - | variable_declaration - ; - -variable_declaration : identifier_list COLON type_denoter - ; - -procedure_and_function_declaration_part : - proc_or_func_declaration_list semicolon - | - ; - -proc_or_func_declaration_list : - proc_or_func_declaration_list semicolon proc_or_func_declaration - | proc_or_func_declaration - ; - -proc_or_func_declaration : procedure_declaration - | function_declaration - ; - -procedure_declaration : procedure_heading semicolon directive - | procedure_heading semicolon procedure_block - ; - -procedure_heading : procedure_identification - | procedure_identification formal_parameter_list - ; - -directive : FORWARD - | EXTERNAL - ; - -formal_parameter_list : LPAREN formal_parameter_section_list RPAREN ; - -formal_parameter_section_list : formal_parameter_section_list semicolon formal_parameter_section - | formal_parameter_section - ; - -formal_parameter_section : value_parameter_specification - | variable_parameter_specification - | procedural_parameter_specification - | functional_parameter_specification - ; - -value_parameter_specification : identifier_list COLON identifier - ; - -variable_parameter_specification : VAR identifier_list COLON identifier - ; - -procedural_parameter_specification : procedure_heading ; - -functional_parameter_specification : function_heading ; - -procedure_identification : PROCEDURE identifier ; - -procedure_block : block ; - -function_declaration : function_heading semicolon directive - | function_identification semicolon function_block - | function_heading semicolon function_block - ; - -function_heading : FUNCTION identifier COLON result_type - | FUNCTION identifier formal_parameter_list COLON result_type - ; - -result_type : identifier ; - -function_identification : FUNCTION identifier ; - -function_block : block ; - -statement_part : compound_statement ; - -compound_statement : PBEGIN statement_sequence END ; - -statement_sequence : statement_sequence semicolon statement - | statement - ; - -statement : open_statement - | closed_statement - ; - -open_statement : label COLON non_labeled_open_statement - | non_labeled_open_statement - ; - -closed_statement : label COLON non_labeled_closed_statement - | non_labeled_closed_statement - ; - -non_labeled_closed_statement : assignment_statement - | procedure_statement - | goto_statement - | compound_statement - | case_statement - | repeat_statement - | closed_with_statement - | closed_if_statement - | closed_while_statement - | closed_for_statement - | - ; - -non_labeled_open_statement : open_with_statement - | open_if_statement - | open_while_statement - | open_for_statement - ; - -repeat_statement : REPEAT statement_sequence UNTIL boolean_expression - ; - -open_while_statement : WHILE boolean_expression DO open_statement - ; - -closed_while_statement : WHILE boolean_expression DO closed_statement - ; - -open_for_statement : FOR control_variable ASSIGNMENT initial_value direction - final_value DO open_statement - ; - -closed_for_statement : FOR control_variable ASSIGNMENT initial_value direction - final_value DO closed_statement - ; - -open_with_statement : WITH record_variable_list DO open_statement - ; - -closed_with_statement : WITH record_variable_list DO closed_statement - ; - -open_if_statement : IF boolean_expression THEN statement - | IF boolean_expression THEN closed_statement ELSE open_statement - ; - -closed_if_statement : IF boolean_expression THEN closed_statement - ELSE closed_statement - ; - -assignment_statement : variable_access ASSIGNMENT expression - ; - -variable_access : identifier - | indexed_variable - | field_designator - | variable_access UPARROW - ; - -indexed_variable : variable_access LBRAC index_expression_list RBRAC - ; - -index_expression_list : index_expression_list comma index_expression - | index_expression - ; - -index_expression : expression ; - -field_designator : variable_access DOT identifier - ; - -procedure_statement : identifier params - | identifier - ; - -params : LPAREN actual_parameter_list RPAREN ; - -actual_parameter_list : actual_parameter_list comma actual_parameter - | actual_parameter - ; - -/* - * this forces you to check all this to be sure that only write and - * writeln use the 2nd and 3rd forms, you really can't do it easily in - * the grammar, especially since write and writeln aren't reserved - */ -actual_parameter : expression - | expression COLON expression - | expression COLON expression COLON expression - ; - -goto_statement : GOTO label - ; - -case_statement : CASE case_index OF case_list_element_list END - | CASE case_index OF case_list_element_list SEMICOLON END - | CASE case_index OF case_list_element_list semicolon - otherwisepart statement END - | CASE case_index OF case_list_element_list semicolon - otherwisepart statement SEMICOLON END - ; - -case_index : expression ; - -case_list_element_list : case_list_element_list semicolon case_list_element - | case_list_element - ; - -case_list_element : case_constant_list COLON statement - ; - -otherwisepart : OTHERWISE - | OTHERWISE COLON - ; - -control_variable : identifier ; - -initial_value : expression ; - -direction : TO - | DOWNTO - ; - -final_value : expression ; - -record_variable_list : record_variable_list comma variable_access - | variable_access - ; - -boolean_expression : expression ; - -expression : simple_expression - | simple_expression relop simple_expression - ; - -simple_expression : term - | simple_expression addop term - ; - -term : factor - | term mulop factor - ; - -factor : sign factor - | exponentiation - ; - -exponentiation : primary - | primary STARSTAR exponentiation - ; - -primary : variable_access - | unsigned_constant - | function_designator - | set_constructor - | LPAREN expression RPAREN - | NOT primary - ; - -unsigned_constant : unsigned_number - | CHARACTER_STRING - | NIL - ; - -unsigned_number : unsigned_integer | unsigned_real ; - -unsigned_integer : DIGSEQ - ; - -unsigned_real : REALNUMBER - ; - -/* functions with no params will be handled by plain identifier */ -function_designator : identifier params - ; - -set_constructor : LBRAC member_designator_list RBRAC - | LBRAC RBRAC - ; - -member_designator_list : member_designator_list comma member_designator - | member_designator - ; - -member_designator : member_designator DOTDOT expression - | expression - ; - -addop: PLUS - | MINUS - | OR - ; - -mulop : STAR - | SLASH - | DIV - | MOD - | AND - ; - -relop : EQUAL - | NOTEQUAL - | LT - | GT - | LE - | GE - | IN - ; - -identifier : IDENTIFIER - ; - -semicolon : SEMICOLON - ; - -comma : COMMA +%{ +/* + * grammar.y + * + * Pascal grammar in Yacc format, based originally on BNF given + * in "Standard Pascal -- User Reference Manual", by Doug Cooper. + * This in turn is the BNF given by the ANSI and ISO Pascal standards, + * and so, is PUBLIC DOMAIN. The grammar is for ISO Level 0 Pascal. + * The grammar has been massaged somewhat to make it LALR, and added + * the following extensions. + * + * constant expressions + * otherwise statement in a case + * productions to correctly match else's with if's + * beginnings of a separate compilation facility + */ + +%} + +%token AND ARRAY ASSIGNMENT CASE CHARACTER_STRING COLON COMMA CONST DIGSEQ +%token DIV DO DOT DOTDOT DOWNTO ELSE END EQUAL EXTERNAL FOR FORWARD FUNCTION +%token GE GOTO GT IDENTIFIER IF IN LABEL LBRAC LE LPAREN LT MINUS MOD NIL NOT +%token NOTEQUAL OF OR OTHERWISE PACKED PBEGIN PFILE PLUS PROCEDURE PROGRAM RBRAC +%token REALNUMBER RECORD REPEAT RPAREN SEMICOLON SET SLASH STAR STARSTAR THEN +%token TO TYPE UNTIL UPARROW VAR WHILE WITH + +%% +file : program + | module + ; + +program : program_heading semicolon block DOT + ; + +program_heading : PROGRAM identifier + | PROGRAM identifier LPAREN identifier_list RPAREN + ; + +identifier_list : identifier_list comma identifier + | identifier + ; + +block : label_declaration_part + constant_definition_part + type_definition_part + variable_declaration_part + procedure_and_function_declaration_part + statement_part + ; + +module : constant_definition_part + type_definition_part + variable_declaration_part + procedure_and_function_declaration_part + ; + +label_declaration_part : LABEL label_list semicolon + | + ; + +label_list : label_list comma label + | label + ; + +label : DIGSEQ + ; + +constant_definition_part : CONST constant_list + | + ; + +constant_list : constant_list constant_definition + | constant_definition + ; + +constant_definition : identifier EQUAL cexpression semicolon + ; + +/*constant : cexpression ; /* good stuff! */ + +cexpression : csimple_expression + | csimple_expression relop csimple_expression + ; + +csimple_expression : cterm + | csimple_expression addop cterm + ; + +cterm : cfactor + | cterm mulop cfactor + ; + +cfactor : sign cfactor + | cexponentiation + ; + +cexponentiation : cprimary + | cprimary STARSTAR cexponentiation + ; + +cprimary : identifier + | LPAREN cexpression RPAREN + | unsigned_constant + | NOT cprimary + ; + +constant : non_string + | sign non_string + | CHARACTER_STRING + ; + +sign : PLUS + | MINUS + ; + +non_string : DIGSEQ + | identifier + | REALNUMBER + ; + +type_definition_part : TYPE type_definition_list + | + ; + +type_definition_list : type_definition_list type_definition + | type_definition + ; + +type_definition : identifier EQUAL type_denoter semicolon + ; + +type_denoter : identifier + | new_type + ; + +new_type : new_ordinal_type + | new_structured_type + | new_pointer_type + ; + +new_ordinal_type : enumerated_type + | subrange_type + ; + +enumerated_type : LPAREN identifier_list RPAREN + ; + +subrange_type : constant DOTDOT constant + ; + +new_structured_type : structured_type + | PACKED structured_type + ; + +structured_type : array_type + | record_type + | set_type + | file_type + ; + +array_type : ARRAY LBRAC index_list RBRAC OF component_type + ; + +index_list : index_list comma index_type + | index_type + ; + +index_type : ordinal_type ; + +ordinal_type : new_ordinal_type + | identifier + ; + +component_type : type_denoter ; + +record_type : RECORD record_section_list END + | RECORD record_section_list semicolon variant_part END + | RECORD variant_part END + ; + +record_section_list : record_section_list semicolon record_section + | record_section + ; + +record_section : identifier_list COLON type_denoter + ; + +variant_part : CASE variant_selector OF variant_list semicolon + | CASE variant_selector OF variant_list + | + ; + +variant_selector : tag_field COLON tag_type + | tag_type + ; + +variant_list : variant_list semicolon variant + | variant + ; + +variant : case_constant_list COLON LPAREN record_section_list RPAREN + | case_constant_list COLON LPAREN record_section_list semicolon + variant_part RPAREN + | case_constant_list COLON LPAREN variant_part RPAREN + ; + +case_constant_list : case_constant_list comma case_constant + | case_constant + ; + +case_constant : constant + | constant DOTDOT constant + ; + +tag_field : identifier ; + +tag_type : identifier ; + +set_type : SET OF base_type + ; + +base_type : ordinal_type ; + +file_type : PFILE OF component_type + ; + +new_pointer_type : UPARROW domain_type + ; + +domain_type : identifier ; + +variable_declaration_part : VAR variable_declaration_list semicolon + | + ; + +variable_declaration_list : + variable_declaration_list semicolon variable_declaration + | variable_declaration + ; + +variable_declaration : identifier_list COLON type_denoter + ; + +procedure_and_function_declaration_part : + proc_or_func_declaration_list semicolon + | + ; + +proc_or_func_declaration_list : + proc_or_func_declaration_list semicolon proc_or_func_declaration + | proc_or_func_declaration + ; + +proc_or_func_declaration : procedure_declaration + | function_declaration + ; + +procedure_declaration : procedure_heading semicolon directive + | procedure_heading semicolon procedure_block + ; + +procedure_heading : procedure_identification + | procedure_identification formal_parameter_list + ; + +directive : FORWARD + | EXTERNAL + ; + +formal_parameter_list : LPAREN formal_parameter_section_list RPAREN ; + +formal_parameter_section_list : formal_parameter_section_list semicolon formal_parameter_section + | formal_parameter_section + ; + +formal_parameter_section : value_parameter_specification + | variable_parameter_specification + | procedural_parameter_specification + | functional_parameter_specification + ; + +value_parameter_specification : identifier_list COLON identifier + ; + +variable_parameter_specification : VAR identifier_list COLON identifier + ; + +procedural_parameter_specification : procedure_heading ; + +functional_parameter_specification : function_heading ; + +procedure_identification : PROCEDURE identifier ; + +procedure_block : block ; + +function_declaration : function_heading semicolon directive + | function_identification semicolon function_block + | function_heading semicolon function_block + ; + +function_heading : FUNCTION identifier COLON result_type + | FUNCTION identifier formal_parameter_list COLON result_type + ; + +result_type : identifier ; + +function_identification : FUNCTION identifier ; + +function_block : block ; + +statement_part : compound_statement ; + +compound_statement : PBEGIN statement_sequence END ; + +statement_sequence : statement_sequence semicolon statement + | statement + ; + +statement : open_statement + | closed_statement + ; + +open_statement : label COLON non_labeled_open_statement + | non_labeled_open_statement + ; + +closed_statement : label COLON non_labeled_closed_statement + | non_labeled_closed_statement + ; + +non_labeled_closed_statement : assignment_statement + | procedure_statement + | goto_statement + | compound_statement + | case_statement + | repeat_statement + | closed_with_statement + | closed_if_statement + | closed_while_statement + | closed_for_statement + | + ; + +non_labeled_open_statement : open_with_statement + | open_if_statement + | open_while_statement + | open_for_statement + ; + +repeat_statement : REPEAT statement_sequence UNTIL boolean_expression + ; + +open_while_statement : WHILE boolean_expression DO open_statement + ; + +closed_while_statement : WHILE boolean_expression DO closed_statement + ; + +open_for_statement : FOR control_variable ASSIGNMENT initial_value direction + final_value DO open_statement + ; + +closed_for_statement : FOR control_variable ASSIGNMENT initial_value direction + final_value DO closed_statement + ; + +open_with_statement : WITH record_variable_list DO open_statement + ; + +closed_with_statement : WITH record_variable_list DO closed_statement + ; + +open_if_statement : IF boolean_expression THEN statement + | IF boolean_expression THEN closed_statement ELSE open_statement + ; + +closed_if_statement : IF boolean_expression THEN closed_statement + ELSE closed_statement + ; + +assignment_statement : variable_access ASSIGNMENT expression + ; + +variable_access : identifier + | indexed_variable + | field_designator + | variable_access UPARROW + ; + +indexed_variable : variable_access LBRAC index_expression_list RBRAC + ; + +index_expression_list : index_expression_list comma index_expression + | index_expression + ; + +index_expression : expression ; + +field_designator : variable_access DOT identifier + ; + +procedure_statement : identifier params + | identifier + ; + +params : LPAREN actual_parameter_list RPAREN ; + +actual_parameter_list : actual_parameter_list comma actual_parameter + | actual_parameter + ; + +/* + * this forces you to check all this to be sure that only write and + * writeln use the 2nd and 3rd forms, you really can't do it easily in + * the grammar, especially since write and writeln aren't reserved + */ +actual_parameter : expression + | expression COLON expression + | expression COLON expression COLON expression + ; + +goto_statement : GOTO label + ; + +case_statement : CASE case_index OF case_list_element_list END + | CASE case_index OF case_list_element_list SEMICOLON END + | CASE case_index OF case_list_element_list semicolon + otherwisepart statement END + | CASE case_index OF case_list_element_list semicolon + otherwisepart statement SEMICOLON END + ; + +case_index : expression ; + +case_list_element_list : case_list_element_list semicolon case_list_element + | case_list_element + ; + +case_list_element : case_constant_list COLON statement + ; + +otherwisepart : OTHERWISE + | OTHERWISE COLON + ; + +control_variable : identifier ; + +initial_value : expression ; + +direction : TO + | DOWNTO + ; + +final_value : expression ; + +record_variable_list : record_variable_list comma variable_access + | variable_access + ; + +boolean_expression : expression ; + +expression : simple_expression + | simple_expression relop simple_expression + ; + +simple_expression : term + | simple_expression addop term + ; + +term : factor + | term mulop factor + ; + +factor : sign factor + | exponentiation + ; + +exponentiation : primary + | primary STARSTAR exponentiation + ; + +primary : variable_access + | unsigned_constant + | function_designator + | set_constructor + | LPAREN expression RPAREN + | NOT primary + ; + +unsigned_constant : unsigned_number + | CHARACTER_STRING + | NIL + ; + +unsigned_number : unsigned_integer | unsigned_real ; + +unsigned_integer : DIGSEQ + ; + +unsigned_real : REALNUMBER + ; + +/* functions with no params will be handled by plain identifier */ +function_designator : identifier params + ; + +set_constructor : LBRAC member_designator_list RBRAC + | LBRAC RBRAC + ; + +member_designator_list : member_designator_list comma member_designator + | member_designator + ; + +member_designator : member_designator DOTDOT expression + | expression + ; + +addop: PLUS + | MINUS + | OR + ; + +mulop : STAR + | SLASH + | DIV + | MOD + | AND + ; + +relop : EQUAL + | NOTEQUAL + | LT + | GT + | LE + | GE + | IN + ; + +identifier : IDENTIFIER + ; + +semicolon : SEMICOLON + ; + +comma : COMMA ; \ No newline at end of file diff --git a/doc/src/conditional-compilation.md b/doc/src/conditional-compilation.md index bc77904..fbffafa 100644 --- a/doc/src/conditional-compilation.md +++ b/doc/src/conditional-compilation.md @@ -1,9 +1,9 @@ -LALRPOP support conditional compilation of public non-terminal declarations via `#[cfg(feature = "FEATUERE")]` attributes. -If run in a build script LALRPOP will automatically pickup the features from `cargo` and use those. Alternatively an explicit set of features can be set using the `Configuration` type. - -```rust -#[cfg(feature = "FEATURE")] -pub MyRule : () = { - ... -}; -``` +LALRPOP support conditional compilation of public non-terminal declarations via `#[cfg(feature = "FEATUERE")]` attributes. +If run in a build script LALRPOP will automatically pickup the features from `cargo` and use those. Alternatively an explicit set of features can be set using the `Configuration` type. + +```rust +#[cfg(feature = "FEATURE")] +pub MyRule : () = { + ... +}; +``` diff --git a/lalrpop-test/Cargo.toml b/lalrpop-test/Cargo.toml index 8f9c55d..6184644 100644 --- a/lalrpop-test/Cargo.toml +++ b/lalrpop-test/Cargo.toml @@ -8,7 +8,7 @@ workspace = ".." [dependencies] diff = "0.1.9" -regex = "1" +regex = "1" [dependencies.lalrpop-util] path = "../lalrpop-util" diff --git a/lalrpop/src/build/mod.rs b/lalrpop/src/build/mod.rs index 9230030..83cac3e 100644 --- a/lalrpop/src/build/mod.rs +++ b/lalrpop/src/build/mod.rs @@ -1,562 +1,562 @@ -//! Utilies for running in a build script. - -use atty; -use file_text::FileText; -use grammar::parse_tree as pt; -use grammar::repr as r; -use lalrpop_util::ParseError; -use lexer::intern_token; -use lr1; -use message::builder::InlineBuilder; -use message::{Content, Message}; -use normalize; -use parser; -use rust::RustWrite; -use session::{ColorConfig, Session}; -use sha2::{Digest, Sha256}; -use term; -use tls::Tls; -use tok; - -use std::fs; -use std::io::{self, BufRead, Write}; -use std::path::{Path, PathBuf}; -use std::process::exit; -use std::rc::Rc; - -mod action; -mod fake_term; - -use self::fake_term::FakeTerminal; - -const LALRPOP_VERSION_HEADER: &'static str = concat!( - "// auto-generated: \"", - env!("CARGO_PKG_NAME"), - " ", - env!("CARGO_PKG_VERSION"), - "\"" -); - -fn hash_file(file: &Path) -> io::Result { - let mut file = try!(fs::File::open(&file)); - let mut sha_256 = Sha256::new(); - try!(io::copy(&mut file, &mut sha_256)); - - let mut hash_str = "// sha256: ".to_owned(); - for byte in sha_256.result() { - hash_str.push_str(&format!("{:x}", byte)); - } - Ok(hash_str) -} - -pub fn process_dir>(session: Rc, root_dir: P) -> io::Result<()> { - let lalrpop_files = try!(lalrpop_files(root_dir)); - for lalrpop_file in lalrpop_files { - try!(process_file(session.clone(), lalrpop_file)); - } - Ok(()) -} - -pub fn process_file>(session: Rc, lalrpop_file: P) -> io::Result<()> { - let lalrpop_file = lalrpop_file.as_ref(); - let rs_file = try!(resolve_rs_file(&session, lalrpop_file)); - let report_file = try!(resolve_report_file(&session, lalrpop_file)); - process_file_into(session, lalrpop_file, &rs_file, &report_file) -} - -fn resolve_rs_file(session: &Session, lalrpop_file: &Path) -> io::Result { - gen_resolve_file(session, lalrpop_file, "rs") -} - -fn resolve_report_file(session: &Session, lalrpop_file: &Path) -> io::Result { - gen_resolve_file(session, lalrpop_file, "report") -} - -fn gen_resolve_file(session: &Session, lalrpop_file: &Path, ext: &str) -> io::Result { - let in_dir = if let Some(ref d) = session.in_dir { - d.as_path() - } else { - Path::new(".") - }; - let out_dir = if let Some(ref d) = session.out_dir { - d.as_path() - } else { - in_dir - }; - - // If the lalrpop file is not in in_dir, the result is that the - // .rs file is created in the same directory as the lalrpop file - // for compatibility reasons - Ok(out_dir - .join(lalrpop_file.strip_prefix(&in_dir).unwrap_or(lalrpop_file)) - .with_extension(ext)) -} - -fn process_file_into( - session: Rc, - lalrpop_file: &Path, - rs_file: &Path, - report_file: &Path, -) -> io::Result<()> { - if session.force_build || try!(needs_rebuild(&lalrpop_file, &rs_file)) { - log!( - session, - Informative, - "processing file `{}`", - lalrpop_file.to_string_lossy() - ); - if let Some(parent) = rs_file.parent() { - try!(fs::create_dir_all(parent)); - } - try!(remove_old_file(&rs_file)); - - // Load the LALRPOP source text for this file: - let file_text = Rc::new(try!(FileText::from_path(lalrpop_file.to_path_buf()))); - - // Store the session and file-text in TLS -- this is not - // intended to be used in this high-level code, but it gives - // easy access to this information pervasively in the - // low-level LR(1) and grammar normalization code. This is - // particularly useful for error-reporting. - let _tls = Tls::install(session.clone(), file_text.clone()); - - // Do the LALRPOP processing itself and write the resulting - // buffer into a file. We use a buffer so that if LR(1) - // generation fails at some point, we don't leave a partial - // file behind. - { - let grammar = try!(parse_and_normalize_grammar(&session, &file_text)); - let buffer = try!(emit_recursive_ascent(&session, &grammar, &report_file)); - let mut output_file = try!(fs::File::create(&rs_file)); - try!(writeln!(output_file, "{}", LALRPOP_VERSION_HEADER)); - try!(writeln!(output_file, "{}", try!(hash_file(&lalrpop_file)))); - try!(output_file.write_all(&buffer)); - } - } - Ok(()) -} - -fn remove_old_file(rs_file: &Path) -> io::Result<()> { - match fs::remove_file(rs_file) { - Ok(()) => Ok(()), - Err(e) => { - // Unix reports NotFound, Windows PermissionDenied! - match e.kind() { - io::ErrorKind::NotFound | io::ErrorKind::PermissionDenied => Ok(()), - _ => Err(e), - } - } - } -} - -fn needs_rebuild(lalrpop_file: &Path, rs_file: &Path) -> io::Result { - match fs::File::open(&rs_file) { - Ok(rs_file) => { - let mut version_str = String::new(); - let mut hash_str = String::new(); - - let mut f = io::BufReader::new(rs_file); - - try!(f.read_line(&mut version_str)); - try!(f.read_line(&mut hash_str)); - - Ok(hash_str.trim() != try!(hash_file(&lalrpop_file)) - || version_str.trim() != LALRPOP_VERSION_HEADER) - } - Err(e) => match e.kind() { - io::ErrorKind::NotFound => Ok(true), - _ => Err(e), - }, - } -} - -fn lalrpop_files>(root_dir: P) -> io::Result> { - let mut result = vec![]; - for entry in try!(fs::read_dir(root_dir)) { - let entry = try!(entry); - let file_type = try!(entry.file_type()); - - let path = entry.path(); - - if file_type.is_dir() { - result.extend(try!(lalrpop_files(&path))); - } - - if file_type.is_file() - && path.extension().is_some() - && path.extension().unwrap() == "lalrpop" - { - result.push(path); - } - } - Ok(result) -} - -fn parse_and_normalize_grammar(session: &Session, file_text: &FileText) -> io::Result { - let grammar = match parser::parse_grammar(file_text.text()) { - Ok(grammar) => grammar, - - Err(ParseError::InvalidToken { location }) => { - let ch = file_text.text()[location..].chars().next().unwrap(); - report_error( - &file_text, - pt::Span(location, location), - &format!("invalid character `{}`", ch), - ); - } - - Err(ParseError::UnrecognizedToken { - token: None, - expected: _, - }) => { - let len = file_text.text().len(); - report_error( - &file_text, - pt::Span(len, len), - &format!("unexpected end of file"), - ); - } - - Err(ParseError::UnrecognizedToken { - token: Some((lo, _, hi)), - expected, - }) => { - let _ = expected; // didn't implement this yet :) - let text = &file_text.text()[lo..hi]; - report_error( - &file_text, - pt::Span(lo, hi), - &format!("unexpected token: `{}`", text), - ); - } - - Err(ParseError::ExtraToken { token: (lo, _, hi) }) => { - let text = &file_text.text()[lo..hi]; - report_error( - &file_text, - pt::Span(lo, hi), - &format!("extra token at end of input: `{}`", text), - ); - } - - Err(ParseError::User { error }) => { - let string = match error.code { - tok::ErrorCode::UnrecognizedToken => "unrecognized token", - tok::ErrorCode::UnterminatedEscape => "unterminated escape; missing '`'?", - tok::ErrorCode::UnrecognizedEscape => { - "unrecognized escape; only \\n, \\r, \\t, \\\" and \\\\ are recognized" - } - tok::ErrorCode::UnterminatedStringLiteral => { - "unterminated string literal; missing `\"`?" - } - tok::ErrorCode::UnterminatedCharacterLiteral => { - "unterminated character literal; missing `'`?" - } - tok::ErrorCode::UnterminatedAttribute => "unterminated #! attribute; missing `]`?", - tok::ErrorCode::ExpectedStringLiteral => "expected string literal; missing `\"`?", - tok::ErrorCode::UnterminatedCode => { - "unterminated code block; perhaps a missing `;`, `)`, `]` or `}`?" - } - }; - - report_error( - &file_text, - pt::Span(error.location, error.location + 1), - string, - ) - } - }; - - match normalize::normalize(session, grammar) { - Ok(grammar) => Ok(grammar), - Err(error) => report_error(&file_text, error.span, &error.message), - } -} - -fn report_error(file_text: &FileText, span: pt::Span, message: &str) -> ! { - println!("{} error: {}", file_text.span_str(span), message); - - let out = io::stderr(); - let mut out = out.lock(); - file_text.highlight(span, &mut out).unwrap(); - - exit(1); -} - -fn report_messages(messages: Vec) -> term::Result<()> { - let builder = InlineBuilder::new().begin_paragraphs(); - let builder = messages - .into_iter() - .fold(builder, |b, m| b.push(Box::new(m))); - let content = builder.end().end(); - report_content(&*content) -} - -fn report_content(content: &Content) -> term::Result<()> { - // FIXME -- can we query the size of the terminal somehow? - let canvas = content.emit_to_canvas(80); - - let try_colors = match Tls::session().color_config { - ColorConfig::Yes => true, - ColorConfig::No => false, - ColorConfig::IfTty => atty::is(atty::Stream::Stdout), - }; - - if try_colors { - if let Some(mut stdout) = term::stdout() { - return canvas.write_to(&mut *stdout); - } - } - - let stdout = io::stdout(); - let mut stdout = FakeTerminal::new(stdout.lock()); - canvas.write_to(&mut stdout) -} - -fn emit_module_attributes( - grammar: &r::Grammar, - rust: &mut RustWrite, -) -> io::Result<()> { - rust.write_module_attributes(grammar) -} - -fn emit_uses(grammar: &r::Grammar, rust: &mut RustWrite) -> io::Result<()> { - rust.write_uses("", grammar) -} - -fn emit_recursive_ascent( - session: &Session, - grammar: &r::Grammar, - report_file: &Path, -) -> io::Result> { - let mut rust = RustWrite::new(vec![]); - - // We generate a module structure like this: - // - // ``` - // mod { - // // For each public symbol: - // pub fn parse_XYZ(); - // mod __XYZ { ... } - // - // // For each bit of action code: - // - // } - // ``` - // - // Note that the action code goes in the outer module. This is - // intentional because it means that the foo.lalrpop file serves - // as a module in the rust hierarchy, so if the action code - // includes things like `super::` it will resolve in the natural - // way. - - try!(emit_module_attributes(grammar, &mut rust)); - try!(emit_uses(grammar, &mut rust)); - - if grammar.start_nonterminals.is_empty() { - println!("Error: no public symbols declared in grammar"); - exit(1); - } - - for (user_nt, start_nt) in &grammar.start_nonterminals { - // We generate these, so there should always be exactly 1 - // production. Otherwise the LR(1) algorithm doesn't know - // where to stop! - assert_eq!(grammar.productions_for(start_nt).len(), 1); - - log!( - session, - Verbose, - "Building states for public nonterminal `{}`", - user_nt - ); - - let _lr1_tls = lr1::Lr1Tls::install(grammar.terminals.clone()); - - let lr1result = lr1::build_states(&grammar, start_nt.clone()); - if session.emit_report { - let mut output_report_file = try!(fs::File::create(&report_file)); - try!(lr1::generate_report(&mut output_report_file, &lr1result)); - } - - let states = match lr1result { - Ok(states) => states, - Err(error) => { - let messages = lr1::report_error(&grammar, &error); - let _ = report_messages(messages); - exit(1) // FIXME -- propagate up instead of calling `exit` - } - }; - - match grammar.algorithm.codegen { - r::LrCodeGeneration::RecursiveAscent => try!(lr1::codegen::ascent::compile( - &grammar, - user_nt.clone(), - start_nt.clone(), - &states, - "super", - &mut rust, - )), - r::LrCodeGeneration::TableDriven => try!(lr1::codegen::parse_table::compile( - &grammar, - user_nt.clone(), - start_nt.clone(), - &states, - "super", - &mut rust, - )), - - r::LrCodeGeneration::TestAll => try!(lr1::codegen::test_all::compile( - &grammar, - user_nt.clone(), - start_nt.clone(), - &states, - &mut rust, - )), - } - - rust!( - rust, - "{}use self::{}parse{}::{}Parser;", - grammar.nonterminals[&user_nt].visibility, - grammar.prefix, - start_nt, - user_nt - ); - } - - if let Some(ref intern_token) = grammar.intern_token { - try!(intern_token::compile(&grammar, intern_token, &mut rust)); - rust!(rust, "pub use self::{}intern_token::Token;", grammar.prefix); - } - - try!(action::emit_action_code(grammar, &mut rust)); - - try!(emit_to_triple_trait(grammar, &mut rust)); - - Ok(rust.into_inner()) -} - -fn emit_to_triple_trait(grammar: &r::Grammar, rust: &mut RustWrite) -> io::Result<()> { - #![allow(non_snake_case)] - - let L = grammar.types.terminal_loc_type(); - let T = grammar.types.terminal_token_type(); - let E = grammar.types.error_type(); - - let parse_error = format!( - "{p}lalrpop_util::ParseError<{L}, {T}, {E}>", - p = grammar.prefix, - L = L, - T = T, - E = E, - ); - - let mut user_type_parameters = String::new(); - for type_parameter in &grammar.type_parameters { - user_type_parameters.push_str(&format!("{}, ", type_parameter)); - } - - rust!(rust, ""); - rust!( - rust, - "pub trait {}ToTriple<{}> {{", - grammar.prefix, - user_type_parameters, - ); - rust!( - rust, - "fn to_triple(value: Self) -> Result<({L},{T},{L}), {parse_error}>;", - L = L, - T = T, - parse_error = parse_error, - ); - rust!(rust, "}}"); - - rust!(rust, ""); - if grammar.types.opt_terminal_loc_type().is_some() { - rust!( - rust, - "impl<{utp}> {p}ToTriple<{utp}> for ({L}, {T}, {L}) {{", - p = grammar.prefix, - utp = user_type_parameters, - L = L, - T = T, - ); - rust!( - rust, - "fn to_triple(value: Self) -> Result<({L},{T},{L}), {parse_error}> {{", - L = L, - T = T, - parse_error = parse_error, - ); - rust!(rust, "Ok(value)"); - rust!(rust, "}}"); - rust!(rust, "}}"); - - rust!( - rust, - "impl<{utp}> {p}ToTriple<{utp}> for Result<({L}, {T}, {L}), {E}> {{", - utp = user_type_parameters, - p = grammar.prefix, - L = L, - T = T, - E = E, - ); - rust!( - rust, - "fn to_triple(value: Self) -> Result<({L},{T},{L}), {parse_error}> {{", - L = L, - T = T, - parse_error = parse_error, - ); - rust!(rust, "match value {{"); - rust!(rust, "Ok(v) => Ok(v),"); - rust!(rust, "Err(error) => Err({p}lalrpop_util::ParseError::User {{ error }}),", - p = grammar.prefix); - rust!(rust, "}}"); // match - rust!(rust, "}}"); - rust!(rust, "}}"); - } else { - rust!( - rust, - "impl<{utp}> {p}ToTriple<{utp}> for {T} {{", - utp = user_type_parameters, - p = grammar.prefix, - T = T, - ); - rust!( - rust, - "fn to_triple(value: Self) -> Result<((),{T},()), {parse_error}> {{", - T = T, - parse_error = parse_error, - ); - rust!(rust, "Ok(((), value, ()))"); - rust!(rust, "}}"); - rust!(rust, "}}"); - - rust!( - rust, - "impl<{utp}> {p}ToTriple<{utp}> for Result<({T}),{E}> {{", - utp = user_type_parameters, - p = grammar.prefix, - T = T, - E = E, - ); - rust!( - rust, - "fn to_triple(value: Self) -> Result<((),{T},()), {parse_error}> {{", - T = T, - parse_error = parse_error, - ); - rust!(rust, "match value {{"); - rust!(rust, "Ok(v) => Ok(((), v, ())),"); - rust!(rust, "Err(error) => Err({p}lalrpop_util::ParseError::User {{ error }}),", - p = grammar.prefix); - rust!(rust, "}}"); // match - rust!(rust, "}}"); // fn - rust!(rust, "}}"); // impl - } - - Ok(()) -} +//! Utilies for running in a build script. + +use atty; +use file_text::FileText; +use grammar::parse_tree as pt; +use grammar::repr as r; +use lalrpop_util::ParseError; +use lexer::intern_token; +use lr1; +use message::builder::InlineBuilder; +use message::{Content, Message}; +use normalize; +use parser; +use rust::RustWrite; +use session::{ColorConfig, Session}; +use sha2::{Digest, Sha256}; +use term; +use tls::Tls; +use tok; + +use std::fs; +use std::io::{self, BufRead, Write}; +use std::path::{Path, PathBuf}; +use std::process::exit; +use std::rc::Rc; + +mod action; +mod fake_term; + +use self::fake_term::FakeTerminal; + +const LALRPOP_VERSION_HEADER: &'static str = concat!( + "// auto-generated: \"", + env!("CARGO_PKG_NAME"), + " ", + env!("CARGO_PKG_VERSION"), + "\"" +); + +fn hash_file(file: &Path) -> io::Result { + let mut file = try!(fs::File::open(&file)); + let mut sha_256 = Sha256::new(); + try!(io::copy(&mut file, &mut sha_256)); + + let mut hash_str = "// sha256: ".to_owned(); + for byte in sha_256.result() { + hash_str.push_str(&format!("{:x}", byte)); + } + Ok(hash_str) +} + +pub fn process_dir>(session: Rc, root_dir: P) -> io::Result<()> { + let lalrpop_files = try!(lalrpop_files(root_dir)); + for lalrpop_file in lalrpop_files { + try!(process_file(session.clone(), lalrpop_file)); + } + Ok(()) +} + +pub fn process_file>(session: Rc, lalrpop_file: P) -> io::Result<()> { + let lalrpop_file = lalrpop_file.as_ref(); + let rs_file = try!(resolve_rs_file(&session, lalrpop_file)); + let report_file = try!(resolve_report_file(&session, lalrpop_file)); + process_file_into(session, lalrpop_file, &rs_file, &report_file) +} + +fn resolve_rs_file(session: &Session, lalrpop_file: &Path) -> io::Result { + gen_resolve_file(session, lalrpop_file, "rs") +} + +fn resolve_report_file(session: &Session, lalrpop_file: &Path) -> io::Result { + gen_resolve_file(session, lalrpop_file, "report") +} + +fn gen_resolve_file(session: &Session, lalrpop_file: &Path, ext: &str) -> io::Result { + let in_dir = if let Some(ref d) = session.in_dir { + d.as_path() + } else { + Path::new(".") + }; + let out_dir = if let Some(ref d) = session.out_dir { + d.as_path() + } else { + in_dir + }; + + // If the lalrpop file is not in in_dir, the result is that the + // .rs file is created in the same directory as the lalrpop file + // for compatibility reasons + Ok(out_dir + .join(lalrpop_file.strip_prefix(&in_dir).unwrap_or(lalrpop_file)) + .with_extension(ext)) +} + +fn process_file_into( + session: Rc, + lalrpop_file: &Path, + rs_file: &Path, + report_file: &Path, +) -> io::Result<()> { + if session.force_build || try!(needs_rebuild(&lalrpop_file, &rs_file)) { + log!( + session, + Informative, + "processing file `{}`", + lalrpop_file.to_string_lossy() + ); + if let Some(parent) = rs_file.parent() { + try!(fs::create_dir_all(parent)); + } + try!(remove_old_file(&rs_file)); + + // Load the LALRPOP source text for this file: + let file_text = Rc::new(try!(FileText::from_path(lalrpop_file.to_path_buf()))); + + // Store the session and file-text in TLS -- this is not + // intended to be used in this high-level code, but it gives + // easy access to this information pervasively in the + // low-level LR(1) and grammar normalization code. This is + // particularly useful for error-reporting. + let _tls = Tls::install(session.clone(), file_text.clone()); + + // Do the LALRPOP processing itself and write the resulting + // buffer into a file. We use a buffer so that if LR(1) + // generation fails at some point, we don't leave a partial + // file behind. + { + let grammar = try!(parse_and_normalize_grammar(&session, &file_text)); + let buffer = try!(emit_recursive_ascent(&session, &grammar, &report_file)); + let mut output_file = try!(fs::File::create(&rs_file)); + try!(writeln!(output_file, "{}", LALRPOP_VERSION_HEADER)); + try!(writeln!(output_file, "{}", try!(hash_file(&lalrpop_file)))); + try!(output_file.write_all(&buffer)); + } + } + Ok(()) +} + +fn remove_old_file(rs_file: &Path) -> io::Result<()> { + match fs::remove_file(rs_file) { + Ok(()) => Ok(()), + Err(e) => { + // Unix reports NotFound, Windows PermissionDenied! + match e.kind() { + io::ErrorKind::NotFound | io::ErrorKind::PermissionDenied => Ok(()), + _ => Err(e), + } + } + } +} + +fn needs_rebuild(lalrpop_file: &Path, rs_file: &Path) -> io::Result { + match fs::File::open(&rs_file) { + Ok(rs_file) => { + let mut version_str = String::new(); + let mut hash_str = String::new(); + + let mut f = io::BufReader::new(rs_file); + + try!(f.read_line(&mut version_str)); + try!(f.read_line(&mut hash_str)); + + Ok(hash_str.trim() != try!(hash_file(&lalrpop_file)) + || version_str.trim() != LALRPOP_VERSION_HEADER) + } + Err(e) => match e.kind() { + io::ErrorKind::NotFound => Ok(true), + _ => Err(e), + }, + } +} + +fn lalrpop_files>(root_dir: P) -> io::Result> { + let mut result = vec![]; + for entry in try!(fs::read_dir(root_dir)) { + let entry = try!(entry); + let file_type = try!(entry.file_type()); + + let path = entry.path(); + + if file_type.is_dir() { + result.extend(try!(lalrpop_files(&path))); + } + + if file_type.is_file() + && path.extension().is_some() + && path.extension().unwrap() == "lalrpop" + { + result.push(path); + } + } + Ok(result) +} + +fn parse_and_normalize_grammar(session: &Session, file_text: &FileText) -> io::Result { + let grammar = match parser::parse_grammar(file_text.text()) { + Ok(grammar) => grammar, + + Err(ParseError::InvalidToken { location }) => { + let ch = file_text.text()[location..].chars().next().unwrap(); + report_error( + &file_text, + pt::Span(location, location), + &format!("invalid character `{}`", ch), + ); + } + + Err(ParseError::UnrecognizedToken { + token: None, + expected: _, + }) => { + let len = file_text.text().len(); + report_error( + &file_text, + pt::Span(len, len), + &format!("unexpected end of file"), + ); + } + + Err(ParseError::UnrecognizedToken { + token: Some((lo, _, hi)), + expected, + }) => { + let _ = expected; // didn't implement this yet :) + let text = &file_text.text()[lo..hi]; + report_error( + &file_text, + pt::Span(lo, hi), + &format!("unexpected token: `{}`", text), + ); + } + + Err(ParseError::ExtraToken { token: (lo, _, hi) }) => { + let text = &file_text.text()[lo..hi]; + report_error( + &file_text, + pt::Span(lo, hi), + &format!("extra token at end of input: `{}`", text), + ); + } + + Err(ParseError::User { error }) => { + let string = match error.code { + tok::ErrorCode::UnrecognizedToken => "unrecognized token", + tok::ErrorCode::UnterminatedEscape => "unterminated escape; missing '`'?", + tok::ErrorCode::UnrecognizedEscape => { + "unrecognized escape; only \\n, \\r, \\t, \\\" and \\\\ are recognized" + } + tok::ErrorCode::UnterminatedStringLiteral => { + "unterminated string literal; missing `\"`?" + } + tok::ErrorCode::UnterminatedCharacterLiteral => { + "unterminated character literal; missing `'`?" + } + tok::ErrorCode::UnterminatedAttribute => "unterminated #! attribute; missing `]`?", + tok::ErrorCode::ExpectedStringLiteral => "expected string literal; missing `\"`?", + tok::ErrorCode::UnterminatedCode => { + "unterminated code block; perhaps a missing `;`, `)`, `]` or `}`?" + } + }; + + report_error( + &file_text, + pt::Span(error.location, error.location + 1), + string, + ) + } + }; + + match normalize::normalize(session, grammar) { + Ok(grammar) => Ok(grammar), + Err(error) => report_error(&file_text, error.span, &error.message), + } +} + +fn report_error(file_text: &FileText, span: pt::Span, message: &str) -> ! { + println!("{} error: {}", file_text.span_str(span), message); + + let out = io::stderr(); + let mut out = out.lock(); + file_text.highlight(span, &mut out).unwrap(); + + exit(1); +} + +fn report_messages(messages: Vec) -> term::Result<()> { + let builder = InlineBuilder::new().begin_paragraphs(); + let builder = messages + .into_iter() + .fold(builder, |b, m| b.push(Box::new(m))); + let content = builder.end().end(); + report_content(&*content) +} + +fn report_content(content: &Content) -> term::Result<()> { + // FIXME -- can we query the size of the terminal somehow? + let canvas = content.emit_to_canvas(80); + + let try_colors = match Tls::session().color_config { + ColorConfig::Yes => true, + ColorConfig::No => false, + ColorConfig::IfTty => atty::is(atty::Stream::Stdout), + }; + + if try_colors { + if let Some(mut stdout) = term::stdout() { + return canvas.write_to(&mut *stdout); + } + } + + let stdout = io::stdout(); + let mut stdout = FakeTerminal::new(stdout.lock()); + canvas.write_to(&mut stdout) +} + +fn emit_module_attributes( + grammar: &r::Grammar, + rust: &mut RustWrite, +) -> io::Result<()> { + rust.write_module_attributes(grammar) +} + +fn emit_uses(grammar: &r::Grammar, rust: &mut RustWrite) -> io::Result<()> { + rust.write_uses("", grammar) +} + +fn emit_recursive_ascent( + session: &Session, + grammar: &r::Grammar, + report_file: &Path, +) -> io::Result> { + let mut rust = RustWrite::new(vec![]); + + // We generate a module structure like this: + // + // ``` + // mod { + // // For each public symbol: + // pub fn parse_XYZ(); + // mod __XYZ { ... } + // + // // For each bit of action code: + // + // } + // ``` + // + // Note that the action code goes in the outer module. This is + // intentional because it means that the foo.lalrpop file serves + // as a module in the rust hierarchy, so if the action code + // includes things like `super::` it will resolve in the natural + // way. + + try!(emit_module_attributes(grammar, &mut rust)); + try!(emit_uses(grammar, &mut rust)); + + if grammar.start_nonterminals.is_empty() { + println!("Error: no public symbols declared in grammar"); + exit(1); + } + + for (user_nt, start_nt) in &grammar.start_nonterminals { + // We generate these, so there should always be exactly 1 + // production. Otherwise the LR(1) algorithm doesn't know + // where to stop! + assert_eq!(grammar.productions_for(start_nt).len(), 1); + + log!( + session, + Verbose, + "Building states for public nonterminal `{}`", + user_nt + ); + + let _lr1_tls = lr1::Lr1Tls::install(grammar.terminals.clone()); + + let lr1result = lr1::build_states(&grammar, start_nt.clone()); + if session.emit_report { + let mut output_report_file = try!(fs::File::create(&report_file)); + try!(lr1::generate_report(&mut output_report_file, &lr1result)); + } + + let states = match lr1result { + Ok(states) => states, + Err(error) => { + let messages = lr1::report_error(&grammar, &error); + let _ = report_messages(messages); + exit(1) // FIXME -- propagate up instead of calling `exit` + } + }; + + match grammar.algorithm.codegen { + r::LrCodeGeneration::RecursiveAscent => try!(lr1::codegen::ascent::compile( + &grammar, + user_nt.clone(), + start_nt.clone(), + &states, + "super", + &mut rust, + )), + r::LrCodeGeneration::TableDriven => try!(lr1::codegen::parse_table::compile( + &grammar, + user_nt.clone(), + start_nt.clone(), + &states, + "super", + &mut rust, + )), + + r::LrCodeGeneration::TestAll => try!(lr1::codegen::test_all::compile( + &grammar, + user_nt.clone(), + start_nt.clone(), + &states, + &mut rust, + )), + } + + rust!( + rust, + "{}use self::{}parse{}::{}Parser;", + grammar.nonterminals[&user_nt].visibility, + grammar.prefix, + start_nt, + user_nt + ); + } + + if let Some(ref intern_token) = grammar.intern_token { + try!(intern_token::compile(&grammar, intern_token, &mut rust)); + rust!(rust, "pub use self::{}intern_token::Token;", grammar.prefix); + } + + try!(action::emit_action_code(grammar, &mut rust)); + + try!(emit_to_triple_trait(grammar, &mut rust)); + + Ok(rust.into_inner()) +} + +fn emit_to_triple_trait(grammar: &r::Grammar, rust: &mut RustWrite) -> io::Result<()> { + #![allow(non_snake_case)] + + let L = grammar.types.terminal_loc_type(); + let T = grammar.types.terminal_token_type(); + let E = grammar.types.error_type(); + + let parse_error = format!( + "{p}lalrpop_util::ParseError<{L}, {T}, {E}>", + p = grammar.prefix, + L = L, + T = T, + E = E, + ); + + let mut user_type_parameters = String::new(); + for type_parameter in &grammar.type_parameters { + user_type_parameters.push_str(&format!("{}, ", type_parameter)); + } + + rust!(rust, ""); + rust!( + rust, + "pub trait {}ToTriple<{}> {{", + grammar.prefix, + user_type_parameters, + ); + rust!( + rust, + "fn to_triple(value: Self) -> Result<({L},{T},{L}), {parse_error}>;", + L = L, + T = T, + parse_error = parse_error, + ); + rust!(rust, "}}"); + + rust!(rust, ""); + if grammar.types.opt_terminal_loc_type().is_some() { + rust!( + rust, + "impl<{utp}> {p}ToTriple<{utp}> for ({L}, {T}, {L}) {{", + p = grammar.prefix, + utp = user_type_parameters, + L = L, + T = T, + ); + rust!( + rust, + "fn to_triple(value: Self) -> Result<({L},{T},{L}), {parse_error}> {{", + L = L, + T = T, + parse_error = parse_error, + ); + rust!(rust, "Ok(value)"); + rust!(rust, "}}"); + rust!(rust, "}}"); + + rust!( + rust, + "impl<{utp}> {p}ToTriple<{utp}> for Result<({L}, {T}, {L}), {E}> {{", + utp = user_type_parameters, + p = grammar.prefix, + L = L, + T = T, + E = E, + ); + rust!( + rust, + "fn to_triple(value: Self) -> Result<({L},{T},{L}), {parse_error}> {{", + L = L, + T = T, + parse_error = parse_error, + ); + rust!(rust, "match value {{"); + rust!(rust, "Ok(v) => Ok(v),"); + rust!(rust, "Err(error) => Err({p}lalrpop_util::ParseError::User {{ error }}),", + p = grammar.prefix); + rust!(rust, "}}"); // match + rust!(rust, "}}"); + rust!(rust, "}}"); + } else { + rust!( + rust, + "impl<{utp}> {p}ToTriple<{utp}> for {T} {{", + utp = user_type_parameters, + p = grammar.prefix, + T = T, + ); + rust!( + rust, + "fn to_triple(value: Self) -> Result<((),{T},()), {parse_error}> {{", + T = T, + parse_error = parse_error, + ); + rust!(rust, "Ok(((), value, ()))"); + rust!(rust, "}}"); + rust!(rust, "}}"); + + rust!( + rust, + "impl<{utp}> {p}ToTriple<{utp}> for Result<({T}),{E}> {{", + utp = user_type_parameters, + p = grammar.prefix, + T = T, + E = E, + ); + rust!( + rust, + "fn to_triple(value: Self) -> Result<((),{T},()), {parse_error}> {{", + T = T, + parse_error = parse_error, + ); + rust!(rust, "match value {{"); + rust!(rust, "Ok(v) => Ok(((), v, ())),"); + rust!(rust, "Err(error) => Err({p}lalrpop_util::ParseError::User {{ error }}),", + p = grammar.prefix); + rust!(rust, "}}"); // match + rust!(rust, "}}"); // fn + rust!(rust, "}}"); // impl + } + + Ok(()) +} diff --git a/lalrpop/src/lib.rs b/lalrpop/src/lib.rs index 6d783d6..7349ad2 100644 --- a/lalrpop/src/lib.rs +++ b/lalrpop/src/lib.rs @@ -1,58 +1,58 @@ -// Need this for rusty_peg -#![recursion_limit = "256"] -// I hate this lint. -#![allow(unused_parens)] -// The builtin tests don't cover the CLI and so forth, and it's just -// too darn annoying to try and make them do so. -#![cfg_attr(test, allow(dead_code))] - -extern crate ascii_canvas; -extern crate atty; -extern crate bit_set; -extern crate diff; -extern crate ena; -extern crate itertools; -#[cfg_attr(any(feature = "test", test), macro_use)] -extern crate lalrpop_util; -extern crate petgraph; -extern crate regex; -extern crate regex_syntax; -extern crate sha2; -extern crate string_cache; -extern crate term; -extern crate unicode_xid; - -#[cfg(test)] -extern crate rand; - -// hoist the modules that define macros up earlier -#[macro_use] -mod rust; -#[macro_use] -mod log; - -mod api; -mod build; -mod collections; -mod file_text; -mod grammar; -mod kernel_set; -mod lexer; -mod lr1; -mod message; -mod normalize; -mod parser; -mod session; -mod tls; -mod tok; -mod util; - -#[cfg(test)] -mod generate; -#[cfg(test)] -mod test_util; - -pub use api::process_root; -pub use api::process_root_unconditionally; -pub use api::Configuration; -use ascii_canvas::style; +// Need this for rusty_peg +#![recursion_limit = "256"] +// I hate this lint. +#![allow(unused_parens)] +// The builtin tests don't cover the CLI and so forth, and it's just +// too darn annoying to try and make them do so. +#![cfg_attr(test, allow(dead_code))] + +extern crate ascii_canvas; +extern crate atty; +extern crate bit_set; +extern crate diff; +extern crate ena; +extern crate itertools; +#[cfg_attr(any(feature = "test", test), macro_use)] +extern crate lalrpop_util; +extern crate petgraph; +extern crate regex; +extern crate regex_syntax; +extern crate sha2; +extern crate string_cache; +extern crate term; +extern crate unicode_xid; + +#[cfg(test)] +extern crate rand; + +// hoist the modules that define macros up earlier +#[macro_use] +mod rust; +#[macro_use] +mod log; + +mod api; +mod build; +mod collections; +mod file_text; +mod grammar; +mod kernel_set; +mod lexer; +mod lr1; +mod message; +mod normalize; +mod parser; +mod session; +mod tls; +mod tok; +mod util; + +#[cfg(test)] +mod generate; +#[cfg(test)] +mod test_util; + +pub use api::process_root; +pub use api::process_root_unconditionally; +pub use api::Configuration; +use ascii_canvas::style; diff --git a/lalrpop/src/parser/mod.rs b/lalrpop/src/parser/mod.rs index 6d2a895..9c93d27 100644 --- a/lalrpop/src/parser/mod.rs +++ b/lalrpop/src/parser/mod.rs @@ -1,84 +1,84 @@ -use std::iter; - -use grammar::parse_tree::*; -use grammar::pattern::*; -use lalrpop_util; -use tok; - -#[cfg(not(any(feature = "test", test)))] -#[allow(dead_code)] -mod lrgrammar; - -#[cfg(any(feature = "test", test))] -lalrpop_mod!( - // --------------------------------------------------------------------------------------- - // NOTE: Run `cargo build -p lalrpop` once before running `cargo test` to create this file - // --------------------------------------------------------------------------------------- - #[allow(dead_code)] - lrgrammar, - "/src/parser/lrgrammar.rs" -); - -#[cfg(test)] -mod test; - -pub enum Top { - Grammar(Grammar), - Pattern(Pattern), - MatchMapping(TerminalString), - TypeRef(TypeRef), - GrammarWhereClauses(Vec>), -} - -pub type ParseError<'input> = lalrpop_util::ParseError, tok::Error>; - -macro_rules! parser { - ($input: expr, $offset: expr, $pat: ident, $tok: ident) => {{ - let input = $input; - let tokenizer = - iter::once(Ok((0, tok::Tok::$tok, 0))).chain(tok::Tokenizer::new(input, $offset)); - lrgrammar::TopParser::new() - .parse(input, tokenizer) - .map(|top| match top { - Top::$pat(x) => x, - _ => unreachable!(), - }) - }}; -} - -pub fn parse_grammar<'input>(input: &'input str) -> Result> { - let mut grammar = try!(parser!(input, 0, Grammar, StartGrammar)); - - // find a unique prefix that does not appear anywhere in the input - while input.contains(&grammar.prefix) { - grammar.prefix.push('_'); - } - - Ok(grammar) -} - -fn parse_pattern<'input>( - input: &'input str, - offset: usize, -) -> Result, ParseError<'input>> { - parser!(input, offset, Pattern, StartPattern) -} - -fn parse_match_mapping<'input>( - input: &'input str, - offset: usize, -) -> Result> { - parser!(input, offset, MatchMapping, StartMatchMapping) -} - -#[cfg(test)] -pub fn parse_type_ref<'input>(input: &'input str) -> Result> { - parser!(input, 0, TypeRef, StartTypeRef) -} - -#[cfg(test)] -pub fn parse_where_clauses<'input>( - input: &'input str, -) -> Result>, ParseError<'input>> { - parser!(input, 0, GrammarWhereClauses, StartGrammarWhereClauses) -} +use std::iter; + +use grammar::parse_tree::*; +use grammar::pattern::*; +use lalrpop_util; +use tok; + +#[cfg(not(any(feature = "test", test)))] +#[allow(dead_code)] +mod lrgrammar; + +#[cfg(any(feature = "test", test))] +lalrpop_mod!( + // --------------------------------------------------------------------------------------- + // NOTE: Run `cargo build -p lalrpop` once before running `cargo test` to create this file + // --------------------------------------------------------------------------------------- + #[allow(dead_code)] + lrgrammar, + "/src/parser/lrgrammar.rs" +); + +#[cfg(test)] +mod test; + +pub enum Top { + Grammar(Grammar), + Pattern(Pattern), + MatchMapping(TerminalString), + TypeRef(TypeRef), + GrammarWhereClauses(Vec>), +} + +pub type ParseError<'input> = lalrpop_util::ParseError, tok::Error>; + +macro_rules! parser { + ($input: expr, $offset: expr, $pat: ident, $tok: ident) => {{ + let input = $input; + let tokenizer = + iter::once(Ok((0, tok::Tok::$tok, 0))).chain(tok::Tokenizer::new(input, $offset)); + lrgrammar::TopParser::new() + .parse(input, tokenizer) + .map(|top| match top { + Top::$pat(x) => x, + _ => unreachable!(), + }) + }}; +} + +pub fn parse_grammar<'input>(input: &'input str) -> Result> { + let mut grammar = try!(parser!(input, 0, Grammar, StartGrammar)); + + // find a unique prefix that does not appear anywhere in the input + while input.contains(&grammar.prefix) { + grammar.prefix.push('_'); + } + + Ok(grammar) +} + +fn parse_pattern<'input>( + input: &'input str, + offset: usize, +) -> Result, ParseError<'input>> { + parser!(input, offset, Pattern, StartPattern) +} + +fn parse_match_mapping<'input>( + input: &'input str, + offset: usize, +) -> Result> { + parser!(input, offset, MatchMapping, StartMatchMapping) +} + +#[cfg(test)] +pub fn parse_type_ref<'input>(input: &'input str) -> Result> { + parser!(input, 0, TypeRef, StartTypeRef) +} + +#[cfg(test)] +pub fn parse_where_clauses<'input>( + input: &'input str, +) -> Result>, ParseError<'input>> { + parser!(input, 0, GrammarWhereClauses, StartGrammarWhereClauses) +}