1/*==============================================================================
  2|
  3|  NAME
  4|
  5|     ppParser.hpp
  6|
  7|  DESCRIPTION
  8|
  9|     Header file for polynomial parser classes.
 10|
 11|     User manual and technical documentation are described in detail in my web page at
 12|     http://seanerikoconnor.freeservers.com/Mathematics/AbstractAlgebra/PrimitivePolynomials/overview.html
 13|
 14|  LEGAL
 15|
 16|     Primpoly Version 16.3 - A Program for Computing Primitive Polynomials.
 17|     Copyright (C) 1999-2024 by Sean Erik O'Connor.  All Rights Reserved.
 18|
 19|     This program is free software: you can redistribute it and/or modify
 20|     it under the terms of the GNU General Public License as published by
 21|     the Free Software Foundation, either version 3 of the License, or
 22|     (at your option) any later version.
 23|
 24|     This program is distributed in the hope that it will be useful,
 25|     but WITHOUT ANY WARRANTY; without even the implied warranty of
 26|     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 27|     GNU General Public License for more details.
 28|
 29|     You should have received a copy of the GNU General Public License
 30|     along with this program.  If not, see http://www.gnu.org/licenses/.
 31|     
 32|     The author's address is seanerikoconnor!AT!gmail!DOT!com
 33|     with the !DOT! replaced by . and the !AT! replaced by @
 34|
 35==============================================================================*/
 36
 37// Wrap this header file to prevent duplication if it is included
 38// accidentally more than once.
 39#ifndef __PPPARSER_H__
 40
 41
 42// Some miscellaneous constants.
 43#define MAX_NUM_COMMAND_LINE_ARGS 3
 44const ppuint minModulus{ 2 } ;
 45const ppuint minDegree{ 2 } ;
 46
 47
 48/*=============================================================================
 49 |
 50 | NAME
 51 |
 52 |     Action
 53 |
 54 | DESCRIPTION
 55 |
 56 |     Types of LR parser actions.
 57 |
 58 +============================================================================*/
 59
 60enum class Action
 61{
 62    Shift, Reduce, Accept, Error
 63} ;
 64
 65
 66/*=============================================================================
 67 |
 68 | NAME
 69 |
 70 |     enumToString
 71 |
 72 | DESCRIPTION
 73 |
 74 |     Helper function to translate enum types into human readable format.
 75 |
 76 +============================================================================*/
 77
 78template< typename SymbolType >
 79string enumToString( const SymbolType & st ) ;
 80
 81
 82/*=============================================================================
 83 |
 84 | NAME
 85 |
 86 |     Symbol
 87 |
 88 | DESCRIPTION
 89 |
 90 |     Define the VALUE of a symbol.  Used in the parser's syntax directed
 91 |     translation and also for the final polynomial value returned from the
 92 |     parser.
 93 |
 94 +============================================================================*/
 95
 96// Tokens and other types of symbols used in parsing.
 97template< typename SymbolType, typename ValueType >
 98class Symbol
 99{
100    public:
101        Symbol() ;
102        
103        Symbol( SymbolType type, int state ) ;
104        
105        Symbol( SymbolType type, int state, ValueType value )  ;
106        
107        Symbol( const Symbol< SymbolType, ValueType > & s ) ;
108    
109        // Destructor.  Virtual so derived class destructor
110        // automatically calls its base class destructor.
111        virtual ~Symbol()
112        {
113        }
114    
115        Symbol< SymbolType, ValueType > & operator=( const Symbol<SymbolType,ValueType> & s ) ;
116
117        // Print function for Symbol class.
118        // Define this templated non-member friend function here, otherwise the compiler will generate
119        // a link error.  If the compiler sees only a declaration here, it doesn't know yet it's a template.
120        // See https://isocpp.org/wiki/faq/templates#template-friends
121        friend ostream & operator<<( ostream & out, const Symbol<SymbolType,ValueType> & s )
122        {
123            out << enumToString<SymbolType>( s.type_ ) << "   " ;
124            out << s.value_ << "   " ;
125            out << "state " << s.state_ ;
126
127            return out ;
128        }
129
130    // Allow direct access to this simple data type for convenience.
131    public:
132        SymbolType type_ ;  // Type of terminal or nonterminal.
133        ValueType  value_ ; // Value of the symbol.
134        int        state_ ; // State or production number.
135} ;
136
137/*=============================================================================
138 |
139 | NAME
140 |
141 |     ActionState
142 |
143 | DESCRIPTION
144 |
145 |     A pair consisting of action and state for the LR parser.
146 |     Actions are (shift s, reduce p, accept, error) and the new state
147 |     number to transition to.
148 |
149 +============================================================================*/
150
151class ActionState
152{
153    public:
154        ActionState() ;
155        
156        ActionState( Action type, int state ) ;
157    
158        // Destructor.  Virtual so derived class destructor
159        // automatically calls its base class destructor.
160        virtual ~ActionState()
161        {
162        }
163
164        ActionState( const ActionState & as ) ;
165        
166        ActionState & operator=( const ActionState & as ) ;
167    
168        friend ostream & operator<<( ostream & out, const ActionState & as ) ;
169    
170    // Allow direct access to this simple data type for convenience.
171    public:
172        Action action_ ;
173        int    state_ ;
174} ;
175
176/*=============================================================================
177 |
178 | NAME
179 |
180 |     ParserError
181 |
182 | DESCRIPTION
183 |
184 |     Exceptions for the parser class.
185 |
186 +============================================================================*/
187
188class ParserError : public runtime_error
189{
190    public:
191        // Throw with error message, file name and line number.
192        ParserError( const string & description, const string & file, const int & line )
193        : runtime_error( description + " in file " + file + " at line " + to_string(line) )
194        {
195        } ;
196    
197        // Throw with an error message.
198        ParserError( const string & description )
199            : runtime_error( description )
200        {
201        } ;
202
203        // Throw with default error message.
204        ParserError()
205            : runtime_error( "Parser error:  " )
206        {
207        } ;
208
209} ; 
210
211
212
213/*=============================================================================
214|
215| NAME
216|
217|     Parser
218|
219| DESCRIPTION
220|
221|     Abstract base class for parsing using an LALR(1) parser.
222|
223| EXAMPLE
224|
225|     Example of how to use the PolyParser child class for polynomials,
226|
227|        // Create the parser.
228|        PolyParser<PolySymbol, PolyValue> parser() ;
229|
230|        // Sample polynomial.
231|        string s = "2 x ^ 3 + 3 x + 4, 5" ;
232|
233|        try 
234|        {
235|            PolyValue v = parser.parse( s ) ;
236|            cout << "Parsing the string = " << s << endl ;
237|            cout << "We have a polynomial = " << v << endl ;
238|        }
239|        catch( ParserError & e )
240|        {
241|            cout << "Parser error:  " << e.what() << endl ;
242|        }
243|
244| NOTES
245|
246|     I used a parser generator I wrote in Common LISP to generate the LALR(1) parse tables.  See my
247|     notes,
248|         http://seanerikoconnor.freeservers.com/ComputerScience/Compiler/ParserGeneratorAndParser/LRandLALRParserGeneratorAndParser.html
249|     You can also use Flex and Bison (Yacc) to generate the equivalent tables, but I like my own parser generator.
250|
251|     While the LALR(1) parser machinery stays the same in this base class, the grammar,
252|     i.e. parse tables and error messages, will be redefined in the child class, also the syntax
253|     directed translation.  We try to use the base class lexical analyzer for all child classes, if possible.
254|  
255+============================================================================*/
256
257template< typename SymbolType, typename ValueType >
258class Parser
259{
260    // The basic member functions to create and initialize the parser,
261    // make copies and do a syntax-directed translation of a string.
262    public:
263        // Default construct the parser.
264        Parser() ;
265        
266        // Destructor.  Virtual so derived class destructor
267        // automatically calls its base class destructor.
268        virtual ~Parser() ;
269
270        // Copy constructor.
271        Parser( const Parser & parser ) ;
272
273        // Assignment operator.
274        virtual Parser & operator=( const Parser & parser ) ;
275
276        // Parse an input string, doing a syntax-directed translation, returning the value.
277        ValueType parse( string s ) ;
278
279    // Pure virtual functions to be defined only in the child classes, since the lexical analyzer, parse tables and
280    // syntax-directed translation depend uniquely on the grammar.
281    //
282	// Defined within the scope of the Parser class or its derived classes only,
283    // so not visible from outside the parser.
284    protected:
285        // Parse string into tokens.
286        virtual void tokenize( string sentence ) = 0 ;
287    
288        // NOTE:  call this in the child class constructor.  It fills in the ACTION, GOTO, and ERROR_MESSAGE tables.
289        virtual void initializeTables() = 0 ;
290
291        // Reduce using production A -> beta, computing A's value from values of the tokens in beta.
292        virtual void syntaxDirectedTranslation( int productionNum, int topIndex, Symbol<SymbolType,ValueType> & as ) = 0 ;
293    
294    // These member values and functions are called directly in the child classes.
295    protected:
296        // Number of states, productions, etc.
297        int num_states_ ;        // Includes the 0 state.
298        int num_productions__ ;   // Number of productions.
299        int num_terminals_ ;     // Number of terminal symbols.
300        int num_non_terminals_ ;  // Number of nonterminal symbols.
301    
302        void insertAction(       int state,   SymbolType terminal, Action actionType, int actionState ) ;
303        void insertGoto(         int state,   SymbolType nonterm,  int    newState ) ;
304        void insertProduction(   int prodNum, SymbolType nonTerm,  int    rhsLength ) ;
305        void insertErrorMessage( int state,   string errorMessage ) ;
306
307    protected:
308        // Parser stack containing symbols tokenized by lexical analyzer.
309        vector< Symbol<SymbolType, ValueType> > input_stack_ ;
310
311        // Parse stack containing symbols and states.
312        vector< Symbol<SymbolType, ValueType> > parse_stack_ ;
313
314        // Action table.  
315        //     action = ACTION_TABLE[ <state at top of parse stack> ][ <lookahead input token> ]
316        vector< vector<ActionState> > action_table_ ;
317
318        // GOTO table.
319        // For reduce actions,
320        //    new goto state to push = GOTO[ current state ][ nonterminal production symbol ]
321        vector< vector<int> > goto_table_ ;
322
323        // Error messages.
324        vector<string> error_message_ ;
325        
326        // Productions:  Left side non-terminal symbol + production number.
327        vector< Symbol<SymbolType, ValueType> > production_ ;
328} ;
329
330
331
332// --------------- Derived class parsers and their symbols and values ---------
333
334
335
336/*=============================================================================
337 | 
338 | NAME
339 | 
340 |     PolyValue
341 | 
342 | DESCRIPTION
343 | 
344 |     Define a symbol with a polynomial value.  Used in the parser's syntax directed
345 |     translation and also for the final polynomial value returned from the 
346 |     parser.
347 |
348 +============================================================================*/
349
350class PolyValue
351{
352    public:
353        PolyValue() ;
354    
355        PolyValue( const PolyValue & v ) ;
356    
357        PolyValue & operator=( const PolyValue & v ) ;
358    
359        friend ostream & operator<<( ostream & , const PolyValue & v ) ;
360
361    // Allow direct access to this simple data type for convenience.
362    public:
363        ppuint           scalar_ ;  // Number which can be a polynomial coefficient, power of x, or the modulus p.
364                                    // Let it be signed for maximum flexibility.  We'll check if it exceeds the allowed
365                                    // range for internal primitive polynomial computations in the parser. 
366        vector<ppuint> f_ ;       // Polynomial f(x).
367} ;
368
369
370/*=============================================================================
371 |
372 | NAME
373 |
374 |     PolySymbol
375 |
376 | DESCRIPTION
377 |
378 |     Define polynomial symbol (tokens).  Used in the parser's syntax directed
379 |     translation.
380 |
381 +============================================================================*/
382
383enum class PolySymbol
384{
385    // Terminal symbols or tokens.
386    Dollar = 0, Integer, Comma, Ecks, Plus, Exp,
387    NumTerminals,
388    
389    // Nonterminal symbols.
390    S, Mod, Poly, Term, Multiplier, Power,
391    NumSymbols
392} ;
393
394/*=============================================================================
395|
396| NAME
397|
398|     PolyParser
399|
400| DESCRIPTION
401|
402|     Child class for parsing a polynomial grammar inheriting from abstract
403|     base class Parser.
404|
405| EXAMPLE
406|
407|     Here is the LALR(1) grammar.  Note the use of epsilon productions to
408|     make the grammar simpler.
409|
410|     Productions:
411|
412|         S          -> Poly Mod
413|         Mod        -> comma integer | EPSILON
414|         Poly       -> Poly + Term | Term
415|         Term       -> Multiplier Power
416|         Multiplier -> integer | EPSILON
417|         Power      -> x | x ^ integer | EPSILON
418|
419|     Terminal symbols:
420|   
421|         comma + integer ^ x $
422|
423+============================================================================*/
424
425template< typename SymbolType, typename ValueType >
426class PolyParser : public Parser< SymbolType, ValueType >
427{
428    public:
429        // Construct the parser.
430        PolyParser() ;
431        
432        // Destructor.
433        ~PolyParser() ;
434        
435        // Copy constructor.
436        PolyParser( const PolyParser< SymbolType, ValueType > & parser ) ;
437        
438        // Assignment operator.
439        PolyParser< SymbolType, ValueType > & operator=( const PolyParser< SymbolType, ValueType > & parser ) ;
440        
441        // Command line argument parsing.
442        void parseCommandLine( int argc, const char * argv[] ) ;
443        
444        // Allow direct access to these command line parameters for convenience.
445        bool   test_polynomial_for_primitivity_ ;
446        bool   list_all_primitive_polynomials_ ;
447        bool   print_operation_count_ ;
448        bool   print_help_ ;
449        bool   slow_confirm_ ;
450        ppuint p ;
451        int    n ;
452        Polynomial test_polynomial_ ;
453    
454        // Location of the factorization tables.
455        static string current_working_dir_ ;
456    
457    private:
458        // Parse string into tokens.
459        void tokenize( string sentence ) ;
460        
461        // Fill in the ACTION, GOTO, and ERROR_MESSAGE tables.
462        void initializeTables() ;
463
464        // Syntax directed translation.  Reduce using production A -> beta, computing A's value from values of 
465        // the tokens in beta.
466        void syntaxDirectedTranslation( int productionNum, int topIndex, Symbol<SymbolType,ValueType> & as ) ;
467
468        // Even though these member variables are declared in the base class as protected, they are not visible in this
469        // child class.  Because we have a templated class, we must tell the C++ compiler explicitly that these symbols
470        // are from the base class.
471        using Parser<SymbolType, ValueType>::num_states_ ;
472        using Parser<SymbolType, ValueType>::num_productions__ ;
473        using Parser<SymbolType, ValueType>::num_terminals_ ;
474        using Parser<SymbolType, ValueType>::num_non_terminals_ ;
475
476        using Parser<SymbolType, ValueType>::insertAction ;
477        using Parser<SymbolType, ValueType>::insertGoto ;
478        using Parser<SymbolType, ValueType>::insertProduction ;
479        using Parser<SymbolType, ValueType>::insertErrorMessage ;
480
481        using Parser< SymbolType, ValueType >::input_stack_ ;
482        using Parser< SymbolType, ValueType >::parse_stack_ ;
483        using Parser< SymbolType, ValueType >::action_table_ ;
484        using Parser< SymbolType, ValueType >::goto_table_ ;
485        using Parser< SymbolType, ValueType >::error_message_ ;
486        using Parser< SymbolType, ValueType >::production_ ;
487} ;
488
489// Static member variable needs to be initialized outside of the class declaration.
490template< typename SymbolType, typename ValueType >
491string PolyParser<SymbolType, ValueType>::current_working_dir_ = "" ;
492
493
494// --------------- Derived class parsers and their symbols and values ---------
495
496
497
498/*=============================================================================
499 | 
500 | NAME
501 | 
502 |     FactorizationValue
503 | 
504 | DESCRIPTION
505 | 
506 |     Define a parser symbol which containst the number string in one line of
507 |     the factorization table and also the prime factorization itself.
508 |     This is used in the parser's syntax directed translation and also for
509 |     the final value returned from the parser.
510 |
511 +============================================================================*/
512
513template< typename IntType >
514class FactorizationValue
515{
516    public:
517        FactorizationValue() ;
518    
519        FactorizationValue( const FactorizationValue<IntType> & v ) ;
520    
521        FactorizationValue( const IntType & p, const int & count ) ;
522
523        FactorizationValue<IntType> & operator=( const FactorizationValue<IntType> & v ) ;
524    
525        // Print out the factorization.
526        // Define this templated non-member friend function here, otherwise the compiler will generate
527        // a link error.  If the compiler sees only a declaration here, and not the complete implementation,
528        // it doesn't know yet it's a template.
529        // See https://isocpp.org/wiki/faq/templates#template-friends
530        friend ostream & operator<<( ostream & out, const FactorizationValue<IntType> & factorization )
531        {
532            if (factorization.factor_.size() > 0)
533            {
534                for (auto & f : factorization.factor_)
535                    out << f ;
536            }
537            if (factorization.numberString_.length() > 0)
538                out << factorization.numberString_ ;
539
540            return out ;
541        }
542    
543        static IntType numberStringToInteger( const string & numberString ) ;
544
545    // Allow direct access to data for convenience.
546    public:
547        string                         numberString_ ;  // Sequence of digits representing an integer.
548		vector< PrimeFactor<IntType> > factor_ ;        // Vector of distinct prime factors to powers.
549} ;
550
551
552/*=============================================================================
553 |
554 | NAME
555 |
556 |     FactorizationSymbols
557 |
558 | DESCRIPTION
559 |
560 |     Define factoriziton symbols (tokens).  Used in the parser's syntax directed
561 |     translation.
562 |
563 +============================================================================*/
564
565enum class FactorizationSymbol
566{
567    // Terminal symbols or tokens.
568    Dollar = 0, Integer, Period, Caret, Backslash,
569    NumTerminals,
570    
571    // Nonterminal symbols.
572    S, Factorization, Factor, BigInteger,
573    
574    // Count number of total symbols.
575    NumSymbols
576} ;
577
578
579
580/*=============================================================================
581|
582| NAME
583|
584|     FactorizationParser
585|
586| DESCRIPTION
587|
588|     Child class for parsing a factorization table of Cunningham numbers p ^ n - 1
589|     inherited from abstract base class Parser.
590|
591| EXAMPLE
592|
593|     Each table for p has one line containing the factorization of p ^ n - 1 of the type
594|
595|          n <num factors p1 ^ m1 . p2 ^ m2 ...
596|
597|     Dots separate the factors.  Long numbers are continued on the next line with backslashes.
598|     For example, in the table for p = 3, the line below is for n = 398 and has the 12 factors
599|     2 ^ 4, 3, 3583 ... 11881576435363115110426317067813673631182057431802975257340612015708984828478898969687279497327343
600|       
601|         398    12  2^4.3.3583.4588543.34266607.2146612951394313997.8670122322845042\
602|                    61471.3742361194240057889227626965547117.118815764353631151\
603|                    104263170678136736311820574318029752573406120157089\
604|                    84828478898969687279497327343
605|
606|     Here is the LALR(1) grammar.  
607| 
608|     Productions:
609|
610|         S             -> integer integer Factorization
611|         Factorization -> Factorization . Factor | Factor
612|         Factor        -> BigInteger ^ BigInteger | BigInteger
613|         BigInteger    -> BigInteger \ integer | integer
614|
615|     Terminal symbols:
616|         integer . ^ \ $
617|
618+============================================================================*/
619
620template< typename SymbolType, typename ValueType >
621class FactorizationParser : public Parser< SymbolType, ValueType >
622{
623    public:
624        // Construct the parser.
625        FactorizationParser() ;
626        
627        // Destructor.
628        ~FactorizationParser() ;
629        
630        // Copy constructor.
631        FactorizationParser( const FactorizationParser< SymbolType, ValueType > & parser ) ;
632        
633        // Assignment operator.
634        FactorizationParser< SymbolType, ValueType > & operator=( const FactorizationParser< SymbolType, ValueType > & parser ) ;
635        
636    private:
637        // Parse string into tokens.
638        void tokenize( string sentence ) ;
639        
640        // Fill in the ACTION, GOTO, and ERROR_MESSAGE tables.
641        void initializeTables() ;
642
643        // Syntax directed translation.  Reduce using production A -> beta, computing A's value from values of 
644        // the tokens in beta.
645        void syntaxDirectedTranslation( int productionNum, int topIndex, Symbol<SymbolType,ValueType> & as ) ;
646
647        // Even though these member variables are declared in the base class as protected, they are not visible in this
648        // child class.  We have to tell the compiler explicitly that these symbols are base class templated types so it
649        // knows what they are.
650        using Parser<SymbolType, ValueType>::num_states_ ;
651        using Parser<SymbolType, ValueType>::num_productions__ ;
652        using Parser<SymbolType, ValueType>::num_terminals_ ;
653        using Parser<SymbolType, ValueType>::num_non_terminals_ ;
654
655        using Parser<SymbolType, ValueType>::insertAction ;
656        using Parser<SymbolType, ValueType>::insertGoto ;
657        using Parser<SymbolType, ValueType>::insertProduction ;
658        using Parser<SymbolType, ValueType>::insertErrorMessage ;
659
660        using Parser< SymbolType, ValueType >::input_stack_ ;
661        using Parser< SymbolType, ValueType >::parse_stack_ ;
662        using Parser< SymbolType, ValueType >::action_table_ ;
663        using Parser< SymbolType, ValueType >::goto_table_ ;
664        using Parser< SymbolType, ValueType >::error_message_ ;
665        using Parser< SymbolType, ValueType >::production_ ;
666} ;
667
668#endif // __PPPARSER_H__