1/*==============================================================================
2|
3| NAME
4|
5| ppParser.hpp
6|
7| DESCRIPTION
8|
9| Header file for polynomial parser classes.
10|
11| User manual and technical documentation are described in detail in my web page at
12| http://seanerikoconnor.freeservers.com/Mathematics/AbstractAlgebra/PrimitivePolynomials/overview.html
13|
14| LEGAL
15|
16| Primpoly Version 16.3 - A Program for Computing Primitive Polynomials.
17| Copyright (C) 1999-2024 by Sean Erik O'Connor. All Rights Reserved.
18|
19| This program is free software: you can redistribute it and/or modify
20| it under the terms of the GNU General Public License as published by
21| the Free Software Foundation, either version 3 of the License, or
22| (at your option) any later version.
23|
24| This program is distributed in the hope that it will be useful,
25| but WITHOUT ANY WARRANTY; without even the implied warranty of
26| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
27| GNU General Public License for more details.
28|
29| You should have received a copy of the GNU General Public License
30| along with this program. If not, see http://www.gnu.org/licenses/.
31|
32| The author's address is seanerikoconnor!AT!gmail!DOT!com
33| with the !DOT! replaced by . and the !AT! replaced by @
34|
35==============================================================================*/
36
37// Wrap this header file to prevent duplication if it is included
38// accidentally more than once.
39#ifndef __PPPARSER_H__
40
41
42// Some miscellaneous constants.
43#define MAX_NUM_COMMAND_LINE_ARGS 3
44const ppuint minModulus{ 2 } ;
45const ppuint minDegree{ 2 } ;
46
47
48/*=============================================================================
49 |
50 | NAME
51 |
52 | Action
53 |
54 | DESCRIPTION
55 |
56 | Types of LR parser actions.
57 |
58 +============================================================================*/
59
60enum class Action
61{
62 Shift, Reduce, Accept, Error
63} ;
64
65
66/*=============================================================================
67 |
68 | NAME
69 |
70 | enumToString
71 |
72 | DESCRIPTION
73 |
74 | Helper function to translate enum types into human readable format.
75 |
76 +============================================================================*/
77
78template< typename SymbolType >
79string enumToString( const SymbolType & st ) ;
80
81
82/*=============================================================================
83 |
84 | NAME
85 |
86 | Symbol
87 |
88 | DESCRIPTION
89 |
90 | Define the VALUE of a symbol. Used in the parser's syntax directed
91 | translation and also for the final polynomial value returned from the
92 | parser.
93 |
94 +============================================================================*/
95
96// Tokens and other types of symbols used in parsing.
97template< typename SymbolType, typename ValueType >
98class Symbol
99{
100 public:
101 Symbol() ;
102
103 Symbol( SymbolType type, int state ) ;
104
105 Symbol( SymbolType type, int state, ValueType value ) ;
106
107 Symbol( const Symbol< SymbolType, ValueType > & s ) ;
108
109 // Destructor. Virtual so derived class destructor
110 // automatically calls its base class destructor.
111 virtual ~Symbol()
112 {
113 }
114
115 Symbol< SymbolType, ValueType > & operator=( const Symbol<SymbolType,ValueType> & s ) ;
116
117 // Print function for Symbol class.
118 // Define this templated non-member friend function here, otherwise the compiler will generate
119 // a link error. If the compiler sees only a declaration here, it doesn't know yet it's a template.
120 // See https://isocpp.org/wiki/faq/templates#template-friends
121 friend ostream & operator<<( ostream & out, const Symbol<SymbolType,ValueType> & s )
122 {
123 out << enumToString<SymbolType>( s.type_ ) << " " ;
124 out << s.value_ << " " ;
125 out << "state " << s.state_ ;
126
127 return out ;
128 }
129
130 // Allow direct access to this simple data type for convenience.
131 public:
132 SymbolType type_ ; // Type of terminal or nonterminal.
133 ValueType value_ ; // Value of the symbol.
134 int state_ ; // State or production number.
135} ;
136
137/*=============================================================================
138 |
139 | NAME
140 |
141 | ActionState
142 |
143 | DESCRIPTION
144 |
145 | A pair consisting of action and state for the LR parser.
146 | Actions are (shift s, reduce p, accept, error) and the new state
147 | number to transition to.
148 |
149 +============================================================================*/
150
151class ActionState
152{
153 public:
154 ActionState() ;
155
156 ActionState( Action type, int state ) ;
157
158 // Destructor. Virtual so derived class destructor
159 // automatically calls its base class destructor.
160 virtual ~ActionState()
161 {
162 }
163
164 ActionState( const ActionState & as ) ;
165
166 ActionState & operator=( const ActionState & as ) ;
167
168 friend ostream & operator<<( ostream & out, const ActionState & as ) ;
169
170 // Allow direct access to this simple data type for convenience.
171 public:
172 Action action_ ;
173 int state_ ;
174} ;
175
176/*=============================================================================
177 |
178 | NAME
179 |
180 | ParserError
181 |
182 | DESCRIPTION
183 |
184 | Exceptions for the parser class.
185 |
186 +============================================================================*/
187
188class ParserError : public runtime_error
189{
190 public:
191 // Throw with error message, file name and line number.
192 ParserError( const string & description, const string & file, const int & line )
193 : runtime_error( description + " in file " + file + " at line " + to_string(line) )
194 {
195 } ;
196
197 // Throw with an error message.
198 ParserError( const string & description )
199 : runtime_error( description )
200 {
201 } ;
202
203 // Throw with default error message.
204 ParserError()
205 : runtime_error( "Parser error: " )
206 {
207 } ;
208
209} ;
210
211
212
213/*=============================================================================
214|
215| NAME
216|
217| Parser
218|
219| DESCRIPTION
220|
221| Abstract base class for parsing using an LALR(1) parser.
222|
223| EXAMPLE
224|
225| Example of how to use the PolyParser child class for polynomials,
226|
227| // Create the parser.
228| PolyParser<PolySymbol, PolyValue> parser() ;
229|
230| // Sample polynomial.
231| string s = "2 x ^ 3 + 3 x + 4, 5" ;
232|
233| try
234| {
235| PolyValue v = parser.parse( s ) ;
236| cout << "Parsing the string = " << s << endl ;
237| cout << "We have a polynomial = " << v << endl ;
238| }
239| catch( ParserError & e )
240| {
241| cout << "Parser error: " << e.what() << endl ;
242| }
243|
244| NOTES
245|
246| I used a parser generator I wrote in Common LISP to generate the LALR(1) parse tables. See my
247| notes,
248| http://seanerikoconnor.freeservers.com/ComputerScience/Compiler/ParserGeneratorAndParser/LRandLALRParserGeneratorAndParser.html
249| You can also use Flex and Bison (Yacc) to generate the equivalent tables, but I like my own parser generator.
250|
251| While the LALR(1) parser machinery stays the same in this base class, the grammar,
252| i.e. parse tables and error messages, will be redefined in the child class, also the syntax
253| directed translation. We try to use the base class lexical analyzer for all child classes, if possible.
254|
255+============================================================================*/
256
257template< typename SymbolType, typename ValueType >
258class Parser
259{
260 // The basic member functions to create and initialize the parser,
261 // make copies and do a syntax-directed translation of a string.
262 public:
263 // Default construct the parser.
264 Parser() ;
265
266 // Destructor. Virtual so derived class destructor
267 // automatically calls its base class destructor.
268 virtual ~Parser() ;
269
270 // Copy constructor.
271 Parser( const Parser & parser ) ;
272
273 // Assignment operator.
274 virtual Parser & operator=( const Parser & parser ) ;
275
276 // Parse an input string, doing a syntax-directed translation, returning the value.
277 ValueType parse( string s ) ;
278
279 // Pure virtual functions to be defined only in the child classes, since the lexical analyzer, parse tables and
280 // syntax-directed translation depend uniquely on the grammar.
281 //
282 // Defined within the scope of the Parser class or its derived classes only,
283 // so not visible from outside the parser.
284 protected:
285 // Parse string into tokens.
286 virtual void tokenize( string sentence ) = 0 ;
287
288 // NOTE: call this in the child class constructor. It fills in the ACTION, GOTO, and ERROR_MESSAGE tables.
289 virtual void initializeTables() = 0 ;
290
291 // Reduce using production A -> beta, computing A's value from values of the tokens in beta.
292 virtual void syntaxDirectedTranslation( int productionNum, int topIndex, Symbol<SymbolType,ValueType> & as ) = 0 ;
293
294 // These member values and functions are called directly in the child classes.
295 protected:
296 // Number of states, productions, etc.
297 int num_states_ ; // Includes the 0 state.
298 int num_productions__ ; // Number of productions.
299 int num_terminals_ ; // Number of terminal symbols.
300 int num_non_terminals_ ; // Number of nonterminal symbols.
301
302 void insertAction( int state, SymbolType terminal, Action actionType, int actionState ) ;
303 void insertGoto( int state, SymbolType nonterm, int newState ) ;
304 void insertProduction( int prodNum, SymbolType nonTerm, int rhsLength ) ;
305 void insertErrorMessage( int state, string errorMessage ) ;
306
307 protected:
308 // Parser stack containing symbols tokenized by lexical analyzer.
309 vector< Symbol<SymbolType, ValueType> > input_stack_ ;
310
311 // Parse stack containing symbols and states.
312 vector< Symbol<SymbolType, ValueType> > parse_stack_ ;
313
314 // Action table.
315 // action = ACTION_TABLE[ <state at top of parse stack> ][ <lookahead input token> ]
316 vector< vector<ActionState> > action_table_ ;
317
318 // GOTO table.
319 // For reduce actions,
320 // new goto state to push = GOTO[ current state ][ nonterminal production symbol ]
321 vector< vector<int> > goto_table_ ;
322
323 // Error messages.
324 vector<string> error_message_ ;
325
326 // Productions: Left side non-terminal symbol + production number.
327 vector< Symbol<SymbolType, ValueType> > production_ ;
328} ;
329
330
331
332// --------------- Derived class parsers and their symbols and values ---------
333
334
335
336/*=============================================================================
337 |
338 | NAME
339 |
340 | PolyValue
341 |
342 | DESCRIPTION
343 |
344 | Define a symbol with a polynomial value. Used in the parser's syntax directed
345 | translation and also for the final polynomial value returned from the
346 | parser.
347 |
348 +============================================================================*/
349
350class PolyValue
351{
352 public:
353 PolyValue() ;
354
355 PolyValue( const PolyValue & v ) ;
356
357 PolyValue & operator=( const PolyValue & v ) ;
358
359 friend ostream & operator<<( ostream & , const PolyValue & v ) ;
360
361 // Allow direct access to this simple data type for convenience.
362 public:
363 ppuint scalar_ ; // Number which can be a polynomial coefficient, power of x, or the modulus p.
364 // Let it be signed for maximum flexibility. We'll check if it exceeds the allowed
365 // range for internal primitive polynomial computations in the parser.
366 vector<ppuint> f_ ; // Polynomial f(x).
367} ;
368
369
370/*=============================================================================
371 |
372 | NAME
373 |
374 | PolySymbol
375 |
376 | DESCRIPTION
377 |
378 | Define polynomial symbol (tokens). Used in the parser's syntax directed
379 | translation.
380 |
381 +============================================================================*/
382
383enum class PolySymbol
384{
385 // Terminal symbols or tokens.
386 Dollar = 0, Integer, Comma, Ecks, Plus, Exp,
387 NumTerminals,
388
389 // Nonterminal symbols.
390 S, Mod, Poly, Term, Multiplier, Power,
391 NumSymbols
392} ;
393
394/*=============================================================================
395|
396| NAME
397|
398| PolyParser
399|
400| DESCRIPTION
401|
402| Child class for parsing a polynomial grammar inheriting from abstract
403| base class Parser.
404|
405| EXAMPLE
406|
407| Here is the LALR(1) grammar. Note the use of epsilon productions to
408| make the grammar simpler.
409|
410| Productions:
411|
412| S -> Poly Mod
413| Mod -> comma integer | EPSILON
414| Poly -> Poly + Term | Term
415| Term -> Multiplier Power
416| Multiplier -> integer | EPSILON
417| Power -> x | x ^ integer | EPSILON
418|
419| Terminal symbols:
420|
421| comma + integer ^ x $
422|
423+============================================================================*/
424
425template< typename SymbolType, typename ValueType >
426class PolyParser : public Parser< SymbolType, ValueType >
427{
428 public:
429 // Construct the parser.
430 PolyParser() ;
431
432 // Destructor.
433 ~PolyParser() ;
434
435 // Copy constructor.
436 PolyParser( const PolyParser< SymbolType, ValueType > & parser ) ;
437
438 // Assignment operator.
439 PolyParser< SymbolType, ValueType > & operator=( const PolyParser< SymbolType, ValueType > & parser ) ;
440
441 // Command line argument parsing.
442 void parseCommandLine( int argc, const char * argv[] ) ;
443
444 // Allow direct access to these command line parameters for convenience.
445 bool test_polynomial_for_primitivity_ ;
446 bool list_all_primitive_polynomials_ ;
447 bool print_operation_count_ ;
448 bool print_help_ ;
449 bool slow_confirm_ ;
450 ppuint p ;
451 int n ;
452 Polynomial test_polynomial_ ;
453
454 // Location of the factorization tables.
455 static string current_working_dir_ ;
456
457 private:
458 // Parse string into tokens.
459 void tokenize( string sentence ) ;
460
461 // Fill in the ACTION, GOTO, and ERROR_MESSAGE tables.
462 void initializeTables() ;
463
464 // Syntax directed translation. Reduce using production A -> beta, computing A's value from values of
465 // the tokens in beta.
466 void syntaxDirectedTranslation( int productionNum, int topIndex, Symbol<SymbolType,ValueType> & as ) ;
467
468 // Even though these member variables are declared in the base class as protected, they are not visible in this
469 // child class. Because we have a templated class, we must tell the C++ compiler explicitly that these symbols
470 // are from the base class.
471 using Parser<SymbolType, ValueType>::num_states_ ;
472 using Parser<SymbolType, ValueType>::num_productions__ ;
473 using Parser<SymbolType, ValueType>::num_terminals_ ;
474 using Parser<SymbolType, ValueType>::num_non_terminals_ ;
475
476 using Parser<SymbolType, ValueType>::insertAction ;
477 using Parser<SymbolType, ValueType>::insertGoto ;
478 using Parser<SymbolType, ValueType>::insertProduction ;
479 using Parser<SymbolType, ValueType>::insertErrorMessage ;
480
481 using Parser< SymbolType, ValueType >::input_stack_ ;
482 using Parser< SymbolType, ValueType >::parse_stack_ ;
483 using Parser< SymbolType, ValueType >::action_table_ ;
484 using Parser< SymbolType, ValueType >::goto_table_ ;
485 using Parser< SymbolType, ValueType >::error_message_ ;
486 using Parser< SymbolType, ValueType >::production_ ;
487} ;
488
489// Static member variable needs to be initialized outside of the class declaration.
490template< typename SymbolType, typename ValueType >
491string PolyParser<SymbolType, ValueType>::current_working_dir_ = "" ;
492
493
494// --------------- Derived class parsers and their symbols and values ---------
495
496
497
498/*=============================================================================
499 |
500 | NAME
501 |
502 | FactorizationValue
503 |
504 | DESCRIPTION
505 |
506 | Define a parser symbol which containst the number string in one line of
507 | the factorization table and also the prime factorization itself.
508 | This is used in the parser's syntax directed translation and also for
509 | the final value returned from the parser.
510 |
511 +============================================================================*/
512
513template< typename IntType >
514class FactorizationValue
515{
516 public:
517 FactorizationValue() ;
518
519 FactorizationValue( const FactorizationValue<IntType> & v ) ;
520
521 FactorizationValue( const IntType & p, const int & count ) ;
522
523 FactorizationValue<IntType> & operator=( const FactorizationValue<IntType> & v ) ;
524
525 // Print out the factorization.
526 // Define this templated non-member friend function here, otherwise the compiler will generate
527 // a link error. If the compiler sees only a declaration here, and not the complete implementation,
528 // it doesn't know yet it's a template.
529 // See https://isocpp.org/wiki/faq/templates#template-friends
530 friend ostream & operator<<( ostream & out, const FactorizationValue<IntType> & factorization )
531 {
532 if (factorization.factor_.size() > 0)
533 {
534 for (auto & f : factorization.factor_)
535 out << f ;
536 }
537 if (factorization.numberString_.length() > 0)
538 out << factorization.numberString_ ;
539
540 return out ;
541 }
542
543 static IntType numberStringToInteger( const string & numberString ) ;
544
545 // Allow direct access to data for convenience.
546 public:
547 string numberString_ ; // Sequence of digits representing an integer.
548 vector< PrimeFactor<IntType> > factor_ ; // Vector of distinct prime factors to powers.
549} ;
550
551
552/*=============================================================================
553 |
554 | NAME
555 |
556 | FactorizationSymbols
557 |
558 | DESCRIPTION
559 |
560 | Define factoriziton symbols (tokens). Used in the parser's syntax directed
561 | translation.
562 |
563 +============================================================================*/
564
565enum class FactorizationSymbol
566{
567 // Terminal symbols or tokens.
568 Dollar = 0, Integer, Period, Caret, Backslash,
569 NumTerminals,
570
571 // Nonterminal symbols.
572 S, Factorization, Factor, BigInteger,
573
574 // Count number of total symbols.
575 NumSymbols
576} ;
577
578
579
580/*=============================================================================
581|
582| NAME
583|
584| FactorizationParser
585|
586| DESCRIPTION
587|
588| Child class for parsing a factorization table of Cunningham numbers p ^ n - 1
589| inherited from abstract base class Parser.
590|
591| EXAMPLE
592|
593| Each table for p has one line containing the factorization of p ^ n - 1 of the type
594|
595| n <num factors p1 ^ m1 . p2 ^ m2 ...
596|
597| Dots separate the factors. Long numbers are continued on the next line with backslashes.
598| For example, in the table for p = 3, the line below is for n = 398 and has the 12 factors
599| 2 ^ 4, 3, 3583 ... 11881576435363115110426317067813673631182057431802975257340612015708984828478898969687279497327343
600|
601| 398 12 2^4.3.3583.4588543.34266607.2146612951394313997.8670122322845042\
602| 61471.3742361194240057889227626965547117.118815764353631151\
603| 104263170678136736311820574318029752573406120157089\
604| 84828478898969687279497327343
605|
606| Here is the LALR(1) grammar.
607|
608| Productions:
609|
610| S -> integer integer Factorization
611| Factorization -> Factorization . Factor | Factor
612| Factor -> BigInteger ^ BigInteger | BigInteger
613| BigInteger -> BigInteger \ integer | integer
614|
615| Terminal symbols:
616| integer . ^ \ $
617|
618+============================================================================*/
619
620template< typename SymbolType, typename ValueType >
621class FactorizationParser : public Parser< SymbolType, ValueType >
622{
623 public:
624 // Construct the parser.
625 FactorizationParser() ;
626
627 // Destructor.
628 ~FactorizationParser() ;
629
630 // Copy constructor.
631 FactorizationParser( const FactorizationParser< SymbolType, ValueType > & parser ) ;
632
633 // Assignment operator.
634 FactorizationParser< SymbolType, ValueType > & operator=( const FactorizationParser< SymbolType, ValueType > & parser ) ;
635
636 private:
637 // Parse string into tokens.
638 void tokenize( string sentence ) ;
639
640 // Fill in the ACTION, GOTO, and ERROR_MESSAGE tables.
641 void initializeTables() ;
642
643 // Syntax directed translation. Reduce using production A -> beta, computing A's value from values of
644 // the tokens in beta.
645 void syntaxDirectedTranslation( int productionNum, int topIndex, Symbol<SymbolType,ValueType> & as ) ;
646
647 // Even though these member variables are declared in the base class as protected, they are not visible in this
648 // child class. We have to tell the compiler explicitly that these symbols are base class templated types so it
649 // knows what they are.
650 using Parser<SymbolType, ValueType>::num_states_ ;
651 using Parser<SymbolType, ValueType>::num_productions__ ;
652 using Parser<SymbolType, ValueType>::num_terminals_ ;
653 using Parser<SymbolType, ValueType>::num_non_terminals_ ;
654
655 using Parser<SymbolType, ValueType>::insertAction ;
656 using Parser<SymbolType, ValueType>::insertGoto ;
657 using Parser<SymbolType, ValueType>::insertProduction ;
658 using Parser<SymbolType, ValueType>::insertErrorMessage ;
659
660 using Parser< SymbolType, ValueType >::input_stack_ ;
661 using Parser< SymbolType, ValueType >::parse_stack_ ;
662 using Parser< SymbolType, ValueType >::action_table_ ;
663 using Parser< SymbolType, ValueType >::goto_table_ ;
664 using Parser< SymbolType, ValueType >::error_message_ ;
665 using Parser< SymbolType, ValueType >::production_ ;
666} ;
667
668#endif // __PPPARSER_H__