Confusing Number Parser

closed account (NUj6URfi)
I have the code

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
#include <iostream>
#include <fstream>
#include <cstring>
#include <cctype>
#include <cstdarg>
#include <cmath>
#include <sstream>
#include <stack>
#include <vector>

using namespace std;

#ifndef MAX_PATH
#define MAX_PATH  260
#endif

int firstint;
int secondint;
bool addition;
bool subtraction;
bool multiplication;
bool division;
char filename[MAX_PATH + 1];
char input_line[MAX_PATH + 1];
int argvcount;
string randomaccess;
int i;
int stringlength;

struct Token{
	enum TokenType{
		null=0,
		END=1,
		INTEGER='0',
		PLUS='+',
		MINUS='-',
		MUL='*',
		DIV='/',
		POW='^',
		LPAREN='(',
		RPAREN=')',
		expr=128
	} type;
	double intValue;
	Token(TokenType type=END):type(type),intValue(0){}
	Token(long val):type(INTEGER),intValue(val){}
	Token(char character){
		this->type=(TokenType)character;
	}
};

struct Rule{
	Token reduces_to;
	std::vector<Token> constraints;
	Token lookahead;
	Rule(const Token &to,const Token &la,unsigned constraints,...){
		this->reduces_to=to;
		this->lookahead=la;
		va_list list;
		va_start(list,constraints);
		this->constraints.reserve(constraints);
		for (unsigned a=0;a<constraints;a++)
			this->constraints.push_back(va_arg(list,Token::TokenType));
	}
	bool matches(const std::vector<Token> &stack,const Token &lookahead){
		if (stack.size()<this->constraints.size() ||
				this->lookahead.type!=Token::null && this->lookahead.type!=lookahead.type)
			return 0;
		const Token *array=&stack[stack.size()-this->constraints.size()];
		for (unsigned a=0,size=this->constraints.size();a<size;a++)
			if (array[a].type!=this->constraints[a].type)
				return 0;
		return 1;
	}
};

class Parser{
	std::stringstream stream;
	std::vector<Token> stack;
	bool result;
	std::vector<Rule> rules;
	Token read(){
		char character;
		while (!this->stream.eof() && isspace(character=this->stream.peek()))
			this->stream.get();
		if (this->stream.eof())
			return Token::END;
		character=this->stream.peek();
		if (isdigit(character)){
			std::string str;
			str.push_back(this->stream.get());
			while (isdigit(this->stream.peek()))
				str.push_back(this->stream.get());
			long temp=atol(str.c_str());
			return temp;
		}
		return (char)this->stream.get();
	}
	bool reduce(const Token &lookahead){
		long rule_index=-1;
		unsigned max=0;
		for (unsigned a=0;a<this->rules.size();a++){
			if (this->rules[a].matches(this->stack,lookahead) && this->rules[a].constraints.size()>max){
				rule_index=a;
				max=this->rules[a].constraints.size();
			}
		}
		if (rule_index<0 || this->rules[rule_index].reduces_to.type==Token::null)
			return 0;
		Rule &rule=this->rules[rule_index];
		Token new_token(rule.reduces_to);
		Token *redex=&this->stack[this->stack.size()-rule.constraints.size()];
		switch (rule_index){
			case 0: //expr <- INTEGER
				new_token.intValue=redex[0].intValue;
				break;
			case 1: //expr <- '(' expr ')'
			case 2: //expr <- '+' expr
				new_token.intValue=redex[1].intValue;
				break;
			case 3: //expr <- '-' expr
				new_token.intValue=-redex[1].intValue;
				break;
			case 4: //impossible
			case 5: //expr <- expr '^' expr
				new_token.intValue=pow((double)redex[0].intValue,(double)redex[2].intValue);
				break;
			case 6: //expr <- expr '*' expr
				new_token.intValue=redex[0].intValue*redex[2].intValue;
				break;
			case 7: //expr <- expr '/' expr
				new_token.intValue=redex[0].intValue/redex[2].intValue;
				break;
			case 8: //impossible
			case 9: //impossible
			case 10: //expr <- expr '+' expr
				new_token.intValue=redex[0].intValue+redex[2].intValue;
				break;
			case 11: //impossible
			case 12: //impossible
			case 13: //expr <- expr '-' expr
				new_token.intValue=redex[0].intValue-redex[2].intValue;
				break;
		}
		for (unsigned a=0;a<rule.constraints.size();a++)
			this->stack.pop_back();
		this->stack.push_back(new_token);
		return 1;
	}
	bool run_state(){
		Token next_token=this->read();
		while (this->reduce(next_token));
		switch (next_token.type){
			case Token::END:
				this->result=(this->stack.size()==1);
				return 0;
			case Token::INTEGER:
			case Token::PLUS:
			case Token::MINUS:
			case Token::MUL:
			case Token::DIV:
			case Token::RPAREN:
			case Token::LPAREN:
			case Token::POW:
				this->stack.push_back(next_token);
				return 1;
			default:
				this->result=0;
				return 0;
		}
	}
	void initializeRules(){
		this->rules.clear();
		/*rule 0*/	this->rules.push_back(Rule(	Token::expr,	Token::null,	1,	Token::INTEGER	));

		/*rule 1*/	this->rules.push_back(Rule(	Token::expr,	Token::null,	3,	Token::LPAREN,	Token::expr,	Token::RPAREN	));

		/*rule 2*/	this->rules.push_back(Rule(	Token::expr,	Token::null,	2,	Token::PLUS,	Token::expr	));
		/*rule 3*/	this->rules.push_back(Rule(	Token::expr,	Token::null,	2,	Token::MINUS,	Token::expr	));

		/*rule 4*/	this->rules.push_back(Rule(	Token::null,	Token::POW,		3,	Token::expr,	Token::POW,	Token::expr	));
		/*rule 5*/	this->rules.push_back(Rule(	Token::expr,	Token::null,	3,	Token::expr,	Token::POW,	Token::expr	));

		/*rule 6*/	this->rules.push_back(Rule(	Token::expr,	Token::null,	3,	Token::expr,	Token::MUL,	Token::expr	));

		/*rule 7*/	this->rules.push_back(Rule(	Token::expr,	Token::null,	3,	Token::expr,	Token::DIV,	Token::expr	));

		/*rule 8*/	this->rules.push_back(Rule(	Token::null,	Token::MUL,		3,	Token::expr,	Token::PLUS,	Token::expr	));
		/*rule 9*/	this->rules.push_back(Rule(	Token::null,	Token::DIV,		3,	Token::expr,	Token::PLUS,	Token::expr	));
		/*rule 10*/	this->rules.push_back(Rule(	Token::expr,	Token::null,	3,	Token::expr,	Token::PLUS,	Token::expr	));

		/*rule 11*/	this->rules.push_back(Rule(	Token::null,	Token::MUL,		3,	Token::expr,	Token::MINUS,	Token::expr	));
		/*rule 12*/	this->rules.push_back(Rule(	Token::null,	Token::DIV,		3,	Token::expr,	Token::MINUS,	Token::expr	));
		/*rule 13*/	this->rules.push_back(Rule(	Token::expr,	Token::null,	3,	Token::expr,	Token::MINUS,	Token::expr	));
	}
public:
	Parser(const std::string &str)
			:stream(str){
		this->result=0;
		this->initializeRules();
	}
	bool eval(double &res){
		while (this->run_state());
		if (this->result)
			res=this->stack.front().intValue;
		else
			this->stack.clear();
		return this->result;
	}
};


int main(int argc, char *argv[]) {
    cout << "What file do you want to compile?";
    cin.getline(filename, 100);
    ifstream file_in(filename);
    argvcount = 0; 
    i = 0;
    while(!file_in.eof()) {       
        file_in.getline(input_line, MAX_PATH);
                      Parser evaluator("2^2^2");
	double res=0;
	std::cout <<(evaluator.eval(res)?"ACCEPT":"ABORT")<<std::endl;
	std::cout <<res<<std::endl;

        if (strncmp(input_line, "print ", 6) == 0) {
            randomaccess = input_line + 6;
            cout << randomaccess; 
            stringlength = randomaccess.length();
        }
        if (strncmp(input_line, "nline", 5) == 0) {
           argvcount++;
           randomaccess = input_line + 5;
            cout << "\n"; 
            stringlength = randomaccess.length();
        }
           
        if (strncmp(input_line, "program(end)", 12) == 0) {
            randomaccess = input_line + 12;
            cin.get();
            return 0;}      }
}



with my file that I will run through this being

1
2
3
4
5
6
7

print "HI"
nline
print 2 + 2
program(end)



My intent is that this will print hi, go to a new line, solve the equation and print the answer. The number parser I found isn't mine, I was directed too it though and I don't understand it. Please help me achieve my goal.
helios' parser only handles arithmetic equations; there's no way it can understand print, nline, etc.

If you want to understand how the code works you first need to learn a bit about parser design. For example: "Introduction to Shift-Reduce Parsing"
http://www.cs.binghamton.edu/~zdu/parsdemo/srintro.html

Note that in practice, parsers like this are not hand-written; esp. when dealing with more complex grammars than basic arithmetic. They are usually generated by a tool like Bison. For an introduction to this tool-based approach, see Albatross' article "Bison Tracking"
http://www.cplusplus.com/articles/4LzyhbRD/

In the short term, it could have a go at extending your string-based interpreter to handle basic aithmetic (to match helios' parser.), as you were talking about in this thread:
http://www.cplusplus.com/forum/lounge/111673/

Note that to get lines like print 2 + 2 to work cleanly you are going to have to get some form of basic tokenization to work, as the TheIdeasMan mentioned in the other thread, and get you program to work with a stack. Basic string manipulation might work with 2 + 2 but with longer expressions it will be a pain

(Aside: the shunting yard algorithm might help here:
http://en.wikipedia.org/wiki/Shunting-yard_algorithm )

Also, you should consider reading your source file into a vector of strings (to start with), as this will allow you to implement simple looping like early forms of Basic (which might be a good role model for your language to start with).

Later on the vector should contain statement objects: you parse the string to tokens (tokenization/lexing); you parse the token to create statements (parsing) which can be executed. (These statements could be both simple, single statements or compound ones made or other smaller statements.)

Edit: then, rather than treat each line separately, as an entry in the vector, you can parse to a tree, which is what real compilers do (an abstract syntax tree
http://en.wikipedia.org/wiki/Abstract_syntax_tree )

Andy
Last edited on
closed account (NUj6URfi)
Thanks.
Topic archived. No new replies allowed.