Simple Interpreter

Pages: 123
Well at the moment I am commenting the big parts so that I understand it all.

Do you see how I could make the script parse a file though?
So I've just added ^ (power) to the code.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
#include <string>
#include <cctype>
#include <iostream>
#include <map>
#include <strstream>

using namespace std;

// Pointer to input string
istream* input;

// No of errors = 0 by default
int no_of_errors;

// Function to output error
double error(const char* s) {
    no_of_errors++;
    cerr << "Error: " << s << '\n';
    return 1;
}

// Different tokens
enum Token_value {
	NAME,		NUMBER,		END,
	PLUS='+',	MINUS='-',	MUL='*',	DIV='/', POW='^',
	PRINT=';',	ASSIGN='=',	LP='(',		RP=')'
};

Token_value curr_tok = PRINT;
double number_value;
string string_value;

// For each function in the token_value enumeration...
Token_value get_token() {
	char ch;

	// Skip whitespace except \n
	do {
		if(!input->get(ch)) return curr_tok = END;
	} while (ch!='\n' && isspace(ch));

	// Switch each character
	switch (ch) {
        // Print
    	case ';':
    	case '\n':
    		return curr_tok=PRINT;
	// Operators
    	case '*':
    	case '/':
    	case '+':
    	case '-':
    	case '(':
    	case ')':
    	case '=':
        case '^':
    		return curr_tok=Token_value(ch);
	// Numbers AND Decimal point
    	case '0': case '1': case '2': case '3': case '4':
    	case '5': case '6': case '7': case '8': case '9':
    	case '.':
    		input->putback(ch);
    		*input >> number_value;
    		return curr_tok=NUMBER;
	// Variable declaration
    	default:			// NAME, NAME=, or error
    		if (isalpha(ch)) { // If ch is alphabetic
    			string_value = ch;
    			while (input->get(ch) && isalnum(ch))
    				string_value += ch;	// string_value.push_back(ch);
    							// to work around library bug
    			input->putback(ch);
    			return curr_tok=NAME;
    		}
    		error("Bad token");
    		return curr_tok=PRINT;
	}
}

// Create a map type consisting of string and double values
map <string, double> table;

// Expressions
double expr(bool);

// Handle Primaries
double prim(bool get) {
	if (get) get_token();

	switch (curr_tok) {
        // Floating point constants
    	case NUMBER:
    	{	double v = number_value;
    		get_token();
    		return v;
    	}
    	// Variables
    	case NAME:
    	{	double& v = table[string_value];
    		if (get_token() == ASSIGN) v = expr(true);
    		return v;
    	}
    	// Unary Minus
    	case MINUS:
    		return -prim(true);
	// Left Parenthesis
    	case LP:
    	{	double e = expr(true);
    		if (curr_tok != RP) return error(") expected");
    		get_token();		// eat ')'
    		return e;
    	}
    	default:
    		return error("Primary expected");
	}
}

// Calculate the powers
double Pow(double n, double p) {
	double result = 1.0; 
	for(int j=0; j<p; j++) {
		result *= n;
	}
	return result;       
}

// Multiply and divide
double term(bool get) {
	double left = prim(get);

	// Keep looping - that wont happen though as we HAVE to find a token
	for (;;) {
		switch (curr_tok) {
		// Multiply
    		case MUL:
    			left *= prim(true);
    			break;
   		// Divide
    		case DIV:
    			if (double d = prim(true)) {
    				left /= d;
    				break;
    			}
    			return error("Divide by 0");
   		// Power
		case POW:
			return Pow(left, prim(true));
			break;
    		default:
    			return left;
		}
    }
}

// Add and subtract
double expr(bool get) {
	double left = term(get);

	// Keep looping - that wont happen though as we HAVE to find a token
	for (;;) {
		switch (curr_tok) {
		// Plus
    		case PLUS:
    			left += term(true);
    			break;
		// Minus
    		case MINUS:
    			left -= term(true);
    			break;
    		default:
    			return left;
		}
    }
}

int main(int argc, char* argv[]) {
    cout << "Ceres Maths Parser V1" << endl;
    cout << "Written by James Brooks 2009\n" << endl;
	switch (argc) {
        // Direct input
    	case 1:
    		input = &cin;
    		break;
    	// Argument String
    	case 2:
    		input = new istrstream(argv[1]);
    		break;
	    // Otherwise we have a problem
    	default:
    		error("Too many arguments");
    		return 1;
	}

	// Insert predefined values
	table["pi"] = 3.1415926535897932385;
	table["e"] = 2.7182818284590452354;

	while (*input) {
		get_token();
		if (curr_tok == END) break;
		if (curr_tok == PRINT) continue;
		cout << expr(false) << '\n';
	}

	if (input != &cin) delete input;
	return no_of_errors;
}

I tried adding functions however I have no idea how to do that. I mean proper functions which allow me to do something like:
 
Tan(x)

Also, could someone explain to me what ";" does in the code?

James
Last edited on
';' should act in the same way of the newline, it stops the current operation and makes the program showing the result:
1
2
3
case ';':
case '\n':
    return curr_tok=PRINT;

For a function as sin cos etc, handle it in 'prim'
eg:
1
2
3
4
5
6
7
8
9
10
11
case NAME:
{
    if ( string_value == "sin" )
        return sin ( prim() );
    else
    {
        double& v = table[string_value];
        if (get_token() == ASSIGN) v = expr(true);
        return v;
    }
}
Last edited on
Bazzy, could you show me how this would be implemented please?
Didn't I show you?
Check my code example and compare it to what you have (line 98 on the code you posted)
Oh I see! Thanks Bazzy!

Do you know how I could wrap the "arguments" in brackets? Also, instead of command input, what about parsing a file?
Last edited on
To make your program accept only parentheses to contain arguments, check for the next token to be LP
eg:
1
2
3
4
5
6
7
8
9
10
if ( string_value == "sin" )
{
    if ( get_token() != LP ) return error ( "( expected" );
    
    // Now as in case LP
    double v = expr(true);
    if (curr_tok != RP) return error(") expected");
    get_token();  // eat ')'
    return  sin( v );
}


Your code can already parse a file:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
int main(int argc, char* argv[]) {
    cout << "Ceres Maths Parser V1" << endl;
    cout << "Written by James Brooks 2009\n" << endl;
	switch (argc) {
        // Direct input
    	case 1:
    		input = &cin;
    		break;
    	// Argument String
    	case 2:
    		input = new istrstream(argv[1]); // if you pass a filename as argument to your program it would be used as input
    		break;
	    // Otherwise we have a problem
    	default:
    		error("Too many arguments");
    		return 1;
	}

I got parenthesis to work straight after posting, typical, but thanks anyway!

I tried parsing a file however it doesn't work. Surely the file would need to be opened for it to work?
The file gets open when its name is passed to the file stream constructor.
How are you passing the filename to your program?
So I would need to change:
 
input = new istrstream(argv[1])

To, ifstream?

I'm dragging the file on to the EXE.
More or less I have the same problem. I don't know if I have to open another topic or not. I add my question here becouse usually, in forums, admins like to have same questions in same topics.

I need to program, like jbrooksuk, an INTERPRETER (not a parser) of my own script. I read (before enterning here) bison documentations but Bison cannot solve my problems.

Bison is fine if you have to add a syntax check of your script. But I don't need error checking (becouse scripts will be produced with a program, so errors are impossible to do).
The most great problem is one that Bison DON'T solve... is the runtime interpreting (even if you use Bison you have to write it by your own so it is useless to use Bison) and, most of all, Conditional Brantches (if you read documentations it says only "find your way. Is too much hard to explain").

for example if I have this script code

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
command 1
command 2
if (condition 1) {
  command 3
  command 4
  if (condition 2) {
    command 5
    loop(1) {
      command 6
      command 7
      if (condition 3) {
        endloop(1)
      }
    }
  }
}


This is only an example... it could be also in XML grammar. The problem is not the final structure of the script. Is to execute end interpreting it correctly in run-time.

for example command 3 will be executed only if condition 1 is true
for example if condition 1 is true ALL the code inside its { } will be execute (other conditional brantches will be evalued and if true executed, if false skipped).
Loop and endloop works as IDs loop(ID) is a while(1) that will be executed until an endloop(ID) encountered (so loop(1) ended when find endloop(1) and quit out of { } of loop(1) )

Any suggestion will be appreciated. It is fine also a solution that suggest to "build" a script if it is too hard to interpret it directly from source (like C++ do when first compile and after link). In this case I need suggestion how to "build" a binary that reproduces all conditions and loop that is easyer to be interpreted in runtime.
That's actually what I was looking at for, an interpreter, I even mentioned it. I'm just working with the calculator link because it's kind of what I need.

I too, still need if statements etc.
So I would need to change:
input = new istrstream(argv[1])
To, ifstream?

Yes ( I didn't realise that it was a istrstream )

I too, still need if statements etc.

Is not really hard to add an if statement, you can do this is a similar way of a function:
Pseudocode:
1
2
3
4
5
6
7
8
9
if ( string_value == "if" )
{
   // get parentheses as with 'sin'

   if ( expr() )
       Read_and_execute_the_next_block // You may add the braces token to enclose a block
   else
      Skip_the_next_block
}
Last edited on
Bazzy, Changing istrstream to ifstream just caused errors. I even changed line 13 to ifstream too.
Oh, I've done it now. Thanks very much for the help Bazzy.

Just one last thing. How would you implement user-defined functions?

Code should look like this:
1
2
3
Func Add(x, y) {
      Return x+y
}
That is a bit more challenging and there should be many different ways to accomplish it.
Here is what your parser should do to store and use a function:
1
2
3
4
5
6
7
// function declaration
if You_find_the_keyword "Func"
    read_the_next_token
    if it_isnt_a_NAME    return error
    store_the_function_name_in_a_table // ( as the table you have for variables )
    read_the_parameters_and_save_them
    read_the_body_and_save_it

1
2
3
4
5
6
7
8
9
10
11
12
13
// function call
if Your_token_is_a_NAME
    if is_a_keyword
        do_whatever_you_need // ( as the part above  )
    if is_in_the_variable_table
        return variable_value
    if is_in_the_function_table
       check_for_parentheses
       while function_needs_argument
           get_the_next_value_until_you_find_a_comma
           set_the_parameter_to_the_argument_value
       check_for_RP
       execute_the_function_body


The function may be stored in a class
eg:
1
2
3
4
5
6
struct function
{
    string name;
    map<string,double> parameters;
    string body;
};
When executing the function body use an istringstream constructed with function::body as input stream (you should add the assignment of the argument values to the parameters to the function body) and parse it
Thanks bazzy, I'll have a go at this now!
Using the struct "function", how do I store the function name and parameters to the parameters map?
If you store the functions in a map with string as key type, you can avoid having the 'name' member in the function structure.
To assign parameters you can do something like this:
1
2
3
4
map<string,function> func_table;

func_table["Add"].parameters["x"] = 0;
func_table["Add"].parameters["y"] = 0;

When you need to assign arguments to the parameters, you can use iterators:
1
2
for ( map<string,function>::iterator i=func_table["Add"].parameters.begin() ; i != func_table["Add"].parameters.end(); i++ )
    i->second = expr(true); // Notice: you need to break the expression at a comma, not at a semicolon  


some useful links:
http://www.cplusplus.com/reference/stl/map/operator%5B%5D/
http://www.cplusplus.com/reference/stl/map/begin/
http://www.cplusplus.com/reference/stl/map/find/
Last edited on
Sorry about this, I'm not sure where you got func_table from, do I create that as well?
Pages: 123