Confusing Number Parser

Forum

Forum
Windows Programming
Confusing Number Parser

Confusing Number Parser

closed account (NUj6URfi)

I have the code

#include <iostream>
#include <fstream>
#include <cstring>
#include <cctype>
#include <cstdarg>
#include <cmath>
#include <sstream>
#include <stack>
#include <vector>

using namespace std;

#ifndef MAX_PATH
#define MAX_PATH  260
#endif

int firstint;
int secondint;
bool addition;
bool subtraction;
bool multiplication;
bool division;
char filename[MAX_PATH + 1];
char input_line[MAX_PATH + 1];
int argvcount;
string randomaccess;
int i;
int stringlength;

struct Token{
	enum TokenType{
		null=0,
		END=1,
		INTEGER='0',
		PLUS='+',
		MINUS='-',
		MUL='*',
		DIV='/',
		POW='^',
		LPAREN='(',
		RPAREN=')',
		expr=128
	} type;
	double intValue;
	Token(TokenType type=END):type(type),intValue(0){}
	Token(long val):type(INTEGER),intValue(val){}
	Token(char character){
		this->type=(TokenType)character;
	}
};

struct Rule{
	Token reduces_to;
	std::vector<Token> constraints;
	Token lookahead;
	Rule(const Token &to,const Token &la,unsigned constraints,...){
		this->reduces_to=to;
		this->lookahead=la;
		va_list list;
		va_start(list,constraints);
		this->constraints.reserve(constraints);
		for (unsigned a=0;a<constraints;a++)
			this->constraints.push_back(va_arg(list,Token::TokenType));
	}
	bool matches(const std::vector<Token> &stack,const Token &lookahead){
		if (stack.size()<this->constraints.size() ||
				this->lookahead.type!=Token::null && this->lookahead.type!=lookahead.type)
			return 0;
		const Token *array=&stack[stack.size()-this->constraints.size()];
		for (unsigned a=0,size=this->constraints.size();a<size;a++)
			if (array[a].type!=this->constraints[a].type)
				return 0;
		return 1;
	}
};

class Parser{
	std::stringstream stream;
	std::vector<Token> stack;
	bool result;
	std::vector<Rule> rules;
	Token read(){
		char character;
		while (!this->stream.eof() && isspace(character=this->stream.peek()))
			this->stream.get();
		if (this->stream.eof())
			return Token::END;
		character=this->stream.peek();
		if (isdigit(character)){
			std::string str;
			str.push_back(this->stream.get());
			while (isdigit(this->stream.peek()))
				str.push_back(this->stream.get());
			long temp=atol(str.c_str());
			return temp;
		}
		return (char)this->stream.get();
	}
	bool reduce(const Token &lookahead){
		long rule_index=-1;
		unsigned max=0;
		for (unsigned a=0;a<this->rules.size();a++){
			if (this->rules[a].matches(this->stack,lookahead) && this->rules[a].constraints.size()>max){
				rule_index=a;
				max=this->rules[a].constraints.size();
			}
		}
		if (rule_index<0 || this->rules[rule_index].reduces_to.type==Token::null)
			return 0;
		Rule &rule=this->rules[rule_index];
		Token new_token(rule.reduces_to);
		Token *redex=&this->stack[this->stack.size()-rule.constraints.size()];
		switch (rule_index){
			case 0: //expr <- INTEGER
				new_token.intValue=redex[0].intValue;
				break;
			case 1: //expr <- '(' expr ')'
			case 2: //expr <- '+' expr
				new_token.intValue=redex[1].intValue;
				break;
			case 3: //expr <- '-' expr
				new_token.intValue=-redex[1].intValue;
				break;
			case 4: //impossible
			case 5: //expr <- expr '^' expr
				new_token.intValue=pow((double)redex[0].intValue,(double)redex[2].intValue);
				break;
			case 6: //expr <- expr '*' expr
				new_token.intValue=redex[0].intValue*redex[2].intValue;
				break;
			case 7: //expr <- expr '/' expr
				new_token.intValue=redex[0].intValue/redex[2].intValue;
				break;
			case 8: //impossible
			case 9: //impossible
			case 10: //expr <- expr '+' expr
				new_token.intValue=redex[0].intValue+redex[2].intValue;
				break;
			case 11: //impossible
			case 12: //impossible
			case 13: //expr <- expr '-' expr
				new_token.intValue=redex[0].intValue-redex[2].intValue;
				break;
		}
		for (unsigned a=0;a<rule.constraints.size();a++)
			this->stack.pop_back();
		this->stack.push_back(new_token);
		return 1;
	}
	bool run_state(){
		Token next_token=this->read();
		while (this->reduce(next_token));
		switch (next_token.type){
			case Token::END:
				this->result=(this->stack.size()==1);
				return 0;
			case Token::INTEGER:
			case Token::PLUS:
			case Token::MINUS:
			case Token::MUL:
			case Token::DIV:
			case Token::RPAREN:
			case Token::LPAREN:
			case Token::POW:
				this->stack.push_back(next_token);
				return 1;
			default:
				this->result=0;
				return 0;
		}
	}
	void initializeRules(){
		this->rules.clear();
		/*rule 0*/	this->rules.push_back(Rule(	Token::expr,	Token::null,	1,	Token::INTEGER	));

		/*rule 1*/	this->rules.push_back(Rule(	Token::expr,	Token::null,	3,	Token::LPAREN,	Token::expr,	Token::RPAREN	));

		/*rule 2*/	this->rules.push_back(Rule(	Token::expr,	Token::null,	2,	Token::PLUS,	Token::expr	));
		/*rule 3*/	this->rules.push_back(Rule(	Token::expr,	Token::null,	2,	Token::MINUS,	Token::expr	));

		/*rule 4*/	this->rules.push_back(Rule(	Token::null,	Token::POW,		3,	Token::expr,	Token::POW,	Token::expr	));
		/*rule 5*/	this->rules.push_back(Rule(	Token::expr,	Token::null,	3,	Token::expr,	Token::POW,	Token::expr	));

		/*rule 6*/	this->rules.push_back(Rule(	Token::expr,	Token::null,	3,	Token::expr,	Token::MUL,	Token::expr	));

		/*rule 7*/	this->rules.push_back(Rule(	Token::expr,	Token::null,	3,	Token::expr,	Token::DIV,	Token::expr	));

		/*rule 8*/	this->rules.push_back(Rule(	Token::null,	Token::MUL,		3,	Token::expr,	Token::PLUS,	Token::expr	));
		/*rule 9*/	this->rules.push_back(Rule(	Token::null,	Token::DIV,		3,	Token::expr,	Token::PLUS,	Token::expr	));
		/*rule 10*/	this->rules.push_back(Rule(	Token::expr,	Token::null,	3,	Token::expr,	Token::PLUS,	Token::expr	));

		/*rule 11*/	this->rules.push_back(Rule(	Token::null,	Token::MUL,		3,	Token::expr,	Token::MINUS,	Token::expr	));
		/*rule 12*/	this->rules.push_back(Rule(	Token::null,	Token::DIV,		3,	Token::expr,	Token::MINUS,	Token::expr	));
		/*rule 13*/	this->rules.push_back(Rule(	Token::expr,	Token::null,	3,	Token::expr,	Token::MINUS,	Token::expr	));
	}
public:
	Parser(const std::string &str)
			:stream(str){
		this->result=0;
		this->initializeRules();
	}
	bool eval(double &res){
		while (this->run_state());
		if (this->result)
			res=this->stack.front().intValue;
		else
			this->stack.clear();
		return this->result;
	}
};


int main(int argc, char *argv[]) {
    cout << "What file do you want to compile?";
    cin.getline(filename, 100);
    ifstream file_in(filename);
    argvcount = 0; 
    i = 0;
    while(!file_in.eof()) {       
        file_in.getline(input_line, MAX_PATH);
                      Parser evaluator("2^2^2");
	double res=0;
	std::cout <<(evaluator.eval(res)?"ACCEPT":"ABORT")<<std::endl;
	std::cout <<res<<std::endl;

        if (strncmp(input_line, "print ", 6) == 0) {
            randomaccess = input_line + 6;
            cout << randomaccess; 
            stringlength = randomaccess.length();
        }
        if (strncmp(input_line, "nline", 5) == 0) {
           argvcount++;
           randomaccess = input_line + 5;
            cout << "\n"; 
            stringlength = randomaccess.length();
        }
           
        if (strncmp(input_line, "program(end)", 12) == 0) {
            randomaccess = input_line + 12;
            cin.get();
            return 0;}      }
}

with my file that I will run through this being


print "HI"
nline
print 2 + 2
program(end)

My intent is that this will print hi, go to a new line, solve the equation and print the answer. The number parser I found isn't mine, I was directed too it though and I don't understand it. Please help me achieve my goal.

andywestken (4087)

helios' parser only handles arithmetic equations; there's no way it can understand print, nline, etc.

If you want to understand how the code works you first need to learn a bit about parser design. For example: "Introduction to Shift-Reduce Parsing"
http://www.cs.binghamton.edu/~zdu/parsdemo/srintro.html

Note that in practice, parsers like this are not hand-written; esp. when dealing with more complex grammars than basic arithmetic. They are usually generated by a tool like Bison. For an introduction to this tool-based approach, see Albatross' article "Bison Tracking"
http://www.cplusplus.com/articles/4LzyhbRD/

In the short term, it could have a go at extending your string-based interpreter to handle basic aithmetic (to match helios' parser.), as you were talking about in this thread:
http://www.cplusplus.com/forum/lounge/111673/

Note that to get lines like print 2 + 2 to work cleanly you are going to have to get some form of basic tokenization to work, as the TheIdeasMan mentioned in the other thread, and get you program to work with a stack. Basic string manipulation might work with 2 + 2 but with longer expressions it will be a pain

(Aside: the shunting yard algorithm might help here:
http://en.wikipedia.org/wiki/Shunting-yard_algorithm )

Also, you should consider reading your source file into a vector of strings (to start with), as this will allow you to implement simple looping like early forms of Basic (which might be a good role model for your language to start with).

Later on the vector should contain statement objects: you parse the string to tokens (tokenization/lexing); you parse the token to create statements (parsing) which can be executed. (These statements could be both simple, single statements or compound ones made or other smaller statements.)

Edit: then, rather than treat each line separately, as an entry in the vector, you can parse to a tree, which is what real compilers do (an abstract syntax tree
http://en.wikipedia.org/wiki/Abstract_syntax_tree )

Andy

Last edited on

closed account (NUj6URfi)

Thanks.

Topic archived. No new replies allowed.

C++

Forum

Confusing Number Parser