1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106
|
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <regex.h>
using std::ostream;
using std::cout;
using std::endl;
using std::ifstream;
using std::string;
using std::vector;
class Token
{
string Type, Name;
int LineNo, LineCol;
public:
Token()
: Type("UNINITIALIZED"), Name(""), LineNo(0), LineCol(-1) {}
Token(string type, string name, int lineno, int linecol)
: Type(type), Name(name), LineNo(lineno), LineCol(linecol) {}
void SetType (string type) { Type = type; }
void SetName (string name) { Name = name; }
void SetLineNo (int lineno ) { LineNo = lineno; }
void SetLineCol(int linecol) { LineCol = linecol; }
string GetType () { return Type; }
string GetName () { return Name; }
int GetLineNo () { return LineNo; }
int GetLineCol() { return LineCol; }
};
ostream& operator<<(ostream &out, Token &token)
{
out<<"("<< token.GetType() <<", "<< token.GetName() <<", "<< token.GetLineNo() <<", "<< token.GetLineCol() <<")";
return out;
}
void ReadInFile(vector<string>&, ifstream&);
void Lex(string&, vector<Token>&, bool);
int main(int argc, char *argv[])
{
ifstream File(argv[1]);
vector<string> FileContents;
ReadInFile(FileContents, File);
vector<Token> TokenList;
for(auto &Counter : FileContents)
Lex(Counter, TokenList, true);
for(auto &Counter : TokenList)
cout<< Counter << endl;
}
void ReadInFile(vector<string> &FileContents, ifstream &File)
{
string Line;
while(getline(File, Line))
FileContents.push_back(Line);
}
void Lex(string &Line, vector<Token> &TokenList, bool NewLine)
{
static int LineNo = 1;
int LineSize = Line.size();
regex_t Regex;
regmatch_t Match;
regcomp(&Regex, "\"[^\"]+\"", REG_EXTENDED);
if(regexec(&Regex, Line.c_str(), 1, &Match, 0) == 0)
TokenList.push_back(Token("STRING", Line.substr(Match.rm_so, Match.rm_eo - Match.rm_so), LineNo, Match.rm_so));
regfree(&Regex);
regcomp(&Regex, "function", REG_EXTENDED);
if(regexec(&Regex, Line.c_str(), 1, &Match, 0) == 0)
TokenList.push_back(Token("KEYWORD", Line.substr(Match.rm_so, Match.rm_eo - Match.rm_so), LineNo, Match.rm_so));
regfree(&Regex);
regcomp(&Regex, "[_|A-Z|a-z]?[_|A-Z|a-z|0-9]+", REG_EXTENDED);
if(regexec(&Regex, Line.c_str(), 1, &Match, 0) == 0)
TokenList.push_back(Token("NAME", Line.substr(Match.rm_so, Match.rm_eo - Match.rm_so), LineNo, Match.rm_so));
regfree(&Regex);
regcomp(&Regex, "[0-9]+.?[0-9]+", REG_EXTENDED);
if(regexec(&Regex, Line.c_str(), 1, &Match, 0) == 0)
TokenList.push_back(Token("NUMBER", Line.substr(Match.rm_so, Match.rm_eo - Match.rm_so), LineNo, Match.rm_so));
regfree(&Regex);
if(Match.rm_eo != Line.size() - 1)
{
Line = Line.substr(Match.rm_eo, Line.size() - 1 - Match.rm_eo);
Lex(Line, TokenList, false);
}
if(NewLine)
LineNo++;
}
|