Reading segments from text file

I have a text file containing:
int Bob, 2you, john, AdamSmith;
float taxYear=2013, taxRate=29.2.3;

And I need to read it in a way where I analyze:
int
Bob
2you
john
AdamSmith
float
taxyear
2013
taxRate
29.2.3
,
;

So I need to break each segment of the text file to its individual part, and then analyze each part to see if its a special character, identifier, reserved word etc. It's like a basic parsing program.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
#include <iostream>
#include <fstream>
#include <string>
#include <algorithm>

using namespace std;

const string text = "Data.txt";

void readFile(string text);
bool isInt(string text);
bool isIden(string text);
bool isReal(string text);
bool isSpecial(string text);
bool isResv(string text);


int main()
{
	cout << "Opening text file to store data" << endl;
	readFile(text);

system("PAUSE");
return 0;
}
void readFile(string text)
{
	string line;
	string lower;
	char buffer;
	bool trueInt = false;
	ifstream f(text);
	if (f.is_open())
	{
		while (f >> ws, getline(f, line, ' ')) //eliminate leading white space, read line per line until hitting comma
		{
			if (isInt(line))
			{
				cout << line << "\t INTEGER" << endl;
			}
			else if (isIden(line))
			{
				cout << line << "\t IDENTIFIER" << endl;
			}
			else if (isResv(line))
			{
				cout << line << "\t RESERVED WORD" << endl;
			}
			else if (isReal(line))
			{
				cout << line << "\t REAL NUMBER" << endl;
			}
			else if (isSpecial(line))
			{
				cout << line << "\t SPECIAL CHARACTER" << endl;
			}
			else
			{
				cout << line << "\t INVALID TOKEN" << endl;
			}

			/*if (line == "int" || line == "float")
			{
			cout << line << "\t" << "RESERVED WORD" << endl;
			}
			else
			{
			for (int i = 0; i < line.length(); i++)
			{
			if (line[i] >= 48 && line[i] <= 57) //to check if int
			{
			if (i == line.length() - 1) //if it meets demand of each char being digit
			{
			cout << line << "\t" << "INTEGER NUMBER" << endl;
			}
			}
			else if ((line[0] >= 65 && line[0] <= 90) || (line[0] >= 97 && line[i] <= 122)) //if first index is letter
			{
			//now check if each index is either letter and digit, but not decimal or space
			if ((line[i] >= 65 && line[i] <= 90) || (line[i] >= 97 && line[i] <= 122) || (line[i] >= 48 && line[i] <= 57) && (line[i] != '.' || line[i] != ' '))
			{
			if (i == line.length() - 1)
			{
			cout << line << "\t" << "IDENTIFIER" << endl;
			}
			}
			}

			}
			}*/

		}
	}
	f.close();

}
bool isInt(string text)
{
	text.erase(remove(text.begin(), text.end(), ','), text.end());//remove comma from line
	bool isInt = false;
	while (isInt == false)
	{
		for (int i = 0; i < text.length(); i++)
		{
			if (text[i] >= 48 && text[i] <= 57) //if it is digit, dont do anything
			{
			}
			else
			{
				return isInt; //immediately return false if not a digit
			}
		}
		isInt = true;
		return isInt;
	}
}

bool isIden(string text)
{
	bool isIden = false;
	text.erase(remove(text.begin(), text.end(), ','), text.end());//remove comma from line
	text.erase(remove(text.begin(), text.end(), '='), text.end());//remove equal from line
	while (isIden == false)
	{
		if ((text[0] >= 65 && text[0] <= 90) || (text[0] >= 97 && text[0] <= 122))
		{
			for (int i = 1; i < text.length(); i++)
			{
				if (((text[i] >= 65 && text[i] <= 90) || (text[i] >= 97 && text[i] <= 122)) || (text[i] >= 48 && text[i] <= 57)) //if letter or digit don't do anything
				{
				}
				else
				{
					return isIden; 
				}
			}
			isIden = true;
			return isIden;
		}
		else //if first char is not a letter 
		{ 
			return isIden;
		}
	}

}

bool isResv(string text)
{
	text.erase(remove(text.begin(), text.end(), ','), text.end());//remove comma from line
	text.erase(remove(text.begin(), text.end(), '='), text.end());//remove equal from line
	if (text == "int" || text == "float")
	{
		return true;
	}
	else
	{
		return false;
	}
	
}

bool isSpecial(string text)
{
	if (text == ";" || text == ",")
	{
		return true;
	}
	else
	{
		false;
	}
}

bool isReal(string text)
{
	bool isReal = false;
	int dec = 0;
	while (isReal == false)
	{
		for (int i = 0; i < text.length(); i++)
		{
			if (text[i] >= 48 && text[i] <= 57 && dec == 0)
			{
			}
			if (text[i] >= 48 && text[i] <= 57 && dec == 1)
			{

			}
			if (text[i] == '.')
			{
				dec++;
			}
			else
			{
				return false;
			}
		}
		isReal = true;
		return isReal;
	}
}



So the correct output for my code should be:

int RESERVED WORD
Bob IDENTIFIER
, SPECIAL CHARACTER
2you INVALID
Adam.Smith INVALID
float RESERVED WORD
2013 INTEGER NUMBER
taxYear IDENTIFIER
taxRate IDENTIFIER
29.2.3 INVALID
; SPECIAL CHARACTER


but this code gives me this output:
int RESERVED WORD
Bob, IDENTIFIER //So I need to remove the comma
2you, SPECIAL CHARACTER //It's not a special character, it's invalid
john, IDENTIFIER //It is correct but I need to remove the comma
Adam.Smith; NOTHING //Should say invalid
float SPECIAL CHARACTER //Wrong it should be a reserved word
taxYear= 2013, IDENTIFIER //taxYear and 2013 should be 2 seperate segments
taxRate=29.2.3; SPECIAL CHARACTER //taxRate and 29.2.3 should be seperate segments
Line 100, 120, 177: Not a good practice to name local variables the same as your function. The compiler knows the difference, however, it's confusing to the reader.

Line 171: This line does nothing. You're missing the return. No guarantee what this function will return if line 165 is false.

Line 125, 129: See isalpha() and isalnum() for a better way to test.

Line 183, 186: See isdigit().

Last edited on
Thank you, I've changed my code to use those functions instead of manually putting the conditions, and it did clear up some errors but the output is still wrong:

int RESERVED WORD //correct
Bob, IDENTIFIER //correct but I need to remove the comma still
2you, INVALID //correct but I need to remove the comma
john, IDENTIFIER //correct but need to remove the comma
AdamSmith; //Doesn't say anything, supposed to say it is INVALID
float INVALID //Wrong this is a reserved word
taxYear=2013 IDENTIFIER //wrong, taxYear is an identifier and 2013 is an integer number, they are supposed to analyzed as 2 seperate strings not as one whole one
TaxRate=29.2.3 INVALID //wrong, same as above

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
void readFile(string text)
{
	string line;
	string lower;
	char buffer;
	bool trueInt = false;
	ifstream f(text);
	if (f.is_open())
	{
		while (f >> ws, getline(f, line, ' ')) //eliminate leading white space, read line per line until hitting comma
		{
			if (isResv(line))
			{
				cout << line << "\t RESERVED WORD" << endl;
			}
			else if (isIden(line))
			{
				cout << line << "\t IDENTIFIER" << endl;
			}
			else if (isInt(line))
			{
				cout << line << "\t INTEGER NUMBER" << endl;
			}
			else if (isReal(line))
			{
				cout << line << "\t REAL NUMBER" << endl;
			}
			else if (isSpecial(line))
			{
				cout << line << "\t SPECIAL CHARACTER" << endl;
			}
			else
			{
				cout << line << "\t INVALID TOKEN" << endl;
			}

			/*if (line == "int" || line == "float")
			{
			cout << line << "\t" << "RESERVED WORD" << endl;
			}
			else
			{
			for (int i = 0; i < line.length(); i++)
			{
			if (line[i] >= 48 && line[i] <= 57) //to check if int
			{
			if (i == line.length() - 1) //if it meets demand of each char being digit
			{
			cout << line << "\t" << "INTEGER NUMBER" << endl;
			}
			}
			else if ((line[0] >= 65 && line[0] <= 90) || (line[0] >= 97 && line[i] <= 122)) //if first index is letter
			{
			//now check if each index is either letter and digit, but not decimal or space
			if ((line[i] >= 65 && line[i] <= 90) || (line[i] >= 97 && line[i] <= 122) || (line[i] >= 48 && line[i] <= 57) && (line[i] != '.' || line[i] != ' '))
			{
			if (i == line.length() - 1)
			{
			cout << line << "\t" << "IDENTIFIER" << endl;
			}
			}
			}

			}
			}*/

		}
	}
	f.close();

}
bool isInt(string text)
{
	text.erase(remove(text.begin(), text.end(), ','), text.end());//remove comma from line
	bool isInt = false;
	while (isInt == false)
	{
		for (int i = 0; i < text.length(); i++)
		{
			if (text[i] >= 48 && text[i] <= 57) //if it is digit, dont do anything
			{
			}
			else
			{
				return isInt; //immediately return false if not a digit
			}
		}
		isInt = true;
		return isInt;
	}
}

bool isIden(string text)
{
	bool isIden = false;
	text.erase(remove(text.begin(), text.end(), ','), text.end());//remove comma from line
	text.erase(remove(text.begin(), text.end(), '='), text.end());//remove equal from line
	while (isIden == false)
	{
		if (isalpha(text[0])) //if first letter is a word 
		{
			for (int i = 1; i < text.length(); i++)
			{
				if (isalnum(text[i])) //if letter or digit don't do anything
				{
				}
				else
				{
					return isIden; 
				}
			}
			isIden = true;
			return isIden;
		}
		else //if first char is not a letter 
		{ 
			return isIden;
		}
	}

}

bool isResv(string text)
{
	text.erase(remove(text.begin(), text.end(), ','), text.end());//remove comma from line
	text.erase(remove(text.begin(), text.end(), '='), text.end());//remove equal from line
	if (text == "int" || text == "float")
	{
		return true;
	}
	else
	{
		return false;
	}
	
}

bool isSpecial(string text)
{
	if (text == ";" || text == ",")
	{
		return true;
	}
	else
	{
		return false;
	}
}

bool isReal(string text)
{
	bool isReal = false;
	int dec = 0;
	while (isReal == false)
	{
		for (int i = 0; i < text.length(); i++)
		{
			if (isdigit(text[i]) && dec == 0) //continue when no decimals found
			{
			}
			if (isdigit(text[i]) <= 57 && dec == 1) //continue when 1 decimal found
			{
			}
			if (text[i] == '.')
			{
				dec++;
			}
			else
			{
				return false; //return false when more than 1 decimal found
			}
		}
		isReal = true;
		return isReal;
	}
}
Last edited on
Line 161: You don't want to compare the result of isdigit() to 57. isdigit() returns a bool (0,1) which will always be less than 57.

Line 80: Missed using isdigit() here.


I fixed those 2 errors, but the output is still the same :/
Still need help! I can't seem to find the problem, I'm sure the conditions work as intended, maybe it's the way my program reads the file?
Lookin for any help!
Your problem is with the parsing. The , and ; are special characters so you can't dump them.
I would do it like this:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
#include <iostream>
#include <fstream>
#include <string>
#include <sstream>
#include <vector>

using namespace std;

const string text = "Data.txt";

string readFile (string fileName);
bool isInt (string text);
bool isIden (string text);
bool isReal (string text);
bool isSpecial (string text);
bool isResv (string text);

void parseToken (const string& input, vector<string> &tokens);
void displayToken (const vector<string> &tokens);

int main ()
{
  cout << "Opening text file to store data" << endl;
  string input = readFile (text);
  vector<string> tokens;
  parseToken (input, tokens);
  displayToken (tokens);
  system ("PAUSE");
  return 0;
}

void parseToken (const string& input, vector<string> &tokens)
{
  string buffer;

  for (char ch : input)
  {
    switch (ch)
    {
      case ',':
      case ';': // special char
        if (!buffer.empty ())
        {
          tokens.push_back (buffer);
          buffer.clear ();
        }
        buffer = ch;
        tokens.push_back (buffer);
        buffer.clear ();
        break;
        case ' ': // separator
        case '=':
          if (!buffer.empty ())
          {
            tokens.push_back (buffer);
            buffer.clear ();
          }
          break;
       default:
        buffer += ch;
    }
  }
}

void displayToken (const vector<string> &tokens)
{
  for (string s : tokens)
  {
    // TODO analyze and print the tokens
  }
}

string readFile (string fileName)
{
  ifstream src(fileName);

  if (!src)
  {
    cerr << "\aFILE ERROR" << "\n\n";
    exit (EXIT_FAILURE);
  }

  ostringstream oss;
  string line;
  while (getline (src, line))
  {
    
    oss << line;
  }
  return oss.str ();
}
bool isInt (string text)
{
  for (char ch : text)
  {
    if (!isdigit (ch))
    {
      return false;
    }
  }
  return true;
}

bool isIden (string text)
{
  bool isIden = false;
  while (isIden == false)
  {
    if (isalpha (text[0])) //if first letter is a word 
    {
      for (int i = 1; i < text.length (); i++)
      {
        if (isalnum (text[i])) //if letter or digit don't do anything
        {
        }
        else
        {
          return isIden;
        }
      }
      isIden = true;
      return isIden;
    }
    else //if first char is not a letter 
    {
      return isIden;
    }
  }

}

bool isResv (string text)
{
  if (text == "int" || text == "float")
  {
    return true;
  }
  else
  {
    return false;
  }

}

bool isSpecial (string text)
{
  if (text == ";" || text == ",")
  {
    return true;
  }
  else
  {
    return false;
  }
}

bool isReal (string text)
{
  bool isReal = false;
  int dec = 0;
  while (isReal == false)
  {
    for (int i = 0; i < text.length (); i++)
    {
      if (isdigit (text[i]) && dec == 0) //continue when no decimals found
      {
      }
      if (isdigit (text[i]) <= 57 && dec == 1) //continue when 1 decimal found
      {
      }
      if (text[i] == '.')
      {
        dec++;
      }
      else
      {
        return false; //return false when more than 1 decimal found
      }
    }
    isReal = true;
    return isReal;
  }
}
Topic archived. No new replies allowed.