Formatting txt file: unwanted empty line and need space before and after punctuation

Hey, All!
Goals:
1. Getting rid of comments *** DONE!
2. Ensure a space before and after punctuation marks **NOT CONSISTENTLY COMPLETE
2. Getting rid of extra blank spaces and blank lines **NOT CONSISTENTLY COMPLETE


Problems with my output:
1. There is a blank line (line 8) even though the others have been ignored
2. Some of the punctuation has been formatted with the desired space before and space after, but not all the instances have been corrected.

Note: I used several methods (replace, insert, etc) for formatting the punctuation but none produce uniform results.

Any suggestions are appreciated. I have a long list of tasks to perform on this baby but have been tracing this for several days and am stucker than stuck. Thank you in advance!

Before text file: http://pastebin.com/Db0hxM23

After running program: http://pastebin.com/SWXzEQGv

Actual code:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
#include <iostream>
#include <string>
#include <fstream>
#include <cstdio>

using namespace std;

//user-declared functions
void strip_normal_comment(string &text);
void strip_comments_blanks(const string file_name);
void spaces_for_punctuation(string &text);


int main ()
{
	strip_comments_blanks("Finali.txt");
	return 0;
}

//------------------------------------------------------
//Name: void strip_normal_comment(string &text)
//Purpose: Remove a "normal comment" (//block) from text
//------------------------------------------------------
void strip_normal_comment(string &text)
{
	//Does not continue if line is less than/equal to a character.
	//The comment itself (//) has two characters
	if (text.length() <= 1)
		return;

	for (size_t i = 0; i < text.length(); i++)
	{
		//If matches
		if (text[i] == '/' && text[i + 1] == '/')
		{
			//Then erase everything starting with the position 'i'.
			//This position represents the place of the first '/'
			text.erase(i);
			return;
		}
	}
}

void spaces_for_punctuation(string &text)
{
	
	size_t found;

	//Does not continue if line is less than a character.
	if (text.length() < 1)
		return;

	for (size_t i = 0; i < text.length(); i++)
	{
		if (text[i] == ';')
		{
			found = text.find(";");
			//Then add a space before and after the semicolon.
			text.insert(found, " "); 
			text.insert(found + 2, " ");  
			return;
		}

		if (text[i] == ',')
		{
			found = text.find(",");
			//Then add a space before and after the comma.
			text.replace(found, 1, " , ");
			return;
		}

		if (text[i] == '(')
		{
			found = text.find("(");
			size_t q = i + 1;

			//Then add a space before and after the open parens.
			if (text[i] == ' ')
			{ 
				//do nothing before the (
				return;
			} else
			{
				//insert space before ( if isn't there
				text.insert(i, " ");
				q++;
				return;
			}
				
			if(text[q] == ' ')
			{
				//do nothing after the (
				return;
			} else
			{
				//insert space after ( if isn't there
				text.insert(q, " "); 
				return;
			}
			
			return;
		}

		if (text[i] == ')')
		{
			found = text.find(")");
			//Then add a space before and after the close parens.
			text.insert(found, " "); 
			text.insert(found + 2, " ");  
			return;
		}

		if (text[i] == '*')
		{
			found = text.find('*');
			//Then add a space before and after the mult. sign
			text.insert(found, " "); 
			text.insert(found + 2, " "); 
			return;
		}

		if (text[i] == '-')
		{
			found = text.find("-");
			//Then add a space before and after the minus. sign
			text.insert(found, " "); 
			text.insert(found + 2, " "); 
			return;
		}

		if (text[i] == '+')
		{
			found = text.find('+');
			//Then add a space before and after the add sign
			text.insert(found, " "); 
			text.insert(found + 2, " "); 
			return;
		}

		if (text[i] == '=')
		{
			found = text.find('=');
			//Then add a space before and after the equal sign.
			text.replace(found, 1, " = ");
			return;
		}
	}
}

//------------------------------------------------------
//Name: string trim(const string str, const string whitespace = " \t")
//Purpose: finds where the whitespace starts and ends
//------------------------------------------------------
string trim(const string str, const string whitespace = " \t")
{
    const auto strBegin = str.find_first_not_of(whitespace);
    
	if (strBegin == std::string::npos)
        return ""; // no content

    const auto strEnd = str.find_last_not_of(whitespace);
    const auto strRange = strEnd - strBegin + 1;

    return str.substr(strBegin, strRange);
}

//------------------------------------------------------
//string reduce(const string str, const string fill = " ", const string whitespace = " \t")
//Purpose: Remove a "normal comment" (//block) from text
//------------------------------------------------------
string reduce(const string str, const string fill = " ", const string whitespace = " \t")
{
    // trim first
    auto result = trim(str, whitespace);

    // replace sub ranges
    auto beginSpace = result.find_first_of(whitespace);
    while (beginSpace != std::string::npos)
    {
        const auto endSpace = result.find_first_not_of(whitespace, beginSpace);
        const auto range = endSpace - beginSpace;

		//shifts characters over to the left
        result.replace(beginSpace, range, fill);

        const auto newStart = beginSpace + fill.length();
        beginSpace = result.find_first_of(whitespace, newStart);
    }

    return result;
}


//------------------------------------------------------
//Name: void strip_comments_blanks(const string file_name)
//Purpose: removes comments
//Functions called: void strip_normal_comment(string &text)
//------------------------------------------------------
void strip_comments_blanks(const string file_name)
{
	//open the file, and check for existance
	ifstream ifs;
	ifs.open(file_name.c_str());

	if(ifs.fail())
			cout<< "Cannot find file " << file_name << "!\n";
	else
	{
		ofstream created;
		//result file (the one with no comments nor extra spaces) will 
		//be called uncommented_formatted-YourFileName.extension

		string created_name = "uncommented_formatted-" + file_name;
		created.open(created_name.c_str());

		string line; 
		string temp;
		size_t pos = 0;
		size_t pos2 = 0;

		bool matched_pos = false;

		//infinite loop.  a "break" inside this while will tell it when
		//to stop.

		while(true)
		{
			if (!matched_pos)
			{
				//Extract line, if not at end of file
				if(getline(ifs, line))
				{
					temp = line; 
					strip_normal_comment(temp);
					spaces_for_punctuation(temp);
				}else
					break;
			}
			if(!temp.empty())		
			{
				created<< reduce(temp) << '\n';	
			}
		}
		created.close();	
	}
	ifs.close();//close file.
}
There is a blank line (line 8) even though the others have been ignored

Did you observe that the only blank line left is the line where comment was stripped.
So, after removing comment, you should probably check if that line is a blank line. If yes, then you need to take appropriate action.

Some of the punctuation has been formatted with the desired space before and space after, but not all the instances have been corrected.

I think the problem is, you are returning from the function once you are taking care of some punctuation. So, if there are multiple punctuations in the same line, they are not getting addressed. eg:
1
2
3
4
5
6
7
8
	if (text[i] == '*')
		{
			found = text.find('*');
			//Then add a space before and after the mult. sign
			text.insert(found, " "); 
			text.insert(found + 2, " "); 
			return;
		}

Here, once * is taken care of, it does not check for further punctuation in the line.


These should solve most of the problems. If you are still facing issues, please reply with the latest output after the above changes are made.

Lemme know how it goes.
you should probably check if that line is a blank line. If yes, then you need to take appropriate action.


You are right! Line wasn't empty but had spaces so it was printed to the new file.

you are returning from the function once you are taking care of some punctuation. So, if there are multiple punctuations in the same line, they are not getting addressed.


This was the assignment I learned what "continue" was =) Thank you for pointing that out. Also, I learned about regex, but am having trouble with it. I commented out the portion that formatted the spacing for "(", ")", "*", "+" because it would build but cause me to abort when I ran it. I'm not sure why it works will with the other characters but not those four.

Current code:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190

#include <iostream>
#include <string>
#include <fstream>
#include <cstdio>
#include <regex>
#include <algorithm>

using namespace std;
using namespace std::tr1;

//user-declared functions
void strip_normal_comment(string &text);
void strip_comments_blanks(const string file_name);
string punctuation(string &text);

int main ()
{
	strip_comments_blanks("Finali.txt");
	return 0;
}

//------------------------------------------------------
//Name: void strip_normal_comment(string &text)
//Purpose: Remove a "normal comment" (//block) from text
//------------------------------------------------------
void strip_normal_comment(string &text)
{
	//Does not continue if line is less than/equal to a character.
	//The comment itself (//) has two characters
	if (text.length() <= 1)
		return;

	for (size_t i = 0; i < text.length(); i++)
	{
		//If matches
		if (text[i] == '/' && text[i + 1] == '/')
		{
			//Then erase everything starting with the position 'i'.
			//This position represents the place of the first '/'
			text.erase(i);
			return;
		}
	}
}

string punctuation(string &text)
{
	string rsmi, rcma, reql, rmin, rcln, rpar, rcpn, rmlt, rpls;
	string smi, cma, eql, min, cln, par, cpn, mlt, pls;

	regex osmi(";");
	rsmi = " ; ";
	smi = regex_replace(text, osmi, rsmi);

	regex ocma(",");
	rcma = " , ";
	cma = regex_replace(smi, ocma, rcma);

	regex oeql("=");
	reql = " = ";
	eql = regex_replace(cma, oeql, reql);

	regex omin("-");
	rmin = " - ";
	min = regex_replace(eql, omin, rmin);

	regex ocln(":");
	rcln = " : ";
	cln = regex_replace(min, ocln, rcln);
	
	/*regex opar("(");
	rpar = " ( ";
	par = regex_replace(cln, opar, rpar);

	regex ocpn(")");
	rcpn = " ) ";
	cpn = regex_replace(cln, ocpn, rcpn);

	regex omlt("*");
	rmlt = " * ";
	mlt = regex_replace(cln, omlt, rmlt);

	regex opls("+");
	rpls = " + ";
	pls = regex_replace(min, opls, rpls);*/

	return cln;
	//return pls;
}

//------------------------------------------------------
//Name: string trim(const string str, const string whitespace = " \t")
//Purpose: finds where the string starts and ends
//------------------------------------------------------
string trim(const string str, const string whitespace = " \t")
{
    const auto strBegin = str.find_first_not_of(whitespace);
    
	if (strBegin == std::string::npos)
        return ""; // no content

    const auto strEnd = str.find_last_not_of(whitespace);
    const auto strRange = strEnd - strBegin + 1;

    return str.substr(strBegin, strRange);
}

//------------------------------------------------------
//string reduce(const string str, const string fill = " ", const string whitespace = " \t")
//Purpose: Remove a "normal comment" (//block) from text
//------------------------------------------------------
string reduce(const string str, const string fill = " ", const string whitespace = " \t")
{
    // trim first
    auto result = trim(str, whitespace);

    // replace sub ranges
    auto beginSpace = result.find_first_of(whitespace);
    while (beginSpace != std::string::npos)
    {
        const auto endSpace = result.find_first_not_of(whitespace, beginSpace);
        const auto range = endSpace - beginSpace;

		//shifts characters over to the left
        result.replace(beginSpace, range, fill);

        const auto newStart = beginSpace + fill.length();
        beginSpace = result.find_first_of(whitespace, newStart);
    }

    return result;
}

//------------------------------------------------------
//Name: void strip_comments_blanks(const string file_name)
//Purpose: removes comments
//Functions called: void strip_normal_comment(string &text)
//------------------------------------------------------
void strip_comments_blanks(const string file_name)
{
	//open the file, and check for existance
	ifstream ifs;
	ifs.open(file_name.c_str());

	if(ifs.fail())
			cout<< "Cannot find file " << file_name << "!\n";
	else
	{
		ofstream created;
		//result file (the one with no comments nor extra spaces) will 
		//be called uncommented_formatted-YourFileName.extension

		string created_name = "uncommented_formatted-" + file_name;
		created.open(created_name.c_str());

		string line; 
		string temp;
		size_t pos = 0;
		size_t pos2 = 0;

		bool matched_pos = false;

		//infinite loop.  a "break" inside this while will tell it when
		//to stop.

		while(true)
		{
			if (!matched_pos)
			{
				//Extract line, if not at end of file
				if(getline(ifs, line))
				{
					temp = line; 
					strip_normal_comment(temp);
					temp.erase(temp.begin(), find_if(temp.begin(), temp.end(), not1(ptr_fun<int, int>(isspace))));
				}else
					break;
			}

			if(temp.length() > 0)
			{
				created << reduce(punctuation(temp)) << '\n';
			}
		}
		created.close();	
	}
	ifs.close();//close file.
}


Result: http://pastebin.com/VegVZ1TC
Last edited on
. I commented out the portion that formatted the spacing for "(", ")", "*", "+" because it would build but cause me to abort when I ran it. I'm not sure why it works will with the other characters but not those four.


They are special characters within regex. They are used for other purposes and will only "act" like themselves when you "hug" them with [] brackets. Final code: http://pastebin.com/PNy3q81D
Topic archived. No new replies allowed.