I/O records seperated by {}

New to file I/O and c++ in general, would greatly appreciate any help or if someone can point me in the general direction.
The goal is to read in a file where the records are confined by a pair of {}. However, one record is not necessarily on one line in the input file. Next you remove white space and any duplicate records. The unique records are written on to a plain text file.

For example:

input1.txt
{id:1234567,first:Mary,last:Green}
{id:1234568, first:Peter, last:Morgan}
{id:1234567, first:Mary, last:Green}

output1.txt
{id:1234567,first:Mary,last:Green}
{id:1234568,first:Peter,last:Morgan}

input2.txt
{id:1234567,
first:Mary,last:Green,GPA:4.0} {id:1234568, first:Peter,
last:White , GPA:3.8}


{id:1234567, first:Mary, last:Green, GPA:3.9}

output2.txt
{id:1234567,first:Mary,last:Green,GPA:4.0}
{id:1234568,first:Peter,last:White,GPA:3.8}
{id:1234567,first:Mary,last:Green,GPA:3.9}

This is my code so far. It only works for input1.txt, because I do not know how to parse an input file into seperate records based on a pair of {}. I only know how to parse a file by lines using getline().

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65

#include <iostream>
#include <fstream>
#include <string>
using namespace std;
int main()
{
	string arr[100]; // creates array to hold names
	int size = 100;
	short loop = 0; //short for loop for input
	string line; //this will contain the data read from the file
	ifstream myfile("input1.txt"); //opening the file.
	ofstream myOutput("output1.txt");
	

	
	if (myfile.is_open()) //if the file is open
	{
		while (!myfile.eof()) //while the end of file is NOT reached
		{
			getline(myfile, line); //get one line from the file
			for (int i = 0; i < line.length(); i++)
			{
				if (line[i] == ' ') line.erase(i, 1); //erase if there is a white space
			}
			arr[loop] = line;
			loop++;
		}

		//get rid of duplicates
		for (int i = 0; i < size - 1; i++)
		{
			// cout << myArr[i] << '\n';
			for (int j = i + 1; j < size; j++)
			{
				// cout << '\t' << myArr[j] << '\n';

				if (arr[i] == arr[j]) // then this is a duplicate 
					arr[j] = "";
			}

		}

		//cout
		for (int i = 0; i < size; i++)
		{
			cout << arr[i] << endl; //and output it
		}
		//output in txt file
		if (myOutput.is_open())
		{
			for (int i = 0; i < size; i++)
			{
				myOutput << arr[i] << "\n";
			}
			myOutput.close();
		}

		myfile.close(); //closing the file
	}
	else cout << "Unable to open file"; //if the file is not open output
	system("PAUSE");
	return 0;
}
 
Last edited on
> how to parse an input file into seperate records based on a pair of {}.
> I only know how to parse a file by lines using getline().

There is an overload of std::getline() which accepts a delimiter character.
For instance, std::getline( stm, str, '}' ) would read everything up till a '}' is encountered.
( the '}' is extracted but not stored into the string).

Using the facilities available in the standard library would make the code
a. easier to write
b. easier to read and understand
c. less error-prone
d. potentially, more efficient.

For instance, strongly favour std::vector<> over c-style arrays.
https://cal-linux.com/tutorials/vectors.html

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
#include <iostream>
#include <string>
#include <fstream>
#include <cctype>
#include <vector>
#include <algorithm>
#include <sstream>

// get one delimited record
std::istream& get_delimited_record( std::istream& stm, std::string& str, char delim_begin, char delim_end )
{
    // 1. read all characters upto delim_begin into str, extract and discard the delimiter
    // 2. if step 1 was successful, read all characters upto delim_end into str
    // use std::getline with a delimiter:
    // http://en.cppreference.com/w/cpp/io/basic_istream/getline overload (2)
    std::getline( stm, str, delim_begin ) && std::getline( stm, str, delim_end ) ;

    return stm ;
}

// return string with white spaces and non-printable characters removed
std::string remove_ws( std::string str )
{
    std::string result ;
    // range base loop : http://www.stroustrup.com/C++11FAQ.html#for
    for( char c : str ) if( !std::isspace(c) && std::isprint(c) ) result += c ;
    return result ;
}

// get all delimited records with white spaces removed (ignore empty records)
std::vector<std::string> get_records( std::istream& stm, char delim_begin, char delim_end )
{
    std::vector<std::string> recs ;

    std::string rec ;
    while( get_delimited_record( stm, rec, delim_begin, delim_end ) )
    {
        rec = remove_ws(rec) ;
        if( !rec.empty() ) recs.push_back(rec) ; // ignore empty records
    }

    return recs ;
}

int main()
{
    // create an input stream for testing purposes
    // http://en.cppreference.com/w/cpp/io/basic_istringstream
    std::istringstream input_file( R"(
    {id:1234567,
       first:Mary,last:Green,GPA:4.0} {id:1234568, first:Peter,
    last:White , GPA:3.8} {       }


      {id:1234567, first:Mary, last:Green, GPA:3.9}
    {
        id:1234568,
        first:Peter,
        last:White ,
        GPA:3.8
    } {}

             {id:1234567, first:Mary  ,last:Green,GPA:4.0}

    )"
    ) ;

    // dump the contents of the input stream (for debug support)
    // http://en.cppreference.com/w/cpp/io/basic_istringstream/str
    std::cout << "input\n-----\n" << input_file.str() << "\n\n" ;

    const char rec_begin = '{' ;
    const char rec_end = '}' ;

    // get all the records with white spaces removed
    auto records = get_records( input_file, rec_begin, rec_end ) ;

    // sort the records (case sensitive)
    // http://en.cppreference.com/w/cpp/algorithm/sort
    // http://en.cppreference.com/w/cpp/iterator/begin
    std::sort( std::begin(records), std::end(records) ) ;

    // remove duplicate records (case sensitive)
    // http://en.cppreference.com/w/cpp/algorithm/unique
    // http://en.cppreference.com/w/cpp/container/vector/erase overload (2)
    records.erase( std::unique( std::begin(records), std::end(records) ), std::end(records) ) ;

    std::cout << "output\n-----\n" ;
    // write the non-duplicated records to output
    // range base loop : http://www.stroustrup.com/C++11FAQ.html#for
    // auto: http://www.stroustrup.com/C++11FAQ.html#auto
    for( const auto& rec : records ) std::cout << rec_begin << rec << rec_end << '\n' ;
}

http://coliru.stacked-crooked.com/a/f6bee32a1bcfd6b0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <set>
#include <algorithm>
#include <iterator>
using namespace std;

string readFile( string filename );
vector<string> getDelimitedStrings( const string &text, string first, string last );
template <typename T> void removeDuplicates( vector<T> &V );
void writeFile( string filename, const vector<string> &items );

//======================================================================

int main()
{
   string infile  = "input.txt";
   string outfile = "output.txt";
   string allText = readFile( infile );                                   // read file, removing whitespace and line feeds
   vector<string> items = getDelimitedStrings( allText, "{", "}" );       // extract records
   removeDuplicates( items );                                             // remove duplicates
   writeFile( outfile, items );                                           // write file
}

//======================================================================

string readFile( string filename )
{
   string result;
   ifstream in( filename );
// in >> noskipws;      // uncomment if you wanted to retain white space
   remove_copy( istream_iterator<char>{in}, {}, back_inserter( result ), '\n' );
   in.close();
   return result;
}

//======================================================================

vector<string> getDelimitedStrings( const string &text, string first, string last )
{                                                  
   vector<string> collection;
   unsigned int pos = 0, start;

   while ( true )
   {
      start = text.find( first, pos );
      if ( start == string::npos ) return collection;
      pos = text.find( last, start );
      pos += last.size();
      collection.push_back( text.substr( start, pos - start ) );
   }
}

//======================================================================

template <typename T> void removeDuplicates( vector<T> &V )
{
   set<T> S;
   V.erase( remove_if( V.begin(), V.end(), [&S]( T val ){ return !S.insert( val ).second; } ), V.end() );
}

//======================================================================

void writeFile( string filename, const vector<string> &items )
{
   ofstream out( filename );
   for ( string s : items ) out << s << endl;
   out.close();
}

//====================================================================== 


input.txt:
{id:1234567,
first:Mary,last:Green,GPA:4.0} {id:1234568, first:Peter,
last:White , GPA:3.8}

{id:1234567,
first:Mary,last:Green,GPA:4.0} {id:1234568, first:Peter,
last:White , GPA:3.8}


{id:1234567, first:Mary, last:Green, GPA:3.9}


output.txt:
{id:1234567,first:Mary,last:Green,GPA:4.0}
{id:1234568,first:Peter,last:White,GPA:3.8}
{id:1234567,first:Mary,last:Green,GPA:3.9}
Topic archived. No new replies allowed.