Can't write/read Strings (objects containing Strings) to/from a binary file !

HI Everyone .
I created another topic here : http://www.cplusplus.com/forum/beginner/107976/ and I learnt much from it .
But now I am facing a new problem .
this is ALMOST how my "Student" class looks like :
1
2
3
4
5
6
7
8
9
10
11
class Student
{
private :
    string Name ;
    string ID ;
    Date DateReged ;
public :
    void SetName (string input) {Name = input ;}
    void SetID (string input) {ID = input ;}
    void SetDateReged (int D , int M , int Y) {DateReged.SetDate (D , M , Y) ;}
} ;

I am asked to write a program for managing students of a university .
The program should be able to write the current state of Students info into a binary file and load them in the program later .
As you see there are strings in my class .
I learnt that strings will cause problems while manipulating with binary files , because strings will return "The Address Of Where The Characters Are Saved" and NOT "The Characters Itself" .
Now I am looking for a method or function so I'll be able to write/read strings to/from binary files .
This is what I did at first :
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
int main ()
{
    string Path ;
    cout << "\n\nEnter the path in which you wnat to save the current state of institution : " ;
    cin >> Path ;
    fstream fstu ("student.txt" , ios :: in) ; // fstu = File Student
    int NStus ;
    fstu >> NStus ;
    fstu.seekg (0) ;
    fstu.close () ;
    fstream stubin (Path , ios :: out | ios :: binary) ;
    vector <Student> stus = ReturnStusBinary () ;
    vector <Student> :: iterator it ;
    Student tmp ;
    stubin.write ((char *) & NStus , sizeof (int)) ;
    //CHEKCED ; 'stus'contains all the students correctly .
    for (it = stus.begin () ; it != stus.end () ; it ++)
    {
        tmp = * it ;
        stubin.write ((char *) & tmp , sizeof (Student)) ;
    }
    stubin.close () ;
    NStus = 0 ;
    fstream stubinin (Path , ios :: in | ios :: binary) ;
    vector <Student> stusin ;
    stubinin.read ((char *) & NStus , sizeof (int)) ;
    //CHECKED ; NStus shows the right number .
    for (int i = 0 ; i < NStus ; i ++)
    {
        stubinin.read ((char *) & tmp , sizeof (Student)) ;
        stusin.push_back (tmp) ;
    }
    stubinin.close () ;
    //CHECKED ; 'stusin' contains all the students correctly .
    return 0 ;
}

The whole thing was written in main.cpp and main function .
There was no problem , as you can see in comments I checked whether the students are written/read to/from the file correctly .
Then I separated them into two functions :
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
string BinarySaver ()
{
    string Path ;
    cout << "\n\nEnter the path in which you wnat to save the current state of institution : " ;
    cin >> Path ;
    fstream fstu ("student.txt" , ios :: in) ; // fstu = File Student
    int NStus ;
    fstu >> NStus ;
    fstu.seekg (0) ;
    fstu.close () ;
    fstream stubin (Path , ios :: out | ios :: binary) ;
    vector <Student> stus = ReturnStusBinary () ;
    vector <Student> :: iterator it ;
    Student tmp ;
    stubin.write ((char *) & NStus , sizeof (int)) ;
    for (it = stus.begin () ; it != stus.end () ; it ++)
    {
        tmp = * it ;
        stubin.write ((char *) & tmp , sizeof (Student)) ;
    }
    stubin.close () ;
    return Path ;
}

void BinaryLoader (string Path)
{
    int NStus ;
    Student tmp ;
    fstream stubin (Path , ios :: in | ios :: binary) ;
    vector <Student> stus ;
    stubin.read ((char *) & NStus , sizeof (int)) ;
    for (int i = 0 ; i < NStus ; i ++)
    {
        stubin.read ((char *) & tmp , sizeof (Student)) ;
        cout << tmp.GetDateReged ().ToString () ;
        cout << tmp.GetFirstName () ;
        stus.push_back (tmp) ;
    }
    vector <Student> :: iterator it ;
}
int main ()
{
    BinaryLoader (BinarySaver ()) ;

    return 0 ;
}

The 'BinaryLoader' can't read objects from the files , as you can see I put two 'COUTs' in the for loop : 1 - To show the Date (Which is non-string) 2 - To show FirstName (which is a string) and the Date is printed correctly on output , but there is no FirstName (and no other strings like : LastName , Certificate , ID and ...)

What I'm looking for is a function so that I can be able to write the copy of the strings into the function instead of their address .
Thank you so much .
Sorry if I wrote a lot , I just wanted to make it clear . :|
> I learnt that strings will cause problems while manipulating with binary files ,
> because strings will return "The Address Of Where The Characters Are Saved"
> and NOT "The Characters Itself" .
¿why are you making the same mistake, then?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#include"iostream"
#include"fstream"
#include"string"
using namespace std;
class ABC
{
	
public:
	int a;
	string g;
	ABC()
	{
		a=0;
		g="success";
	}
};

void read()
{
	 ABC* c=new ABC();
     ABC* d=new ABC(); 
	ifstream ifs("trial.txt",ios::binary);
     ifs.read((char*)c,sizeof(*c));
   ifs.read((char*)d,sizeof(*d));
   
   cout<<c->a<<"\n";
   cout<<c->g<<"\n";
   cout<<d->a<<"\n";
   cout<<d->g<<"\n";
   ifs.close();
}
void write()
{
	
   ABC a;
   ABC b;
  
   b.g="Failure";
   b.a=20;

   ofstream ofs("trial.txt",ios::binary);
   ofs.write((char*)&a,sizeof(a));
   ofs.write((char*)&b,sizeof(b));
   ofs.close();
   ofs.clear();

}
int main()
{
  try{
  write();
  read();
  return 0;
  }
  catch(exception e)
  {
	  cout<<e.what();
	  return 0;
  }
}


ive written a similar code it works perfectly fine for me,
only problem i see in ur code is with the following lines:
1
2
3
4
5
int Nstus;
 stubin.read ((char *) & NStus , sizeof (int)) ;
  Student tmp ;
and 
 stubin.read ((char *) & tmp , sizeof (Student)) ;

wen i tried to do that, it was giving the adresses and some exceptions , so i assigned memory for variables/objects i am reading, modify ur code suitably as per my example.
ps: forgot to add delete statements, kindly add that if u follow my example, there are some technical critics like milchy baouy.
I'm going to assume you haven't already read the article I linked to in the other thread you mentioned (which actually does mention how to read/write strings to a binary file). I'll give a link at the end of this post....

But first... some background.

When you do a raw write of a block of memory, write() will look at the pointer you give it and blindly start copying X bytes to the file. This sort of works for POD (plain old data) types... but it utterly fails for complex types (like strings).

Let's take a look at why.

****Why you should not read/write complex non-POD structs/classes****

Reason #1: Complex types may contain dynamically allocated memory or other pointers

here's a simplistic example:

1
2
3
4
5
6
7
8
9
class Foo
{
private:
    int* data;

public:
    Foo() { data = new int[10]; }
    ~Foo() { delete[] data; }
};


Here... our Foo class conceptually contains information for 10 ints (~40 bytes). Yet if you do sizeof(Foo)... it'll probably give you the size of one pointer (~4 bytes).

This is because the Foo class does not contain the data it's referring to... it merely contains a pointer to it. Therefore... a naive write to a file would simply write the pointer and not the actual data.

Attempting to read that data later would just result in having a pointer that points to random memory.

This is similar to what is happening with strings. The string data is actually not in the string class... but rather it is allocated dynamically.

#2: Non POD types may contain VTables and other "hidden" data that you absolutely must not touch

Trivial example:

1
2
3
4
5
6
class Foo
{
public:
    virtual ~Foo() { }
    int x;
};


sizeof(Foo) is likely going to be larger than sizeof(int) because Foo is now polymorphic... meaning it has a VTable. VTables are black magic and you absolutely must not tinker with them or you risk destroying your program.

But again... a naive read/write doesn't acknowledge that... and will simply try to read/write the full object... vtable and all. Resulting in massive screw ups.





So yeah. Naive reads/writes do not work with complex types unless they are POD.

But if you notice before I said POD types only "sort of" work. What do I mean by that?

****Why you should not read/write POD structs/classes****

Well let's take a look at another trivial example:

1
2
3
4
5
6
struct Foo
{
    char a;  // 1 byte
    int b;   // 4 bytes
    char c;  // 1 byte
};


Here we have a POD struct. It would not suffer from any of the problems previously mentioned. I added comments to show how many bytes each individual var might take (technically this may vary, but it's typical).

So if a struct is just a collection of all these vars... you would expect the size of the struct to be equal to the sum of all of them... right? so sizeof(Foo) would be 6?

Well... on my machine sizeof(Foo) is 12. SURPRISE!

What's happening is that the compiler is adding padding to the struct so that variables are aligned on certain memory boundaries. This makes accessing them faster.

So when you do a naive, raw write to a file, it will also write the padding bytes. Of course when you read it... you'll read the padding bytes and it'll work as you'd expect.

So why did I say it only sorta works?

Well consider the following situation.

- You run your program and save a bunch of files.
- You port your program to another platform and/or change or update your compiler
- This new compiler happens to assign different padding to the struct
- You run the newly compiled program and try to load the files you saved in the old version of your program


Since the padding changed, the data is read differently (more or less data is read, or the padding is in different spots) - so the read fails and you get garbage.


There are ways you can tell the compiler to leave off the padding. But that raises other problems I won't get into now. Let's just say that memory alignment is important.


So okay... simply put... it's not a great idea to read/write structs in full. So just reading/writing individual vars works... right?

Well.....

****Why you should not read/write any variable larger than 1 byte****

There are 2 things you have to watch out for.

#1: ill-defined size of variables. int might be 4 bytes depending on your platform/compiler... or it might be 2 bytes or it might be 8 bytes.

So reading/writing a full int suffers the same problems as the 'padding' scenario above. If you have a file saved with version X of your program, then rebuild in version Y where int suddenly changed size on you.... your file will not load any more.

This can be solved by using the <cstdint> types like uint8_t, uint16_t, etc which all are guaranteed to have a certain byte size.


#2: endianness. Memory consists of a series of bytes. How an int is stored in memory is how it is stored in the file when you do a raw write. But how the int is stored in memory varies depending on the machine you're running on.

x86/x64 machines are little endian. So if you have int foo = 1;, foo will look like this in memory:
01 00 00 00

So if you save 'foo' to a file on your x86 machine.. then hand that file off to your buddy who is running a big endian machine... he'll read it back the same way.

However.. on a big endian machine.. 01 00 00 00 is not 1.... it's 0x1000000.. or 16777216

So yeah... your load fails and your program explodes.



This is why I make it a point to never read/write anything larger than a single byte to a binary file. Do so ensures that your file will always work.

Of course you might not care about the finer details.. and you might only care about getting it to work for your mini program that you don't intend to give to anyone besides yourself. In which case you can probably get away with read/writing POD structs just fine.

Just know that doing so is not future proof. Nor is it portable.



With that in mind.... I wrote an article that explains how to do all your binary file IO with just reading/writing individual bytes. This includes how to read/write strings.

The article is here:

http://www.cplusplus.com/articles/DzywvCM9/

I'm happy to answer questions about it, or to clarify anything in this post if it's unclear.
Last edited on
Best is to serilize the object and then write to a file, in java its easy u just have to mark the class as serilizable, then the objects of class will be serilizable unless u are inheriting from a non serializable class, serilization preserves the state of the object, i think it must be possible to serialize objects in C++ as well, may be other experts can help u out here!!!
There is no way to mark a class as serializable in C++, sadly. You either have to implement it yourself or use a library, like Boost.Serialization
http://www.boost.org/doc/libs/1_54_0/libs/serialization/doc/index.html

And support in other libraries includes CArchive (MFC), QDataStream (Qt), and wxXMLSerializer (wxWidgets).

You generally serialize to either a byte stream (as does the Boost library) or to text (XML, JSON, ... or a custom formats).

In your case you could easily use a text-based approach as everything except for the date is already a string. And your Date class has a ToString method, so you just need a method to convert a date string back to a value and you're away.

Andy

stus.txt (and stus_2.txt, shown by diff tool to be identical)

Evan%Turing%2013/10/27%TURINGE%Baccalaureate%Computing
Jill%Gates%2012/4/16%GATESJ%Baccalaureate%Business Studies
Bill%Ewing%2012/5/26%EWINGB%Baccalaureate%Marketing


output

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
FirstName   = Evan
LastName    = Turing
DateReged   = 2013/10/27
ID          = TURINGE
Certificate = Baccalaureate
Major       = Computing

FirstName   = Jill
LastName    = Gates
DateReged   = 2012/4/16
ID          = GATESJ
Certificate = Baccalaureate
Major       = Business Studies

FirstName   = Bill
LastName    = Ewing
DateReged   = 2012/5/26
ID          = EWINGB
Certificate = Baccalaureate
Major       = Marketing


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
#include <iostream>
#include <fstream>
#include <sstream>
#include <string>
#include <vector>
using namespace std;

// #include "Date.h"

const char chDelim = '%';

class Date {
private:
    int Day;
    int Month;
    int Year;

public:
    Date()
    : Day(0), Month(0), Year(0) {}
    Date(int D , int M , int Y)
    : Day(D), Month(M), Year(Y) {}
    void SetDate (int D , int M , int Y) {
        Day   = D;
        Month = M;
        Year  = Y;
    }
    string ToString () const {
        ostringstream oss;
        oss << Year << '/' << Month << '/' << Day;
        return oss.str();
    }
    bool Parse (const string& str) {
        istringstream iss(str);
        char delim1 = '\0';
        char delim2 = '\0';
        iss >> Year >> delim1 >> Month >> delim2 >> Day;
        return true; // should handle errors
    }
};

// #include "Person.h"

class Person
{
private :
    string FirstName ;
    string LastName ;
    Date DateReged ;
public :
    void SetFirstName (string input) {FirstName = input ;}
    void SetLastName (string input) {LastName = input ;}
    void SetDateReged (int Dayin , int Monthin , int Yearin) {
        DateReged.SetDate (Dayin , Monthin , Yearin) ;
    }
    string GetFirstName () const {return FirstName ;}
    string GetLastName () const {return LastName ;}
    string GetDateRegedString () const {return DateReged.ToString () ;}
    Date GetDateReged () const {return DateReged ;} ;
    virtual string returnInfo () const {
        return GetFirstName () + "\n" + GetLastName () + "\n"
            + GetDateRegedString () + "\n" ; } ;
    bool WriteToStream(ostream& os) const {
        os << FirstName << chDelim << LastName << chDelim << DateReged.ToString();
        return true; // should handle errors
    }

    bool ReadFromStream(istream& is) {
        getline(is, FirstName, chDelim);
        getline(is, LastName, chDelim);
        string temp;
        getline(is, temp, chDelim);
        DateReged.Parse(temp);
        return true; // should handle errors
    }
} ;

// #include "Student.h"

class Student : public Person
{
private :
    string ID ;
    string Certificate ;
    string Major ;
public :
    void SetID (string input) {ID = input ;}
    void SetCertificate (string input) {Certificate = input ;}
    void SetMajor (string input) {Major = input ;}
    string GetID () const {return ID ;}
    string GetCertificate () const {return Certificate ;}
    string GetMajor () const {return Major ;} ;
    virtual string returnInfo () const {
        string personInfo = Person::returnInfo () ;
        return personInfo + GetID () + "\n" + GetCertificate () + "\n"
            + GetMajor () + "\n" ;}

    bool WriteToStream(ostream& os) const {
        if(Person::WriteToStream(os)) {
            os << chDelim << ID << chDelim << Certificate << chDelim << Major;
            return true; // should handle errors better
        }
        return false;
    }

    bool ReadFromStream(istream& is) {
        if(Person::ReadFromStream(is)) {
            getline(is, ID, chDelim);
            getline(is, Certificate, chDelim);
            getline(is, Major, chDelim);
            return true; // should handle errors better
        }
        return false;
    }
} ;

void dump(const Student& stu) {
        cout << "FirstName   = " << stu.GetFirstName()   << endl
             << "LastName    = " << stu.GetLastName()    << endl
             << "DateReged   = " << stu.GetDateRegedString() << endl
             << "ID          = " << stu.GetID()          << endl
             << "Certificate = " << stu.GetCertificate() << endl
             << "Major       = " << stu.GetMajor()       << endl;
}

template<typename TElem>
void dump(const vector<TElem>& vec) {
    const size_t count = vec.size();
    for(size_t index = 0; count > index; ++index) {
        const TElem& elem = vec[index];
        dump(elem);
        cout << endl;
    }
}

template<typename TElem>
bool ReadFromStream(istream& is, vector<TElem>& vec) {
    string line;
    while(getline(is, line)) {
        istringstream iss(line);
        TElem elem;
        elem.ReadFromStream(iss);
        vec.push_back(elem);
    }
    return true; // should handle errors
}

template<typename TElem>
bool WriteToStream(ostream& os, const vector<TElem>& vec) {
    const size_t count = vec.size();
    for(size_t index = 0; count > index; ++index) {
        const TElem& elem = vec[index];
        elem.WriteToStream(os);
        os << endl;
    }
    return true; // should handle errors
}

int main() {
    vector<Student> stus;
    {
        ifstream ifs("stus.txt");
        ReadFromStream(ifs, stus);
    }
    dump(stus);
    {
        ofstream ofs("stus_2.txt");
        WriteToStream(ofs, stus);
    }
    return 0;
}
Last edited on
Topic archived. No new replies allowed.