Printing from a File Problem

Hey everyone, I've been having a problem with this all day, and after bashing my face against the wall for the past oh, 5 hours, I thought I'd ask you. I've been having problems actually getting these words to be pulled from their respective file. It appears that I am able to pull all of them out on a single line, so I try to sift through them and get rid of the spaces and deposit all of the words that are then cleaned of punctuation (another function, that one works fine) into a map. I keep getting way less words than I put in. Any ideas or suggestions of why this might be?

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
  void get_words( map <string, int >&m )
{
//calls the ifstream to read in strings
//sends them through clean entry
//inputs everything from clean entry into a map

string::iterator parse;
string s1, s2, line;
ifstream myfiles("prog4.d");

if (myfiles.is_open())
{
 while (!myfiles.eof())
 {
// [] (char c) -> bool { return (isalnum(c)); });
 getline(myfiles, line, '\n') ;
 size_t pos = 0;
  for (parse = line.begin(); parse != line.end(); parse++) //creating iterator to move through and
  {                                                     //get words without spaces
   if(*parse == ' ')
   {    size_t len = line.find(*parse); //creates the string the second that it sees a space
        s1 = line.substr (pos, len-pos);
         if (s1.length() > 0)
         {
         clean_entry(s1, s2); //cleans the string and adds it to the map
         m[s2]++;

         s1.clear();
         s2.clear(); //clears the string so that it can be refilled
         pos = len;} //puts pos to the front where the process can restart.
    }
   }
  }
  myfiles.close();
}
else
        {
        cout << "Could not open file" << endl;
        exit (-1);
        }
}

void print_words( const map <string, int >& m ) //prints the words
{
PRN p (m.size());
int i =1;

for (map <string, int>::const_iterator it = m.begin(); it != m.end(); it++)
{if (i != NO_ITEMS)
 { cout  << left << setw(ITEM_W) << it->first  << ": " << it->second << " ";
   i++;}
 else
 {cout << left << setw(ITEM_W) << it->first  <<  ": " << it->second << endl;
  i = 1;}
}
cout << endl;
cout << "Number of distinct words: " << m.size() << endl;


Here's the output I should be getting

1                : 1    100              : 1    a                : 8
about            : 4    absorb           : 1    and              : 2
another          : 1    any              : 1    are              : 2
as               : 1    at               : 1    available        : 1
be               : 1    because          : 1    become           : 2
best             : 1    book             : 4    bring            : 1
c                : 6    called           : 1    can              : 2
causing          : 1    change           : 1    chip             : 1
choice           : 1    comma            : 1    computer         : 3
content          : 1    damage           : 1    deliberately     : 1
design           : 1    devoted          : 1    do               : 1
encouraged       : 1    enjoy            : 1    etc              : 1
expand           : 1    experiment       : 1    exposed          : 1
fabrication      : 1    familiar         : 1    familiarity      : 1
fingertips       : 1    for              : 2    forget           : 1
games            : 1    get              : 1    greater          : 1
have             : 2    having           : 1    heart            : 1
help             : 1    however          : 1    if               : 3
illustrated      : 1    in               : 4    increase         : 1
involved         : 1    is               : 6    it               : 5
just             : 1    keywords         : 1    knowledge        : 1	
language         : 5    learn            : 1    made             : 1
more             : 2    most             : 1    not              : 1
of               : 2    omit             : 1    on               : 1
one              : 2    or               : 2    over             : 1
period           : 1    play             : 1    preface          : 1
preferably       : 1    processing       : 1    proficient       : 1
programming      : 1    programs         : 2    quite            : 1
read             : 1    required         : 1    secrets          : 1
seeking          : 1    semicolon        : 1    simply           : 1
speed            : 1    statement        : 1    such             : 1
terminal         : 1    terminate        : 1    text             : 1
that             : 1    the              : 11   then             : 1
thing            : 1    this             : 5    thoroughly       : 1
time             : 1    to               : 15   typing           : 1
understand       : 1    understanding    : 1    want             : 2
way              : 1    well             : 1    what             : 1
whatever         : 1    will             : 3    wise             : 1
with             : 5    without          : 1    word             : 1
worry            : 1    you              : 14   your             : 3

no of words in input stream : 219
no of words in output list  : 120



and here's the output I am getting:


a               : 1 about           : 1 deliberately    : 1
fabrication     : 1 get             : 1 in              : 1
it              : 1 knowledge       : 1 language        : 2
over            : 1 play            : 1 seeking         : 1
semicolon       : 1 this            : 1 to              : 1
understand      : 1 you             : 3
Number of distinct words: 17
12345678101112131415161720no of words in input stream: 0

ignore the bottom string of numbers, that's a class problem I am currently working on solving. Any and all ideas are appreciated.
Well, I looked at the code, but haven't fixed it. On the other hand, I do think it is unnecessarily complicated. Did you consider something like this:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
void get_words( map <string, int >&m )
{
    string s1, s2;
    ifstream myfiles("prog4.d");

    if (!myfiles.is_open())
    {
        cout << "Could not open file" << endl;
        exit (-1);
    }

    while (myfiles >> s1)    // read each word
    {
        clean_entry(s1, s2); // cleans the string
        m[s2]++;             // and adds it to the map
    }
}
Check out comments:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56

void get_words( map <string, int >&m ) {
   string::iterator parse;
   string s1, s2, line;
   ifstream myfiles("prog4.d");

   if (myfiles.is_open()) {
       while (!myfiles.eof()) {
          getline(myfiles, line, '\n') ;
          size_t pos = 0;
          cout << "Working on line: " << line << endl;
          for (parse = line.begin(); parse != line.end(); parse++) //creating iterator to move through and
          {                                                     //get words without spaces
             cout << "'" << *parse << "'";
             if(*parse == ' ') {    
                // ******************************************************
                // * 
                // * This line is the start of you issue.  You keep finding
                // * the same space (the first one) over and over.  You
                // * need to start the search from one passed the position.
                // * Try using:
                // *       size_t find (char c, size_t pos = 0)
                // * the second argument is the one you want to update
                // * each time a new space is found
                // *
                // ******************************************************
                size_t len = line.find(*parse); //creates the string the second that it sees a space
               
                // ******************************************************
                // * Notice that you are finding the same space over and
                // * over because the print out below print the save value
                // * for len again and agian
                // ******************************************************
                cout << "\nFound space at pos: " << len << endl;
                s1 = line.substr (pos, len-pos);
                if (s1.length() > 0) {
                   clean_entry(s1, s2); //cleans the string and adds it to the map
                   m[s2]++;
   
                   s1.clear();
                   s2.clear(); //clears the string so that it can be refilled
                   pos = len;
                } //puts pos to the front where the process can restart.
                else {
                   cout << "Zero substr" << endl;
                }
             }
          }
          cout << endl;
      }
      myfiles.close();
   } else {
     cout << "Could not open file" << endl;
     exit (-1);
   }
}
It's repeating on the same space over and over again, and then when it changes down to the next line, it takes a different space. I see. Thanks a ton for that explanation. Certainly will help me in solving this thing. Let me try some of the recommended changes and see what happens.
Also, notice that if the word doesn't have a space after it you would produce a substr:

Input ----> Output
1
2
3
First   None of the other words
Second   in the line are processed
Third

first     : 1 second    : 1 
Number of distinct words: 2

Only the first word in each line is found except for the last line. It doesn't have a space after it and therefore not parsed.
Hey, so I've updated the code to something a tad more simple as I realized the problem but I've been stuck on this one problem with it. I am now getting an std::out_of_range error. For clarity's sake, the message I am reading in looks like this:

                                                        PREFACE

This is a book about the computer language called C. If you are
seeking a book to increase your typing speed, expand on your
knowledge of word processing, or learn the secrets of chip
fabrication and design, this is not the one for you. However , if
you want to become thoroughly familiar with the C programming
language, then you have made a wise choice. For this book is devoted
to just that--to help you become proficient in C.

        It is one thing to read about a language; it is quite another to
get involved in it.. The best and most time-effective way to absorb
a language such as C is to have 1 terminal or computer available to
you , preferably at you fingertips. You will be exposed  to well
over 100 C programs in this book.

        You are encouraged to experiment with the programs illustrated
in this text. Omit keywords, change a comma to a period, ,
deliberately forget to terminate a statement with the required
semicolon, etc. You can do whatever you want without having to worry
about causing any damage to the computer, because you simply can't.
Play "what if" games to your heart's content. Familiarity with the
language will bring with it greater understanding ;the more you
understand about "C", the more you will enjoy it. ???



Now a bit more detail: I am getting many more strings read in now that I've updated the code, but I am not getting PREFACE and then it ends at the ' , ' in the sentence "However , if you want to become thoroughly familiar" and throws the error. I made some debugging measures to figure out what was wrong and I know what is wrong, but I have no clue how to fix this.

Here's the new code. Any ideas?

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
void get_words( map <string, int >&m )
{
//calls the ifstream to read in strings
//sends them through clean entry
//inputs everything from clean entry into a map


string s1, s2, line;
ifstream myfiles("prog4.d");


if (myfiles.is_open())
{
 while (!myfiles.eof())
 {


 getline(myfiles, line) ;
 line += '\n';
 size_t index = 0;
  for (unsigned int i = 0; i <= line.size(); i++) //creating iterator to move through and
  {                                                     //get words without spaces
   if(line[i] == ' ' || line[i] == '\n' )                                                
   {    size_t len = i; //creates the string the second that it sees a space
        cout << "Space found at position: " << len << endl;
        cout << "Index: " << index << " " << "Length afterwards: " << len-index
        << endl;
      cout << "string: " << s1 << endl;
         if (s1.length() > 0)
         {
         clean_entry(s1, s2); //cleans the string and adds it to the map
         m[s2]++;

         s1.clear();
         s2.clear(); //clears the string so that it can be refilled
         index = len;} //puts pos to the front where the process can restart.
    }

   }
  }
  myfiles.close();
}
else
        {
        cout << "Could not open file" << endl;
Last edited on
If you fix the changes in the comments, you will get the right answer
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#include <iomanip>
#include <iostream>
#include <algorithm>
#include <string>
#include <cctype>
#include <list>
#include <map>
#include <fstream>
using namespace std;
void clean_entry( const string& s1, string& s2 ) {
   string::const_iterator start = s1.begin();
   string::const_iterator finish = s1.begin();
   start = find_if (s1.begin(), s1.end(), 
                    [] (char c) -> bool { return (isalnum(c) > 0); }); 
   // *************************************************************
   // * There is an error in this line 
   // *************************************************************
   finish = find_if(s1.begin(), s1.end(), 
                     [] (char c) -> bool { return (!isalnum(c) > 0); });
   s2 = s1.substr (start-s1.begin(), finish-start); 
   transform (s2.begin(), s2.end(), s2.begin(),::tolower); 
}

void get_words( map <string, int >&m ) {
   string s1, s2, line;
   ifstream myfiles("prog4.d");

   if (myfiles.is_open()) {
      while (!myfiles.eof()) {
         getline(myfiles, line) ;
         line += '\n';
         size_t index = 0;
         // **************************************************************
         // *                          <= is bad  
         // *                            vv
         // * for (unsigned int i = 0; i <= line.size(); i++) 
         // * 
         // * line can only be indexed line.size()-1, <= would cause your
         // * out_of_range error
         // **************************************************************
         for (unsigned int i = 0; i < line.size(); i++) {                                               
             if(line[i] == ' ' || line[i] == '\n' ) {
                size_t len = i; 
                cout << "Space found at position: " << len << endl;
                cout << "Index: " << index << " " << "Length afterwards: " << len-index << endl;
                // ******************************************************
                // * Check to see if there is length
                // ******************************************************
                if ( len-index > 0 ) {
                   // ******************************************************
                   // * You need to set the value of s1 here using the 
                   // * varables index and len
                   // ******************************************************
                   //s1 = 
                   cout << "Before: '" << s1 << "'" << endl;
                   // ******************************************************
                   // * Have to make a change to clean_entry
                   // ******************************************************
                   clean_entry(s1, s2); 
                   cout << "After: '" << s2 << "'" << endl;
                   m[s2]++;
                   s1.clear();
                   s2.clear(); 
                   index = len;
                } 
             }
          }
      }
      myfiles.close();
   }
   else {
        cout << "Could not open file" << endl;
   }
}

void print_words( const map <string, int >& m ){
   const int NO_ITEMS(3),ITEM_W(18);
   int i(1);
   for (map <string, int>::const_iterator it = m.begin(); it != m.end(); it++) {
      if (i != NO_ITEMS) { 
         cout  << left << setw(ITEM_W) << it->first  << ": " << setw(3) << it->second << " ";
         i++;
      } else {
         cout << left << setw(ITEM_W) << it->first  <<  ": " << setw(3) << it->second << endl;
         i = 1;
      }
   }
   cout << endl;
   cout << "Number of distinct words: " << m.size() << endl;
}

int main(void){

   map<string,int> theMap;
   get_words(theMap);
   print_words(theMap);
   return 0; 
}
Thanks a ton for your response. I got it to work. Really appreciate the time taken to help my noob ass. Haha.
Topic archived. No new replies allowed.