Counting unique words in a text file

I want to count unique words in a text file and display their words and frequencies. Line 60 happens to be the problem. And I don't know how to solve this problem.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
  #include <iostream>
#include <fstream>
#include <cassert>
#include <cstring>

using namespace std;

const int MAX_CHARS = 60 ;
const int MAX_WORDS = 30000 ;



int sequential_search ( int data [], int from, int to, int search_value )
{
    // pre-condition
    assert(0 <= from && 0 <= to) ;

    if ( from > to )
        return to + 1 ;
    int position = from ;
    while ( position <= to && data [ position ] != search_value )
        position++ ;
    return position ;
}

bool open_text_file ( ifstream& infile )
{
    // pre-conditions
    assert( !infile.is_open ()) ;

    // post-condition
    // Result is true only if the file is opened

    cout << "Please enter a file name here: " ;
    char filename [80] ;
    cin.getline( filename, 80, '\n') ;
    infile.open( filename ) ;
    return infile.is_open () ;

}

bool read_word (ifstream& infile, char word [MAX_CHARS])
{
    // pre-conditions
    assert(infile.is_open () ) ;

    // post-condition
    // Result is true only if word has been filled with the next word from infile

    infile >> word ;
    return infile ;

}

bool tally_word ( char word [MAX_CHARS], char words [MAX_WORDS][MAX_CHARS], int freqs [MAX_WORDS], int& no )
{
    // pre-conditions
    assert( no >= 0 && no < MAX_WORDS ) ;

    int pos = sequential_search ( words, 0, no, word ) ;
    if ( pos < no )
    {
        freqs [pos]++ ;
        return true ;
    }
    else if ( pos < MAX_WORDS )
    {
        strcpy ( words [pos], word ) ;
        freqs [pos] = 1 ;
        no++ ;
        return true ;
    }
    return false ;

}

int count_words ( ifstream& infile, char words [MAX_WORDS][MAX_CHARS], int freqs [MAX_WORDS] )
{
    // pre-conditions
    assert( infile.is_open ()) ;

    // post-condition
    // Result is the number of counted words stored in words and freqs

    int no = 0 ;
    char next [MAX_CHARS] ;
    while ( read_word ( infile, next ) && tally_word ( next, words, freqs, no ) && no < MAX_WORDS ) ;
    return no ;
}




void display_frequencies ( char words [MAX_WORDS][MAX_CHARS], int freqs [MAX_WORDS], int no )
{
    // pre-conditions
    assert( no >= 0 && no < MAX_WORDS ) ;

    // Post-condition
    // All words and their frequencies have been printed

    for ( int i = 0 ; i < no ; i++ )
    {
        cout << words [i] << '\t'
             << freqs [i] << '\n' ;
    }
}




int main ()
{
    ifstream file ;
    if ( open_text_file ( file ) )
    {
        char words [MAX_WORDS][MAX_CHARS] ;
        int freqs [MAX_WORDS] ;
        int nr = count_words( file, words, freqs ) ;
        display_frequencies( words, freqs, nr ) ;
        file.close () ;
        return 0 ;
    }
    else return 1 ;
}
Last edited on
Weird, you're passing 'word' as the fourth argument sequential_search(), which is of type char[].
But the fourth argument of sequential_search() is of type int.
Yeah I figured that, I want int pos to be equal to position which is returned in sequential_search. But can't figure out how to do that.
Anyone any suggestions how I could do this?
Anyone any suggestions how I could do this?


Use a map<string,int>

The map will ensure sorting and uniqueness of the strings; the int part can keep count.
1
2
3
4
5
6
#include <map>
.....
map<string,int> MyMap;
.....
MyMap[word]++;
...

The problems are line 13:
 
int sequential_search ( int data [], int from, int to, int search_value )

and line 21
 
    while ( position <= to && data [ position ] != search_value )


which should be:
 
int sequential_search ( char data [MAX_WORDS][MAX_CHARS], int from, int to, char * search_value )
and
 
    while ( position <= to && strcmp( data [ position ] , search_value) )




Edit. Agree with @lastchance suggestion to use std::string and std::map.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
#include <iostream>
#include <iomanip>
#include <fstream>
#include <string>
#include <map>

using namespace std;

int main ()
{
    ifstream file( "words.txt");
    
    if ( !file )
        return 1 ;
        
    map<string,int> MyMap;   
    
    for ( string next; file >> next;  )  MyMap[next]++;    

    for ( const auto & item : MyMap )
        cout << setw(30) << item.first << setw(8)  << item.second << '\n' ;
}
Last edited on
Topic archived. No new replies allowed.