Using BST for concordance program

The problem is that I want it to ignore punctuation when adding strings to the BST. But it seems to take the letter(s) after the punctuation and consider it as another word. Ex It's and Its should be the same word (punc ignored) but the program will see the word It's as the word "it" and the word 's'. Also the last word in the text file is not showing up in the list. Hope that's good enough explanation. Im sure there's just one tiny thing missing from my program to fix this. If you need any more info about the program let me know. Thanks so much

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
/*


File Name: CBST.cpp

This program will read an input file of text and builds a concordance of the words in the file.
After the file has been read and processed, the program will display the list of words (in alphabetical order)
and how many times the word shows up in the input file. This program will ignore punctuation, capital letters
and whitespace.
*/



#include <iostream>
#include <iomanip>
#include <fstream>
#include <cstring>
#include "BST.h"
using namespace std;

typedef char Word[MAX+1];

void make_list(ifstream& infile, Word& array);
//@param ifstream& infile: A stream to read an input file
//@param Word& array: an array of type Word that will hold a single word (string) from the input file
//@output: A BST that contains every word from the input file with the correct word frequency

void read_word(ifstream& infile, Word array);
//@param ifstream& infile: A stream to read an input file
//@param Word array: an array containing a Word that will be used as an input and output
//@output: the result will remove any punctuation and will turn all characters to uppercase


int main(){

    char file_name[100];
    typedef char Word[MAX+1];
    ifstream infile;
    Word array;

    cout << "Enter a file name: ";
    cin >> file_name;               // Get file name

    infile.open(file_name);         // Open file
    if(!infile)                     // If we couldn't open the file...
    {
        cout << "Failed to open file." << endl;
        return 0;
    }

    make_list(infile, array);       // Make the concordance

    infile.close();                 // Close the file

    return 0;
}


void make_list(ifstream& infile, Word& array){

    BST bst;

    read_word(infile, array);               // Read a word
    while(!infile.eof())                    // While the file isn't empty...
    {

        bst.insert(array);     // Insert into BST

        read_word(infile, array);           // Read another word
    }

    cout << bst;

    cout << "The file contains " << bst.length() << " distinct words." << endl;
}



void read_word(ifstream& infile, Word array){

    char ch;
    int i = 0;

    infile.get(ch);

    while(!infile.eof() && isalpha(ch) && !isspace(ch) && !ispunct(ch))
    {
        if(i > MAX-1)
        {
            while(!isspace(ch) && !ispunct(ch))
                infile.get(ch);
            break;
        }

        ch = toupper(ch);


        array[i] = ch;
        i++;
        infile.get(ch);
    }
    if(i != 0)
        array[i] = '\0';
}


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
/*


File Name: BST.h

This is a BST ADT header file that is made for this concordance program
*/

#ifndef CONCORDANCE_H
#define CONCORDANCE_H
#include <iostream>
#include <cstdlib>

const int MAX = 8;

class BST
{
    public:
        typedef char Item[MAX+1];

        // Constructor
        BST();

        // Destructor
        ~BST();

        // Modification Member Functions
        void insert(Item entry); // Inserts an Item entry into the BST. Duplicates will be ignored

        // Constant Member Functions
        int length(); // Returns the number of Nodes in the BST

        // Friend Functions
        //Output function
        friend std::ostream& operator << (std::ostream& out_s, BST& c);

    private:
        // Data Members
        struct Node
        {
            Item data;
            int count;
            Node *left;
            Node *right;
        };


        Node *root;


        void destroy(Node *r); //Deletes all Nodes and pointers in the BST

        void print(Node *p, std::ostream& out_s); // Outputs the data and count from each Node struct in the BST to the terminal

        void help_insert(Node* &t, Item entry); // Shell insert function

        int count(Node *r); //Returns the number of Nodes in the BST


};
#endif 


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
/*

File Name: BST.cpp

This is a BST ADT implementation file that is made for this concordance program
*/
#include "BST.h"
#include <iostream>
#include <iomanip>
#include <cstring>
using namespace std;

BST::BST(){

root = NULL;

}

void BST::destroy(Node *r)
{
    if(r != NULL)
    {
        destroy(r -> left);
        destroy(r -> right);
        delete r;
    }
}

BST::~BST()
{
    destroy(root);
}

void BST::help_insert(Node* &t, Item entry)
{
    if ( t == NULL )
    {
        t = new Node;
        strcpy(t -> data, entry);
        t -> count = 1;
        t -> left = NULL;
        t -> right = NULL;
    }
    else if ( strcmp(entry, t -> data) < 0)
        help_insert ( t -> left, entry);
    else if ( strcmp(entry, t -> data) > 0)
        help_insert ( t -> right, entry);
    else // duplicate
        {
        t -> count++;
        return;

    }
}

void BST::insert(Item entry)
{
    help_insert(root, entry);
}

int BST::count(Node *r)
{
    if(r == NULL)
        return 0;
    else
        return count(r -> left) + 1 + count(r -> right);
}

int BST::length()
{
    return count(root);
}

void BST::print(Node *p, ostream& out_s)
{
    if(p != NULL)
    {
        print(p -> left, out_s);
        out_s << left << setw(10) << p -> data << right << setw(9) << p -> count << endl;
        print(p -> right, out_s);
    }
}

ostream& operator << (ostream& out_s, BST& c)
{

    out_s << "Word" << setw(10) << " " << "Count" << setw(8) << endl;
    out_s << "--------------------" << endl;

    c.print(c.root, out_s);

    out_s << "--------------------" << endl;

    return out_s;
}
If you treat punctuation the exact same way as you do whitespace, then you shouldn't be surprised that it marks the end of a word... like whitespace.

Looping on eof is the wrong thing to do.

1
2
3
4
5
6
7
8
9
10
11
12
void read_word(ifstream& infile, Word array) 
{
    infile >> std::ws;          // skip past leading whitespace.
    unsigned char_count = 0;

    char ch;
    while (char_count < MAX && infile.get(ch) && !isspace(ch))
        if (!ispunct(ch))
            array[char_count++] = ch;

    array[char_count] = '\0';
}


Topic archived. No new replies allowed.