Problem: Text Concordance using BST

Hey GUYS, I have this assignment and it is due tomorrow night and I really need your help because I can't get my program to work, so please help me find what is wrong with it. First this is my assignment:
The Problem:
Given a text document, produce a listing of all the distinct words present in the text.
This problem has many applications in linguistics in general and in Computational Linguistics in particular. Text concordance is used to construct a Corpus, a body of the words most commonly used in a modern standard language, e.g., Modern Standard English (MSE) and Modern Standard Arabic (MSA). It is also used in Morphological Analysis of text, in Text Compression and in author and style analysis of text.
Because we do not know in advance how many distinct words are there in the document, the straightforward way to do this is to build a Dictionary of the distinct words (as keys) and their frequencies in the text (as data). However, the number of elements in the Dictionary may grow so large that accessing a word (e.g. by using sequential search) is not efficient. The solution is to maintain an array of Binary Search Trees (BST’s); each BST is for words beginning with a given alphabetical letter.
Implement a BST Template Class and develop a program that would read a text file and generate the text concordance. Consider the words in the file to be separated either by one or more blanks or by end-of-line. Also, words will not be case sensitive.
The user should be able to:
• Search and obtain the probability of a given word in the text and,
• List the highest probability word in each tree.

A text file “I Robot.txt” is provided for testing your program.

Second this is my header file for thebinary search tree
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
/ FILE: bintree.h
// DEFINITION OF TEMPLATE CLASS BINARY SEARCH
// TREE


#ifndef BINTREE_H
#define BINTREE_H

// Specification of the class

template <class keyType, class dataType>

class binaryTree
{   

   public:
	        
      // Public Member functions ...
      // CREATE AN EMPTY TREE
      binaryTree();

      // INSERT AN ELEMENT INTO THE TREE
      bool insert (const keyType &, const dataType &);

      // CHECK IF THE TREE IS EMPTY
      bool empty() const;

      // SEARCH FOR A KEY IN THE TREE
      bool search (const keyType &) const;

	  // RETRIEVE DATA FOR A GIVEN KEY
      bool retrieve (const keyType &, dataType &) const;
	  
	  // TRAVERSE A TREE
      void traverse() const;

	  // MAX OF A TREE
	  void MAX( keyType &,  dataType &)const;

	  // Iterative Pre-order Traversal
	  void preorder () const;

	  // Iterative Level-order Traversal
	  void levelorder () const;

	  // GRAPHIC OUTPUT
      void graph() const;

	  // REMOVE AN ELEMENT FROM THE TREE
      void remove (const keyType &);

      
   private:
      // Node Class
	   class treeNode
	   {
		public:
			keyType key; 		// key 
			dataType data;		// Data
			treeNode *left;		// left subtree	       
			treeNode *right;	// right subtree
	   }; // end of class treeNode declaration

	   typedef treeNode * NodePointer;
	 // Data member ....
      NodePointer root;
	  
      // Private Member functions ...

      // Searches a subtree for a key
      bool search2 (NodePointer , const keyType &) const;
	  
      // Searches a subtree for a key and retrieves data
      bool retrieve2 (NodePointer , const keyType & , dataType &) const;
	  
	  // Inserts an item in a subtree
      bool insert2 (NodePointer &, const keyType &, const dataType &);

	  // Traverses a subtree
      void traverse2 (NodePointer ) const; 

	  // Get the Max of a tree according to the data type
	  void MAX2( NodePointer, keyType &,  dataType &)const;

	  // Graphic output of a subtree
	  void graph2 (int ,NodePointer ) const;

	  // LOCATE A NODE CONTAINING ELEMENT AND ITS PARENT
      void parentSearch ( const keyType &k, bool &found, 
							NodePointer &locptr, NodePointer &parent) const;


}; 

#endif // BINTREE_H 

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#include <iostream>
#include <iomanip>
#include "Stackt.h"
#include "Queuet.h"

using namespace std;

// Member functions ...

// constructor - create an empty tree
template <class keyType, class dataType>
binaryTree<keyType, dataType>::binaryTree()
{ root = NULL; }

//____________ Public search __________________
// Searches for the item with same key as k
//  in a binary search tree.
// Pre : k is defined.
// Returns true if key is located,
//   otherwise, returns false.
template <class keyType, class dataType>
bool binaryTree<keyType, dataType>::search(const keyType &k) const  
{
   return search2(root, k); 
} // end of public search   

//____________ Private search __________________
// Searches for the item with same key as k 
// in the subtree pointed to by aRoot. Called
// by public search.
// Pre : k and aRoot are defined.
// Returns true if key is located,
// otherwise, returns false.
template <class keyType, class dataType>
bool binaryTree<keyType, dataType>::search2(NodePointer aRoot,
									 const keyType &k)	const
{
    if (aRoot == NULL)      
		return false;                     
    else if (k == aRoot->key)
		return true; 
    else if (k < aRoot->key)
		return search2(aRoot->left, k); 
    else
		return search2(aRoot->right, k); 
} // end of private search

//____________ Public retrieve __________________
// Searches for the item with same key as k
//  and retrieves data part if found
// Pre : k is defined.
// Returns true if key is located,
//   otherwise, returns false.
template <class keyType, class dataType>
bool binaryTree<keyType, dataType>::retrieve(const keyType &k,
										     dataType &d) const  
{
   return retrieve2(root, k, d); 
} // end of public retrieve   

//____________ Private retrieve __________________
// Searches for the item with same key as k 
// in the subtree pointed to by aRoot and retrieves
// data part if key is found.Called by public retrieve.
// Pre : k and aRoot are defined.
// Returns true if key is located,
// otherwise, returns false.
template <class keyType, class dataType>
bool binaryTree<keyType, dataType>::retrieve2(NodePointer aRoot,
									 const keyType &k,
									 dataType &d)	const
{
    if (aRoot == NULL)      
		return false;                     
    else if (k == aRoot->key)
	{ d = aRoot->data; return true;} 
    else if (k < aRoot->key)
		return retrieve2(aRoot->left, k, d); 
    else
		return retrieve2(aRoot->right,k, d); 
} // end of private retrieve

//____________ Public insert __________________
// Inserts element into a binary search tree.
// Pre : key k is defined.
// Post: Inserts element if k is not in the tree.
// Returns true if the insertion is performed.
// If there is a node with the same key value 
// as k, returns false.
template <class keyType, class dataType>
bool binaryTree<keyType, dataType>::insert(const keyType &k, const dataType &d)
{    
	return insert2 (root, k , d);
} // end of public insert

Last edited on
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
//____________ Private insert __________________
// Inserts element in the tree pointed to by 
// aRoot.Called by public insert.
// Pre : aRoot k and d are defined.
// Post: If a node with same key as k is found,
// returns false. If an empty tree is reached,
// inserts element as a leaf node and returns true.
template <class keyType, class dataType>
bool binaryTree<keyType, dataType>::insert2(NodePointer &aRoot, 
									  const keyType &k, const dataType &d)
{  

  // Check for empty tree.
  if (aRoot == NULL)
  { // Attach new node
    aRoot = new treeNode; 
    aRoot->left = NULL; 
    aRoot->right = NULL;
    aRoot->key = k;
	aRoot->data = d;
    return true;
  }
  else if (k == aRoot->key)
    return false;  
  else if (k < aRoot->key)
    return insert2 (aRoot->left, k, d); 
  else
    return insert2 (aRoot->right, k, d); 
} // end of private insert

//____________ Public empty __________________ 
// Check if tree is empty
// Pre : none
// Post: Returns true if tree is empty, false 
// otherwise
template <class keyType, class dataType>
bool binaryTree<keyType, dataType>::empty() const
{
   return(root == NULL);
} // end of empty

//____________ Public traverse__________________ 
// Traverses a binary search tree in key order.
// Pre : none
// Post: Each element of the tree is displayed.
// Elements are displayed in key order.
template <class keyType, class dataType>
void binaryTree<keyType, dataType>::traverse() const
{
   traverse2 (root);
} // end of public traverse

//____________ Private traverse__________________
// Traverses the binary search tree pointed to
// by aRoot in key order. Called by traverse.
// Pre : aRoot is defined.
// Post: displays each node in key order.
template <class keyType, class dataType>
void binaryTree<keyType, dataType>::traverse2 (NodePointer aRoot) const       
{
   if (aRoot != NULL)
   { // recursive in-order traversal
     traverse2 (aRoot->left);           
     cout << aRoot->key << " " << aRoot->data << endl; 
     traverse2 (aRoot->right); 
   }
   
} // end of private traverse

//____Public pre-order traversal (Iterative)______
template <class keyType, class dataType>
void binaryTree<keyType, dataType>::preorder () const       
{
	Stackt<NodePointer> s;
	NodePointer t = root;
	s.push(t);
	while(!s.stackIsEmpty())
	{
		s.pop(t); cout << t->key << endl;
		if ( t->right != NULL ) s.push(t->right);
		if ( t->left  != NULL ) s.push (t->left);
	}
}

//____Public Level-order traversal (Iterative)______
template <class keyType, class dataType>
void binaryTree<keyType, dataType>::levelorder () const       
{
	Queuet<NodePointer> q;
	NodePointer t = root;
	q.enqueue(t);
	while(!q.queueIsEmpty())
	{
		q.dequeue(t); cout << t->key << endl;
		if ( t->left  != NULL ) q.enqueue (t->left);
		if ( t->right != NULL ) q.enqueue (t->right);
	}
}


//____________ Public graph__________________ 
// Graphic output of a BST
// Pre : none
// Post: Graphical representation is displayed.
template <class keyType, class dataType>
void binaryTree<keyType, dataType>::graph() const
{
	graph2 (0 , root);
}// end of public graph

//____________ Private graph__________________ 
// Graphic output of a subtree pointed to by 
// aRoot with indent spaces
// Pre : none
// Post: Graphical representation is displayed.
template <class keyType, class dataType>
void binaryTree<keyType, dataType>::graph2(int indent, NodePointer aRoot) const
{
	if (aRoot != NULL)
	{ // recursive in-order traversal
      graph2 (indent+8, aRoot->right);           
      cout << setw(indent) << " " << aRoot->key << endl; 
      graph2 (indent+8, aRoot->left); 
	}
}// end of private graph
	  
//____________ Public remove __________________
// Remove an element from the binary search tree
// Pre : k is defined.
// Post: if k is present, its node will be
// removed and tree will be modified
template <class keyType, class dataType>
void binaryTree<keyType, dataType>::remove (const keyType &k)
{
	bool found;
	NodePointer x,parent;
	// Search for element and its parent
	parentSearch (k, found, x, parent);
	if (!found)
	{
		cout << "Item not in BST\n";
		return;
	}
	// else, element is found
	if ((x->left != NULL)&&(x->right != NULL))
	{	// Node has two children
		// Find inorder successor and its parent
		NodePointer xSucc = x->right;
		parent = x;
		while (xSucc->left != NULL) // descend left
		{ parent = xSucc; xSucc = xSucc->left; }
		// Move contents of xSucc to x and change x
		// to point to successor, which will be removed
		x->key = xSucc->key; x->data = xSucc->data;
		x = xSucc;
	} // end if node has two children
	// Now, node has 0 or 1 child
	NodePointer subtree = x->left; // subtree of x
	if (subtree == NULL) subtree = x->right;
	if (parent == NULL) root = subtree; //remove root
	else if (parent->left == x) //parent left child 
		parent->left = subtree;
	else parent->right = subtree; // right child
	delete x;
} // end of public remove

//____________ Private parentSearch __________________
// Locate a node containing key k and its parent
// Pre : none.
// Post: locptr points to node containing k
// or is NULL if not found, and parent points to 
// its parent. Used by remove
template <class keyType, class dataType>
void binaryTree<keyType, dataType>::parentSearch (const keyType &k, 
											bool &found, 
											NodePointer &locptr,
				   							NodePointer &parent) const
{
	locptr = root;  parent = NULL; found = false;
	while (!found && locptr != NULL)
	{
		if (k < locptr->key) // descend left
		{
			parent = locptr;	locptr = locptr->left;
		}
		else if (locptr->key < k) // descend right
		{
			parent = locptr;	locptr = locptr->right;
		}
		else found = true; // el found
	}// end while
} // end of private parentSearch

//____________ Public MAX__________________ 
// Traverses a binary search tree in key order.
// Gets the max of counter in the tree.
template <class keyType, class dataType>
void binaryTree<keyType, dataType>::MAX(keyType & word, dataType & count) const
{
	MAX2(root, word, count);
} // end of public MAX

//____________ Private MAX__________________
// Traverses the binary search tree pointed to
// Gets the max of counter in the tree.
template <class keyType, class dataType>
void binaryTree<keyType, dataType>::MAX2(NodePointer aRoot, keyType & word, dataType & count) const       
{
   if (aRoot != NULL)
   { 
	   MAX2(aRoot->left, word, count);           
	   count= (aRoot->data > count) ? aRoot->data: count;
	   word=(aRoot->data > count) ? aRoot->key: word;
	   MAX2(aRoot->right, word, count); 
   }
}
MY Program
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
// File: BSTappl.cpp
// Test class template binaryTree

#include <iostream>
#include <string>
#include <fstream>// Including fstream
#include "binaryTree.h"
# define dosname "I Robot.txt"
using namespace std;
string line;
void uppercase(string &);// function that changes lower case to upper case
//---------------------------------
int main()
{
	
	int counter;// data
	char ans;// choice of the user of the menu
	binaryTree<string, int> BST [26];// Creating an array of 26 trees
	
	int maxcount=0;// data
	string maxwrd=" ";//key  //Hadeel
	
	ifstream infile;// Declaring an input file.
	infile.open("in.txt");// Opening the source file
	
	infile>>line;// Reading the first word
	
	while(!infile.eof()) // while loop that runs untill the end of file
	{
		uppercase(line);// calling the function

		char FrstLtr=line.at(0);//get the 1st letter of the string
		
		if(!BST[int(FrstLtr)-int('A')].search(line))//seach for the word in the tree of the letter
		{
			BST[int(FrstLtr)-int('A')].insert(line, 1);//if not found insert in the tree
		}
		else//if the word is found
		{
			
			BST[int(FrstLtr)-int('A')].retrieve(line, counter);//retrieve the word
			BST[int(FrstLtr)-int('A')].remove(line);//delete the word
			counter++;
			BST[int(FrstLtr)-int('A')].insert(line, counter);// insert with new counter
		}
			infile>>line;
	}

	cout<<"Welcome"<<endl;


	do
	{

	
		cout<<" You can choose whether you want to search the probability of the occurance of a word in the text"<<endl;
		cout<<"Or you can choose to list the word with the highest occurance probability in every tree"<< endl;
		cout<<"Type S for search or L for list and E if you want to exit"<<endl;

		cin>>ans;

		if((ans=='s')||(ans=='S'))// if the user chooses to search a word

		{
	
			string wrd;
			cin>>wrd;// input the word that he wants to search
			uppercase(wrd);
			char FrstLtr2=wrd.at(0);//get the 1st letter of the string
			if(BST[int(FrstLtr2)-int('A')].search(wrd)) // searching the word in the tree of the first letter
			{
				//if (BST.retrieve(wrd, counter)) cout <<"The probability of the"<<word<<"is"<<counter<<endl;//Hadeel
				if (BST[int(FrstLtr2)-int('A')].retrieve(wrd, counter)) cout <<"The probability of the"<<wrd<<"is"<<counter<<endl;//Hadeel
			}
			else
				cout<<"The word is not found"<<endl;
		}
		else if ((ans=='l')||(ans=='L'))
		{
	
			for(int i=0; i<26;i++)
	
			{
				//BST.MAX(maxwrd, maxcount);// gets the word with highest probability//Hadeel
				BST[i].MAX(maxwrd, maxcount);// gets the word with highest probability
				cout<<" The word with highest probability of" << maxcount << "is" <<maxwrd << " in Tree " << i<< endl;
			}

		}
		else if ((ans=='e')||(ans=='E'))
			cout<<"Thank you for using Text Concordance program"<<endl;
		else
			cout<<"Please entre a vaild answer...."<<endl;
	
	}//while((ans!='e')||(ans=='E'))
	while((ans!='e')||(ans=='E'));//Hadeel
		
		infile.close();


	
			return 0;
}
//------------------------------------------------------------------------------------------------------
// A void function that changes all lower case letters of a word in the text to upper case.

void uppercase(string & line)
{
	//for(int i=0; i<line.length; i++)
	for(int i=0; i<line.length(); i++)//Hadeel
	
		{
			char c=line.at(i);// c is the first letter of the word.
			c=toupper(c);// using toupper function.
			line[i]=c;// returning the letter to the word.
	}
}
//--------------------------------------------------------------------------------------------------------
Topic archived. No new replies allowed.