to lower case

Having trouble with this dictionary spellchecker.

I am able to hash the dictionary, and search the input for properly spelled words. I am having trouble with double spaces, and upper case letter. I can't seem to figure out the logic that I need.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
#include <iostream>
#include <iomanip>
#include <fstream>
#include <string>
#include <stdio.h>
#include <string.h>
#include <ctype.h>


using namespace std;

const int Error = 1;

//-----------------------------------------------------------------------------

// Given a character string and the size of the hash table, add up the ascii 
// values of each char and return the sum mod table size.

int hash1( const char *key, int tableSize )
  {
  unsigned int hashVal = 0;

  while( *key != '\0' ) hashVal += *key++;

  return hashVal % tableSize;
  }

//------------------------------------------------------------------------------

// Given a character string, and the size of the hash table, this algorithm 
// assumes the key has at least 2 chars plus the null byte.

int hash2( const char *key, int tableSize )
  {
  // 27 is the number of letters plus the blank in the English alphabet; 729 is
  // 27 squared. 

  return ( key[ 0 ] + 27 * key[ 1 ] + 729 * key[ 2 ] ) % tableSize;
  }

//------------------------------------------------------------------------------

// Loop through the key, 1 char at a time. Add the char's value to hashVal shifted
// left by 5 (which is the same as hashVal*32).

int hash3( const char *key, int tableSize )
  {
  unsigned int hashVal = 0;

  while( *key != '\0' ) hashVal = ( hashVal << 5 ) + *key++;

  return hashVal % tableSize;
  }

//------------------------------------------------------------------------------
//-----------------------------------------------------------------------------

// Given a name, an array and a hash key, insert name into the array at location
// hash key. If there is a collison, use quadratic probing to determine where to 
// place the name.


void insert(char *word, char *hashTable[], int key, int tableSize)
  {
  if(hashTable[key] == NULL)
    {
    //cout << "\nInserting " << word << " at index " << key << endl;

    hashTable[key] = strdup(word);
    }
  else
    {
    int i = 1, j = 1;

    //cout << "\nCollision at index " << key << " doing a quadratic probe ..." << endl;

    while((i < tableSize) && (hashTable[key+j]))
      {
      j = ++i*i;
      }
      
    if(i > tableSize)
      {
      cout << "Error, collision index out of range." << endl;
      }
    else
      {
      //cout << "\nInserting " << word << " at index " << (key+j) << endl;
      hashTable[key+j] = strdup(word);
      }
    } 
  }

//-----------------------------------------------------------------------------

// Given a name to find, the hash table and the table size, return a pointer
// to the node containing name (or NULL if not found).

char *findWord(char *word, char *hashTable[], int tableSize)
  {
  int key = hash1(word, tableSize);

  if(hashTable[key] == NULL) return(NULL);   // Name not in list.

  if(strcmp(hashTable[key], word) == 0)
    {
    return (hashTable[key]);
    }
  else
    {
    int i = 1, j = 1;

    while((i < tableSize) && (hashTable[key+j]) && (strcmp(hashTable[key+j], word) != 0))
      {
      j = ++i*i;
      }

    if(hashTable[key+j] == NULL)
      {
      return (NULL);
      }
    else if(strcmp(hashTable[key+j], word) == 0)
      {
      return(hashTable[key+j]);
      }

    return(NULL);
    }
  }

//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------

int main( )
{

ifstream dictionary_File("dictionary.txt", ios::in); // Open dictionary

if(dictionary_File.is_open()) // Error check opening file
 {
  cout << "\nDictionary file open." << endl;
 }
else
 {
  cout << "\nDictionary file did not open. Exiting." << endl;
  return(Error);
 }

//-----------------------------------------------------------------------------

ifstream check_File("check.me", ios::in); // Open check file

if(check_File.is_open()) // Error check opening file
 {
  cout << "\nCheck file open." << endl;
 }
else
 {
  cout << "\nCheck file did not open. Exiting." << endl;
  return(Error);
 }

//-----------------------------------------------------------------------------
  
  int tableSize = 104729;     // A prime number.
  char **word;
  char **spellCheck;
  char *hashTable[tableSize];
  char *str;
  const int size = 45428;
  const int num = 47;

//-----------------------------------------------------------------------------
  
  for(int i = 0; i < tableSize; i++) hashTable[i] = NULL;

//-----------------------------------------------------------------------------
  
  string str_word;
  int i=0; 

  word = new char*[size]; // Create array in memory

  while(getline(dictionary_File, str_word))
   {
    word[i] = new char[str_word.size() + 1];   // Allocate
    copy(str_word.begin(), str_word.end(), word[i]);
    //word[str_word.size()] = '\0';

    i++;
   }//end while

//-----------------------------------------------------------------------------  

  // Insert each name into nameList;

  cout << "\nHashing..." << endl;

  for(int i = 0; i < size; i++)
   {
    insert(word[i], hashTable, hash1(word[i], tableSize), tableSize);
   }

//-----------------------------------------------------------------------------

  // Search hashTable for each word.

  cout << "\nSpell Checking..." << endl;  
  spellCheck = new char*[num];
  int j=0;

     while(getline(check_File, str_word, ' '))  // Getting words from file
      {
       for(int i=0; i < str_word.length(); i++) // Remove punctuation
        if(ispunct(str_word[i])) str_word[i] = ' ';        

       for(int i=0; i < str_word.length(); i++) // Remove spaces
        if(isspace(str_word[i])) str_word[i] = '\0';
         
       spellCheck[j] = new char[str_word.size()];   // Allocate
       copy(str_word.begin(), str_word.end(), spellCheck[j]);
       
       j++;       
      }//end while

    cout << "To this point" << endl;

    cin.ignore(); // Pause
    cin.get();
    
    for(int i=0; i<num; i++)
    {
     str = findWord(spellCheck[i], hashTable, tableSize);
     
     if(str)
      {
       cout << str << " was found\n" << endl;
      }
     else                                                  //HELP HERE
      {
       cout << "Searching for: " << spellCheck[i] << endl;
       //for(int i=0; i < spellCheck[i].length(); i++) // Remove punctuation
        spellCheck[i] = tolower(*spellCheck[i]);
       cout << "After tolower: " << spellCheck[i] << endl;
        if(findWord(spellCheck[i], hashTable, tableSize))
         {
          cout << "got here" << endl;
          cout << spellCheck[i] << " was found\n" << endl;
         }
        else cout << spellCheck[i] << " was not found\n" << endl;
      }
      
    }//end for    

  cout << endl;

//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
 
//-----------------------------------------------------------------------------



//-----------------------------------------------------------------------------
 cout << "\nEnd of Program" << endl;
 cin.ignore(); // Pause
 cin.get();
 return(0);
}//end main

//----------------------------------------------------------------------------- 
Input file looks like this:

This is a silly file containing red hens and blue ducks named John. Where oh
where have the horses gone to the Moon or to the sun what the hell
is this Paul guy talking about he is very odd on a sunny day with dogs barking.

As of right now, my output is this.


C:\Users\Zero>g++ HashProblem.cpp
HashProblem.cpp: In function 'int main()':
HashProblem.cpp:261:47: error: invalid conversion from 'int' to 'char*' [-fpermi
ssive]
         spellCheck[i] = tolower(*spellCheck[i]);
                                               ^

C:\Users\Zero>
Look closely at that line. Notice how one of them is *spellCheck[i]and the other is spellCheck[i]

Awesome that worked for the tolower case. It's always the simple things that get overlooked.

Any idea on how to avoid spaces, for some reason it isnt wanting to recognize the 'a' in the input.

Here's the output after changing that last error:


C:\Users\Zero>g++ HashProblem.cpp

C:\Users\Zero>a.exe

Dictionary file open.

Check file open.

Hashing...

Spell Checking...
To this point


Searching for: This
After tolower: this
got here
this was found

is was found

Searching for: a
After tolower: a
 was not found

silly was found

file was found

containing was found

red was found

hens was found

and was found

blue was found

ducks was found

named was found

John was found

Searching for: ê
After tolower: ê
 was not found

Searching for: Where
After tolower: where
got here
where was found

oh was found

have was found

the was found

horses was found

gone was found

to was found

the was found

Moon was found

or was found

to was found

the was found

sun was found

what was found

the was found

hell was found

Searching for:
After tolower:
 was not found

this was found

Paul was found

guy was found

talking was found

about was found

he was found

is was found

very was found

odd was found

on was found

Searching for: a
After tolower: a
 was not found

sunny was found

day was found

with was found

dogs was found

barking was found



End of Program
I am not sure why this


Searching for: a
After tolower: a
 was not found


or this


Searching for: ê
After tolower: ê
 was not found


is happening, I used isspace, so maybe something im doing wrong there?
I think the major issue is this.
1
2
3
  string str_word;

  word = new char*[size]; // Create array in memory 
Why not simply use the string as a dynamic array that it already is? Because now you have memory leaks.

As far as the converting the letters to spaces I am not exactly sure why that is happening. Though the isspace function should just tell if you if it is a space or not then return true or false.
If I don't allocate it there, it goes out of scope and anything that I put into word is temporary and goes away. I had trouble with that earlier.

I'm using a teachers code, he's pretty strict about it, otherwise i would have done all of this with strings instead.

I'm not sure as to why it is even reading the spaces at all. It gets to the point in the file where it is formatted:

word.space space word, so this
1
2
for(int i=0; i < str_word.length(); i++) // Remove spaces
        if(isspace(str_word[i])) str_word[i] = '\0';


should remove it, or replace the spaces with null instead.

It reads the letter 'a', but doesn't input it, i am unsure why.
The thing is,
1
2
3
4
5
       for(int i=0; i < str_word.length(); i++) // Remove punctuation
        if(ispunct(str_word[i])) str_word[i] = ' ';        

       for(int i=0; i < str_word.length(); i++) // Remove spaces
        if(isspace(str_word[i])) str_word[i] = '\0';
You make all the punctuation into spaces, then you make all of those spaces null terminators, when you output a c-string it outputs until the null terminator is found. So the problem may lie there.
Well, the input file is like this:

This_is_a_silly_file_containing_red_hens_and_blue_ducks_named_John._Where_oh\n
where_have_the_horses_gone_to_the_Moon_or_to_the_sun_what_the_hell_\n
is_this_Paul_guy_talking_about_he_is_very_odd_on_a_sunny_day_with_dogs_barking.


So, if I get: John.

I convert it to John_
which I convert to John\0
so the output is John

but, I'm getting this: a_
which is converting to a
but im getting an output of _
or NULL.

It makes no sense.
Whats up with all the underscores? You could simply use spaces. Also, it looks like the problem lies somewhere in the find word function.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
char *findWord(char *word, char *hashTable[], int tableSize)
  {
  int key = hash1(word, tableSize);

  if(hashTable[key] == NULL) return(NULL);   // Name not in list.

  if(strcmp(hashTable[key], word) == 0)
    {
    return (hashTable[key]);
    }
  else
    {
    int i = 1, j = 1;

    while((i < tableSize) && (hashTable[key+j]) && (strcmp(hashTable[key+j], word) != 0))
      {
      j = ++i*i;
      }

    if(hashTable[key+j] == NULL)
      {
      return (NULL);
      }
    else if(strcmp(hashTable[key+j], word) == 0)
      {
      return(hashTable[key+j]);
      }

    return(NULL);
    }
  }
It may be returning null for 'A' since it is not found in the hash? That looks to be the problem that it returns null.
I just put all of the underscores to clarify for spaces, theyre not really in the file itself. Maybe use a different hashing function will do better.
Okay that makes a little more sense and yeah you'll wanna probably debug the find word function and the hash function.Or out of curiosity is the 'a' in the dictionary file?
If I output the spellCheck[i] in a for loop, it outputs the 'a' though.

 
for(int i=0; i<num; i++) cout << spellCheck[i] << endl;



To this point


This
is
a
silly
file
containing
red
hens
and
blue
ducks
named
John
ê
Where
oh
have
the
horses
gone
to
the
Moon
or
to
the
sun
what
the
hell

this
Paul
guy
talking
about
he
is
very
odd
on
a
sunny
day
with
dogs
barking
Looks like the period got replaced with an 'ê' might want to ignore those characters when hashing or where ever that change was made.

Try outputting before line 247 then right after and see if the results vary. It's pretty much just trying to debug, I don't see any obvious things wrong but someone else may spot some.

It seems though that 'a' was not added to the dictionary. Since it is not finding it on line 237. Also shouldn't
1
2
      //for(int i=0; i < spellCheck[i].length(); i++) // Remove punctuation
        spellCheck[i] = tolower(*spellCheck[i]);
be
1
2
for(int j = 0; j < lengthOfString; ++j)
    spellCheck[i][j] = tolower(spellCheck[i][j]);
Last edited on
I changed that to:
*spellCheck[i] = tolower(*spellCheck[i]); // Conversion

My updated code:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
#include <iostream>
#include <iomanip>
#include <fstream>
#include <string>
#include <stdio.h>
#include <cctype>

using namespace std;

const int Error = 1;

//-----------------------------------------------------------------------------

// Given a character string and the size of the hash table, add up the ascii 
// values of each char and return the sum mod table size.

int hash1( const char *key, int tableSize )
  {
  unsigned int hashVal = 0;

  while( *key != '\0' ) hashVal += *key++;

  return hashVal % tableSize;
  }

//------------------------------------------------------------------------------

// Given a character string, and the size of the hash table, this algorithm 
// assumes the key has at least 2 chars plus the null byte.

int hash2( const char *key, int tableSize )
  {
  // 27 is the number of letters plus the blank in the English alphabet; 729 is
  // 27 squared. 

  return ( key[ 0 ] + 27 * key[ 1 ] + 729 * key[ 2 ] ) % tableSize;
  }

//------------------------------------------------------------------------------

// Loop through the key, 1 char at a time. Add the char's value to hashVal shifted
// left by 5 (which is the same as hashVal*32).

int hash3( const char *key, int tableSize )
  {
  unsigned int hashVal = 0;

  while( *key != '\0' ) hashVal = ( hashVal << 5 ) + *key++;

  return hashVal % tableSize;
  }

//------------------------------------------------------------------------------
//-----------------------------------------------------------------------------

// Given a name, an array and a hash key, insert name into the array at location
// hash key. If there is a collison, use quadratic probing to determine where to 
// place the name.


void insert(char *word, char *hashTable[], int key, int tableSize)
  {
  if(hashTable[key] == NULL)
    {
    //cout << "\nInserting " << word << " at index " << key << endl;

    hashTable[key] = strdup(word);
    }
  else
    {
    int i = 1, j = 1;

    //cout << "\nCollision at index " << key << " doing a quadratic probe ..." << endl;

    while((i < tableSize) && (hashTable[key+j]))
      {
      j = ++i*i;
      }
      
    if(i > tableSize)
      {
      cout << "Error, collision index out of range." << endl;
      }
    else
      {
      //cout << "\nInserting " << word << " at index " << (key+j) << endl;
      hashTable[key+j] = strdup(word);
      }
    } 
  }

//-----------------------------------------------------------------------------

// Given a name to find, the hash table and the table size, return a pointer
// to the node containing name (or NULL if not found).

char *findWord(char *word, char *hashTable[], int tableSize)
  {
  int key = hash1(word, tableSize);

  if(hashTable[key] == NULL) return(NULL);   // Name not in list.

  if(strcmp(hashTable[key], word) == 0)
    {
    return (hashTable[key]);
    }
  else
    {
    int i = 1, j = 1;

    while((i < tableSize) && (hashTable[key+j]) && (strcmp(hashTable[key+j], word) != 0))
      {
      j = ++i*i;
      }

    if(hashTable[key+j] == NULL)
      {
      return (NULL);
      }
    else if(strcmp(hashTable[key+j], word) == 0)
      {
      return(hashTable[key+j]);
      }

    return(NULL);
    }
  }

//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------
//-----------------------------------------------------------------------------

int main( )
{

ifstream dictionary_File("dictionary.txt", ios::in); // Open dictionary

if(dictionary_File.is_open()) // Error check opening file
 {
  cout << "\nDictionary file open." << endl;
 }
else
 {
  cout << "\nDictionary file did not open. Exiting." << endl;
  return(Error);
 }

//-----------------------------------------------------------------------------

ifstream check_File("check.me", ios::in); // Open check file

if(check_File.is_open()) // Error check opening file
 {
  cout << "\nCheck file open." << endl;
 }
else
 {
  cout << "\nCheck file did not open. Exiting." << endl;
  return(Error);
 }

//-----------------------------------------------------------------------------
  
  int tableSize = 104729;     // A prime number.
  char **word;
  char **spellCheck;
  char *hashTable[tableSize];
  char *str;
  const int size = 45428;
  const int num = 47;

//-----------------------------------------------------------------------------
  
  for(int i = 0; i < tableSize; i++) hashTable[i] = NULL;

//-----------------------------------------------------------------------------
  
  string str_word;
  int i=0; 

  word = new char*[size]; // Create array in memory

  while(getline(dictionary_File, str_word))
   {
    word[i] = new char[str_word.size() + 1];   // Allocate
    copy(str_word.begin(), str_word.end(), word[i]);
    //word[str_word.size()] = '\0';

    i++;
   }//end while

//-----------------------------------------------------------------------------  

  // Insert each name into hashTable;

  cout << "\nHashing..." << endl;

  for(int i = 0; i < size; i++)
   {
    insert(word[i], hashTable, hash1(word[i], tableSize), tableSize);
   }

//-----------------------------------------------------------------------------

  // Search hashTable for each word.

  cout << "\nSpell Checking..." << endl;  
  spellCheck = new char*[num];
  int j=0;

     while(getline(check_File, str_word, ' '))  // Getting words from file
      {

       for(int i=0; i < str_word.length(); i++) // Remove punctuation
        if(ispunct(str_word[i])) str_word[i] = '\0';        

       for(int i=0; i < str_word.length(); i++) // Remove spaces
        if(isspace(str_word[i])) str_word[i] = '\0';
         
       spellCheck[j] = new char[str_word.size()];   // Allocate
       copy(str_word.begin(), str_word.end(), spellCheck[j]);
       
       j++;       
      }//end while

    cout << "\nTo this point" << endl;

    cin.ignore(); // Pause
    cin.get();

    for(int i=0; i<num; i++) cout << spellCheck[i] << endl;
    
    for(int i=0; i<num; i++)
     {
      str = findWord(spellCheck[i], hashTable, tableSize);
     
     if(str)
      {
       cout << str << " was found\n" << endl;
      }
     else // Convert to lowercase if not found
      {
       *spellCheck[i] = tolower(*spellCheck[i]); // Conversion
 
        if(str = findWord(spellCheck[i], hashTable, tableSize))
         {
          cout << spellCheck[i] << " was found\n" << endl;
         }
        else cout << spellCheck[i] << " was not found\n" << endl;
      }
      
    }//end for    

  cout << endl;

//-----------------------------------------------------------------------------
 cout << "\nEnd of Program" << endl;
 cin.ignore(); // Pause
 cin.get();
 return(0);
}//end main

//----------------------------------------------------------------------------- 
My updated output:
C:\Users\Zero>g++ HashProblem.cpp

C:\Users\Zero>a.exe

Dictionary file open.

Check file open.

Hashing...

Spell Checking...

To this point


This
is
a
silly
file
containing
red
hens
and
blue
ducks
named
John
ê
Where
oh
have
the
horses
gone
to
the
Moon
or
to
the
sun
what
the
hell

this
Paul
guy
talking
about
he
is
very
odd
on
a
sunny
day
with
dogs
barking
this was found

is was found

 was not found

silly was found

file was found

containing was found

red was found

hens was found

and was found

blue was found

ducks was found

named was found

John was found

 was not found

where was found

oh was found

have was found

the was found

horses was found

gone was found

to was found

the was found

Moon was found

or was found

to was found

the was found

sun was found

what was found

the was found

hell was found

 was not found

this was found

Paul was found

guy was found

talking was found

about was found

he was found

is was found

very was found

odd was found

on was found

 was not found

sunny was found

day was found

with was found

dogs was found

barking was found



End of Program
The thing is, you output everything before line 236. Try outputting again on line 237. You can see that the problem lies somewhere in there. Then from there you will have to go back and narrow exactly where in there the problem is. Somewhere in the find function it is returning a null pointer for 'a'.
Last edited on
Topic archived. No new replies allowed.