Need Help, important and probably simple

I have no idea why it is doing what it is doing. I have it searching through the entire string letter by letter, looking for spaces, punctuation, etc... yet it still is continuing on with the space.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
#include <iostream>
#include <iomanip>
#include <fstream>
#include <string>
#include <stdio.h>
#include <cctype>
#include <algorithm>

using namespace std;

const int Error = 1;

//-----------------------------------------------------------------------------

// Given a character string and the size of the hash table, add up the ascii 
// values of each char and return the sum mod table size.

int hash1( const char *key, int tableSize )
  {
  unsigned int hashVal = 0;

  while( *key != '\0' ) hashVal += *key++;

  return hashVal % tableSize;
  }

//------------------------------------------------------------------------------

// Given a character string, and the size of the hash table, this algorithm 
// assumes the key has at least 2 chars plus the null byte.

int hash2( const char *key, int tableSize )
  {
  // 27 is the number of letters plus the blank in the English alphabet; 729 is
  // 27 squared. 

  return ( key[ 0 ] + 27 * key[ 1 ] + 729 * key[ 2 ] ) % tableSize;
  }

//------------------------------------------------------------------------------

// Loop through the key, 1 char at a time. Add the char's value to hashVal shifted
// left by 5 (which is the same as hashVal*32).

int hash3( const char *key, int tableSize )
  {
  unsigned int hashVal = 0;

  while( *key != '\0' ) hashVal = ( hashVal << 5 ) + *key++;

  return hashVal % tableSize;
  }

//------------------------------------------------------------------------------
//-----------------------------------------------------------------------------

// Given a name, an array and a hash key, insert name into the array at location
// hash key. If there is a collison, use quadratic probing to determine where to 
// place the name.


void insert(char *word, char *hashTable[], int key, int tableSize)
  {
  if(hashTable[key] == NULL)
    {
    //cout << "\nInserting " << word << " at index " << key << endl;

    hashTable[key] = strdup(word);
    }
  else
    {
    int i = 1, j = 1;

    //cout << "\nCollision at index " << key << " doing a quadratic probe ..." << endl;

    while((i < tableSize) && (hashTable[key+j]))
      {
      j = ++i*i;
      }
      
    if(i > tableSize)
      {
      cout << "Error, collision index out of range." << endl;
      }
    else
      {
      //cout << "\nInserting " << word << " at index " << (key+j) << endl;
      hashTable[key+j] = strdup(word);
      }
    } 
  }

//-----------------------------------------------------------------------------

// Given a name to find, the hash table and the table size, return a pointer
// to the node containing name (or NULL if not found).

char *findWord(char *word, char *hashTable[], int tableSize)
  {
  int key = hash1(word, tableSize);

  if(hashTable[key] == NULL) return(NULL);   // Word not in list.

  if(strcmp(hashTable[key], word) == 0)
    {
    return (hashTable[key]);
    }
  else
    {
    int i = 1, j = 1;

    while((i < tableSize) && (hashTable[key+j]) && (strcmp(hashTable[key+j], word) != 0))
      {
      j = ++i*i;
      }

    if(hashTable[key+j] == NULL)
      {
      return (NULL);
      }
    else if(strcmp(hashTable[key+j], word) == 0)
      {
      return(hashTable[key+j]);
      }
    return(NULL);
    }
  }

//-----------------------------------------------------------------------------

int main()
{

ifstream dictionary_File("dictionary.txt", ios::in); // Open dictionary

if(!dictionary_File.is_open()) // Error check opening file
 {
  cout << "\nDictionary file did not open. Exiting." << endl;
  return(Error);
 }

//-----------------------------------------------------------------------------

ifstream check_File("check.me", ios::in); // Open check file

if(!check_File.is_open()) // Error check opening file
 {
  cout << "\nCheck file did not open. Exiting." << endl;
  return(Error);
 }

//-----------------------------------------------------------------------------
  
  int tableSize = 104729;     // A prime number.
  char **word;
  char **spellCheck;
  char *hashTable[tableSize];
  char *str;
  const int size = 45428;
  const int num = 47;

//-----------------------------------------------------------------------------
  
  for(int i = 0; i < tableSize; i++) hashTable[i] = NULL;

//-----------------------------------------------------------------------------
  
  string str_word;
  int i=0; 

  word = new char*[size]; // Create in memory

  while(getline(dictionary_File, str_word))
   {
    word[i] = new char[str_word.length()]; // Allocate
    copy(str_word.begin(), str_word.end(), word[i]);

    i++;
   }//end while

//-----------------------------------------------------------------------------  

  // Insert each name into hashTable

  for(int i = 0; i < size; i++)
   {
    insert(word[i], hashTable, hash1(word[i], tableSize), tableSize);
   }

//-----------------------------------------------------------------------------

  // Search hashTable for each word.
  // Format for dictionary search
 
  spellCheck = new char*[num];
  int j=0;

     while(getline(check_File, str_word, ' ')) // Getting words from file
      {
       for(int i=0; i < str_word.length(); i++)
        {
         if(str_word[i] == ' ')   str_word[i] = '\0';    // Replace space with NULL char
         if(ispunct(str_word[i])) str_word.erase(i, 1);  // Remove punctuation          
         if(isspace(str_word[i])) str_word.erase(i, 1);  // Remove spaces
         if(str_word[i] == '\0')  str_word.erase(i, 1);  // Remove NULL
         if(str_word[i] == '\n')  str_word.erase(i, 1);  // Remove \n newline
         if(str_word[i] == '\t')  str_word.erase(i, 1);  // Remove \t tabs 
        }       
         
         spellCheck[j] = new char[str_word.length()];    // Allocate
         strcpy(spellCheck[j], str_word.c_str());
       
       j++;       
      }//end while

//-----------------------------------------------------------------------------    
    
    for(int i=0; i<num; i++)
     {
      str = findWord(spellCheck[i], hashTable, tableSize); // Finding words
     if(str)
      {
       continue; // Continue on to the next word
      }
     else // Convert to lowercase if not found
      {
       *spellCheck[i] = tolower(*spellCheck[i]); // Conversion
 
        if(str = findWord(spellCheck[i], hashTable, tableSize)) // Re-search
         {
          continue; // Continue on to the next word
         }
        else cout << spellCheck[i] << " is misspelled.\n" << endl;
      }
      
    }//end for    

  cout << endl;

//-----------------------------------------------------------------------------

 return(0); // Success
}//end main

//-----------------------------------------------------------------------------  


Output:

C:\Users\Zero>g++ HashProblem.cpp

C:\Users\Zero>a.exe
a is misspelled.

rohn is misspelled.

 is misspelled.

ohwhere is misspelled.

a is misspelled.



C:\Users\Zero>


However, when it was run on a Macbook this was the output:

 was is misspelled.

ohwhere is misspelled.


Why can I not get rid of the space, if I put in a statement like:

1
2
3
if(str_word == " ") 
       //or
      if(str_word == ' ')


It does nothing to change it. I am completely baffled.
for(int i=0; i < str_word.length(); i++) This is problemtic. You are erasing parts of the string in your loop which messes it up.

Example: consider line , 1. Look what happens:
first iteration. i = 0, line is , 1, line.size() is 3.
    symbol line[i] is ',': punctuation so it is erased. No other conditions works.
second iteration. i = 1, line is  1, line.size() is 2;
    symbol line[1] is '1': it is fine, so we go to the next iteration
third iteration. i = 2, line is  1, line.size() is 2. Stop iterating.

Resulting line is  1 (space-one).

1) make custom function checking if character needs to be removed.
1
2
3
4
5
bool need_to_remove(char c)
{
    return c == ' ' || c == '\0' || c == '\n' || c == '\t' || 
           ispunct(c) || isspace(c);
}


2) erase all character at once using erase-remove idiom:
1
2
str_word.erase( std::remove_if(str_word.begin(), str_word.end(), need_to_remove)
                str_word.end() );
Last edited on
That's a good method, I like it.

Although, even after doing this method, this is my output.


C:\Users\Zero>a.exe

a is misspelled.

rohn is misspelled.

 is misspelled.

ohwhere is misspelled.

a is misspelled.



C:\Users\Zero>
while(getline(check_File, str_word, ' ')) If you want to read space-char separated word, use operator>>. Even if you have reasons to use getline, skip leading whitespaces: while(getline(check_File >> std::ws, str_word, ' '))  

Also this new char[str_word.length()]; is wrong: you forgot space for '\0' terminator.
Thanks for the idea to remove the whitespace before. It works, I also found out that my array for num was just one too big, so it would crash trying to access it.

Even though it is fixed, and should do as it needs too, I am still curious as to why it would even read the whitespace to begin with. I would assume that since I have a delimiter of 'space', that it will begin reading immediately after that until the next whitespace, so within the file, the position it would have difficulty with would be here:

rohn.'space''space''space'Where

It would get to rohn., skip the first space, read in the second space, stop at the third space, and start reading Where, making a string of 'space'.

What I don't understand is why his Mac is reading without the 'a is misspelled.' and my computer is reading with 'a is misspelled'. That's weird to me.
Topic archived. No new replies allowed.