Urgent! Need help assigning a hash value to text in a hashtable

I need help writing a program in Visual Studio that accesses a .txt file, which has to be read from the .cpp in int main() (as I have it doing so) and then assigns a hash value to it and displays it. As well the program needs to display the number of collisions(must be a max of 6).
1
2
3
4
5
6
7
/***
Example output:
James  789
Tom    333

333    Number of collisions: 1
***/

Given the code below, what do I need to integrate, add or change to accomplish this. I am completely stuck and unsure how to accomplish this
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
//HashT.h
#pragma once
#pragma warning (disable:4996)
#include <iostream>
#include <fstream>
#include <cstdlib>
#include <string>
using namespace std;

const short SIZE_KEY = 32;
const short SIZE_VALUE = 256;
const short DEFAULT_TABLESIZE = 31;

typedef struct Metadata
{
	struct Metadata(char* key, char *value)
	{
		strcpy(this->key, key);
		strcpy(this->value, value);
		next = NULL;
	}

	char key[SIZE_KEY];
	char value[SIZE_VALUE];
	struct Metadata* next;
} MDATA;

class Hashtable
{
	int tablesize;
	int size;
	MDATA** table; //same as Metadata* table[]?;
	long hashString(char* key);
	MDATA* find(char* key);
	MDATA* current_entry;
	int current_index;
public:
	Hashtable(int tablesize = DEFAULT_TABLESIZE);
	bool put(char* key, char* value);
	bool get(char* key, char* value);
	bool contains(char* key);
	int getSize();
	void initIterator();
	bool hasNext();
	void getNextKey(char* key);
};

//main.cpp
#include "HashT.h"
Hashtable::Hashtable(int tablesize)
{
	size = 0;
	this->tablesize = tablesize;        //31
	table = new MDATA*[tablesize];   //31

	for (int i = 0; i < tablesize; i++)
		table[i] = NULL;                 //31 arrays
}

MDATA* Hashtable::find(char* key)
{
	int bucket = hashString(key);
	MDATA* temp = table[bucket];

	while (temp != NULL)
	{
		if (strcmp(key, temp->key) == 0)
			return temp;
		temp = temp->next;
	}
	return NULL;
}

long Hashtable::hashString(char* key)        //used to return hash number 
{
	int n = strlen(key);
	long h = 0;

	for (int i = 0; i < n; i++)
		h = (h << 2) + key[i];

	return abs(h % tablesize);
}

bool Hashtable::put(char* key, char* value)
{
	if (find(key) != NULL)
		return false;

	MDATA* entry = new MDATA(key, value);
	int bucket = hashString(key);
	entry->next = table[bucket];
	table[bucket] = entry;
	size++;
	return true;
}

bool Hashtable::get(char* key, char* value)
{
	MDATA* temp = find(key);

	if (temp == NULL)
	{
		value[0] = '\0';
		return false;
	}

	else
	{
		strcpy(value, temp->value);
		return true;
	}
}

bool Hashtable::contains(char* key)
{
	if (find(key) == NULL)
		return false;
	else
		return true;
}

int Hashtable::getSize()
{
	return size;
}

void Hashtable::initIterator()
{
	current_entry = NULL;
	current_index = tablesize;

	for (int i = 0; i < tablesize; i++)
	{
		if (table[i] == NULL)
			continue;
		else
		{
			current_entry = table[i];
			current_index = i;
			break;
		}
	}
}

bool Hashtable::hasNext()
{
	if (current_entry == NULL)
		return false;
	else
		return true;
}

void Hashtable::getNextKey(char* key)
{
	if (current_entry == NULL)
	{
		key[0] = '\0';
		return;
	}
	strcpy(key, current_entry->key);

	if (current_entry->next != NULL)
		current_entry = current_entry->next;

	else
	{
		for (int i = current_index + 1; i < tablesize; i++)
		{
			if (table[i] == NULL)
				continue;
			current_entry = table[i];
			current_index = i;
			return;
		}
		current_entry = NULL;
		current_index = tablesize;
	}
}

void displayAll(Hashtable* hashT);

int main()
{
	Hashtable* hashT = new Hashtable();
	char key[SIZE_KEY];
	char value[SIZE_VALUE];
	int table = 31;
	int h = 30;
	
	ifstream in("Dictionary.txt");

	if (!in)
	{
		cout << "Cannot open input file.\n";
		return 1;
	}

	char str[500];

	while (in)
	{
		in.getline(str, 500);  // delim defaults to '\n'
		if (in) cout << str << endl;

	}

	for (int i = 0; i < table; i++)
	{
		h = abs(i%table);
		cout << h << endl;
		
		strcpy(key, h);
		strcpy(value, "str");
		if (!hashT->contains(key))
		{
			cout << "adding node - key: " << key << " value: " << value << endl;
			hashT->put(key, value);
		}

	}

	strcpy(key, "235");
	strcpy(value, "Joe");

	if (!hashT->contains(key))
	{
		cout << "adding node - key: " << key << " value: " << value << endl;
		hashT->put(key, value);
	}

	strcpy(key, "415");
	strcpy(value, "Henry");

	if (!hashT->contains(key))
	{
		cout << "adding node - key: " << key << " value: " << value << endl;
		hashT->put(key, value);
	}

	strcpy(key, "999");
	strcpy(value, "James");

	if (!hashT->contains(key))
	{
		cout << "adding node - key: " << key << " value: " << value << endl;
		hashT->put(key, value);
	}

	displayAll(hashT);

	system("pause");
	return 0;
}

void displayAll(Hashtable* hashT)
{
	char key[SIZE_KEY];
	char value[SIZE_VALUE];
	cout << "Current nodes in hashtable: " << endl;
	hashT->initIterator();
	while (hashT->hasNext())
	{
		hashT->getNextKey(key);
		hashT->get(key, value);
		cout << "key: " << key << " value: " << value << endl;
	}
}

//Dictionary.txt
Alpha Cnetauri Base
Antimatter containment vessel
Barnards Star
Battleship
Beverly Crusher
Data
Deana Troi
Earth
Enterprise
Explorer Class
Frigates
Geordi La Forge
Gunships
Hikaru Sulu
Impulse Engines
Intrepid
James T Kirk
Jean Luc Picard
Klingon Warbird
Leonard McCoy
/**
This is not the full list in the Dictionary.txt file, there are 39 total words
**/
Last edited on
would it work if I did something like
1
2
3
4
5
6
7
8
9
10
11
12
13
14
if(i=0; i<500;)
{
    for(str[i]<29)
    {
          strcpy(key, "");
	  strcpy(value, "");

	  if (!hashT->contains(key))
	  {
		  cout << "adding node - key: " << key << " value: " << value << endl;
		  hashT->put(key, value);
	  }
     }
}
it may, i didnt study your code.
what I usually do here is use the SHA1 algorithm on the string, and you can modulo (% operator) that into your hash table's max size. If the table is large enough, this should work just fine for most generic text data. And the sha1 code is available all over the web for free, so it shouldnt take but a moment to get it. A lot of the sha1 code generates the value as a hex string instead of a number, but you can convert it easily enough...


this is doing a lot of work ... c++ has a built in hash table container, just so you know.
Its good to do it the hard way once, to understand it better.
Last edited on
I was think about putting
1
2
3
4
5
6
7
ifstream in("Dictionary.txt");

	if (!in)
	{
		cout << "Cannot open input file.\n";
		return 1;
	}

or
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
	char str[500];

	while (in)
	{
		in.getline(str, 500); 
		if (in)
		{
			str = str%table;
			cout << str << endl;
		}

	}
	
	/**for (int j = 0; j < table; j++)
	{
		j = abs(j%table);
		cout << j << endl;

		strcpy(key, j);
		strcpy(value, "str");
		if (!hashT->contains(key))
		{
			cout << "adding node - key: " << key << " value: " << value << endl;
			hashT->put(key, value);
		}

	}**/

but neither return what I was trying to accomplish
if the sha1 did not work, a really, really cheezy way to do it...

union cheese
{
unsigned long long ll;
char c[8];
};

cheese x;
if(strlen(yourstring >7)
memcpy(x.c, yourstring, 8);
else
{
x.ll = 0; //important! if you don't do this stray bytes will creep in and mess up the hash.
strcpy(x.c, yourstring)
}
hashT->put(x.ll % hashtablesize, yourstring);

strings that are identical for the first 8 chars will collide ON TOP OF collides from the modulus function based off table size. This means the approach is quick, but poor. It also has all kinds of memory gotchas ... I think i did it right, but memcpy and c-strings etc are not exactly good things to use, and I think unions are frowned upon as well unless absolutely needed to serialize something. Worse, data will 'bunch' around words, that is, assuming the strings are english words, the distributions of letters etc will make the keys very "not random" and poorish.

this is, by the way, exactly the same as casting a char * off an integer and dumping the bytes in directly to create a number. The union just bypasses the pointer magic code for you, or the bit shifting magic code, all 3 approaches do the same thing in the end.


Last edited on
so in terms of my code, where would i implement what you just wrote? I asked my instructor and they said to either add the dictionary.txt to a 2 dimensinal array or save it to a linked list. Since my hashtable is written for a linked list I am going to try and save the text as a linked list as well.
Last edited on
you would put the union off to the side; its like a class or type def.

The usage of it should be wrapped in a function that just takes a string and returns an integer.
then a clean way to do it is to call that function in your hashT.put function. Then put only needs to take a string, and it puts it in the data structure. But as you have it written, you will have to call the function first, get the value, and pass it to the put function, or do it in-line:

hashT->put( function(yourstring), yourstring);


And once you have this much working you can mess with it to get the collisions down, doing something like srand(x.ll) rand() (this will generate a key that is better. rand() gives the same sequence every time for the same srand(value), so calling srand each time turns it into a hash function). After doing this, your collisions and table spread of the data should be "decent".

The Sha1 approach is still 1000 times better, but this is just a few lines of simple code that more or less works, bad as it is.

(I say more or less because it is still not a super good hash function, and you will still see problems because of the way rand is coded, but its good enough for schoolwork or small data sets).

Unimportant, but just to say it... you are tying one of the fastest and most powerful tools (hash tables) to one of the slowest, clunkiest tools (lists). Lists are all the time allocating memory and doing mostly not-necessary pointer manipulations, and they are sluggish because of it. Hash tables tend to dump into a fixed sized bucket with direct access and are very, very fast. Putting the two together kills a lot of the advantages of the hash table. You do what the professor (or in the workplace, requirements etc) call for, but be AWARE of what you are doing to yourself and try to see if there is a reason to do it or if you need a re-design.


Last edited on
I don't think we're allowed to use unions. I've updated the code here based on suggestions from my instructor but don't know what to do next

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
  #pragma once
#pragma warning (disable:4996)
#include <iostream>
#include <fstream>
#include <cstdlib>
#include <string>
using namespace std;

const short SIZE_KEY = 32;
const short SIZE_VALUE = 256;
const short DEFAULT_TABLESIZE = 31;

typedef struct Metadata
{
	struct Metadata(char* key, char *value)
	{
		strcpy(this->key, key);
		strcpy(this->value, value);
		next = NULL;
	}

	char key[SIZE_KEY];
	char value[SIZE_VALUE];
	struct Metadata* next;
} MDATA;

class Hashtable
{
	int tablesize;
	int size;
	MDATA** table;
	long hashString(char* key);
	MDATA* find(char* key);
	MDATA* current_entry;
	int current_index;
public:
	Hashtable(int tablesize = DEFAULT_TABLESIZE);
	bool put(char* key, char* value);
	bool get(char* key, char* value);
	bool contains(char* key);
	bool hasNext();
	int getSize();
	void Iterator();
	void getNextKey(char* key);
	void text();
};

struct dictionary
{
	string words;
	dictionary* next;
};
dictionary *list;
dictionary *p, *q;

//main.cpp
#include "Hash.h"
Hashtable::Hashtable(int tablesize)
{
	size = 0;
	this->tablesize = tablesize;        //31
	table = new MDATA*[tablesize];   //31

	for (int i = 0; i < tablesize; i++)
		table[i] = NULL;                 //31 arrays
}

MDATA* Hashtable::find(char* key)
{
	int bucket = hashString(key);
	MDATA* temp = table[bucket];

	while (temp != NULL)
	{
		if (strcmp(key, temp->key) == 0)
			return temp;
		temp = temp->next;
	}
	return NULL;
}

//used to return hash number
long Hashtable::hashString(char* key)    
{
	int n = strlen(key);
	long h = 0;

	for (int i = 0; i < n; i++)
	{
		h = (3 * h + key[i]);
		cout << h;      //Gives me weird outputs like 459..,5908...
	}

	return abs(h % tablesize);
}

bool Hashtable::put(char* key, char* value)
{
	if (find(key) != NULL)
		return false;

	MDATA* entry = new MDATA(key, value);
	int bucket = hashString(key);
	entry->next = table[bucket];
	table[bucket] = entry;
	size++;
	return true;
}

int Hashtable::getSize()
{
	return size;
}

bool Hashtable::get(char* key, char* value)
{
	MDATA* temp = find(key);

	if (temp == NULL)
	{
		value[0] = '\0';
		return false;
	}

	else
	{
		strcpy(value, temp->value);
		return true;
	}
}

bool Hashtable::contains(char* key)
{
	if (find(key) == NULL)
		return false;
	else
		return true;
}

void Hashtable::Iterator()
{
	current_entry = NULL;
	current_index = tablesize;

	for (int i = 0; i < tablesize; i++)
	{
		if (table[i] == NULL)
			continue;
		else
		{
			current_entry = table[i];
			current_index = i;
			break;
		}
	}
}

bool Hashtable::hasNext()
{
	if (current_entry == NULL)
		return false;
	else
		return true;
}

void Hashtable::getNextKey(char* key)
{
	if (current_entry == NULL)
	{
		key[0] = '\0';
		return;
	}
	strcpy(key, current_entry->key);

	if (current_entry->next != NULL)
		current_entry = current_entry->next;

	else
	{
		for (int i = current_index + 1; i < tablesize; i++)
		{
			if (table[i] == NULL)
				continue;
			current_entry = table[i];
			current_index = i;
			return;
		}
		current_entry = NULL;
		current_index = tablesize;
	}
}

void displayAll(Hashtable* hashT)
{
	char key[SIZE_KEY];
	char value[SIZE_VALUE];
	cout << "Current nodes in hashtable: " << endl;
	hashT->Iterator();
	while (hashT->hasNext())
	{
		hashT->getNextKey(key);
		hashT->get(key, value);
		cout << "key: " << key << " value: " << value << endl;
	}
}

int main()
{
	Hashtable* hashT = new Hashtable();
	char key[SIZE_KEY];
	char value[SIZE_VALUE];

	ifstream inputFile;
	list = NULL;
	p = new dictionary;

	inputFile.open("Dictionary.txt");

	while (!inputFile.eof())
	{
		p = new dictionary;
		getline(inputFile, p->words);
		cout << p->words << endl;

		strcpy(key, " ");
		strcpy(value, p->words);   //IS GIVING ME AN ERROR BECAUSE OF MISMATCHED TYPES

		if (!hashT->contains(key))
		{
			cout << "key: " << key << " value: " << value << endl;
			hashT->put(key, value);
		}
	}
	if (list == NULL)
		list = p;
	else
	{
		q = list;
		while (q->next != NULL)
			q = q->next;
		q->next = p;
	}
	inputFile.close();
	/******************************************************
	FOR TESTING PURPOSES
	string line_;
	ifstream file_("Dictionary.txt");
	if (file_.is_open())
	{
	while (getline(file_, line_))
	{
	cout << line_ << '\n';
	}
	file_.close();
	}
	
	strcpy(key, "111");
	strcpy(value, "Bob");

	if (!hashT->contains(key))
	{
		cout << "adding node - key: " << key << " value: " << value << endl;
		hashT->put(key, value);
	}

	strcpy(key, "415");
	strcpy(value, "Henry");

	if (!hashT->contains(key))
	{
		cout << "adding node - key: " << key << " value: " << value << endl;
		hashT->put(key, value);
	}
	**************************************************/

	displayAll(hashT);

	system("pause");
	return 0;
}

//Dictionary.txt file (full list contains 39 lines of words
/****************************************************************
Alpha Cnetauri Base
Antimatter containment vessel
Barnards Star
Battleship
Beverly Crusher
Data

What sample output should look like:
Battleship - 236
Data - 111
****************************************************************/
Last edited on
yes. string and char* are not the same at all. string is a c++ object with a lot of functions and things. char* is a block of memory. You should generally use strings in c++ unless you want a block of bytes as bytes.

If you are going to use strings, you can still build a number, and without the union, with simple code.

string s = "test";
unsigned long long ll;
unsigned char * cp = (unsigned char *)(&ll); //the union made this look nicer but does the same thing. I said that above.

ll = 0;
for(int I = 0; I < s.length() && I <8; I++)
cp[I] = s[I];

and convert ll to a key again as before.

you can do the same thing with bit shifting, and lose the pointers entirely. its all the same. (this one I am not sure of... its late, and bit shifting is annoying to think about... but the idea is right, if not the code snip)

ll = 0;
for(int I = 0; I < s.length() && I <8; I++)
{
ll = ll << 8;
ll+= s[I];
}


Last edited on
It's really important to know and understand how char* works as it used a lot in c++. Otherwise it can affect you and might make you make mistakes which cause with errors. Trust me, c errors are the worst kind and are hard to be detected. You can always use some program like checkmarx and others as help if it's getting too hard dealing with those errors.
Good luck!
Ben.
I made some major changes to my code and finally got it to do what I wanted it to do. One of the notes from my instructor was that I shouldn't have opened the .txt file in main() but rather use a constructor or member function in my class.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
#pragma once
#include <iostream>
using namespace std;
#include <fstream>
#include <string>


const int TABLESIZE = 31;

class HashTable
{
	struct Node
	{
		int		collision;
		string	word;
		Node*	next;
	};
	Node* head[TABLESIZE];
public:
	int HashString(string hashkey);
	void put(string dictionary);
	void display();
};

//main file

#include "HashT.h"

int HashTable::HashString(string hashkey)
{
	long hash = 0;

	for (int i = 0; i < hashkey.length(); i++)
		hash = ((hash * i) + (hashkey[i])) % TABLESIZE;

	return abs(hash);
}


void HashTable::put(string dictionary)
{
	int	key = 0;

	Node *newNode, *nodePtr;
	string		temp;
	ifstream	text;


	for (int i = 0; i < TABLESIZE; i++)
		head[i] = NULL;

	text.open(dictionary);
	while (!text.eof())
	{
		newNode = new Node;
		getline(text, temp);

		if (!temp.empty())
		{
			key = HashString(temp);

			newNode->word = temp;
			newNode->next = NULL;
		}

		if (!head[key])
		{
			head[key] = newNode;
			newNode->collision = 0;
		}
		else
		{
			newNode->collision = 1;
			nodePtr = head[key];
			while (nodePtr->next)
			{
				nodePtr = nodePtr->next;
				newNode->collision++;
			}
			nodePtr->next = newNode;
		}
	}
	text.close();
}

void HashTable::display()
{
	Node *displayHash;
	displayHash = new Node;

	for (int i = 0; i < TABLESIZE; i++)
	{
		if (i < TABLESIZE)
		{
			displayHash = head[i];
		}

		while (displayHash)
		{
			if (!displayHash->word.empty())
				cout << "HashKey " << i << '\t' << displayHash->word << "\n" << "Collision:" << '\t' << displayHash->collision << endl;

			displayHash = displayHash->next;
		}
	}
}

int main()
{
	string	file = "dictionary.txt";
	HashTable obj1;

	obj1.put(file);
	obj1.display();

	system("pause");
	return 0;
}
Topic archived. No new replies allowed.