Urgent! Need help assigning a hash value

Forum

Forum
General C++ Programming
Urgent! Need help assigning a hash value

Urgent! Need help assigning a hash value to text in a hashtable

Apr 17, 2017 at 9:34pm

I need help writing a program in Visual Studio that accesses a .txt file, which has to be read from the .cpp in int main() (as I have it doing so) and then assigns a hash value to it and displays it. As well the program needs to display the number of collisions(must be a max of 6).

/***
Example output:
James  789
Tom    333

333    Number of collisions: 1
***/

Given the code below, what do I need to integrate, add or change to accomplish this. I am completely stuck and unsure how to accomplish this

//HashT.h
#pragma once
#pragma warning (disable:4996)
#include <iostream>
#include <fstream>
#include <cstdlib>
#include <string>
using namespace std;

const short SIZE_KEY = 32;
const short SIZE_VALUE = 256;
const short DEFAULT_TABLESIZE = 31;

typedef struct Metadata
{
	struct Metadata(char* key, char *value)
	{
		strcpy(this->key, key);
		strcpy(this->value, value);
		next = NULL;
	}

	char key[SIZE_KEY];
	char value[SIZE_VALUE];
	struct Metadata* next;
} MDATA;

class Hashtable
{
	int tablesize;
	int size;
	MDATA** table; //same as Metadata* table[]?;
	long hashString(char* key);
	MDATA* find(char* key);
	MDATA* current_entry;
	int current_index;
public:
	Hashtable(int tablesize = DEFAULT_TABLESIZE);
	bool put(char* key, char* value);
	bool get(char* key, char* value);
	bool contains(char* key);
	int getSize();
	void initIterator();
	bool hasNext();
	void getNextKey(char* key);
};

//main.cpp
#include "HashT.h"
Hashtable::Hashtable(int tablesize)
{
	size = 0;
	this->tablesize = tablesize;        //31
	table = new MDATA*[tablesize];   //31

	for (int i = 0; i < tablesize; i++)
		table[i] = NULL;                 //31 arrays
}

MDATA* Hashtable::find(char* key)
{
	int bucket = hashString(key);
	MDATA* temp = table[bucket];

	while (temp != NULL)
	{
		if (strcmp(key, temp->key) == 0)
			return temp;
		temp = temp->next;
	}
	return NULL;
}

long Hashtable::hashString(char* key)        //used to return hash number 
{
	int n = strlen(key);
	long h = 0;

	for (int i = 0; i < n; i++)
		h = (h << 2) + key[i];

	return abs(h % tablesize);
}

bool Hashtable::put(char* key, char* value)
{
	if (find(key) != NULL)
		return false;

	MDATA* entry = new MDATA(key, value);
	int bucket = hashString(key);
	entry->next = table[bucket];
	table[bucket] = entry;
	size++;
	return true;
}

bool Hashtable::get(char* key, char* value)
{
	MDATA* temp = find(key);

	if (temp == NULL)
	{
		value[0] = '\0';
		return false;
	}

	else
	{
		strcpy(value, temp->value);
		return true;
	}
}

bool Hashtable::contains(char* key)
{
	if (find(key) == NULL)
		return false;
	else
		return true;
}

int Hashtable::getSize()
{
	return size;
}

void Hashtable::initIterator()
{
	current_entry = NULL;
	current_index = tablesize;

	for (int i = 0; i < tablesize; i++)
	{
		if (table[i] == NULL)
			continue;
		else
		{
			current_entry = table[i];
			current_index = i;
			break;
		}
	}
}

bool Hashtable::hasNext()
{
	if (current_entry == NULL)
		return false;
	else
		return true;
}

void Hashtable::getNextKey(char* key)
{
	if (current_entry == NULL)
	{
		key[0] = '\0';
		return;
	}
	strcpy(key, current_entry->key);

	if (current_entry->next != NULL)
		current_entry = current_entry->next;

	else
	{
		for (int i = current_index + 1; i < tablesize; i++)
		{
			if (table[i] == NULL)
				continue;
			current_entry = table[i];
			current_index = i;
			return;
		}
		current_entry = NULL;
		current_index = tablesize;
	}
}

void displayAll(Hashtable* hashT);

int main()
{
	Hashtable* hashT = new Hashtable();
	char key[SIZE_KEY];
	char value[SIZE_VALUE];
	int table = 31;
	int h = 30;
	
	ifstream in("Dictionary.txt");

	if (!in)
	{
		cout << "Cannot open input file.\n";
		return 1;
	}

	char str[500];

	while (in)
	{
		in.getline(str, 500);  // delim defaults to '\n'
		if (in) cout << str << endl;

	}

	for (int i = 0; i < table; i++)
	{
		h = abs(i%table);
		cout << h << endl;
		
		strcpy(key, h);
		strcpy(value, "str");
		if (!hashT->contains(key))
		{
			cout << "adding node - key: " << key << " value: " << value << endl;
			hashT->put(key, value);
		}

	}

	strcpy(key, "235");
	strcpy(value, "Joe");

	if (!hashT->contains(key))
	{
		cout << "adding node - key: " << key << " value: " << value << endl;
		hashT->put(key, value);
	}

	strcpy(key, "415");
	strcpy(value, "Henry");

	if (!hashT->contains(key))
	{
		cout << "adding node - key: " << key << " value: " << value << endl;
		hashT->put(key, value);
	}

	strcpy(key, "999");
	strcpy(value, "James");

	if (!hashT->contains(key))
	{
		cout << "adding node - key: " << key << " value: " << value << endl;
		hashT->put(key, value);
	}

	displayAll(hashT);

	system("pause");
	return 0;
}

void displayAll(Hashtable* hashT)
{
	char key[SIZE_KEY];
	char value[SIZE_VALUE];
	cout << "Current nodes in hashtable: " << endl;
	hashT->initIterator();
	while (hashT->hasNext())
	{
		hashT->getNextKey(key);
		hashT->get(key, value);
		cout << "key: " << key << " value: " << value << endl;
	}
}

//Dictionary.txt
Alpha Cnetauri Base
Antimatter containment vessel
Barnards Star
Battleship
Beverly Crusher
Data
Deana Troi
Earth
Enterprise
Explorer Class
Frigates
Geordi La Forge
Gunships
Hikaru Sulu
Impulse Engines
Intrepid
James T Kirk
Jean Luc Picard
Klingon Warbird
Leonard McCoy
/**
This is not the full list in the Dictionary.txt file, there are 39 total words
**/

Edit & run on cpp.sh

Last edited on Apr 19, 2017 at 6:02am

Apr 18, 2017 at 6:35pm

F95 (64)

would it work if I did something like

if(i=0; i<500;)
{
    for(str[i]<29)
    {
          strcpy(key, "");
	  strcpy(value, "");

	  if (!hashT->contains(key))
	  {
		  cout << "adding node - key: " << key << " value: " << value << endl;
		  hashT->put(key, value);
	  }
     }
}

Apr 18, 2017 at 6:52pm

jonnin (11493)

it may, i didnt study your code.
what I usually do here is use the SHA1 algorithm on the string, and you can modulo (% operator) that into your hash table's max size. If the table is large enough, this should work just fine for most generic text data. And the sha1 code is available all over the web for free, so it shouldnt take but a moment to get it. A lot of the sha1 code generates the value as a hex string instead of a number, but you can convert it easily enough...

this is doing a lot of work ... c++ has a built in hash table container, just so you know.
Its good to do it the hard way once, to understand it better.

Last edited on Apr 18, 2017 at 6:53pm

Apr 19, 2017 at 6:02am

F95 (64)

I was think about putting

ifstream in("Dictionary.txt");

	if (!in)
	{
		cout << "Cannot open input file.\n";
		return 1;
	}

	char str[500];

	while (in)
	{
		in.getline(str, 500); 
		if (in)
		{
			str = str%table;
			cout << str << endl;
		}

	}
	
	/**for (int j = 0; j < table; j++)
	{
		j = abs(j%table);
		cout << j << endl;

		strcpy(key, j);
		strcpy(value, "str");
		if (!hashT->contains(key))
		{
			cout << "adding node - key: " << key << " value: " << value << endl;
			hashT->put(key, value);
		}

	}**/

but neither return what I was trying to accomplish

Apr 19, 2017 at 1:57pm

jonnin (11493)

if the sha1 did not work, a really, really cheezy way to do it...

union cheese
{
unsigned long long ll;
char c[8];
};

cheese x;
if(strlen(yourstring >7)
memcpy(x.c, yourstring, 8);
else
{
x.ll = 0; //important! if you don't do this stray bytes will creep in and mess up the hash.
strcpy(x.c, yourstring)
}
hashT->put(x.ll % hashtablesize, yourstring);

strings that are identical for the first 8 chars will collide ON TOP OF collides from the modulus function based off table size. This means the approach is quick, but poor. It also has all kinds of memory gotchas ... I think i did it right, but memcpy and c-strings etc are not exactly good things to use, and I think unions are frowned upon as well unless absolutely needed to serialize something. Worse, data will 'bunch' around words, that is, assuming the strings are english words, the distributions of letters etc will make the keys very "not random" and poorish.

this is, by the way, exactly the same as casting a char * off an integer and dumping the bytes in directly to create a number. The union just bypasses the pointer magic code for you, or the bit shifting magic code, all 3 approaches do the same thing in the end.

Last edited on Apr 19, 2017 at 4:51pm

Apr 19, 2017 at 6:57pm

F95 (64)

so in terms of my code, where would i implement what you just wrote? I asked my instructor and they said to either add the dictionary.txt to a 2 dimensinal array or save it to a linked list. Since my hashtable is written for a linked list I am going to try and save the text as a linked list as well.

Last edited on Apr 19, 2017 at 7:23pm

Apr 19, 2017 at 8:11pm

jonnin (11493)

you would put the union off to the side; its like a class or type def.

The usage of it should be wrapped in a function that just takes a string and returns an integer.
then a clean way to do it is to call that function in your hashT.put function. Then put only needs to take a string, and it puts it in the data structure. But as you have it written, you will have to call the function first, get the value, and pass it to the put function, or do it in-line:

hashT->put( function(yourstring), yourstring);

And once you have this much working you can mess with it to get the collisions down, doing something like srand(x.ll) rand() (this will generate a key that is better. rand() gives the same sequence every time for the same srand(value), so calling srand each time turns it into a hash function). After doing this, your collisions and table spread of the data should be "decent".

The Sha1 approach is still 1000 times better, but this is just a few lines of simple code that more or less works, bad as it is.

(I say more or less because it is still not a super good hash function, and you will still see problems because of the way rand is coded, but its good enough for schoolwork or small data sets).

Unimportant, but just to say it... you are tying one of the fastest and most powerful tools (hash tables) to one of the slowest, clunkiest tools (lists). Lists are all the time allocating memory and doing mostly not-necessary pointer manipulations, and they are sluggish because of it. Hash tables tend to dump into a fixed sized bucket with direct access and are very, very fast. Putting the two together kills a lot of the advantages of the hash table. You do what the professor (or in the workplace, requirements etc) call for, but be AWARE of what you are doing to yourself and try to see if there is a reason to do it or if you need a re-design.

Last edited on Apr 19, 2017 at 8:28pm

Apr 20, 2017 at 3:54am

F95 (64)

I don't think we're allowed to use unions. I've updated the code here based on suggestions from my instructor but don't know what to do next

  #pragma once
#pragma warning (disable:4996)
#include <iostream>
#include <fstream>
#include <cstdlib>
#include <string>
using namespace std;

const short SIZE_KEY = 32;
const short SIZE_VALUE = 256;
const short DEFAULT_TABLESIZE = 31;

typedef struct Metadata
{
	struct Metadata(char* key, char *value)
	{
		strcpy(this->key, key);
		strcpy(this->value, value);
		next = NULL;
	}

	char key[SIZE_KEY];
	char value[SIZE_VALUE];
	struct Metadata* next;
} MDATA;

class Hashtable
{
	int tablesize;
	int size;
	MDATA** table;
	long hashString(char* key);
	MDATA* find(char* key);
	MDATA* current_entry;
	int current_index;
public:
	Hashtable(int tablesize = DEFAULT_TABLESIZE);
	bool put(char* key, char* value);
	bool get(char* key, char* value);
	bool contains(char* key);
	bool hasNext();
	int getSize();
	void Iterator();
	void getNextKey(char* key);
	void text();
};

struct dictionary
{
	string words;
	dictionary* next;
};
dictionary *list;
dictionary *p, *q;

//main.cpp
#include "Hash.h"
Hashtable::Hashtable(int tablesize)
{
	size = 0;
	this->tablesize = tablesize;        //31
	table = new MDATA*[tablesize];   //31

	for (int i = 0; i < tablesize; i++)
		table[i] = NULL;                 //31 arrays
}

MDATA* Hashtable::find(char* key)
{
	int bucket = hashString(key);
	MDATA* temp = table[bucket];

	while (temp != NULL)
	{
		if (strcmp(key, temp->key) == 0)
			return temp;
		temp = temp->next;
	}
	return NULL;
}

//used to return hash number
long Hashtable::hashString(char* key)    
{
	int n = strlen(key);
	long h = 0;

	for (int i = 0; i < n; i++)
	{
		h = (3 * h + key[i]);
		cout << h;      //Gives me weird outputs like 459..,5908...
	}

	return abs(h % tablesize);
}

bool Hashtable::put(char* key, char* value)
{
	if (find(key) != NULL)
		return false;

	MDATA* entry = new MDATA(key, value);
	int bucket = hashString(key);
	entry->next = table[bucket];
	table[bucket] = entry;
	size++;
	return true;
}

int Hashtable::getSize()
{
	return size;
}

bool Hashtable::get(char* key, char* value)
{
	MDATA* temp = find(key);

	if (temp == NULL)
	{
		value[0] = '\0';
		return false;
	}

	else
	{
		strcpy(value, temp->value);
		return true;
	}
}

bool Hashtable::contains(char* key)
{
	if (find(key) == NULL)
		return false;
	else
		return true;
}

void Hashtable::Iterator()
{
	current_entry = NULL;
	current_index = tablesize;

	for (int i = 0; i < tablesize; i++)
	{
		if (table[i] == NULL)
			continue;
		else
		{
			current_entry = table[i];
			current_index = i;
			break;
		}
	}
}

bool Hashtable::hasNext()
{
	if (current_entry == NULL)
		return false;
	else
		return true;
}

void Hashtable::getNextKey(char* key)
{
	if (current_entry == NULL)
	{
		key[0] = '\0';
		return;
	}
	strcpy(key, current_entry->key);

	if (current_entry->next != NULL)
		current_entry = current_entry->next;

	else
	{
		for (int i = current_index + 1; i < tablesize; i++)
		{
			if (table[i] == NULL)
				continue;
			current_entry = table[i];
			current_index = i;
			return;
		}
		current_entry = NULL;
		current_index = tablesize;
	}
}

void displayAll(Hashtable* hashT)
{
	char key[SIZE_KEY];
	char value[SIZE_VALUE];
	cout << "Current nodes in hashtable: " << endl;
	hashT->Iterator();
	while (hashT->hasNext())
	{
		hashT->getNextKey(key);
		hashT->get(key, value);
		cout << "key: " << key << " value: " << value << endl;
	}
}

int main()
{
	Hashtable* hashT = new Hashtable();
	char key[SIZE_KEY];
	char value[SIZE_VALUE];

	ifstream inputFile;
	list = NULL;
	p = new dictionary;

	inputFile.open("Dictionary.txt");

	while (!inputFile.eof())
	{
		p = new dictionary;
		getline(inputFile, p->words);
		cout << p->words << endl;

		strcpy(key, " ");
		strcpy(value, p->words);   //IS GIVING ME AN ERROR BECAUSE OF MISMATCHED TYPES

		if (!hashT->contains(key))
		{
			cout << "key: " << key << " value: " << value << endl;
			hashT->put(key, value);
		}
	}
	if (list == NULL)
		list = p;
	else
	{
		q = list;
		while (q->next != NULL)
			q = q->next;
		q->next = p;
	}
	inputFile.close();
	/******************************************************
	FOR TESTING PURPOSES
	string line_;
	ifstream file_("Dictionary.txt");
	if (file_.is_open())
	{
	while (getline(file_, line_))
	{
	cout << line_ << '\n';
	}
	file_.close();
	}
	
	strcpy(key, "111");
	strcpy(value, "Bob");

	if (!hashT->contains(key))
	{
		cout << "adding node - key: " << key << " value: " << value << endl;
		hashT->put(key, value);
	}

	strcpy(key, "415");
	strcpy(value, "Henry");

	if (!hashT->contains(key))
	{
		cout << "adding node - key: " << key << " value: " << value << endl;
		hashT->put(key, value);
	}
	**************************************************/

	displayAll(hashT);

	system("pause");
	return 0;
}

//Dictionary.txt file (full list contains 39 lines of words
/****************************************************************
Alpha Cnetauri Base
Antimatter containment vessel
Barnards Star
Battleship
Beverly Crusher
Data

What sample output should look like:
Battleship - 236
Data - 111
****************************************************************/

Edit & run on cpp.sh

Last edited on Apr 20, 2017 at 3:55am

Apr 20, 2017 at 4:47am

jonnin (11493)

yes. string and char* are not the same at all. string is a c++ object with a lot of functions and things. char* is a block of memory. You should generally use strings in c++ unless you want a block of bytes as bytes.

If you are going to use strings, you can still build a number, and without the union, with simple code.

string s = "test";
unsigned long long ll;
unsigned char * cp = (unsigned char *)(&ll); //the union made this look nicer but does the same thing. I said that above.

ll = 0;
for(int I = 0; I < s.length() && I <8; I++)
cp[I] = s[I];

and convert ll to a key again as before.

you can do the same thing with bit shifting, and lose the pointers entirely. its all the same. (this one I am not sure of... its late, and bit shifting is annoying to think about... but the idea is right, if not the code snip)

ll = 0;
for(int I = 0; I < s.length() && I <8; I++)
{
ll = ll << 8;
ll+= s[I];
}

Last edited on Apr 20, 2017 at 4:52am

Apr 23, 2017 at 4:26pm

benhart (104)

It's really important to know and understand how char* works as it used a lot in c++. Otherwise it can affect you and might make you make mistakes which cause with errors. Trust me, c errors are the worst kind and are hard to be detected. You can always use some program like checkmarx and others as help if it's getting too hard dealing with those errors.
Good luck!
Ben.

Apr 29, 2017 at 6:44pm

F95 (64)

I made some major changes to my code and finally got it to do what I wanted it to do. One of the notes from my instructor was that I shouldn't have opened the .txt file in main() but rather use a constructor or member function in my class.

#pragma once
#include <iostream>
using namespace std;
#include <fstream>
#include <string>


const int TABLESIZE = 31;

class HashTable
{
	struct Node
	{
		int		collision;
		string	word;
		Node*	next;
	};
	Node* head[TABLESIZE];
public:
	int HashString(string hashkey);
	void put(string dictionary);
	void display();
};

//main file

#include "HashT.h"

int HashTable::HashString(string hashkey)
{
	long hash = 0;

	for (int i = 0; i < hashkey.length(); i++)
		hash = ((hash * i) + (hashkey[i])) % TABLESIZE;

	return abs(hash);
}


void HashTable::put(string dictionary)
{
	int	key = 0;

	Node *newNode, *nodePtr;
	string		temp;
	ifstream	text;


	for (int i = 0; i < TABLESIZE; i++)
		head[i] = NULL;

	text.open(dictionary);
	while (!text.eof())
	{
		newNode = new Node;
		getline(text, temp);

		if (!temp.empty())
		{
			key = HashString(temp);

			newNode->word = temp;
			newNode->next = NULL;
		}

		if (!head[key])
		{
			head[key] = newNode;
			newNode->collision = 0;
		}
		else
		{
			newNode->collision = 1;
			nodePtr = head[key];
			while (nodePtr->next)
			{
				nodePtr = nodePtr->next;
				newNode->collision++;
			}
			nodePtr->next = newNode;
		}
	}
	text.close();
}

void HashTable::display()
{
	Node *displayHash;
	displayHash = new Node;

	for (int i = 0; i < TABLESIZE; i++)
	{
		if (i < TABLESIZE)
		{
			displayHash = head[i];
		}

		while (displayHash)
		{
			if (!displayHash->word.empty())
				cout << "HashKey " << i << '\t' << displayHash->word << "\n" << "Collision:" << '\t' << displayHash->collision << endl;

			displayHash = displayHash->next;
		}
	}
}

int main()
{
	string	file = "dictionary.txt";
	HashTable obj1;

	obj1.put(file);
	obj1.display();

	system("pause");
	return 0;
}

Edit & run on cpp.sh

Topic archived. No new replies allowed.

C++

Forum

Urgent! Need help assigning a hash value to text in a hashtable