Problem with hashing logic

I'm having trouble figuring out the logic behind searching for an object that has been hashed. It's nothing like traversing a link or vector to find a field that matches my search variable which is the state name. Does anybody know of an example program that hashes and returns an object?

StateData class
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16

	bool StateData::operator==(StateData &left)
	{          return (this->name == left.name);		  }


	bool StateData::operator!=(StateData &left)
	{          return (this->name != left.name);     }


	StateData StateData::getStateData(string n)
		{ 
			if (name == n)
				return this;    
	}



hashT class
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
private:
    elemType *HTable;		//pointer to the hash table
    int *indexStatusList;	//pointer to the array indicating
							//the status of a position in the
							//hash table
    int length;				//number of items in the hash table
    int HTSize;				//maximum size of the hash table
};


template <class elemType>
void hashT<elemType>::insert(int hashIndex, elemType& rec)
{
	int pCount;
	int inc;

	pCount = 0;
	inc = 1;

	while(indexStatusList[hashIndex] == 1
		  && HTable[hashIndex] != rec
		  && pCount < HTSize / 2)
	{
		cout <<"inc = " << inc << endl;
		pCount++;
		hashIndex = (hashIndex + inc ) % HTSize;
//		cout << "new hashIndex = " << hashIndex << endl;
		inc = inc + 2;
	}


	if(indexStatusList[hashIndex] != 1)
	{
		HTable[hashIndex] = rec;
		cout << "HTable["<< hashIndex <<"]" <<" = " << rec << endl;
		indexStatusList[hashIndex] = 1;
		length++;
	}
	else
		if(HTable[hashIndex] == rec)
			cerr<<"Error: No duplicates are allowed"<<endl;
		else
			cerr<<"Error: The table is full. "
			    <<"Unable to resolve the collision"<<endl;
}

template <class elemType>
void hashT<elemType>::search(int& hashIndex, const elemType& rec, bool& found)
{
	int pCount;
	int inc;

	pCount = 0;
	inc = 1;

	while(indexStatusList[hashIndex] != 0
		  && HTable[hashIndex] != rec 
		  && pCount < HTSize / 2)
	{
		pCount++;
		hashIndex = (hashIndex + inc ) % HTSize;
		inc = inc + 2;
	}

	if(indexStatusList[hashIndex] == 1 && HTable[hashIndex] == rec )
	{
		hashIndex = hashIndex;
		found = true;
	}
	else
		found = false;
}

template <class elemType>
bool hashT<elemType>::isItemAtEqual(int hashIndex, const elemType& rec)
{ 
	return(HTable[hashIndex] == rec);
}

template <class elemType>
void hashT<elemType>::retrieve(int hashIndex, elemType& rec)
{	
	if(indexStatusList[hashIndex] == 1)
		rec = HTable[hashIndex];
}

template <class elemType>
void hashT<elemType>::remove(int hashIndex, const elemType& rec)
{
	bool found;

	search(hashIndex,rec,found);

	if(found)
	{
		indexStatusList[hashIndex] = -1;
		length--;
	}
	else
		cerr<<"The item to be deleted is not in the list."<<endl;
}

template <class elemType>
void hashT<elemType>::print() const
{
	for(int i = 0; i < HTSize; i++)
		if(indexStatusList[i] != 0)
			cout<<i<<"  "<<indexStatusList[i]
				<<"  "<<HTable[i]<<endl;
}

template <class elemType>
hashT<elemType>::hashT(int size)
{
	assert(size > 0);

	HTable = new elemType[size];
	indexStatusList = new int[size];
	length = 0;
	HTSize = size;

	for(int i = 0; i < size; i++)
		indexStatusList[i] = 0;
}

template <class elemType>
hashT<elemType>::~hashT()
{
	delete [] HTable;
	delete [] indexStatusList;
}



Here is the main
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#include <iostream>
#include <fstream>
#include <string>
#include "hashT.h"
#include "stateData.h"

using namespace std;

int hashFunc(string name, int size);

int main()
{
	hashT<StateData> HashTable(50);
	int size = 15;

	ifstream infile;

	infile.open("states.txt");
	if(!infile)
	{
		cout << "Error reading states file.\n";
		exit(1);
	}

	char ch;
	int year, order, key;
	string state, capitol;
	bool found;

	getline(infile, state);

	while(infile)
	{
		getline(infile, capitol);
		infile >> year;
		infile >> order;
		StateData temp;
		temp.setStateInfo(state, capitol, year, order);
		infile.get(ch);
		key = hashFunc(state, size);
		HashTable.insert(key, temp);

		getline(infile, state);
	}

	HashTable.print();

// here is the code in question

	cout<<"Enter item to be deleted: ";
	getline(cin, state);
	cout<<endl;
	key = hashFunc(state, size);   
// how do I determine dtemp in order to execute the remove function?
	HashTable.remove(key, dtemp);

	HashTable.print();


	infile.close();
	system("pause");
	return 0;

}

int hashFunc(string name, int size)
{
	int i, sum, len;
	i= 0;
	sum = 0;
	len = name.length();
	for (int k=0; k < 15 - len; k++)
		name = name + ' ';
	for (int k = 0; k < 5; k++)
	{
		sum = sum + static_cast<int>(name[i]) * 128 * 128
			+ static_cast<int>(name[i+1]) * 128
			+ static_cast<int>(name[i+2]);
		i = i+3;
	}
	return sum % size;
}
Giving this a bump
A hash table can be a simple array:
int table[10];

items are inserted via a hashing function:
1
2
3
4
int hashFunc(int value)
{
  return value % 10;  // the modulus just ensures a valid idx 
}


We can have code like this:
table[ hashFunc(2013) ] = 2013; // 2013 is stored @idx 3

The strength of the hash table is that items can be found quickly via that same hashing function:
std::cout << table[ hashFunc( 2013 ) ];


A concept to understand is that the actual number 2013 has not much to do where it is actually stored. Consider:
1
2
3
4
5
6
7
8
9
10
int hashFunc(int value)
{
  return value % 9;
}

//...

int table[9];
table [ hashFunc(2013) ] = 2013;
std::cout << table [ hashFunc(2013) ];  // the actual index of 2013 doesnt matter 


It gets tricky when different values map to the same index:
1
2
3
// going back to % 10
table[ hashFunc( 2013 ) ] = 2013;
table[ hashFunc( 13 ) ] = 13; // 13 just overwrote 2013  


This is where the index status list comes in, it tells you if that 2013 already existed.
1
2
3
4
// insert
int index = hashFunc(number);
while (status[index] is used) index++;
table[index] = number;


Given the code above, when 13 is inserted the table sees that table[3] already is used, and places 13 at index 4.

Finding the value is basically the same, make the index via hashFunc, and while the index doesnot equal what you are looking for, increase the index.
Last edited on
Topic archived. No new replies allowed.