Help with hash tables

Hello, I need to count the number of collisions I get when I use linear probing and quadratic probing. The problem is that the majority of the time I get results that are nearly identical or results that only slightly differ. Surely this can't be right. I shouldn't be getting nearly identical results most of the time, right? The amount of collisions does differ occasionally, but it usually only happens once in every five attempts. I'm really puzzled about why this is happening and would appreciate any suggestions. Thanks.


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
#include <iostream>
#include <ctime>
#include <stdlib.h>
#include <functional>
#include <cmath>
#include <string>
#include <iomanip>

void fillArray(int[], int);
int linearProbeHash(int[], int);
int quadraticProbeHash(int[], int);
unsigned int hash(const std::string);

int main()
{
    int arr1[1000];
    int arr2[2000];
    int arr3[3000];
    int arr4[4000];
    int arr5[5000];
    int arr6[6000];
    int arr7[7000];
    int arr8[8000];
    int arr9[9000];
    int arr10[10000];

    int linearCollisions[10];
    int quadraticCollisions[10];
    int doubleCollisions[10];

    fillArray(arr1, 1000);
    fillArray(arr2, 2000);
    fillArray(arr3, 3000);
    fillArray(arr4, 4000);
    fillArray(arr5, 5000);
    fillArray(arr6, 6000);
    fillArray(arr7, 7000);
    fillArray(arr8, 8000);
    fillArray(arr9, 9000);
    fillArray(arr10, 10000);

    //Gather data for each hash function
    linearCollisions[0] = linearProbeHash(arr1, 1000);
    linearCollisions[1] = linearProbeHash(arr2, 2000);
    linearCollisions[2] = linearProbeHash(arr3, 3000);
    linearCollisions[3] = linearProbeHash(arr4, 4000);
    linearCollisions[4] = linearProbeHash(arr5, 5000);
    linearCollisions[5] = linearProbeHash(arr6, 6000);
    linearCollisions[6] = linearProbeHash(arr7, 7000);
    linearCollisions[7] = linearProbeHash(arr8, 8000);
    linearCollisions[8] = linearProbeHash(arr9, 9000);
    linearCollisions[9] = linearProbeHash(arr10, 10000);

    quadraticCollisions[0] = quadraticProbeHash(arr1, 1000);
    quadraticCollisions[1] = quadraticProbeHash(arr2, 2000);
    quadraticCollisions[2] = quadraticProbeHash(arr3, 3000);
    quadraticCollisions[3] = quadraticProbeHash(arr4, 4000);
    quadraticCollisions[4] = quadraticProbeHash(arr5, 5000);
    quadraticCollisions[5] = quadraticProbeHash(arr6, 6000);
    quadraticCollisions[6] = quadraticProbeHash(arr7, 7000);
    quadraticCollisions[7] = quadraticProbeHash(arr8, 8000);
    quadraticCollisions[8] = quadraticProbeHash(arr9, 9000);
    quadraticCollisions[9] = quadraticProbeHash(arr10, 10000);


    //Display all of the data
    std::cout << std::left << std::setw(40) << "Size: ";
    for(int i = 0; i < 10; ++i)
        std::cout << std::setw(10) << (i + 1) * 1000;

    std::cout << std::left << std::setw(41) << "\nLinear: ";
    for(int i = 0; i < 10; ++i)
        std::cout << std::setw(10) << linearCollisions[i];

    std::cout << std::left << std::setw(41) << "\nQuadratic: ";
    for(int i = 0; i < 10; ++i)
        std::cout << std::setw(10) << quadraticCollisions[i];

    return 0;
}

int quadraticProbeHash(int array[], int size)
{
    //Initialize every element in the hash table to nullptr
    //This makes it easier to tell if a collision has occurred
    int *hashTable[10001] = {nullptr};

    int collisionCount = 0;
    int index;
    int start;

    for(int i = 0; i < size; ++i)
    {
        char buffer [33];
        //index = intHash(array[i]) %10001;
        index = hash(std::string(itoa(array[i], buffer, 10))) % 10001;
        if(hashTable[index] == nullptr)
        {
            hashTable[index] = &array[i];
        }
        else
        {
            ++collisionCount;
            int j = 0;
            int position = index;
            do
            {
                if(hashTable[index] == nullptr)
                {
                    hashTable[index] = &array[i];
                }
                else
                {
                    index = (position + (int)std::pow(j, 2)) % 10001;
                }
                    j++;
            }while(hashTable[j] != nullptr);
        }
    }
    return collisionCount;
}

int linearProbeHash(int array[], int size)
{
    //Initialize every element in the hash table to nullptr
    //This makes it easier to tell if a collision has occurred
    int *hashTable[10001] = {nullptr};

    int collisionCount = 0;
    int index;

    for(int i = 0; i < size; ++i)
    {

        char buffer [33];
        index = hash(std::string(itoa(array[i], buffer, 10))) % 10001;
        if(hashTable[index] == nullptr)
        {
            hashTable[index] = &array[i];
        }
        else
        {
            ++collisionCount;
            int j = 0;
            do
            {
                if(hashTable[j] == nullptr)
                {
                    hashTable[j] = &array[i];
                }
                else
                    ++j;
            }while(hashTable[j] != nullptr);
        }
    }
    return collisionCount;
}
//Accepts an array and size of the array and fills the array with random integers.
void fillArray(int array[], int size )
{
    srand(time(0));
    for(int i = 0; i < size; ++i)
        array[i] = rand();
}

//This hash function was found in the book Data Structures and Algorithm Analysis in C++ (Textbook for the class)
unsigned int hash(const std::string key)
{
    unsigned int hash = 0;

    for(char ch :key)
        hash = 37 * hash + ch;

    return hash;
}
its probably the nature of the hash function you are using which is the same for both methods and the size of the arrays is the same. And, your hash function is questionable, as it would give too similar results for 'abcd' vs 'abce', this is not desirable.

Just for fun see what you get if you do this:

...
hash = 37 * hash + ch;
srand(hash);
hash ^= rand(); //xor is one of several good possible candidate modifications.

and, its <cstdlib>.
theres a c++ random that is better, but one thing at a time.




Last edited on
Topic archived. No new replies allowed.