Tips on bringing down runtime for HashTable

Hello recently in my first OOP c++ class we learned about unordered sets / hashing and created a program that loads the words of shakesspere and the dictionary. Then compares each word to see if it is in todays dictionary. Anyways I made the program and already exceed the time requirements however I was curious if anyone here has tips on how to optimize my code or code in general to make it run faster. Thank you.

We are'not supposed to change main our assignment was the .h but ill post it anyways, Thanks again!

Here is my .h file

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
 #ifndef HASHTABLE_H_INCLUDED
#define HASHTABLE_H_INCLUDED

#include <iostream>
#include <list>
#include <fstream>
#include <chrono>
#include <vector>
#include <algorithm>
#include <iterator>
#include <cstdlib>


using namespace std;
using std::chrono::system_clock;

const size_t HASHTABLE_SIZE = 1048576;

template<typename T>
class HashTable
{
public:

HashTable();
~HashTable(){delete[] hashTable;}
template<typename InputIterator> HashTable(InputIterator first, InputIterator last);
using iterator = list<T>*;
iterator begin(){ return hashTable; }
iterator end(){ return hashTable + HASHTABLE_SIZE; }
void insert(const T& x){hashTable[hash_it(x)].push_back(x);}
iterator find(const T& key)
{
    size_t hash = hash_it(key);
    list<T>& temp = hashTable[hash];
    if(std::find(temp.begin(),temp.end(), key) != temp.end()) ///want to compare each value to key
    {
        return hashTable + hash;
    }
    else
    {
        return end();
    }
}

private:

list<T>* hashTable;
size_t hash_it(const string& key)
{
    size_t hash = 0;
    for (size_t i=0, n = key.size(); i < n; i++)
    hash = (hash << 2) ^ key[i];
    return hash % HASHTABLE_SIZE;
}
size_t hash_it(const int& key)
{
    return key * (key+3) % HASHTABLE_SIZE;
}

};

///constructor
template<typename T>
HashTable<T>::HashTable()
{
    hashTable = new list<T>[HASHTABLE_SIZE];
}

template<typename T>
template<typename InputIterator>
HashTable<T>::HashTable(InputIterator first, InputIterator last)
{
    ///allocate
    ///insert each element from the container starting at first
    hashTable = new list<T>[HASHTABLE_SIZE];
    while(first != last)
    {
        insert(*first++);
    }
}

#endif // HASHTABLE_H_INCLUDED 


here is my main

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#include "HashTable.h"

int main()
{
    istream_iterator<string> eos1;

    cout << "Loading the complete works of Shakespeare into a vector..." << endl;
    ifstream f("shakespeare.txt");
    if(!f) cout << "Failed loading shakespeare.txt" << endl;
    istream_iterator<string> iit(f);
    vector<string> vShake(iit, eos1);

    cout << "Loading English dictionary into hash table as C++ strings..." << endl;
    ifstream f2("words_alpha.txt");
    if(!f2) cout << "Failed loading words_alpha.txt" << endl;
    istream_iterator<string> iit2(f2);
    HashTable<string> ht1(iit2, eos1);
    //ht1.loadTable(f2);

    int foundCount = 0;
    auto t1 = system_clock::now();
    for(const string& str : vShake)
    {
        auto it = ht1.find(str);
        if(it != ht1.end()) foundCount++;
    }
    auto t2 = system_clock::now();
    cout << foundCount << " out of " << vShake.size() << " total words matched.\n";
    cout << "Time to search HashTable: "
        << std::chrono::duration_cast<std::chrono::milliseconds>(t2-t1).count()
        << "ms" << endl;

    cout << "Loading up a vector with 500000 randoms [0:30000]" << endl;
    vector<int> vElev;
    for(size_t i = 0; i < 500000; i++)
        vElev.push_back(rand() % 30001);

    cout << "Loading elevations file into hash table as ints..." << endl;
    ifstream f3("map-input-844-480.dat");
    if(!f3) cout << "Failed loading map-input-844-480.dat" << endl;
    istream_iterator<int> iit3(f3);
    istream_iterator<int> eos2;
    HashTable<int> ht2(iit3, eos2);
    //ht2.loadTable(f3);

    foundCount = 0;
    t1 = system_clock::now();
    for(const int& x : vElev)
    {
        auto it = ht2.find(x);
        if(it != ht2.end()) foundCount++;
    }
    t2 = system_clock::now();
    cout << foundCount << " out of " << vElev.size() << " total elevations matched.\n";
    cout << "Time to search HashTable: "
        << std::chrono::duration_cast<std::chrono::milliseconds>(t2-t1).count()
        << "ms" << endl;

    return 0;
}
I don't get how your find works.
wouldn't find for a hash table just be

container[hashfunction(data)] ? No iteration should be happening in find, unless you are handling collisions, but it looks like it always iterates.

why a list, would a vector be more efficient here?

What part does your profiler show as being slow?
The number one tip to optimise your code; measure it to see what's taking so long, then make that bit faster.

Don't guess. Measure. There are many ways to profile.
Topic archived. No new replies allowed.