A newbie questions about STL performance

Hello, I am new to C++ and this forum!

Actually I am doing the practice problem of Google code jam:
https://code.google.com/codejam/contest/351101/dashboard#s=p2

The task is very simple. Given a message (string), output the T9 code (i.e. the sequence of buttons you need to press to input the message in an old mobile phone).
For example, if the given string is "hello world", the program should output "4433555 555666096667775553". The space is mandatory to indicate a pause.


Before I learned STL, I wrote very long and stupid code to get the task done. It checks the ASCII of the string to determine which button should be pressed, and then determine how many times the button should be pressed.

(In the code below, I repeat the whole process for one million times and find the average time it takes)

Stupid version:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#include <iostream>
#include <string>
#include <fstream>
#include <ctime>

using namespace std;

int main()
{
    ifstream ifile("C-large-practice.in");
    ofstream ofile("output.txt");
    if(ifile.is_open()){
        clock_t t;
        t = clock();
        int M = 1000000;
        for(int m = 1; m <= M; ++m){
            int case_no;
            const int N = 1500;
            string line;
            ifile>>case_no;
            getline(ifile, line);
            for(int i=1; i<=case_no && getline(ifile, line); i++){
                string T9("");
                for(int j=0;j<=line.length()-1;j++){
                    if(line[j]== ' '){
                        if(T9[T9.length()-1] == '0'){
                            T9 += " ";
                        }
                        T9 += "0";
                    }else if(int(line[j])>=97 && int(line[j])<100){
                        if(T9[T9.length()-1] == '2'){
                            T9 += " ";
                        }
                        T9 += "2";
                    }else if(int(line[j])>=100 && int(line[j])<103){
                        if(T9[T9.length()-1] == '3'){
                            T9 += " ";
                        }
                        T9 += "3";
                    }else if(int(line[j])>=103 && int(line[j])<106){
                        if(T9[T9.length()-1] == '4'){
                            T9 += " ";
                        }T9 += "4";
                    }else if(int(line[j])>=106 && int(line[j])<109){
                        if(T9[T9.length()-1] == '5'){
                            T9 += " ";
                        }T9 += "5";
                    }else if(int(line[j])>=109 && int(line[j])<112){
                        if(T9[T9.length()-1] == '6'){
                            T9 += " ";
                        }T9 += "6";
                    }else if(int(line[j])>=112 && int(line[j])<116){
                        if(T9[T9.length()-1] == '7'){
                            T9 += " ";
                        }T9 += "7";
                    }else if(int(line[j])>=116 && int(line[j])<119){
                        if(T9[T9.length()-1] == '8'){
                            T9 += " ";
                        }T9 += "8";
                    }else if(int(line[j])>=119 && int(line[j])<123){
                        if(T9[T9.length()-1] == '9'){
                            T9 += " ";
                        }T9 += "9";
                    }
                    if(int(line[j])>=97 && int(line[j]) <=114){
                        for(int k=1; k<=((int(line[j])-97)%3); k++){
                                T9 += T9[T9.length()-1];
                        }
                    }
                    if(int(line[j])==115){//s
                                T9 += "777";
                    }
                    if(int(line[j])>=116 && int(line[j]) <=121){
                        for(int k=1; k<=((int(line[j])-98)%3); k++){
                                T9 += T9[T9.length()-1];
                        }
                    }
                    if(int(line[j])==122){//s
                                T9 += "999";
                    }
                }
                ofile<<"Case #"<<i<<": "<<T9<<endl;
            }
        }
        t = clock() - t;
        cout<<"Total time: "<<((float)t)/(CLOCKS_PER_SEC)<<endl;
        cout<<"Average time: "<<((float)t)/(CLOCKS_PER_SEC*M);
    }
    return 0;
}



Later I learned map and redid this problem. It's much shorter and very easy to understand.

STL version:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#include <iostream>
#include <fstream>
#include <map>
#include <ctime>


using namespace std;

int main(){

    ifstream ifile("C-large-practice.in");
    ofstream ofile("Output_large_practice.txt");

    if(ifile.is_open()){
        clock_t t;
        t = clock();
        int M = 1000000;
        for(int m = 1; m <= M; ++m){
            map<char, int> d;
            d['a'] = 2;
            d['b'] = 22;
            d['c'] = 222;
            d['d'] = 3;
            d['e'] = 33;
            d['f'] = 333;
            d['g'] = 4;
            d['h'] = 44;
            d['i'] = 444;
            d['j'] = 5;
            d['k'] = 55;
            d['l'] = 555;
            d['m'] = 6;
            d['n'] = 66;
            d['o'] = 666;
            d['p'] = 7;
            d['q'] = 77;
            d['r'] = 777;
            d['s'] = 7777;
            d['t'] = 8;
            d['u'] = 88;
            d['v'] = 888;
            d['w'] = 9;
            d['x'] = 99;
            d['y'] = 999;
            d['z'] = 9999;
            d[' '] = 0;

            string s;
            char c;
            int i = 0;
            getline(ifile, s);
            while(getline(ifile, s)){
                ofile<<"Case #"<<++i<<": ";
                for(int j = 0; j < s.length(); ++j){
                    if(j>=1 && d[s[j]]%10==d[s[j-1]]%10){
                        ofile<<" ";
                    }
                    ofile<<d[s[j]];
                }
                ofile<<"\n";
            }
        }
        t = clock() - t;
        cout<<"Total time: "<<((float)t)/(CLOCKS_PER_SEC)<<endl;
        cout<<"Average time: "<<((float)t)/(CLOCKS_PER_SEC*M);
    }

    return 0;

}



The performance result are: the stupid version takes around 10-7 s to finish; while the STL takes 10-5 s, which is significantly slower. I uploaded the results of both programs to Google and indeed both program are 'correct'.


So my question is: what's wrong with my STL version? Why is it so much slower? STL should have high performance!

Thank you for reading my long question!
First of all your STL program contains a bug

getline(ifile, s);
while(getline(ifile, s)){

You skip the first line of the file brcause getline is called twice.

Could you explain what is the purpose of this if-statement

1
2
3
                    if(j>=1 && d[s[j]]%10==d[s[j-1]]%10){
                        ofile<<" ";
                    }

Last edited on
This could be a great opportunity to learn what profiling is.

For example, profiling this with gcc/gprof on linux here it spends 55.71% of its time in GNU std::map's member _M_erase() and 43.70% of its time in _M_get_insert_hint_unique_pos().

The reason it does that is of course you destroying and rebuilding the map from scratch on every loop iteration.

Constructing your map once, before the main loop starts (at the same time you're constructing ifstream and ofstream, or even earlier) gives me 2.95e-08 on your second program and 4.02e-08 on the first.

note, map is not the best solution here: a vector/array lookup table would do it in a single memory access, at which point you'd be measuring pretty much nothing but file I/O.
Last edited on
vlad from moscow:

Thanks for your reply! I skip the first line because the input file offered by Google is the number of strings to be processed. In my stupid version I read that number as case_no and use a for loop. In the STL version, I just skip this because I assume the file is perfect and just use a while loop to save a little typing. This may not be a good assumption in other circumstances indeed.

1
2
3
if(j>=1 && d[s[j]]%10==d[s[j-1]]%10){
                        ofile<<" ";
                    }


This if statement check whether a space (pause) is needed. s[j] is the input character at position j. d[s[j]] returns the corresponding button that should be pressed for character s[j] (d for dictionary). d[s[j]] % 10 returns which button should be pressed and ignore the number of times to be pressed. For example, if the input string is 'abc',
d[s[0]] is 2
d[s[1]] is 22
d[s[2]] is 222

But d[s[0]] %10, d[s[1]] %10, d[s[2]] %10 are all 2. So d[s[j]]%10==d[s[j-1]]%10 is true and a space is inserted. The j >= 1 condition skips checking s[0] and s[-1].


Cubbi:
Thanks your reply! I have tried to pull out the construction of map out of the loop and they give very similar result. My case is STL is only slightly slower. But at least they are of the same order of magnitude.

I will try your suggestion using vector/array.


BTW, is there any profiler for windows equivalent to gprof?

I redo the problem using vector:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#include <vector>
#include <iostream>
#include <fstream>
#include <ctime>
#include <string>

using namespace std;

int main(){

    ifstream ifile("C-large-practice.in");
    ofstream ofile("Output_container_large_2.txt");

    if(ifile.is_open()){
        clock_t t;
        t = clock();
        int M = 1000000;
        for(int m = 1; m <= M; ++m){
            vector<int> d;
            d.push_back(0);     //space
            d.push_back(2);     //a
            d.push_back(22);    //b
            d.push_back(222);
            d.push_back(3);
            d.push_back(33);
            d.push_back(333);
            d.push_back(4);
            d.push_back(44);
            d.push_back(444);
            d.push_back(5);
            d.push_back(55);
            d.push_back(555);
            d.push_back(6);
            d.push_back(66);
            d.push_back(666);
            d.push_back(7);
            d.push_back(77);
            d.push_back(777);
            d.push_back(7777);
            d.push_back(8);
            d.push_back(88);
            d.push_back(888);
            d.push_back(9);
            d.push_back(99);
            d.push_back(999);
            d.push_back(9999);

            string s;
            int case_no = 0, ccode, pcode;      //current ascii code, previous ascii code
            getline(ifile, s);
            while(getline(ifile, s)){
                ofile<<"Case #"<<++case_no<<": ";
                for(int i = 0; i<s.length();++i){
                    if(s[i] == ' '){
                        ccode = 0;
                    }else{
                        ccode = int(s[i]) - 96;
                    }
                    if(i>0 && d[ccode] % 10 == d[pcode] % 10){
                        ofile<<' ';
                    }
                    pcode = ccode;
                    ofile<<d[ccode];
                }
                ofile<<endl;
            }
        }
        t = clock() - t;
        cout<<"Total time: "<<((float)t)/(CLOCKS_PER_SEC)<<endl;
        cout<<"Average time: "<<((float)t)/(CLOCKS_PER_SEC*M);
    }
    return 0;
}


The average time is about 10-6. It's an order faster than using map but still an order slower than my stupid version. If processing time is important, is it usually better not to use STL?
You are still recreating the vector every loop. You only need to make it once.
While the construction of a vector is less expensive than that of the map, you still shouldn't be constructing it every iteration of the loop.

[Edit: In fact, that's pretty much the only thing you're measuring since, after the first iteration of the outer for loop you've reached the end of the input file, so the loop beginning on line 51 will only ever be entered one time - which is on the first iteration of the for loop.]
Last edited on
By the way, your original code results in undefined behavior on lines 26, 31, 36, 41, 45, 49, 53, 57, 61 and 75. When t9.length() == 0, t9[t9.length()-1] is using an invalid index into t9.
Topic archived. No new replies allowed.