Code review

Hi devs,

I've got an old code of mine (pertaining to a slightly long time ago that I've forgotten most of it!). Tried to polish it with C++ modern stuff as much as I could.
Anyway, let's review it through a step-by-step approach: firstly from beginning up to line 45:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
#include <iostream>
#include <vector>
#include <list>
#include <string>

// Write a find-and-replace operation for Documents

using Line = std::vector<char>;
using listIter = std::list<Line>::iterator;
using lineIter = Line::iterator;

class Text_iterator {
public:
	listIter ln;
	lineIter pos;

	Text_iterator(listIter ll, lineIter pp)
		: ln(ll), pos(pp) { }

	char& operator*() { return *pos; }

	Text_iterator& operator++();

	bool operator==(const Text_iterator& other) const
	{
		return ln == other.ln && pos == other.pos;
	}
	bool operator!=(const Text_iterator& other) const
	{
		return !(*this == other);
	}
};

//*****************************

Text_iterator& Text_iterator::operator++()
{
	++pos;    
	if (pos == ln->end()) {
		++ln;  // "ln" pointing to the next vector/line
		pos = ln->begin();
	}
	return *this;
}

//************************

class Document {
public:
	std::list<Line> lines;

	Document() { lines.push_back(Line{}); } 
	Text_iterator begin()
	{
		return Text_iterator(lines.begin(), lines.begin()->begin());
	}

	Text_iterator end()
	{
		listIter last = lines.end();
		--last;  // We know that the document is not empty
		return Text_iterator(last, last->end());
	}
};

//******************************************************

std::istream& operator>>(std::istream& is, Document& d)
{
	char ch;
	while (is.get(ch)) // why a loop and not an if?
	{
		d.lines.back().push_back(ch); 
		if (ch == '\n')
			d.lines.push_back(Line{}); // An empty vector of char with uninitialized values
	}

	if (d.lines.back().size()) d.lines.push_back(Line{});  // Add final empty line
	return is;
}

//******************************

Text_iterator my_find(Text_iterator first, Text_iterator last, const char& ch)
{
	for (auto p = first; p != last; ++p)
		if (*p == ch)
			return p;
	return last;
}

//*****************************************************************

bool match(Text_iterator p, Text_iterator last, const std::string& s)
{
	for (int i = 0; i < s.size(); i++, ++p)
		if (*p != s[i] || p == last) return false;

	return true;
}

//********************************************

Text_iterator find_txt(Text_iterator first, Text_iterator last, const std::string& s)
{
	if (s.empty()) return last;  // can't find an empty string
	char first_char = s[0];

	while (true) {
		auto p = my_find(first, last, first_char);
		if (p == last || match(p, last, s)) return p;
		first = ++p;
	}
}

//*****************************************
template<class Iter>
void my_advance(Iter& p, int n)
{
	while (n > 0) { ++p; --n; }
}

//*************************************

bool erase_line(Document& d, int n)
{
	if (n < 0 || d.lines.size() - 1 <= n) return false;

	auto p = d.lines.begin();
	my_advance(p, n);
	d.lines.erase(p);
	return true;
}

//********************************

void replace(Text_iterator p, Document& d, Line s)
{
	p.ln = d.lines.erase(p.ln);
	d.lines.insert(p.ln, s);
}

//*************************

int main()
{
	int num, n;
	Line l;
	bool b = true;
	std::string s;
	Document d;
	auto p = d.begin();

	std::cout << "Please enter characters for your list. Type ^z at the end.\n";
	std::cin >> d;
	std::cin.clear();

	std::cout << "1: Searching a line.\n";
	std::cout << "2: Erasing a line.\n";
	std::cout << "3: Replacing a line.\n";
	std::cout << "4: Printing the valuse.\n";
	std::cout << "0: Exit.\n";

	while (b)
	{
		if (!(std::cin >> n)) {
			std::cout << "Out of Range or bad input!\n";
			std::cin.clear();
			std::cin.ignore(INT_MAX, '\n');
		}

		switch (n)
		{
		case 1:
			std::cout << "Enter your line to search.\n";
			std::cin.ignore();
			getline(std::cin, s);
			p = find_txt(d.begin(), d.end(), s);
			if (p == d.end()) std::cout << "Not found!\n";
			else std::cout << "Found!\n\n";
			break;

		case 2:
			std::cout << "Please enter the number of the line: ";
			if (std::cin >> num && erase_line(d, num - 1)) std::cout << "Erased!\n";
			else {
				std::cout << "Out of Range or bad input!\n";
				std::cin.clear();
				std::cin.ignore(INT_MAX, '\n');
			}
			break;

		case 3:
			std::cout << "Please enter your current line: ";
			std::cin.ignore();
			getline(std::cin, s);
			p = find_txt(d.begin(), d.end(), s);
			if (p == d.end()) { std::cout << "Ther is no such a line!\n"; break; }

			std::cout << "Enter your new line: ";
			getline(std::cin, s);
			copy(s.begin(), s.end(), back_inserter(l));
			l.push_back('\n');
			replace(p, d, l);
			break;

		case 4:
			for (p = d.begin(); p != d.end(); ++p)
				std::cout << *p;
			break;

		case 0: b = false; break;

		default: std::cout << "Something wrong was entered. Try again!\n";
			break;
		}
	}

	system("pause");
	return 0;
}


My problems:

1) Can we use lambdas for this code? If so, where please?
2) On line 38 why do we at once increment pos? The same question for ++ln on line 40. How do we know incrementing them doesn't go out of range there?


Last edited on
Find and replace are part of C++. If you are playing to learn, ok, but you can do a lot with what is built into the language and may want to exercise wrapping those calls up into an interface instead of DIY the internal parts.

lambdas are fed into any number of <algorithm>s and you can use them in your own code I guess. Std transform basically can replace any loop with its own internal loop and a lambda telling it what to do inside. But do you need this anywhere? I don't recommend adding one just to add one.

38: the operator is the increment operator. That is what it does, the function you are building. If you think it can go out of bounds, you need to check it, yes. It seems like a good idea to do so to me. 40 I do not know if it needs a check or not. Try to figure it out, and if it needs one, add it.

main is kind of bulky, but is any of it worth having later or all just driver code to test with? I didn't see anything you should explicitly make a function or keep to go with the class. Its probably ok as is.

prefer {} initialize now: bool b{true};
calling your class iterator may confuse someone as that is a c++ thing. The class isnt really an iterator. Bad name?
1) I don't aim at using std::transform because the loops in the code look more obvious and simpler that using that algorithm. I don't need a lambda in that code either. Right?

2) The program works well with no error even with code inside Text_iterator& Text_iterator::operator++() function! I don't know how that function works!

3) My next question is: what does this Document() { lines.push_back(Line{}); } do on line 50?
Does it mean that when an object of the class is created an empty element (here a vector<char>) will be added to the list? But will really anything be added to the list, since the default value of char is undefined so the vector/element added is almost nothing! That's weird, I don't understand that code.

calling your class iterator may confuse someone as that is a c++ thing. The class isnt really an iterator. Bad name?
4) You mean Text_iterator?
Last edited on
1) right, you will know when you need a lambda, I think. Its usually obvious.

2) well, ok. I don't either, to be honest ... you have to either accept it as working and not screw with it, or dig in and unravel what it does. Lots of code out there that works fine if you don't mess with it, even if you don't understand it. Problem happen when you need to modify it and can't figure it out....

3) its a constructor for the class, setting up the list but honestly I am not sure what the purpose of this statement is. The author felt it was necessary, but I don't see why.

4) yes. that name would annoy some people. I honestly don't worry about such things, but a lot of coders will have some sort of episode over stuff like this.
> My next question is: what does this Document() { lines.push_back(Line{}); } do on line 50?
> Does it mean that when an object of the class is created an empty element will be added to the list?

Yes.


> But will really anything be added to the list

An empty string would be added. Since it contains nothing, there would be no characters in it.
This is done to ensure that the list is initially not empty; that even immediately after construction, lines.begin() would yield an iterator to a valid string, and the behaviour of Document::begin() is well-defined.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
class Document {
public:
	std::list<Line> lines;

	Document() { lines.push_back(Line{}); } 

	Text_iterator begin()
	{
                // note that this would engender undefined behaviour if lines is empty
                //               ( we try to access lines.begin()->begin() )
		return Text_iterator(lines.begin(), lines.begin()->begin());
	}
        
        // ...
};



Quiz: why is it that Document::lines is a std::list and not a std::vector?
Could we use a vector instead?
Last edited on
An empty string would be added. Since it contains nothing, there would be no characters in it.
And the size is 0! printing only white space when it's output! So it's a shadow of a vector which is fortunately unequal to empty! (So that the document owns something at the beginning)

Quiz: why is it that Document::lines is a std::list and not a std::vector? Could we use a vector instead?
Yes, but consider we have a long text with long lines. Replacing/erasing a line in the middle of such a text would be more costly when using a vector than a list. Agree? As well as, the content of the program is to use a list for the lines and a vector for each line.

I scrutinised the code starting from main() until I reached line 177: p = find_txt(d.begin(), d.end(), s);
Then it goes to p = find_txt(d.begin(), d.end(), s);, and after that my_find. Eventually on line 85 we reach for (auto p = first; p != last; ++p) where the operator++() for p will be called on line 34.

We have a bound where p can travers within, starting from begin() (which is the first position in a document that is not empty) ending in end which is one beyond the last char in the document. So we can increment p primarily at the very beginning of that function. (It simply doesn't (and can't) go beyond last). But the problem is that if on line 37. Why, by any coincidence, should the condition pos == ln->end() yield up true!?

The next question is why even do we need the operator Text_iterator& operator++();? On line 85, for (auto p = first; p != last; ++p), p is an iterator to the first char in the whole document which is per se capable of being incremented/decremented/dereferenced and so on. Not?


> Yes, but consider we have a long text with long lines.
> Replacing/erasing a line in the middle of such a text would be more costly when using a vector than a list.
> Agree?

No. A list would be more efficient if and only if the numbers involved are very large.

This post contains some measurements (and some extracts from observations by Stroustrup):
https://www.cplusplus.com/forum/beginner/206320/#msg976016
The precise very large number where the list would become more efficient depends on the implementation.

Try again: so, why is it a list and not a vector?


> why even do we need the operator Text_iterator& operator++();?
> On line 85, for (auto p = first; p != last; ++p), p is an iterator to the first char in the whole document

p is an iterator to the first char in the whole document, yes, but it is an input iterator of type Text_iterator; the iterator must be incrementable.
frek wrote:
I've got an old code of mine (pertaining to a slightly long time ago that I've forgotten most of it!).

I don't know how [the code] works!

This illustrates a point about programming. Most of the time we simply cannot know what the writer of the code was thinking (even if we did wrote it).

Therefore, we have to be able to interpret the code back to logic. For self-written code it should be slightly easier, unless "How do I think?" is a too difficult question.

If you wrote the code, then you cannot have used constructs that you did not know at the time?


While it is too late for old code, do keep this in mind for the code that you write now. Avoid "clever tricks". Keep it simple so that future you (and everyone else) can decipher it.
A list would be more efficient if and only if the numbers involved are very large.
The numbers involved, in my program, would be those vectors (elements of the list), yeah? Each vector is a line, so by long lines I meant long vectors, hence "large numbers"!

Yet, I don't know why that if condition is mandatory inside the function Text_iterator& Text_iterator::operator++()!

yes, but it is an input iterator of type Text_iterator; the iterator must be incrementable.
Got it, right.
But the terms input/output iterators bewilder me. I looked them up on the Web but still they're not vivid for me which is the input one and which the output! For instance, what kind of iterator is it in the following code?

1
2
3
4
5
6
7
8
9
10
11
12
13
14
int main() {

        std::vector<int> v1 = { 1, 2, 3, 4, 5 };
        std::vector<int>::iterator it;

        for (it = v1.begin(); it != v1.end(); ++it) {
            *it = 1;
            std::cout << (*it) << " ";
        }
            std::cout << '\n';

        system("pause");
        return 0;
    }

Last edited on
it is a Random Access Iterator. There are 5 types of iterator:

input
output
forward
bi-directional
random

See http://www.cplusplus.com/reference/iterator/

and reverse iterator http://www.cplusplus.com/reference/iterator/reverse_iterator/

plus other pre-defined iterators.
Last edited on
The numbers involved, in my program, would be those vectors (elements of the list), yeah? Each vector is a line, so by long lines I meant long vectors, hence "large numbers"!
1
2
using Line = std::vector<char>;
std::list<Line> lines;

In the pointed to text there is an example "N to be much larger than 500,000".

What is that "large number"? That N?

The N refers to lines.size(). It does not matter whether each Line is just one byte of a megabyte. The discussion is about how many Line objects you have.


Even if elements would matter, they are vectors. What is the size of a vector?
1
2
3
4
5
6
7
8
9
#include <vector>
#include <iostream>

int main() {
    std::vector<char> small;
    std::vector<double> big(1000000);
    std::cout << "empty " << sizeof(small) << '\n';
    std::cout << "big   " << sizeof(big) << '\n';
}

empty 24
big   24

Q: How can an empty vector that has not even one char take as many bytes as a vector that contains million double values?

A: A vector is merely a sugar-coated pointer. Pointers are all small. Your "Line" (an element of lines) is a small object no matter how much text one line has.
@seeplus
OK, nice resource, albeit it couldn't cover all my ambiguities.
For instance, how did you recognise that it I used above is of type random-access, please? And where each of them are expected to be used in code?

@keskiverto
Categorically speaking we hence can say: If that much larger number for N (the size of the vector/list which is the number of elements each of them contains regardless of the size of the elements themselves) is roughly 800,000, std::vector is the best container to be used for small to middle numbers of that N. Therefore, if N is <=400,000 our choice must be std::vector. Right?

... to interpret the code back to logic
I don't know how to interpret the code back to logic!

Once again I examined that if condition if (pos == ln->end()) this time by debugger (F10 and F11), but yet I can't figure out how exactly it's correct. ln is fed by an iterator to the first element of the list whilst pos is fed by an iterator to the first element of the vector. So
1
2
ls = lines.begin()
pos = lines.begin()->begin()


If we increment ln, it goes to the next vector in the list and if we increment pos it will go to the next char in the vector.
ln->end(), based of the definition points to one element beyond the last element of the list. But in reality ln->end() is one beyond the last element of the vector not the list!!!

PS: Without that if and its body, the program crashes when it reaches the end of the first line at printing the values!
> For instance, what kind of iterator is it in the following code?

Looking at just the loop, an input iterator.
1
2
3
4
5
        for (it = v1.begin(); it != v1.end(); ++it) {

            *it = 1;
            std::cout << (*it) << " ";
        }


Stroustrup in 'The C++ Programming Language':
Input iterator: We can iterate forward using ++ and read each element (repeatedly) using ∗ .
We can compare input iterators using == and != . This is the kind of iterator that istream
offers


The iterator provided by the vector supports all these operations; ergo it is (it can be used as) an input iterator.


> And where each of them are expected to be used in code?

It depends on what operations on the iterators are required; for instance the implementation of an algorithm.
For instance, std::count requires input iterator, std::reverse requires bidirectional iterator etc.
But we cannot write into an input iterator, can we!? (line 3)

One last question about the program: is the list menu of selections coded properly in main() for the user? Aren't those many std::cin.clear();, std::cin.ignore(INT_MAX, '\n'); and so on ugly? What about the termination character sign? I used ^z (ctrl+z) I dislike this too.
Last edited on
you have to clear the buffer if you mix getline and cin statements. It looks right, but I suspect you can reduce the number of them and clean it up some. If you can move the getlines together and the cins together you would need less of them. Or getline everything and turn the text back to numbers. Or make a little function that gets the info and clears the buffer so it only appears once in the code. Lots of ways to make it better.
Last edited on
> But we cannot write into an input iterator, can we!? (line 3)

No, we can't. I made a bad mistake, it is used as a (mutable) forward iterator here.

Stroustrup:
Forward iterator: We can iterate forward repeatedly using ++ and read and write (unless the elements are const ) elements repeatedly using ∗ . If a forward iterator points to a class object, we can use −> to refer to a member. We can compare forward iterators using == and != . This is the kind of iterator forward_list offers


Note that the iterator that vector provides is a LegacyContiguousIterator https://en.cppreference.com/w/cpp/named_req/ContiguousIterator,
it supports all these operations (and more), it can be used as a forward iterator.


> Aren't those many std::cin.clear();, std::cin.ignore(INT_MAX, '\n'); and so on ugly?

Write a few small functions. In general, favour writing functions to factor our repeated similar code fragments.

For instance:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
int get_int( const std::string& prompt, int minv = INT_MIN, int maxv = INT_MAX )
{
    std::cout << prompt << " [" << minv << ',' << maxv << "]: " ;

    int value ;
    if( std::cin >> value )
    {
        if( value >= minv && value <= maxv ) return value ;
        else std::cout << "error: value is out of range." ;
    }
    else std::cout << "error: non-numeric input." ;

    std::cout << " try again\n" ;
    std::cin.clear() ;
    std::cin.ignore( 1'000'000, '\n' ) ;
    return get_int( prompt, minv, maxv ) ;
}

std::string get_line( const std::string& prompt )
{
    std::cout << prompt << ": " ;

    std::string line ;
    // ignore empty lines
    if( std::getline( std::cin, line) && !line.empty() ) return line ;

    std::cin.clear() ;
    return get_line(prompt) ;
}

enum selection { EXIT = 0, SEARCH, ERASE, REPLACE, PRINT };

selection get_selection()
{
    std::cout << "\n\nmenu\n----------\n"
              << SEARCH << ": Search.\n"
              << ERASE << ": Erase a line.\n"
              << REPLACE << ": Replace a line.\n"
              << PRINT << ": Print all lines.\n"
	          << EXIT << ": Exit.\n\n" ;

	return selection( get_int( "enter selection", EXIT, PRINT ) ) ;
}

int main()
{
    selection sel = EXIT ;
    while( ( sel = get_selection() ) != EXIT )
    {
        switch(sel)
        {
            case SEARCH:
            {
                const auto line = get_line( "enter text of the line to search" ) ;
                // ...
            }
            break ;

            case ERASE :
            {
                const int max_line_num = d.lines.size()-1 ;
                const auto line_num = get_int( "enter number of the line to erase", 0, max_line_num ) ;
                // ...
            }
            break ;
            
            // etc. 



> What about the termination character sign? I used ^z (ctrl+z)

End of file (Ctrl+D on Unix, Ctrl+Z on windows) is a common way of signalling end of input from stdin
Guys, the issue about that if statement is arguably easy! My bad!

ln->begin() equals lines.begin()->begin() that is, it pointes to the first element (char) of the vector, and ln->end() points to one beyond the last element in the current vector not the list!
Topic archived. No new replies allowed.