Finding duplicate files using windows and Native C++

This is a console program but I've borrowed so much of code like recursive algorithm from windows so why the question here.

I am willing to program an algorithm which finds the duplicate files, first it has to find the files of same sizes and if the hash matches then the file will be displayed as an output.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
class duplicateFinder {
private:
	bool isDuplcateFound = false;
	multimap<int, wchar_t*> duplicate;
	multimap<int, wchar_t*>::iterator iterate;
	multimap<char**, int> Hash;
	multimap<char**, int> confirmation;
public:
	bool processDrive(const wchar_t* drive);
	int getFileSize(const char* file);
	void display();
};

int duplicateFinder::getFileSize(const char* file)
{
	ifstream mySource;
	mySource.open(file, ios_base::binary);
	mySource.seekg(0, ios_base::end);
	int size = mySource.tellg();
	mySource.close();
	return size;
}

bool duplicateFinder::processDrive(const wchar_t* sDir)
{
	// referred http://www.stackoverflow.com/questions/2314542/listing-directory-contents-using-c-and-windows
	//Map creation and usage
	WIN32_FIND_DATA fdFile;
	HANDLE hFind = NULL;

	wchar_t sPath[2048];
	wsprintf(sPath, L"%s\\*.*", sDir);

	if ((hFind = FindFirstFile(sPath, &fdFile)) == INVALID_HANDLE_VALUE)
	{
		wprintf(L"Path not found: [%s]\n", sDir);
		return false;
	}

	do
	{
		
		if (wcscmp(fdFile.cFileName, L".") != 0
			&& wcscmp(fdFile.cFileName, L"..") != 0)
		{
			
			wsprintf(sPath, L"%s\\%s", sDir, fdFile.cFileName);
			if (fdFile.dwFileAttributes &FILE_ATTRIBUTE_DIRECTORY)
			{
				wprintf(L"Directory: %s\n", sPath);
				processDrive(sPath); 
			}
			else
			{
				//wprintf(L"File: %s\n", sPath);
				char** arr;
				char* hash = new char[MAX_PATH];
				memset(hash, 0, MAX_PATH);
				int correction;
				correction = wcstombs(hash, sPath, MAX_PATH);
				iterate = duplicate.find(getFileSize(hash));
				if (iterate == duplicate.end())
				{
					duplicate.insert(pair<int, wchar_t*>(getFileSize(hash), sPath));
				}
			}
		}
	} while (FindNextFile(hFind, &fdFile));
	FindClose(hFind);
	return isDuplcateFound;
}

void duplicateFinder::display()
{
	for (multimap<int, wchar_t*>::iterator it = duplicate.begin(); it != duplicate.end(); ++it)
		wcout << it->first << " => " << it->second << '\n';

}


In multimap it adds all files how could I program to add only files of same sizes?

Thank you for your time
Last edited on
You may use a map<> with the file size as a key and vector of names as the value.
Hi, Sorry for the late reply. Could you be briefer. Since even If I choose the value as a container(vector - you said) How could it display both the outputs. My program will only display one duplicate file not the both.
Topic archived. No new replies allowed.