C++ error on Ms Visual Studio: “Windows has triggered a breakpoint"

Hi all,

I want to extract all 3 grams(each 3 gram contain 3 byte with 1 byte shift each time) of files in a directory and count frequency of each 3 gram in files. I have written a simple C++ program to extract 3 grams of binary files recursively and saved them in a hash table as a key.
before I add each key I find key. if it was in hash table I did not add this key and just increase member value(frequency of presence 3gram).

The program runs but it stops with an error message saying "windows has triggered a break point in my program.This may be due to the corruption of the heap which indicated a bug in the program or the dlls that it loads"

I would appreciate it if somebody could help me ..

Thanks,

#include "hash_table.h"
#include <string>
#include <windows.h>
#include <fstream>
#include <stdio.h>
#include <iostream>

#define MAX_BUFFER_SIZE 256
typedef CHashTable<int> CLongHashT;

using namespace std;

void makeVocabHash(string dir, CLongHashT HashTperAll, int N) {
/* N -> N-gram! */
HANDLE hFindFile;
WIN32_FIND_DATAA Win32FindData;
CHAR Directory[MAX_PATH];
int counter;
int i;
int countNgram;
string tmp;


fstream fileOpen;

// copying path to directory
sprintf(Directory,"%s\\*.*", &dir[0]);
if((hFindFile = FindFirstFileA(Directory, &Win32FindData)) == INVALID_HANDLE_VALUE){ // if directory not found (finding first file of directory)
return ; // error, directory not found
}

do{
if(strcmp(Win32FindData.cFileName, ".") != 0 && strcmp(Win32FindData.cFileName, "..") != 0){
sprintf(Directory, "%s\\%s", &dir[0], Win32FindData.cFileName);


// if found a file
if(! (Win32FindData.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) ) {
// is a file
fileOpen.open(Directory, ios::in | ios::binary | ios::ate);

//size of file
int end = fileOpen.tellg();
fileOpen.seekg (0, ios::beg);
int begin = fileOpen.tellg();
int size = end - begin;
char data[MAX_BUFFER_SIZE];

fileOpen.read(&data[0], size);
fileOpen.close();

counter = 0;

// reading N bytes, construct 3 grams and insert to hashT
while( (counter != ((size - N) + 1) ) ) {
for( i=0; i!=N; ++i)
tmp += data[i+counter];
// insert to hashT
if (HashTperAll.GetMember(tmp))
countNgram = *(HashTperAll.GetMember(tmp)) + 1;
else
countNgram = 1;

HashTperAll.AddKey(tmp, &countNgram );
tmp = "";
counter++;
}

}
else {
// is a directory
makeVocabHash(Directory, HashTperAll, N);
}
}
} while(FindNextFileA(hFindFile,&Win32FindData));//finding next file in directory

// closing handles
FindClose(hFindFile);
}
//---------------------------------------------------------------------
void main()
{
CLongHashT HashTDocs;

cout<< "enter a path";
string dir;
cin >> dir;
makeVocabHash(dir,HashTDocs, 3);

}

Last edited on
It may be a lot easier if libraries with an idiomatic C++ interface were used.
For instance, with the standard C++ library and boost filesystem
http://www.boost.org/doc/libs/1_49_0/libs/filesystem/v3/doc/index.htm

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#include <iostream>
#include <fstream>
#include <tuple>
#include <unordered_map>
#include <algorithm>
#include <boost/filesystem.hpp>

using byte = unsigned char ;
using trigram = std::tuple<byte,byte,byte> ;

struct hash_trigram
{
    std::size_t operator() ( trigram t ) const
    {
        static const std::hash<unsigned int> hash_uint ;
        byte a, b, c ;
        std::tie( a, b, c ) = t ;
        return hash_uint( (a<<16) + (b<<8) + c ) ;
    }
};

using vocab_map_t = std::unordered_map< trigram, int, hash_trigram > ;

void populate( vocab_map_t& ht, std::istream& stm )
{
    byte tg[3] ;
    if( stm >> tg[1] && stm >> tg[2]  )
    {
        while( stm >> tg[0] ) // read the next byte into tg[0]
        {
            std::rotate( tg, tg+1, tg+3 ) ; // rotate left by one
            ++ht[ std::make_tuple( tg[0], tg[1], tg[2] ) ] ; // insert or increment count
        }
    }
}

void populate( vocab_map_t& vocab_map, const boost::filesystem::path& p )
{
    using namespace boost::filesystem ;

    if( exists(p) )
    {
        if( is_directory(p) )
        {
            for( auto iter = directory_iterator(p) ; iter != directory_iterator() ; ++iter )
                 populate( vocab_map, iter->path() ) ;
        }

        else if( is_regular_file(p) )
        {
            std::ifstream file( p.string(), std::ios::binary ) ;
            populate( vocab_map, file ) ;
        }
    }
}

int main()
{
    vocab_map_t vocab_map ;
    const boost::filesystem::path test_path = "." ;

    populate( vocab_map, test_path ) ;

    // check it out by printing out ten entries
    std::cout << "vocab_map contains " << vocab_map.size() << " entries.\n"
               << "the first ten are:\n" ;

    int cnt = 0 ;
    for( const auto& pair : vocab_map )
    {
        int a, b, c ;
        std::tie(a,b,c) = pair.first ;
        std::cout << "    (" << a << ',' << b << ',' << c << "): " << pair.second << '\n' ;
        if( ++cnt == 10 ) break ;
    }
}


Note: Visual Studio has an implementation of the filesystem library; that could be used in place of boost filesystem.
http://msdn.microsoft.com/en-us/library/hh874694.aspx
Last edited on
Topic archived. No new replies allowed.