urgent-large reference file

hi,
i am working on a code it has two parts:
for the first part i have 1500-4000 text files which have strings in them.some files are same and some have different strings and i have to find amongst those a subset of files which are 10-20 n number and they are covering all the strings in 1500 files. since the string are repeated alot in files, only some files can cover the variation of strings.
to implement this code i used the following logic:

1. Read all the coverage information from each test folder for Statement Coverage and put it into a String array, the array should contain only distinct element. Let this array be A

2. Now create a similar array B of boolean values of lenght A with all values False. This will be our array to check eveything is covered or not

3. Read coverage info one by one from test0 to test1590 and start including into the set. As you include one in the set, mark the corresponding elements in array B to be true.

4. Continue 3 till all the values in B is true.


my code has two parts,one of it which reads all the files and collects unique strings is as follows:
also the reference file is at first chosen as the file among 1500 files with maximum no of statements.


int comparefiles(const char *filename1,const char *filename2, int x);
int countlines(const char *filename);
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int main()
{
int i=0,j=0,cmpx,nl1=0,nl2=0,dt;
char buffer[25000]={},buffer1[25000]={}, buffero[25000]={};
FILE *output;
char buf[20000]={};
char buf1[20000]={};
char bufo[20000]={};
char string2[]="/COND.txt";
char string[]="rctcas.txt";
//char string[]="TEST";
for(i=0;i<1;i++)
{
sprintf(buf,"/home/csgrads/akhan015/desktop/programs/benchmarks/tcas/coverage/test1260%s",string2);
printf("reading reference file %s \n",buf);


for(j=0;j<=1589;j++)
{
sprintf(buf1,"/home/csgrads/akhan015/desktop/programs/benchmarks/tcas/coverage/test%d%s", j,string2);
printf("reading inner loop %s \n",buf1); //sending reference file and another file to compare
dt=comparefiles(buf,buf1,j);
}
}
return 0;

}



int countlines(const char *filename) //count no of lines
{
FILE *fm;
char line[1024];
int NumberOfLines = 0;
fm=fopen(filename, "r");
while( fgets(line,sizeof(line),fm) != NULL)
NumberOfLines++;
return(NumberOfLines);
fclose(fm);
}


int comparefiles(const char *filename1,const char *filename2, int x)
{
FILE * fref;
FILE *output;
FILE * myfile1;
char bufo[20000]={};
char cx1[10000]={} ,cx2[10000]={},cx3[10000]={};
int cmpx,cmpx1;
signed int s=-1;
int nl1,nl2,nl3;
//fflush(fref);
fref= fopen(filename1, "r");
nl1=countlines(filename1);
myfile1= fopen(filename2,"r");
nl2=countlines(filename2);

if((fref== NULL) || (myfile1== NULL))

printf("Error occurs in the file \n");

else
{
int j = 0, k=0;
rewind(myfile1);
first:
while((fgets(cx2 ,30 ,myfile1)!= NULL)) //choose strings and compare and stop when all the strings from //a file match reference file, i.e. no unique string to add
{j++;
int i = 0;
rewind(fref);
while((fgets(cx1 ,30, fref)!= NULL))
{
i++;
if((cmpx=strcmp(cx2 ,cx1))== 0)
{
k++;
if(k==(nl2))
{printf("%d=%d FILES ARE SAME\n",k,nl2 );
return;
}

goto first;


}







}

if((j!=k)&&((cmpx=strcmp(cx2 ,cx1))!= 0)) //here a different statement is found //and checked whether it has been saved in rctcas during comparison with some other file
{

printf(" STATEMENT DOESNOT EXIST\n");

output=fopen("rctcas.txt","a+");
int l=0;
nl3=countlines("rctcas.txt");
second:
while((fgets(cx3 ,30, output)!= NULL))
{puts(cx2);
puts(cx3);
l++;
if((cmpx1=strcmp(cx2 ,cx3))== 0)
{

return;}
else
if(l==nl3)
{fputs(cx2,output);
fclose(output);}
}


}
}

return;
}

i am having two problems in my code:
first after executing 255 times the code gives segmentation fault. this could be because of buffer overflow. maybe using malloc can work but i am not sure how to use malloc as i am a beginner.

second the code creates a reference file which compares a large number of files, e.g. 1500 or 4000 etc and it extracts all the unique strings from the files and store them.whenever the loop runs,reference file creates a checklist to check that all strings are covered. any file being compared is added on the condition that it has atleast one string not covered by previous files.according to the observation the refernce file might contain about 100 unique strings but my reference file is giving 17000 strings because the code has some error. please help me i have to submit the code tomorrow and i cant find the fault. while using fopen i gave 'a+' but that after running two times gives a weird memory table and says aborted.
Last edited on
void comparefiles(const char *filename1,const char *filename2, int x);
int countlines(const char *filename);//these functions are to compare files passed as an argument and compare files.the countline function is to count the number of lines in the code.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

int main()
{
int i=0,j=0,cmpx,nl1=0,nl2=0,dt;
char buf[20000]={};//this buffer has the path copied in it for the reference file
char buf1[20000]={};//this buffer has the path copied in it for the files to be compared
char string2[]="/COND.txt";//the text file to be accessed
char string[]="rctcas.txt";//name of reference file

for(i=0;i<1;i++) //this loop opens reference file
{
sprintf(buf,"/home/csgrads/akhan015/desktop/programs/benchmarks/tcas/coverage/test1260%s",string2);
printf("reading reference file %s \n",buf);


for(j=0;j<=1589;j++)//this loop opens the 1500 files to be compared
{
sprintf(buf1,"/home/csgrads/akhan015/desktop/programs/benchmarks/tcas/coverage/test%d%s", j,string2);//variable paths
printf("reading inner loop %s \n",buf1);
comparefiles(buf,buf1,j);
}
}
return 0;

}



int countlines(const char *filename)
{
FILE *fm;
char line[1024];
int NumberOfLines = 0;
fm=fopen(filename, "r");
while( fgets(line,sizeof(line),fm) != NULL)
NumberOfLines++;
return(NumberOfLines);
fclose(fm);
}


void comparefiles(const char *filename1,const char *filename2, int x)
{
FILE * fref;
FILE *output;
FILE * myfile1;
char cx1[10000]={} ,cx2[10000]={},cx3[10000]={};
int cmpx,cmpx1;
signed int s=-1;
int nl1,nl2,nl3;
//fflush(fref);
fref= fopen(filename1, "r");//opening reference file
nl1=countlines(filename1);//counting no of lines in reference file
myfile1= fopen(filename2,"r");//opening file one of 1500 files
nl2=countlines(filename2);//counting lines

if((fref== NULL) || (myfile1== NULL))

printf("Error occurs in the file \n");


else
{
int j = 0, k=0;
rewind(myfile1);
first:
while((fgets(cx2 ,30 ,myfile1)!= NULL)) //pick 1 string from file to compare it with the refernce file
{j++; //j tells the no. of times this loop executes
int i = 0;
rewind(fref);
while((fgets(cx1 ,30, fref)!= NULL))//this loop compares a string with all the strings in reference file
{

i++;// no of times 2nd loop execute
if((cmpx=strcmp(cx2 ,cx1))== 0)
{

if(k==(nl2))// if all strins of a file are in the reference file then we go to another file and repeat the procedure
{printf("%d=%d FILES ARE SAME\n",k,nl2 );
return;
}
k++,//no of matches in both files
else
goto first;//continue comparing strings


}



}

if((j!=k)&&((cmpx=strcmp(cx2 ,cx1))!= 0))// if all the strings are picked and all match then j==k but here some string doesnt match
{

printf(" STATEMENT DOESNOT EXIST\n");

output=fopen("rctcas.txt","a+");

int l=0;
nl3=countlines("rctcas.txt");//count lines in the file
second:
while((fgets(cx3 ,30, output)!= NULL))// here before writing the string to the file i want to check it is not appended again i.e if the same string was there in some other file but not in reference that it would written again and again
{puts(cx2);
puts(cx3);
l++;// l shows how many times loop is executed
if((cmpx1=strcmp(cx2 ,cx3))== 0)
{
fclose(myfile1);
fclose(fref);
return;}
else
if(l==nl3)//if string does not exist and the whole file is checked
{fputs(cx2,output);
fclose(output);}}

}


}
}
fclose(myfile1);
fclose(fref);
return;
}



this code is easier to read.
Last edited on
Topic archived. No new replies allowed.