Signal: Segmentation fault (11) in MPI Program

Hi,
I am trying to do MPI communication in the context of Gauss elimination method. This is continued from:
http://www.cplusplus.com/forum/general/252734/

However, I have not used memcpy(..) in this program yet as suggested in the above link.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#include <stdio.h>
#include <math.h>
#include <mpi.h>
#include <time.h>
#include <stdlib.h>
//#include <vector.h>


//using namespace std;

// Sorts the input row into chunks to be scattered two all the processors.
void sortByProcess(/*vector<double>*/double* list1, double* list2, int count);

// Swaps two rows.
void swap(double** list, int count, int row1, int row2);


int rank, size;
int main(int argc, char * argv[])
{
  double sTime, eTime, rTime;
  /*ifstream*/ FILE* inFile;
  int num_rows = 3200;
  int num_cols = 3200;
  int cur_control = 0;
  double * send_buffer = NULL;
  double * recv_buffer = NULL;
  double ** data = NULL;
  double determinant;
  char strNum_rows[20]; 
  /*vector<double>*/double* file_buffer=NULL;

  // Just get the initialization of the program going.
  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &size);

  // If the input file is not given, print message and exit.
  if(argc < 2)
  {
    /*cout <<*/ printf("No input file given.\n");// << endl;
    MPI_Finalize();
    return 0;
  }
  // If the root node (0), then open the input file and read in the
  // number of rows.
  if(!rank)
  {
    printf("After rank inside  if @@@@@");
    inFile = fopen(argv[1], "r");
    fgets(strNum_rows, 20, inFile); 
    num_rows = atoi(strNum_rows);
    printf("num_rows???? =%d",num_rows);
    file_buffer = (double *) malloc(num_rows * sizeof(double *));//1

    if(file_buffer == NULL) {
       printf("malloc can't allocate memory for file_buffer");
       return -1;
    }

    /*???? inFile.open(argv[1]);
    inFile >> num_rows;
    file_buffer.resize(num_rows);*/
  }
  
  //printf("After rank outside @@@@@@@@#");
  
   send_buffer = (double *)malloc(num_rows * sizeof(double *));
   
   if(send_buffer == NULL) {
       printf("malloc can't allocate memory for send_buffer");
       return -1;
    }

  
/*?????send_buffer = new double[num_rows];*/
  //printf("After send_buffer #####@");
  // Broadcasts the number of rows to each processor.
  MPI_Bcast (&num_rows, 1, MPI_INT, 0, MPI_COMM_WORLD);
  num_cols = num_rows / size;
  // Allocate the memory on each processor.
  //printf("After Bcast #####@");

  data = (double **) malloc(num_cols * sizeof(double *));

  if(data == NULL) {
       printf("malloc can't allocate memory for data");
       return -1;
    }

  /*???? data #####@");*/
  


  for(int i = 0; i < num_cols; i++){
  data[i] = (double *) malloc(num_rows * sizeof(double *));
  if(data[i] == NULL) {
       printf("malloc can't allocate memory for data[%d]", i);
       return -1;
    }
   }

  /* ???  data[i] = new double[num_rows]; */

  for(int i = 0; i < num_cols; i++)
  {
    for(int j = 0; j < num_rows; j++)
      data[i][j] = 0;
  }

   //printf("Before recv_buffer $$$$$$$@");
   recv_buffer = (double *) malloc(num_cols * sizeof(double *));
   if(recv_buffer == NULL) {
       printf("malloc can't allocate memory for recv_buffer");
       return -1;
    }
  /*???? recv_buffer = new double[num_cols];*/
   
  

  // Scatter the data.
  for(int i = 0; i < num_rows; i++)
  {
    if(!rank)
    {
      for(int j = 0; j < num_rows; j++){
         fgets(strNum_rows, 20, inFile); 
         file_buffer[j] = atof(strNum_rows);
      }
        /*????? inFile >> file_buffer[j];*/
      sortByProcess(file_buffer, send_buffer, num_rows);
    }
     
    //printf("After sortByProcess ^^^^^^@");

    
    // Scatters the data so that each process gets the next value for their columns.
    MPI_Scatter(send_buffer, num_cols/* NOTE num_rows gives SCATTER ERROR &deviates from original code */, MPI_DOUBLE, recv_buffer, num_cols, MPI_DOUBLE, 0, MPI_COMM_WORLD   );
    for(int j = 0; j < num_cols; j++)
    {
      data[j][i] = recv_buffer[j];
    }
  }

    //printf("After Scatter  ^^^^^^@");
   fclose(inFile);
  /*delete []*/ free(recv_buffer);
  /*delete []*/ free(send_buffer);
                //free(file_buffer);
                 /*delete []*/ //free(send_buffer);
  for(int i = 0; i < num_cols; i++)
    /*delete []*/ free( data[i]);
  /*delete []*/ free(data);
  // Begin timing.
  MPI_Barrier(MPI_COMM_WORLD);
  sTime = MPI_Wtime();

   printf("After Barrier  ^^^^^^@");
   MPI_Finalize();
    return 0;
}


Its giving me following errors:
Are the following errors related to memcpy or they are due to MPI?


$ mpicc gaussian.c
zulfi@lc2530hz:~/c programs/MPI_PROG/GEGH$ mpirun -np 4 ./a.out matrix.3200.txt
[lc2530hz:05635] *** Process received signal ***
[lc2530hz:05635] Signal: Segmentation fault (11)
[lc2530hz:05635] Signal code: (128)
[lc2530hz:05635] Failing at address: (nil)
[lc2530hz:05636] *** Process received signal ***
[lc2530hz:05636] Signal: Segmentation fault (11)
[lc2530hz:05636] Signal code: (128)
[lc2530hz:05636] Failing at address: (nil)
[lc2530hz:05637] *** Process received signal ***
[lc2530hz:05637] Signal: Segmentation fault (11)
[lc2530hz:05637] Signal code: (128)
[lc2530hz:05637] Failing at address: (nil)
[lc2530hz:05636] [ 0] [lc2530hz:05637] [ 0] /lib/x86_64-linux-gnu/libc.so.6(+0x3ef20)[0x7f7ac9b11f20]
[lc2530hz:05637] [ 1] [lc2530hz:05635] [ 0] /lib/x86_64-linux-gnu/libc.so.6(+0x3ef20)[0x7fd882900f20]
[lc2530hz:05635] /lib/x86_64-linux-gnu/libc.so.6(+0x3ef20)[0x7fdeb9110f20]
[lc2530hz:05636] [ 1] [ 1] /lib/x86_64-linux-gnu/libc.so.6(fclose+0xd4)[0x7fdeb9150324]
[lc2530hz:05636] [ 2] ./a.out(+0x116c)[0x/lib/x86_64-linux-gnu/libc.so.6(fclose+0xd4)[0x7f7ac9b51324]
[lc2530hz:05637] [ 2] ./a.out(+0x116c)[0x5651bd24c16c]
[lc2530hz:05637] [ 3] /lib/x86_64-linux-gnu/libc.so.6(fclose+0xd4)[0x7fd882940324]
[lc2530hz:05635] [ 2] 56176a25a16c]
[lc2530hz:05636] [ 3] /lib/x86_64-linux-gnu/libc.so.6(./a.out(+0x116c)[0x55ee0e31b16c]
[lc2530hz:05635] [ 3] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0x/lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xe7)[0x7f7ac9af4b97]
[lc2530hz:05637] __libc_start_main+0xe7)[0x7fdeb90f3b97]
[lc2530hz:05636] [ 4] ./a.out(+0xc1a)[0x56176a259c1a[ 4] ./a.out(+0xc1a)[0x5651bd24bc1a]
[lc2530hz:05637] *** End of error message ***
e7)[0x7fd8828e3b97]
[lc2530hz:05635] [ 4] ./a.out(+0xc1a]
[lc2530hz:05636] *** End of error message ***
)[0x55ee0e31ac1a]
[lc2530hz:05635] *** End of error message ***
--------------------------------------------------------------------------
mpirun noticed that process rank 1 with PID 0 on node lc2530hz exited on signal 11 (Segmentation fault).
--------------------------------------------


Some body please guide me how to solve this problem.

Zulfi.

Last edited on
I'm sort of guessing here, but I think there's something wrong with your malloc lines.

You are using send_buffer and recv_buffer as dynamically allocated arrays of type double, but you are doing (double *)malloc(num_rows * sizeof(double *)) instead of
(double *)malloc(num_rows * sizeof(double)).

If you are meaning to allocate N elements of type double, you need to do N * sizeof(double).

(This might not be the only issue)
Last edited on

Hi,

Thanks for your response. God bless you.
I have modified the code based upon your comments but still I am getting the same errors. My modified code is:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178

#include <stdio.h>
#include <math.h>
#include <mpi.h>
#include <time.h>
#include <stdlib.h>
//#include <vector.h>


//using namespace std;

// Sorts the input row into chunks to be scattered two all the processors.
void sortByProcess(/*vector<double>*/double* list1, double* list2, int count);

// Swaps two rows.
void swap(double** list, int count, int row1, int row2);


int rank, size;
int main(int argc, char * argv[])
{
  double sTime, eTime, rTime;
  /*ifstream*/ FILE* inFile;
  int num_rows = 3200;
  int num_cols = 3200;
  int cur_control = 0;
  double * send_buffer = NULL;
  double * recv_buffer = NULL;
  double ** data = NULL;
  double determinant;
  char strNum_rows[20]; 
  /*vector<double>*/double* file_buffer=NULL;

  // Just get the initialization of the program going.
  MPI_Init(&argc, &argv);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &size);

  // If the input file is not given, print message and exit.
  if(argc < 2)
  {
    /*cout <<*/ printf("No input file given.\n");// << endl;
    MPI_Finalize();
    return 0;
  }
  // If the root node (0), then open the input file and read in the
  // number of rows.
  if(!rank)
  {
    printf("After rank inside  if @@@@@");
    inFile = fopen(argv[1], "r");
    fgets(strNum_rows, 20, inFile); 
    num_rows = atoi(strNum_rows);
    printf("num_rows???? =%d",num_rows);
    file_buffer = (double *) malloc(num_rows * sizeof(double ));//1

    if(file_buffer == NULL) {
       printf("malloc can't allocate memory for file_buffer");
       MPI_Finalize();
       return -1;
    }

    /*???? inFile.open(argv[1]);
    inFile >> num_rows;
    file_buffer.resize(num_rows);*/
  }
  
  //printf("After rank outside @@@@@@@@#");
  
   send_buffer = (double *)malloc(num_rows * sizeof(double ));
   
   if(send_buffer == NULL) {
       printf("malloc can't allocate memory for send_buffer");
       MPI_Finalize();
       return -1;
    }

  
/*?????send_buffer = new double[num_rows];*/
  //printf("After send_buffer #####@");
  // Broadcasts the number of rows to each processor.
  MPI_Bcast (&num_rows, 1, MPI_INT, 0, MPI_COMM_WORLD);
  num_cols = num_rows / size;
  // Allocate the memory on each processor.
  //printf("After Bcast #####@");

  //data = (double **) malloc(num_cols * sizeof(double *));
  //calloc initializes the allocated memory to zero
  data = (double **)calloc(num_rows, sizeof(double*));

  if(data == NULL) {
       printf("malloc can't allocate memory for data");
       MPI_Finalize();
       return -1;
    }

  /*???? data #####@");*/
  


  for(int i = 0; i < num_cols; i++){
  data[i] = (double *) malloc(num_rows * sizeof(double ));
  if(data[i] == NULL) {
       printf("data[%d] = malloc(%lu)", i, num_rows * sizeof(double));
       printf("malloc can't allocate memory for data[%d]", i);
       //
       if(i>0) {
          for(int j = i-1; j>=0; j--)
             free( data[j]);
          free(data);
          MPI_Finalize();
          return -2;
       }
    }
   }

  /* ???  data[i] = new double[num_rows]; */

  for(int i = 0; i < num_cols; i++)
  {
    for(int j = 0; j < num_rows; j++)
      data[i][j] = 0;
  }

   //printf("Before recv_buffer $$$$$$$@");
   recv_buffer = (double *) malloc(num_cols * sizeof(double *));
   if(recv_buffer == NULL) {
       printf("malloc can't allocate memory for recv_buffer");
       MPI_Finalize();
       return -1;
    }
  /*???? recv_buffer = new double[num_cols];*/
   
  

  // Scatter the data.
  for(int i = 0; i < num_rows; i++)
  {
    if(!rank)
    {
      for(int j = 0; j < num_rows; j++){
         fgets(strNum_rows, 20, inFile); 
         file_buffer[j] = atof(strNum_rows);
      }
        /*????? inFile >> file_buffer[j];*/
      sortByProcess(file_buffer, send_buffer, num_rows);
    }
     
    //printf("After sortByProcess ^^^^^^@");

    
    // Scatters the data so that each process gets the next value for their columns.
    MPI_Scatter(send_buffer, num_cols/* NOTE num_rows gives SCATTER ERROR &deviates from original code */, MPI_DOUBLE, recv_buffer, num_cols, MPI_DOUBLE, 0, MPI_COMM_WORLD   );
    for(int j = 0; j < num_cols; j++)
    {
      data[j][i] = recv_buffer[j];
    }
  }

    //printf("After Scatter  ^^^^^^@");
   fclose(inFile);
  /*delete []*/ free(recv_buffer);
  /*delete []*/ free(send_buffer);
                //free(file_buffer);
                 /*delete []*/ //free(send_buffer);
  for(int i = 0; i < num_cols; i++)
    /*delete []*/ free( data[i]);
  /*delete []*/ free(data);
  // Begin timing.
  MPI_Barrier(MPI_COMM_WORLD);
  sTime = MPI_Wtime();

   printf("After Barrier  ^^^^^^@");
   MPI_Finalize();
    return 0;

}


The error messages are:



$ mpirun -np 4 ./a.out matrix.3200.txt
[lc2530hz:05234] *** Process received signal ***
[lc2530hz:05234] Signal: Segmentation fault (11)
[lc2530hz:05234] Signal code: (128)
[lc2530hz:05234] Failing at address: (nil)
[lc2530hz:05235] *** Process received signal ***
[lc2530hz:05235] Signal: Segmentation fault (11)
[lc2530hz:05235] Signal code: (128)
[lc2530hz:05235] Failing at address: (nil)
[lc2530hz:05236] *** Process received signal ***
[lc2530hz:05236] Signal: Segmentation fault (11)
[lc2530hz:05236] Signal code: (128)
[lc2530hz:05236] Failing at address: (nil)
[lc2530hz:05234] [ 0] [lc2530hz:05235] [ 0] [lc2530hz:05236] [ 0] /lib/x86_64-linux-gnu/libc.so.6(+0x3ef20)[0x7f593d9cff20]
[lc2530hz:05236] [ 1] /lib/x86_64-linux-gnu/libc.so.6(+0x3ef20)[0x7fa95388af20]
[lc2530hz:05235] [ 1] /lib/x86_64-linux-gnu/libc.so.6(+0x3ef20)[0x7f4ac3b38f20]
[lc2530hz:05234] [ 1] /lib/x86_64-linux-gnu/libc.so.6(fclose+0xd4)[0x7f593da0f324]
[lc2530hz:05236] [ 2] /lib/x86_64-linux-gnu/libc.so.6(fclose+0xd4)[0x7fa9538ca324]
[lc2530hz:05235] [ 2] ./a.out(+0x125b)[0x557c08d7625b]
[lc2530hz:05235] /lib/x86_64-linux-gnu/libc.so.6(fclose+0xd4)[0x7f4ac3b78324]
[lc2530hz:05234] [ 2] ./a.out(+0x125b)[0x[ 3] ./a.out(+0x125b)[0x55def934725b]
[lc2530hz:05236] [ 3] 558ef74dd25b]
[lc2530hz:05234] [ 3] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xe7)[0x7f593d9b2b97]
[lc2530hz:05236] [ 4] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xe7)[0x7fa95386db97]
[lc2530hz:05235] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xe7)[0x7f4ac3b1bb97]
[lc2530hz:05234] [ 4] ./a.out(+0xc6a)[0x558ef74dcc6a]
[ 4] ./a.out(+0xc6a)[0x557c08d75c6a]
[lc2530hz:05235] *** End of error message ***
./a.out(+0xc6a)[0x55def9346c6a]
[lc2530hz:05236] *** End of error message ***
[lc2530hz:05234] *** End of error message ***
--------------------------------------------------------------------------
mpirun noticed that process rank 1 with PID 0 on node lc2530hz exited on signal 11 (Segmentation fault).
--------------------------------------------------------------------------
zulfi@lc2530hz:~/c programs/MPI_PROG/GEGH$


Some body please guide me.

Zulfi.

1. Add the -g option when you compile your code. This gets you debugging information.
So
./a.out(+0x125b)[0x557c08d7625b]
might become
main.c:160

2. Look carefully, you can see that fclose is mentioned many times.

3. if(!rank)
You use this to guard the file open, and file read.
But you DON'T use it to guard the file close.

Hi,

Thanks for your guidance.

This problem got solved.

You really deserve great appreciation for solving this problem.

God bless you.

Zulfi.
Topic archived. No new replies allowed.