MPI_Gether returns SUCCESS But Data in

Forum

Forum
UNIX/Linux Programming
MPI_Gether returns SUCCESS But Data in

MPI_Gether returns SUCCESS But Data in the Buffer M_Div is ZERO

Hi,

I am following the tutorial at:

http://mpitutorial.com/tutorials/mpi-scatter-gather-and-allgather/

I have done MPI_Scatter(...) to distribute data. After that I am doing division using a function computerDivion. computerDivision is returning a pointer and storing it in div_buffer.computeDivison is returning a pointer to array of double values of size num_cols.After that I am calling MPI_Gather(...). I want to store the data returned by each process into a pointer to pointer of 2Darray but the contents of 2D Array M_div is zero. Also MPI_Gather is return a SUCCESS value.

My code is given below:

#include <iostream>
#include <cmath>
#include <mpi.h>
#include <fstream>
#include <ctime>
#include <vector>
#define N 16

int rank, size;
double* computeDivisions(double* recv_buffer, int, int);
void GaussianElimination(double **,double *b ,double *y);
int main(int argc, char * argv[])
{
  double sTime, eTime, rTime;
  std::ifstream inFile;
  int num_rows = 4;
  int num_cols = 4;
  int num_processors =4;
  int cur_control = 0;
  double * send_buffer = NULL;
  double * recv_buffer = NULL;
  double ** data = NULL;
  double determinant;
  double *div_buffer=NULL;
  int irow =0; int icol=0; int iIndex =0;
  std::vector<double> file_buffer;
   double **M_div=NULL;


  double **M_A, *I_A, *I_B, *I_Y;
  double *Output, Pivot;
  I_B = NULL;
  I_B = new double[N];
  if(I_B == NULL){
    std::cout<< " I_A can't be allocated memory";
    MPI_Finalize();
    return -2;
  }
  I_A = NULL;
  I_A = new double[N];
  if(I_A == NULL){
    std::cout<< " I_A can't be allocated memory";
    MPI_Finalize();
    return -2;
  }
  I_Y = NULL;
  I_Y = new double[N];
  if(I_Y== NULL){
    std::cout<< " I_B can't be allocated memory";
    MPI_Finalize();
    return -2;
  }
  
 recv_buffer = new double[N];
 if(recv_buffer== NULL){
    std::cout<< " recv_buffer can't be allocated memory";
    return -2;
  }
   if(!rank)
  {
    
       
       std::cout<<"WE have "<<size <<" processors " <<std::endl;
       M_A[0][0] =2.0;M_A[0][1] =1.0; M_A[0][2] = -1.0; M_A[0][3] =2.0;  //A[0][3] = 12.0;
       M_A[1][0] =4.0;M_A[1][1] =5.0; M_A[1][2] =-3.0;   M_A[1][3] = 6.0;//A[1][3] = 0.0;
       M_A[2][0] =-2.0;M_A[2][1] =5.0; M_A[2][2] = -2.0; M_A[2][3]=6.0;//A[0][3] = -9;
       M_A[3][0] =4.0;M_A[3][1] =11.0; M_A[3][2] = -4.0; M_A[3][3]=8.0;//A[0][3] = -9;
       I_B[0] = 5; I_B[1] = 9; I_B[2] = 4; I_B[3]=2;
       
       //2d to 1d array is giving core dumped
       
       for(irow=0; irow<num_rows; irow++)
	  	  for(icol=0; icol<num_cols; icol++)
			  I_A[iIndex++] = M_A[irow][icol];

  

   }//if(!rank)

  

   MPI_Bcast (&num_rows, 1, MPI_INT, 0, MPI_COMM_WORLD);

   MPI_Scatter(I_A, num_cols, MPI_DOUBLE, recv_buffer, num_cols, MPI_DOUBLE, 0, MPI_COMM_WORLD);//data goes to each process

   //http://mpitutorial.com/tutorials/mpi-scatter-gather-and-allgather/
   
   div_buffer = computeDivisions(recv_buffer, rank, num_cols);

   if(rank==0){
     std::cout<<"printing div_buffer"<<std::endl;
     for(int j=0; j<num_cols; j++)
           std::cout<<div_buffer[j]<<" ";

    std::cout<<"printing of div_buffer ends"<<std::endl;
   }
   

  int ret_val = MPI_Gather(M_div, num_cols, MPI_DOUBLE, div_buffer, num_cols, MPI_DOUBLE, 0, MPI_COMM_WORLD);
  
  if(ret_val == MPI_SUCCESS)
  std::cout<<"MPI_Gather Success";
  else if(ret_val == MPI_ERR_COMM)
     std::cout<<"MPI_Gather : MPI_ERR_COMM";
  else if(ret_val == MPI_ERR_COUNT)
    std::cout<<"MPI_Gather : MPI_ERR_COUNT";
  else if(ret_val == MPI_ERR_TYPE)
    std::cout<<"MPI_Gather : MPI_ERR_TYPE";
  else if(ret_val == MPI_ERR_BUFFER)
    std::cout<<"MPI_Gather : MPI_ERR_BUFFER";


   if(rank==0){
     std::cout<<"printing M_div"<<std::endl;
     for(int i=0; i<num_rows;i++){   
        for(int j=0; j<num_cols; j++)
           std::cout<<M_div[i][j]<<" ";
        std::cout<<std::endl;
     }
     std::cout<<"printing of M_div ends"<<std::endl;
   }
  
    for(int i = 0; i < num_rows; i++)
       delete [] M_div[i];
    delete [ ] M_div;
    delete [ ] div_buffer;
    delete [] I_Y;
    delete [ ]I_B;
    for(int i = 0; i < num_rows; i++)
       delete [] M_A[i];
    delete [ ] M_A;// No need to delete A because A is not created dynamically
    MPI_Finalize();
    return 0;
}


double* computeDivisions(double* recv_buffer, int rank, int num_cols){
       
            for(int j= rank+1; j <=num_cols-1; ++j)
               recv_buffer[j] = recv_buffer[j]/recv_buffer[rank];
              
            
            return recv_buffer;
}

My output is:

$ mpirun -np 4 ./a.out
WE have 4 processors
printing div_buffer
2 0.5 -0.5 1 printing of div_buffer ends
MPI_Gather Successprinting M_div
0 0 0 0
0 0 0 0
0 0 0 0
0 0 0 0
printing of M_div ends
MPI_Gather SuccessMPI_Gather SuccessMPI_Gather Success

Some body please guide me.

Zulfi.

lastchance (6980)

@zak100
The code you have presented won't produce the output you claim. It doesn't even have statements to initialise the MPI system (MPI_Init etc.)

If you want help then:
- present ALL your code properly, so that we have a chance of running it and reproducing your problem;
- remove all the lines completely unnecessary to your problem, as otherwise we are just looking at an obscure walll of code;
- use proper indentation and stop mixing spaces and tabs randomly.

I suspect you have the send and receive buffers the wrong way round in MPI_Gather, but who knows: your code won't run.

For reference (noting positions of the sending and receiving buffers):

1
2
3

int MPI_Gather(const void *sendbuf, int sendcount, MPI_Datatype sendtype,
               void *recvbuf, int recvcount, MPI_Datatype recvtype,
               int root, MPI_Comm comm)

Obviously, only the root processor needs a valid receive buffer here.

In another thread @mbozzi asked you why you aren't using std::vector., and he has a point. If you need to get at the contiguous data of a std::vector V then you can use V.data() - see
http://www.cplusplus.com/reference/vector/vector/data/
Then you won't have to be continually creating arrays on the heap with new.

Last edited on

zak100 (67)

Hi,
Thanks for your response. I would take care of removing all unnecessary code next time

asked you why you aren't using std::vector., and he has a point

Do you mean I have to do it for all my declarations :

double *I_A, *I_B, *I_Y, *recv_buffer, *div_buffer;

For example for I_A I have to write following code:

1
2
3

std::vector<int> myvector_I_A (N);

int* I_A = myvector_I_A.data();

and then again for I_B like this:

1
2

std::vector<int> myvector_I_B (num_rows);
int* I_B = myvector_I_B.data();

Some body please guide me.

Zulfi.

Last edited on

lastchance (6980)

Here you go @zak100 (EDITED)

#include <iostream>
#include <iomanip>
#include <fstream>
#include <sstream>
#include <string>
#include <vector>
#include <algorithm>
#include <cmath>
#include <cstdlib>
#include <ctime>
#include "mpi.h"
using namespace std;

const double SMALL = 1.0E-30;     // Used to stop divide-by-zero
const double PZERO = 1.0E-10;     // Interpretation of "zero" in output

using vec = vector<double>;
using veci = vector<int>;
using matrix = vector<vec>;   

// Function prototypes
bool GaussElimination( matrix A, vec &X );
ostream &operator << ( ostream &strm, const vec &V );
matrix readData();
matrix randomData( int N );

// MPI stuff
int tag = 99;
MPI_Status stat;
int myrank, size;
veci procRow;   // Processor owning a given row
veci localRow;  // Number of row on its own processor

//======================================

int main( int argc, char * argv[] )
{
   MPI_Init( &argc, &argv );
   MPI_Comm_rank( MPI_COMM_WORLD, &myrank );
   MPI_Comm_size( MPI_COMM_WORLD, &size   );

   const int NTIMES = 1000;     // Repeat count
   int N = 128;                 // Generated size
   bool random = false;
   unsigned seed = 42;
// unsigned seed = time( 0 );
   srand( seed );
   matrix A;
   vec X;
   bool ok;


   // Set up matrix and RHS
   if ( random ) A = randomData( N );       // Randomly generated
   else          A = readData();            // Read from (e.g.) file


   // Solve (time multiple runs)
   clock_t start  = clock();
   for ( int times = 1; times <= NTIMES; times++ ) ok = GaussElimination( A, X );
   clock_t finish = clock();

   if ( myrank == 0 )
   {
      cout << "Time: " << (double)( finish - start ) / CLOCKS_PER_SEC << "\n\n";
      if ( ok ) cout << "Solution: " << X << '\n';
      else      cout << "Unable to solve\n";
   }

   MPI_Finalize();
}

//======================================

bool GaussElimination( matrix A, vec &X )
//-------------------------------------------------------------------
// Solve linear system by Gaussian elimination
// Note: A is the AUGMENTED matrix (last column is actually B); it has N rows and N+1 columns
// Note: Copy of A because first argument is passed by value
// Note: Each processor only has SOME of the rows
//-------------------------------------------------------------------
{
   int N = A[0].size() - 1;
   vec row( N + 1 );          
   X = vec( N );
   int proc;                           // Processor in charge of pivot row
   bool doPivoting = true;             // Slower, but more reliable
   struct ptype{ double Amax; int r; };

   //**********************
   // Forward elimination *
   //**********************
   for ( int i = 0; i < N; i++ )
   {
      proc = procRow[i];

      //************************************************************************
      // Partial pivoting: find row r below i with largest element in column i *
      //************************************************************************
      if ( doPivoting && i < N - 1 )
      {
         // Each processor updates its own maxA
         ptype p{ 0.0, i }, pmax;
         for ( int k = i; k < N; k++ )
         {
            if ( myrank == procRow[k] )
            {
               double val = abs( A[localRow[k]][i] );
               if ( val > p.Amax ) p = ptype{ val, k };
            }
         }                                                              
   
         // Global reduction to get the row to swap with
         MPI_Allreduce( &p, &pmax, 1, MPI_DOUBLE_INT, MPI_MAXLOC, MPI_COMM_WORLD );
         int r = pmax.r;
   
         // Swap (tails of) ith and rth rows
         if ( r != i )
         {
            int remote = procRow[r];
            if ( myrank == proc && myrank == remote )      // Swap rows on same processor
            {
               int ii = localRow[i];
               int rr = localRow[r];
               for ( int j = i; j <= N; j++ ) swap( A[ii][j], A[rr][j] );
            }
            else if ( myrank == proc )
            {
               int ii = localRow[i];
               MPI_Sendrecv_replace( A[ii].data() + i, N + 1 - i, MPI_DOUBLE, remote, tag, remote, tag, MPI_COMM_WORLD, &stat );
            }
            else if ( myrank == remote )
            {
               int rr = localRow[r];
               MPI_Sendrecv_replace( A[rr].data() + i, N + 1 - i, MPI_DOUBLE, proc  , tag, proc  , tag, MPI_COMM_WORLD, &stat );
            }
         }
      }

      //**************************
      // Reduction of lower rows *
      //**************************
      // Send tail of pivot row to other processors
      if ( myrank == proc ) row = A[localRow[i]];
      MPI_Bcast( row.data() + i, N + 1 - i, MPI_DOUBLE, proc, MPI_COMM_WORLD );
      if ( abs( row[i] ) < SMALL ) return false;           // Singular matrix

      for ( int r = i + 1; r < N; r++ )         
      {
         if ( myrank == procRow[r] )
         {
            int rr = localRow[r];   
            double multiple = A[rr][i] / row[i];
            for ( int j = i; j < N + 1; j++ ) A[rr][j] -= multiple * row[j];
         }
      }
   }

   //***********************
   // Backward elimination *
   //***********************
   for ( int i = N - 1; i >= 0; i-- )
   {
      proc = procRow[i];
      if ( myrank == proc )
      {
         int ii = localRow[i];
         X[i] = A[ii][N] / A[ii][i];   // Solution for the pivot row (also a multiplier)
      }
      MPI_Bcast( &X[i], 1, MPI_DOUBLE, proc, MPI_COMM_WORLD );
      for ( int r = i - 1; r >= 0; r-- )
      {
         if ( myrank == procRow[r] )
         {
            int rr = localRow[r];
            A[rr][N] -= A[rr][i] * X[i];
            A[rr][i] = 0.0;
         }
      }
   }

   return true;
}

//======================================

ostream &operator << ( ostream &strm, const vec &V )
{
   for ( auto x : V ) strm << setw( 12 ) << ( abs( x ) < PZERO ? 0.0 : x ) << ' ';
   return strm;
}

//======================================

matrix readData()
{
// ifstream in( "in.txt" );
   stringstream in( "4"
                    " 1  2  3  4     1"
                    "-2  5  5  7     2"
                    " 1  9 10  3     3"
                    " 2  2  4  3     4" );

   int N;
   if ( !myrank ) in >> N;
   MPI_Bcast( &N, 1, MPI_INT, 0, MPI_COMM_WORLD );
   procRow  = veci( N );
   localRow = veci( N );

   matrix A;
   vec row( N + 1 );

   for ( int i = 0; i < N; i++ )
   {
      procRow [i] = i % size;
      localRow[i] = i / size;

      int proc = procRow[i];
      if ( myrank == 0 )
      {
         for ( int j = 0; j < N + 1; j++ ) in >> row[j];
         if ( myrank == proc ) A.push_back( row );
         else                  MPI_Send( row.data(), N + 1, MPI_DOUBLE, proc, tag, MPI_COMM_WORLD );
      }
      else if ( myrank == proc ) 
      {
         MPI_Recv( row.data(), N + 1, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &stat );
         A.push_back( row );
      }
   }

   return A;
}

//======================================

matrix randomData( int N )
{
   procRow  = veci( N );
   localRow = veci( N );

   matrix A;
   vec row( N + 1 );

   for ( int i = 0; i < N; i++ )
   {
      procRow [i] = i % size;
      localRow[i] = i / size;

      int proc = procRow[i];
      if ( myrank == 0 )
      {
         for ( int j = 0; j < N + 1; j++ ) row[j] = rand() % 10;
         if ( myrank == proc ) A.push_back( row );
         else                  MPI_Send( row.data(), N + 1, MPI_DOUBLE, proc, tag, MPI_COMM_WORLD );
      }
      else if ( myrank == proc ) 
      {
         MPI_Recv( row.data(), N + 1, MPI_DOUBLE, 0, tag, MPI_COMM_WORLD, &stat );
         A.push_back( row );
      }
   }

   return A;
}

//======================================

Last edited on

zak100 (67)

Hi,

Thanks for your help. I found a sol of MPI_Gather(....). I have used different buffers. Thanks for this program. Actually I don't have to use pivoting or partial pivoting.I have to use a specific alg from the book, My code basically tries to implement that alg.

Zulfi.

Topic archived. No new replies allowed.

C++

Forum

MPI_Gether returns SUCCESS But Data in the Buffer M_Div is ZERO