help with OMP

Hi there,
I need help with the following problem:
Multiplication of a matrix by a vector: Ap = q.
This may sound trivial but in my case it is not simply because the dimensions of A are 250,000,000 by 250,000,000. The variables in my algorithm are:
dataMatrix (50,000,000 x 30) is a matrix that contains indices (addresses) for the matrix A
dataRecord is a row of dataMatrix
nrow is the index to the corresponding Rinv matrix, e.g Rinv(5,9,9)
the call to the function getASddress creates two matrices address containing the indices of A, say A(3,5) and coefficient contains the corresponding values of A(3,5).
p is an input vector and q is the output vector
My attempt to parallelise the algorithm failed with a crash of the program. Any help to fix the problem and improve the speed of the program will be greatly appreciated. Below is the code (note that I am using Blitz arrays):
int main() {

Array<int,2> dataMatrix, address;
Array<int,1> dataRecord;
Array<double,2> coefficient;
Array<double,3> Rinv;
Array<double,1> p, q, t1, t2;
dataMatrix.resize(5000000,30);
dataRecord.resize(30);
int newNumFactors = 20;
int missingValue = -1;
int numberOfTraits = 9;
int nrow;
address.resize(newNumFactors,numberOfTraits);
coefficient.resize(newNumFactors,numberOfTraits);
Rinv.resize(1000,numberOfTraits,numberOfTraits);
p.resize(10000000);
q.resize(10000000);
t1.resize(numberOfTraits);
t2.resize(numberOfTraits);
for (int i = 0; i < 1000; i++) {
Rinv(i,Range::all(),Range::all()) = 5.0;
}


dataMatrix = 1;
p = 4.0;
q = 0.0;
int numOfThreads = 5;
//#pragma omp parallel for num_threads(numOfThreads)
#pragma omp parallel
{
#pragma omp for
for (int ir = 0; ir < dataMatrix.rows(); ir++) {
dataRecord(Range::all()) = dataMatrix(ir, Range::all());
nrow = 63;
// find address and design matrix coefficients
// getAddress();
for (int i = 0; i < newNumFactors; i++) {
for (int k = 0; k < numberOfTraits; k++) {
address(i,k) = i + k + 100;
coefficient(i,k) = 0.707;
}
}

// calculate contributions
t1 = 0.0;
for (int i = 0; i < newNumFactors; i++) {
for (int k = 0; k < numberOfTraits; k++) {
if (address(i,k) == missingValue) {continue;}
t1(k) += coefficient(i,k)*p(address(i,k));
}
}

t2 = 0.0;
for (int k = 0; k < numberOfTraits; k++) {
for (int l = 0; l < numberOfTraits; l++) {
t2(k) += Rinv(nrow,k,l)*t1(l);
}
}
for (int i = 0; i < newNumFactors; i++) {
for (int j = 0; j < numberOfTraits; j++) {
if (address(i,j) == missingValue) {continue;}
q(address(i,j)) += coefficient(i,j)*t2(j);
}
}
} // end of data loop
}





return EXIT_SUCCESS;


}
Last edited on
Topic archived. No new replies allowed.