DLL Calculation does not agree with Exce

Forum

Forum
Beginners
DLL Calculation does not agree with Exce

DLL Calculation does not agree with Excel

Pages: 12

I'm trying to understand why the DLL built from the code below does not get the same result as Excel. In Excel I get: 0.892857143 From the DLL I get: 0.904762
The code was provided by someone here on the forum.

#include <iostream>
#include <iomanip>
#include <algorithm>
#include <cassert>
#include <cmath>
#include <vector>
#include <numeric>
#import "C:\Program Files (x86)\TradeStation 10.0\Program\tskit.dll" no_namespace
#include <atlbase.h>
using namespace std;

const double SMALL = 1.0E-30;          // used to stop divide-by-zero

using vec = vector<double>;         // vector
using matrix = vector<vec>;            // matrix

// Function prototypes
double poly(const vec& C, double x);
matrix transpose(const matrix& A);
matrix matmul(const matrix& A, const matrix& B);
vec    matmul(const matrix& A, const vec& V);
bool solve(const matrix& A, const vec& B, vec& X);
bool polynomialRegression(const vec& X, const vec& Y, int degree, vec& C, double& Rsquared);

//====================================================================== 
double __stdcall Poly3Fit(IEasyLanguageObject* elObj, char* arrayName)
{
    IEasyLanguageVariable* arr = elObj->Variables[arrayName];
    int i = arr->DimensionSize[0];
    
 // Test Data from 09.13.23
    //vec X = {833,836,839,842,845,848};
    vec X = {0, 1, 2, 3, 4, 5};
    vec Y = {4515,4515,4515,4516,4516,4516};
    
    vec C;
    int degree = 3;
    double Rsquared;
    if (polynomialRegression(X, Y, degree, C, Rsquared))
    {
        return Rsquared;
    }
    else
        return 0;
}

//======================================================================

double poly(const vec& C, double x)
{
    double result = 0.0;
    for (int i = C.size() - 1; i >= 0; i--) result = C[i] + result * x;
    return result;
}

//====================================================================== 

matrix transpose(const matrix& A)                        // Transpose a matrix
{
    int m = A.size(), n = A[0].size();
    matrix AT(n, vec(m));
    for (int i = 0; i < n; i++)
    {
        for (int j = 0; j < m; j++) AT[i][j] = A[j][i];
    }
    return AT;
}

//======================================================================

matrix matmul(const matrix& A, const matrix& B)          // Matrix times matrix
{
    int rowsA = A.size(), colsA = A[0].size();
    int rowsB = B.size(), colsB = B[0].size();
    assert(colsA == rowsB);

    matrix C(rowsA, vec(colsB, 0.0));
    for (int i = 0; i < rowsA; i++)
    {
        for (int j = 0; j < colsB; j++)
        {
            for (int k = 0; k < colsA; k++) C[i][j] += A[i][k] * B[k][j];
        }
    }
    return C;
}

//======================================================================

vec matmul(const matrix& A, const vec& V)                // Matrix times vector
{
    int rowsA = A.size(), colsA = A[0].size();
    int rowsV = V.size();
    assert(colsA == rowsV);

    vec C(rowsA, 0.0);
    for (int i = 0; i < rowsA; i++)
    {
        for (int k = 0; k < colsA; k++) C[i] += A[i][k] * V[k];
    }
    return C;
}

//======================================================================

bool solve(const matrix& A, const vec& B, vec& X)
//--------------------------------------
// Solve AX = B by Cholesky factorisation of A (i.e. A = L.LT)
// Requires A to be SYMMETRIC
//--------------------------------------
{
    int n = A.size();

    // Cholesky-factorise A
    matrix L(n, vec(n, 0));
    for (int i = 0; i < n; i++)
    {
        // Diagonal value
        L[i][i] = A[i][i];
        for (int j = 0; j < i; j++) L[i][i] -= L[i][j] * L[i][j];
        L[i][i] = sqrt(L[i][i]);
        if (abs(L[i][i]) < SMALL) return false;

        // Rest of the ith column of L
        for (int k = i + 1; k < n; k++)
        {
            L[k][i] = A[k][i];
            for (int j = 0; j < i; j++) L[k][i] -= L[i][j] * L[k][j];
            L[k][i] /= L[i][i];
        }
    }

    // Solve LY = B, where L is lower-triangular and Y = LT.X
    vec Y = B;
    for (int i = 0; i < n; i++)
    {
        for (int j = 0; j < i; j++) Y[i] -= L[i][j] * Y[j];
        Y[i] /= L[i][i];
    }

    // Solve UX = Y, where U = LT is upper-triangular
    X = Y;
    for (int i = n - 1; i >= 0; i--)
    {
        for (int j = i + 1; j < n; j++) X[i] -= L[j][i] * X[j];
        X[i] /= L[i][i];
    }

    return true;
}

//====================================================================== 

bool polynomialRegression(const vec& X, const vec& Y, int degree, vec& C, double& Rsquared)
{
    int N = X.size();   assert(Y.size() == N);
    matrix A(N, vec(1 + degree));
    for (int i = 0; i < N; i++)
    {
        double xp = 1;
        for (int j = 0; j <= degree; j++)
        {
            A[i][j] = xp;
            xp *= X[i];
        }
    }

    // Solve the least-squares problem for the polynomial coefficients C
    matrix AT = transpose(A);
    if (!solve(matmul(AT, A), matmul(AT, Y), C)) return false;

    // Calculate R^2
    vec AC = matmul(A, C);
    double sumYsq = 0, sumACsq = 0, sumY = 0.0;
    for (int i = 0; i < N; i++)
    {
        sumY += Y[i];
        sumYsq += Y[i] * Y[i];
        sumACsq += AC[i] * AC[i];
    }
    Rsquared = 1.0 - (sumYsq - sumACsq) / (sumYsq - sumY * sumY / N + SMALL);
    return true;
}

//====================================================================== 

// Interface routine that takes a (pointer to) an array Y, of size N,
// and returns the R^2 parameter for a cubic polynomial fit to the points
// (0,Y0), (1,y1), ..., (N-1,yN-1)

double PolyFitFunction(double* Y, int N)
{
    vec X(N);   for (int i = 0; i < N; i++) X[i] = i;
    vec F(Y, Y + N);
    vec C;
    double Rsquared;
    if (polynomialRegression(X, F, 3, C, Rsquared)) return Rsquared;
    else return 0.0;
}

//======================================================================

Last edited on

kigar64551 (784)

Why do you think the result should be exactly the same?

First of all, double is a 64-Bit IEEE-754 floating-point ("double precision") type. And floating-point math has limited precision, which means that there can (and usually is) some "rounding" error in the results! That's especially true, when you chain multiple operations.

👉 https://en.wikipedia.org/wiki/IEEE_754
👉 https://en.wikipedia.org/wiki/Round-off_error

Do you know for sure that Excel also uses "double precision" (64-Bit) floating-point math? If it uses a smaller floating-point type, e.g. "single precision" (32-Bit), then it will probably have bigger rounding error than your code. And, if it uses a bigger floating-point type, e.g. "quadruple precision" (128-Bit), then it will probably have smaller rounding error than your code. In both cases, the final results are going to differ!

Furthermore, as mentioned before, rounding errors accumulate over time, if you chain multiple (many) operations on floating-point values. And the exact order in which the individual operations are done can make a big difference of how rounding errors accumulate! So, if Excel chooses a different order of the operations (or even a completely different algorithm 😲) to do the "same" calculation as in your code, then this alone can result in very different rounding errors – and therefore in different final result.

Last but not least, the C/C++ compiler has various options that influence floating-point operations. Simply put, you can sacrifice some precision for more speed, or sacrifice some speed for more precision. See here for details, as far as MSVC is concerned:

https://learn.microsoft.com/en-us/cpp/build/reference/fp-specify-floating-point-behavior?view=msvc-170

Last edited on

seeplus (6457)

Another possible is sqrt() L121. This may not give the same precision as Excel.

Also see:
https://learn.microsoft.com/en-us/office/troubleshoot/excel/floating-point-arithmetic-inaccurate-result
https://en.wikipedia.org/wiki/Numeric_precision_in_Microsoft_Excel

Atom1 (52)

When I build it as an exe, the answers agree with Excel to 6 decimal places. When I build it as a dll then the answers reported by the calling app do not agree with Excel beyond 2 or 3 decimal places.

This is the code that produces the same answer as Excel.

#include <iostream>
#include <iomanip>
#include <vector>
#include <algorithm>
#include <cassert>
#include <cmath>
using namespace std;

const double SMALL = 1.0E-30;          // used to stop divide-by-zero

using vec = vector<double>;         // vector
using matrix = vector<vec>;            // matrix

// Function prototypes
double poly(const vec& C, double x);
matrix transpose(const matrix& A);
matrix matmul(const matrix& A, const matrix& B);
vec    matmul(const matrix& A, const vec& V);
bool solve(const matrix& A, const vec& B, vec& X);
bool polynomialRegression(const vec& X, const vec& Y, int degree, vec& C, double& Rsquared);

//======================================================================

double poly(const vec& C, double x)
{
    double result = 0.0;
    for (int i = C.size() - 1; i >= 0; i--) result = C[i] + result * x;
    return result;
}

//====================================================================== 

matrix transpose(const matrix& A)                        // Transpose a matrix
{
    int m = A.size(), n = A[0].size();
    matrix AT(n, vec(m));
    for (int i = 0; i < n; i++)
    {
        for (int j = 0; j < m; j++) AT[i][j] = A[j][i];
    }
    return AT;
}

//======================================================================

matrix matmul(const matrix& A, const matrix& B)          // Matrix times matrix
{
    int rowsA = A.size(), colsA = A[0].size();
    int rowsB = B.size(), colsB = B[0].size();
    assert(colsA == rowsB);

    matrix C(rowsA, vec(colsB, 0.0));
    for (int i = 0; i < rowsA; i++)
    {
        for (int j = 0; j < colsB; j++)
        {
            for (int k = 0; k < colsA; k++) C[i][j] += A[i][k] * B[k][j];
        }
    }
    return C;
}

//======================================================================

vec matmul(const matrix& A, const vec& V)                // Matrix times vector
{
    int rowsA = A.size(), colsA = A[0].size();
    int rowsV = V.size();
    assert(colsA == rowsV);

    vec C(rowsA, 0.0);
    for (int i = 0; i < rowsA; i++)
    {
        for (int k = 0; k < colsA; k++) C[i] += A[i][k] * V[k];
    }
    return C;
}

//======================================================================

bool solve(const matrix& A, const vec& B, vec& X)
//--------------------------------------
// Solve AX = B by Cholesky factorisation of A (i.e. A = L.LT)
// Requires A to be SYMMETRIC
//--------------------------------------
{
    int n = A.size();

    // Cholesky-factorise A
    matrix L(n, vec(n, 0));
    for (int i = 0; i < n; i++)
    {
        // Diagonal value
        L[i][i] = A[i][i];
        for (int j = 0; j < i; j++) L[i][i] -= L[i][j] * L[i][j];
        L[i][i] = sqrt(L[i][i]);
        if (abs(L[i][i]) < SMALL) return false;

        // Rest of the ith column of L
        for (int k = i + 1; k < n; k++)
        {
            L[k][i] = A[k][i];
            for (int j = 0; j < i; j++) L[k][i] -= L[i][j] * L[k][j];
            L[k][i] /= L[i][i];
        }
    }

    // Solve LY = B, where L is lower-triangular and Y = LT.X
    vec Y = B;
    for (int i = 0; i < n; i++)
    {
        for (int j = 0; j < i; j++) Y[i] -= L[i][j] * Y[j];
        Y[i] /= L[i][i];
    }

    // Solve UX = Y, where U = LT is upper-triangular
    X = Y;
    for (int i = n - 1; i >= 0; i--)
    {
        for (int j = i + 1; j < n; j++) X[i] -= L[j][i] * X[j];
        X[i] /= L[i][i];
    }

    return true;
}

//====================================================================== 

bool polynomialRegression(const vec& X, const vec& Y, int degree, vec& C, double& Rsquared)
{
    int N = X.size();   assert(Y.size() == N);
    matrix A(N, vec(1 + degree));
    for (int i = 0; i < N; i++)
    {
        double xp = 1;
        for (int j = 0; j <= degree; j++)
        {
            A[i][j] = xp;
            xp *= X[i];
        }
    }

    // Solve the least-squares problem for the polynomial coefficients C
    matrix AT = transpose(A);
    if (!solve(matmul(AT, A), matmul(AT, Y), C)) return false;

    // Calculate R^2
    vec AC = matmul(A, C);
    double sumYsq = 0, sumACsq = 0, sumY = 0.0;
    for (int i = 0; i < N; i++)
    {
        sumY += Y[i];
        sumYsq += Y[i] * Y[i];
        sumACsq += AC[i] * AC[i];
    }
    Rsquared = 1.0 - (sumYsq - sumACsq) / (sumYsq - sumY * sumY / N + SMALL);
    return true;
}

//====================================================================== 

// Interface routine that takes a (pointer to) an array Y, of size N,
// and returns the R^2 parameter for a cubic polynomial fit to the points
// (0,Y0), (1,y1), ..., (N-1,yN-1)

// extern "C"  __declspec( dllexport ) double __stdcall PolyFitFunction( double *Y, int N )   // VERY UNSURE ABOUT THIS
double PolyFitFunction(double* Y, int N)
{
    vec X(N);   for (int i = 0; i < N; i++) X[i] = i;
    vec F(Y, Y + N);
    vec C;
    double Rsquared;
    if (polynomialRegression(X, F, 3, C, Rsquared)) return Rsquared;
    else                                                return 0.0;
}

//====================================================================== 

int main()
{
    //Test Data - Verified to produce correct answer
    vec X = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
    vec Y = { 4585.75, 4592.38, 4606.17, 4615.81, 4615.70, 4615.21, 4617.86, 4622.12, 4624.92, 4624.12, 4627.09 };

    // Data
 //   vec X = { 0, 1, 2, 3, 4, 5 };
    int N = X.size();
 //   vec Y(N);
 //   // Exact cubic polynomial
 //   for (int i = 0; i < N; i++) Y[i] = 10 + 8 * X[i] + 6 * X[i] * X[i] + 4 * X[i] * X[i] * X[i];
 //   // Fudge some values or you'll get perfect agreement
 //  Y[0] -= 10; Y[N - 1] += 20;    // Comment this out and you should get the original cubic


    vec C;
    int degree = 3;
    double Rsquared;
    if (polynomialRegression(X, Y, degree, C, Rsquared))
    {
        cout << "Coefficients in C0 + C1.X + C2.X^2 + ... : ";
        for (double e : C) cout << e << "  ";

        cout << "\n\nCheck fit: (Xi, Yi, poly(Xi) )\n";
        for (int i = 0; i < X.size(); i++) cout << X[i] << '\t' << Y[i] << '\t' << poly(C, X[i]) << '\n';

        cout << "\n\nR^2 = " << Rsquared << '\n';

        // Can we get the same answer via an interface function? (Assumes X is { 0, 1, 2, ... } )
        cout << "\nR^2 (by interface) = " << PolyFitFunction(Y.data(), N) << '\n';
    }
    else
    {
        cerr << "Unable to solve\n";
    }
    return 0;
}

Last edited on

kigar64551 (784)

Are you sure that you are using the exactly same compiler settings, especially regarding /fp, when building the EXE and the DLL?

Furthermore, I would suggest to add some diagnostic output to the code. For example, in each relevant function, dump (print) the input parameters right at the beginning of the function, and also dump (print) the final result right before the function returns.

(Maybe dump some intermediate values too, where it makes sense)

Build the EXE and DLL from the exactly same code, with diagnostic output. Then carefully compare the values between the "EXE" and the "DLL" version! Do the functions even get the exactly same input values? If so, where exactly do the values start to diverge ???

Last edited on

George P (5600)

Your program's numeric precision might be different than the precision Excel uses. Make multiple calculations on data that starts off with different precision will only compound the divergence.

No, I don't know what the precision of Excel's numeric data happens to be. :)

TheIdeasMan (6782)

The code was provided by someone here on the forum.

Well there is a recipe for subtle disaster right there.

Presumably all warnings were turned on, and the code compiled without any warnings?

Are you sure the algorithm used in the code is correct?

Do your input values form some corner case that the code does not deal with?

What the others are saying: Do some debugging, preferably with a debugger.

The precision of Excel is 15 significant figures (IEEE 754), aka double. This makes sense: float precision is very easily exceeded.
https://en.wikipedia.org/wiki/Numeric_precision_in_Microsoft_Excel

Some other tips:

ALWAYS initialise variables with a sensible value at declaration time.

When doing division, always check the divisor is in a suitable range, as well as not being zero.

lastchance (6980)

If you get the expected answer when compiling to an .exe but don’t when compiling to a .dll then you should probably check both your compilation script (and libraries) for making the .dll and also your calling routine.

BTW, the original code was mine and you could simply link the original:
https://cplusplus.com/forum/beginner/276718/#msg1194306

BTW2, who in their right mind would be trying to fit a cubic polynomial to this data:

1
2

    vec X = {0, 1, 2, 3, 4, 5};
    vec Y = {4515,4515,4515,4516,4516,4516};

In Excel I get: 0.892857143 From the DLL I get: 0.904762

Well, in my version of Excel I get 0.904764. If you graph it then it's a rubbish fit.

Last edited on

Atom1 (52)

I am satisfied that there is nothing wrong with the regression calculation (generously provided by lastchance). The tools I am working with involve passing an array to the dll using the Tradestation SDK (tskit.dll). That does seem to work as I am able to get the dll to return values that I am sending to it from Easylanguage. I think something is getting messed up in my translation of the array data to vectors, but I haven't been able to sort that out. I need to be able to examine all of the vector contents but I'm not quite sure of the best way to do that. Would the dll be able to send output to console?

Atom1 (52)

Here is the data when I'm getting back from the dll. It looks ok. This is just part of the data, but there are no anomalies. That's not very readable, sorry. What I've done is to have the dll return the last item in the vector instead of the r-squared value.

			Vector Y					VectorX
836	4515.65	1.00000	4515.25		836	4515.65	1.00000	2
839	4515.8925	1.00000	4515.65		839	4515.8925	1.00000	3
842	4516.22287	1.00000	4515.8925		842	4516.22287	1.00000	4
845	4516.47423	0.99736	4516.222875		845	4516.47423	0.99736	5
848	4516.72552	0.99818	4516.474231		848	4516.72552	0.99818	6
851	4516.75174	0.99626	4516.72552		851	4516.75174	0.99626	7
854	4516.78916	0.99672	4516.751744		854	4516.78916	0.99672	8
857	4516.9372	0.99326	4516.789157		857	4516.9372	0.99326	9
900	4517.21534	0.98919	4516.937199		900	4517.21534	0.98919	10
903	4517.29207	0.99090	4517.215339		903	4517.29207	0.99090	11
+

Last edited on

Atom1 (52)

I assume that the dll has no memory, so that each time it is called everything starts from scratch?

I did another test where at acts an an explicit polynomial which is created in the dll, and it returns 1.000000, so just confirming no problem with the regression calculation.

One thing adding confusion to this is that in Easylanguage, while arrays have a 0 element it is never used. I can now see that I have not been populating the array I'm passing correctly and then getting it converted to a vector. I believe once I get that sorted, it will all work.

Last edited on

kigar64551 (784)

I assume that the dll has no memory, so that each time it is called everything starts from scratch?

Depends on how the DLL is used! 😏

In any case, "local" (stack) variables will be new/separate every time that you call a function. They have "no memory".

Conversely, any "global" variables (as well as "static" variables declared inside a function) will keep their values across multiple function invocations – provided that the DLL containing those variables is not unloaded and re-loaded in between the function calls, of course!

If the application unloads the DLL after the first call and re-loads it before the second call, then even "global" variables may be reset 😮

Normally, you wouldn't do this. But who knows?

You can track when the DLL gets loaded and unloaded via your DllMain function:
https://learn.microsoft.com/de-de/windows/win32/dlls/dllmain

DLL_PROCESS_ATTACH means that the DLL was just loaded, DLL_PROCESS_DETACH means the DLL is about to be unloaded.

Last edited on

TheIdeasMan (6782)

lastchance wrote:
BTW, the original code was mine and you could simply link the original:

I am sorry for the possible negative implications of my comment.

newbieg (764)

Have you tried replacing all instances of "double" with "long double" yet?

Atom1 (52)

I have succeeded in getting the DLL to return results that agree somewhat with Excel and my existing method which agree exactly. I only did the Excel calculation on the first 10 cells. You can compare the Excel result using Linest function with the other two methods in the table. Part of my rationale for doing this was to see if the DLL was faster, and it is, but only by about 10%, so not really worth too much trouble. Part of this was just me wanting to know how to make the DLL. I will clean up the DLL code and post it later. Thanks to all for the help and suggestions.

				
	0.002667042	-0.052815857	0.474366927	4515.221381
	0.001431547	0.019633603	0.073505441	0.072211293
	0.989189097	0.079561048	#N/A	#N/A
	182.9984301	6	#N/A	#N/A
	3.475118449	0.037979762	#N/A	#N/A
				
				
				
Array Values			Old Method	DLL
833	0	4515.25		
836	1	4515.65		
839	2	4515.8925		
842	3	4516.22287		
845	4	4516.47423		
848	5	4516.72552	0.9981800000	0.9973580100
851	6	4516.75174	0.9962600000	0.9981824200
854	7	4516.78916	0.9967200000	0.9962635000
857	8	4516.9372	0.9932600000	0.9967223000
900	9	4517.21534	0.9891900000	0.9932622500

Last edited on

seeplus (6457)

Have you tried replacing all instances of "double" with "long double" yet

Note that with VS, double and long double both use 8 bytes for the type.
https://learn.microsoft.com/en-us/cpp/c-language/storage-of-basic-types?view=msvc-170

Atom1 (52)

Here is the code.

#include <iostream>
#include <iomanip>
#include <algorithm>
#include <cassert>
#include <cmath>
#include <vector>
#include <numeric>
#import "C:\Program Files (x86)\TradeStation 10.0\Program\tskit.dll" no_namespace
#include <atlbase.h>
using namespace std;

const double SMALL = 1.0E-30;          // used to stop divide-by-zero

using vec = vector<double>;         // vector
using matrix = vector<vec>;            // matrix

// Function prototypes
double poly(const vec& C, double x);
matrix transpose(const matrix& A);
matrix matmul(const matrix& A, const matrix& B);
vec    matmul(const matrix& A, const vec& V);
bool solve(const matrix& A, const vec& B, vec& X);
bool polynomialRegression(const vec& X, const vec& Y, int degree, vec& C, double& Rsquared);

//====================================================================== 
double __stdcall Poly3Fit(IEasyLanguageObject* elObj, char* arrayName)
{
 
    IEasyLanguageVariable* arr = elObj->Variables[arrayName];
    int i = arr->DimensionSize[0];
    
    double y;
    int Len;
    double MA;
    //THIS IS JUST TO GET THE LENGTH OF THE INPUT DATA
    for (int n = 0; n < i; n++)
    {
        arr->SelectedIndex[0] = n;
        y = (arr->Value[0]);
        if (y != -1)
        {
            Len = n;
            MA = y;
        }
    }
    i = Len;
    vec X(i);
    vec Y(i);
    for (int n = 0; n < Len; n++)
    {
        arr->SelectedIndex[0] = n;
        Y[n] = (arr->Value[0]);
        X[n] = n;
    }
 
    double z;
    z = Y[1];//CHECK INPUT
    vec C;
    int degree = 3;
    double Rsquared;
    if (polynomialRegression(X, Y, degree, C, Rsquared))
    {
        return Rsquared;
        //return z;
    }
    else
        return 0;
}

//======================================================================

double poly(const vec& C, double x)
{
    double result = 0.0;
    for (int i = C.size() - 1; i >= 0; i--) result = C[i] + result * x;
    return result;
}

//====================================================================== 

matrix transpose(const matrix& A)                        // Transpose a matrix
{
    int m = A.size(), n = A[0].size();
    matrix AT(n, vec(m));
    for (int i = 0; i < n; i++)
    {
        for (int j = 0; j < m; j++) AT[i][j] = A[j][i];
    }
    return AT;
}

//======================================================================

matrix matmul(const matrix& A, const matrix& B)          // Matrix times matrix
{
    int rowsA = A.size(), colsA = A[0].size();
    int rowsB = B.size(), colsB = B[0].size();
    assert(colsA == rowsB);

    matrix C(rowsA, vec(colsB, 0.0));
    for (int i = 0; i < rowsA; i++)
    {
        for (int j = 0; j < colsB; j++)
        {
            for (int k = 0; k < colsA; k++) C[i][j] += A[i][k] * B[k][j];
        }
    }
    return C;
}

//======================================================================

vec matmul(const matrix& A, const vec& V)                // Matrix times vector
{
    int rowsA = A.size(), colsA = A[0].size();
    int rowsV = V.size();
    assert(colsA == rowsV);

    vec C(rowsA, 0.0);
    for (int i = 0; i < rowsA; i++)
    {
        for (int k = 0; k < colsA; k++) C[i] += A[i][k] * V[k];
    }
    return C;
}

//======================================================================

bool solve(const matrix& A, const vec& B, vec& X)
//--------------------------------------
// Solve AX = B by Cholesky factorisation of A (i.e. A = L.LT)
// Requires A to be SYMMETRIC
//--------------------------------------
{
    int n = A.size();

    // Cholesky-factorise A
    matrix L(n, vec(n, 0));
    for (int i = 0; i < n; i++)
    {
        // Diagonal value
        L[i][i] = A[i][i];
        for (int j = 0; j < i; j++) L[i][i] -= L[i][j] * L[i][j];
        L[i][i] = sqrt(L[i][i]);
        if (abs(L[i][i]) < SMALL) return false;

        // Rest of the ith column of L
        for (int k = i + 1; k < n; k++)
        {
            L[k][i] = A[k][i];
            for (int j = 0; j < i; j++) L[k][i] -= L[i][j] * L[k][j];
            L[k][i] /= L[i][i];
        }
    }

    // Solve LY = B, where L is lower-triangular and Y = LT.X
    vec Y = B;
    for (int i = 0; i < n; i++)
    {
        for (int j = 0; j < i; j++) Y[i] -= L[i][j] * Y[j];
        Y[i] /= L[i][i];
    }

    // Solve UX = Y, where U = LT is upper-triangular
    X = Y;
    for (int i = n - 1; i >= 0; i--)
    {
        for (int j = i + 1; j < n; j++) X[i] -= L[j][i] * X[j];
        X[i] /= L[i][i];
    }

    return true;
}

//====================================================================== 

bool polynomialRegression(const vec& X, const vec& Y, int degree, vec& C, double& Rsquared)
{
    int N = X.size();   assert(Y.size() == N);
    matrix A(N, vec(1 + degree));
    for (int i = 0; i < N; i++)
    {
        double xp = 1;
        for (int j = 0; j <= degree; j++)
        {
            A[i][j] = xp;
            xp *= X[i];
        }
    }

    // Solve the least-squares problem for the polynomial coefficients C
    matrix AT = transpose(A);
    if (!solve(matmul(AT, A), matmul(AT, Y), C)) return false;

    // Calculate R^2
    vec AC = matmul(A, C);
    double sumYsq = 0, sumACsq = 0, sumY = 0.0;
    for (int i = 0; i < N; i++)
    {
        sumY += Y[i];
        sumYsq += Y[i] * Y[i];
        sumACsq += AC[i] * AC[i];
    }
    Rsquared = 1.0 - (sumYsq - sumACsq) / (sumYsq - sumY * sumY / N + SMALL);
    return true;
}

//====================================================================== 

// Interface routine that takes a (pointer to) an array Y, of size N,
// and returns the R^2 parameter for a cubic polynomial fit to the points
// (0,Y0), (1,y1), ..., (N-1,yN-1)

double PolyFitFunction(double* Y, int N)
{
    vec X(N);   for (int i = 0; i < N; i++) X[i] = i;
    vec F(Y, Y + N);
    vec C;
    double Rsquared;
    if (polynomialRegression(X, F, 3, C, Rsquared)) return Rsquared;
    else return 0.0;
}

//======================================================================

jonnin (11333)

when debugging this kind of problem, intermediate results are your friend.
Split the problem in halfish, and look at the result(s) you have at that point. Do they match on both platforms? If they match, split the next part in half and repeat. If not, split the first half and repeat. Its a little tedious but within 5 or so splits you should find something that didnt match and trace that back to where it is not initialized correctly or the math is bad or something.

you should also validate the 'correct' answer somehow (prove that it is actually correct or approximately correct within some significant digits). Nothing like debugging code that gives the right answer to discover the original was bugged up.

Last edited on

Atom1 (52)

Debugging was challenging because the only way to see what was going on was to send something back to the calling app. A more experienced person would be able to print from the DLL (if that is possible). I know the procedure gives the correct answer because it agrees with two other methods of calculating a 3rd order r squared. This is provided that each is operating on the same inputs. I believe my problem in the end was that I was not correctly defining the inputs to the DLL from my app. Not sure why, but since I did not see a big speed improvement there didn't seem much point in pursuing it.

kigar64551 (784)

A more experienced person would be able to print from the DLL (if that is possible)

You can use OutputDebugStringA() on Windows, or syslog() on Linux:
https://learn.microsoft.com/en-us/windows/win32/api/debugapi/nf-debugapi-outputdebugstringa
https://man7.org/linux/man-pages/man3/syslog.3.html

Use a tool like DebugView to show the outputs sent by OutputDebugStringA() function:
https://learn.microsoft.com/de-de/sysinternals/downloads/debugview

Last edited on

Pages: 12