I have a project that requires us to perform matrix multiplication via multi-threading, using the Windows API.
I've got the basic code running and everything is well under sane/ideal conditions. But if my input matrices go beyond 10x10 the program ends up failing on WaitOnMultipleObjects(...) and runs the main thread through without waiting for the rest to complete computation.
My solution computes for each element of the resultant matrix. Below is the code:
Attached are the rest of my source files in case you want to compile it.
#include "stdafx.h"
#include <cstdlib>
#include <iostream>
#include <Windows.h>
#include <process.h>
#include <strsafe.h>
#include "random.h"
using namespace std;
class Matrix {
private:
double **_matrixValues;
unsigned int _rowSize, _columnSize;
public:
Matrix() {
_rowSize = 0;
_columnSize = 0;
_matrixValues = 0;
}
Matrix(const unsigned int row_size, const unsigned int column_size) {
_rowSize = row_size;
_columnSize = column_size;
_matrixValues = (double**)malloc(sizeof(double*)*_rowSize);
for(unsigned int i =0 ; i <_rowSize;i++){
*(_matrixValues+i) = (double*)malloc(sizeof(double)*_columnSize);
for(unsigned int j = 0; j < _columnSize; ++j) {
_matrixValues[i][j] = 0;
}
}
}
/*~Matrix() {
for(int i = 0; i < _rowSize; i++)
free(_matrixValues[i]);
free(_matrixValues);
_rowSize = 0;
_columnSize = 0;
}*/
double getValue_at (const unsigned int row_index,
const unsigned int column_index){
if(row_index >= _rowSize || column_index >= _columnSize)
cerr << "[GETTER]Access Violation: Out of Bounds";
else
return _matrixValues[row_index][column_index];
}
void setValue_at (double value, const unsigned int row_index,
const unsigned int column_index) {
if((row_index >= _rowSize) || (column_index >= _columnSize))
cerr << "[SETTER]Access Violation: Out of Bounds";
else
_matrixValues[row_index][column_index] = value;
}
const unsigned int ColumnSize() { return _columnSize; }
const unsigned int RowSize() { return _rowSize; }
};
typedef struct ThreadedMatrixData {
Matrix *A, *B, *resultantMatrix;
unsigned int row_position, column_position;
DWORD theadId;
} ThreadedMatrixComputationData, *ThreadedMatrixComputationDataPtr;
#define MAX_THREADS 3
#define BUF_SIZE 1024
DWORD WINAPI ThreadedMultiplier( LPVOID lpParam );
void ErrorHandler(LPTSTR lpszFunction);
void print_usage();
void randomMatrixFill(Matrix &matrix);
Matrix ThreadedMatrixMultiplier(Matrix &leftHandMatrix, Matrix &rightHandMatrix);
int main(int argc, char* argv[])
{
const int a_row = 10, a_column = 4;
const int b_row = 4, b_column = 10;
Matrix A, B;
A = Matrix(a_row,a_column);
B = Matrix(b_row,b_column);
printf("Generating randomized matrices:\n");
randomMatrixFill(A);
randomMatrixFill(B);
printf("\tA:\n");
for(int i = 0; i < a_row; ++i) {
printf("\t");
for(int j = 0; j < a_column; ++j) {
printf("%f ", A.getValue_at(i,j));
}
printf("\n");
}
printf("\tB:\n");
for(int i = 0; i < b_row; ++i) {
printf("\t");
for(int j = 0; j < b_column; ++j) {
printf("%f ", B.getValue_at(i,j));
}
printf("\n");
}
printf("Performing threded Matrix Multiplication...\n");
Matrix C = ThreadedMatrixMultiplier(A,B);
printf("\nResultant Matrix C:\n");
for(unsigned int i = 0; i < C.RowSize(); ++i) {
printf("\t");
for(unsigned int j = 0; j < C.ColumnSize(); ++j) {
printf("%f ", C.getValue_at(i,j));
}
printf("\n");
}
system("PAUSE");
return 0;
}
void randomMatrixFill(Matrix &matrix){
RandNumGen X (-10.0, 10.0); // random coefs in range (-10,10)
for (unsigned int i=0; i < matrix.RowSize(); i++) {
for (unsigned int j=0; j < matrix.ColumnSize(); j++) {
matrix.setValue_at(X.generate(),i,j);
}
}
}
Matrix ThreadedMatrixMultiplier(Matrix &leftHandMatrix, Matrix &rightHandMatrix) {
//prepare new resultant matrix parameters
unsigned int returnValue_rowSize, returnValue_columnSize;
if(leftHandMatrix.ColumnSize() > rightHandMatrix.ColumnSize())
returnValue_columnSize = leftHandMatrix.ColumnSize();
else
returnValue_columnSize = rightHandMatrix.ColumnSize();
if(leftHandMatrix.RowSize() > rightHandMatrix.RowSize())
returnValue_rowSize = leftHandMatrix.RowSize();
else
returnValue_rowSize = rightHandMatrix.RowSize();
Matrix returnValue = Matrix(returnValue_rowSize, returnValue_columnSize);
const unsigned int totalThreads = returnValue_rowSize*returnValue_columnSize;
//prepare data and handlers for computation
ThreadedMatrixComputationDataPtr *threadedMatrixComputationData;
threadedMatrixComputationData = new ThreadedMatrixComputationDataPtr[totalThreads];
DWORD *dwThreadIdArray;
dwThreadIdArray = new DWORD[totalThreads];
HANDLE *hThreadArray;
hThreadArray = new HANDLE[totalThreads];
HANDLE *hThreadHandleAssertie;
hThreadHandleAssertie = new HANDLE[totalThreads];
printf("Creating %d threads to handle a %dx%d Matrix",
totalThreads, returnValue_rowSize,returnValue_columnSize);
// Allocate memory for thread data.
for(unsigned int i = 0; i < totalThreads; i++) {
threadedMatrixComputationData[i] = (ThreadedMatrixComputationDataPtr) HeapAlloc(
GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(ThreadedMatrixComputationData) );
if( threadedMatrixComputationData[i] == NULL ) {
// If the array allocation fails, the system is out of memory
// so there is no point in trying to print an error message.
// Just terminate execution.
ExitProcess(2);
}
// Provide references to data for each thread to work with.
threadedMatrixComputationData[i]->A = &leftHandMatrix;
threadedMatrixComputationData[i]->B = &rightHandMatrix;
threadedMatrixComputationData[i]->resultantMatrix = &returnValue;
threadedMatrixComputationData[i]->column_position = i%returnValue_columnSize;
threadedMatrixComputationData[i]->row_position = i/returnValue_columnSize;
// Time to actually create the thread to begin execution on its own, bitch.
hThreadArray[i] = CreateThread(
NULL, // default security attributes
0, // use default stack size
ThreadedMultiplier, // thread function name
threadedMatrixComputationData[i], // argument to thread function
0, // use default creation flags | run immediately
&dwThreadIdArray[i]); // returns the thread identifier
threadedMatrixComputationData[i]->theadId = dwThreadIdArray[i];
// Check the return value for success.
// If CreateThread fails, terminate execution.
// This will automatically clean up threads and memory.
if (hThreadArray[i] == NULL) {
ErrorHandler(TEXT("CreateThread"));
ExitProcess(3);
} else {
printf("\t->Launched Thread# %d, ID:%d\n", i, dwThreadIdArray[i]);
//WaitForSingleObject(hThreadArray[i], INFINITE); //waiting for each thread would solve the problem, but defeat the purpose of using multiple threads. A simple, but obviously wrong work-around.
}
} // End of multiplier thread creation loop.
DWORD res = WaitForMultipleObjects(totalThreads, hThreadArray, TRUE, INFINITE);
// Close all thread handles and free memory allocations.
// House cleaning after the thread parties! FTW.
//for(unsigned int i=0; i<totalThreads; i++) {
// CloseHandle(hThreadArray[i]);
// if(threadedMatrixComputationData[i] != NULL) {
// HeapFree(GetProcessHeap(), 0, threadedMatrixComputationData[i]);
// threadedMatrixComputationData[i] = NULL; // Ensure address is not reused.
// }
//}
return returnValue;
}
DWORD WINAPI ThreadedMultiplier( LPVOID lpParam ) {
HANDLE hStdout;
ThreadedMatrixComputationDataPtr threadedMatrixComputationDataInstance;
TCHAR msgBuf[BUF_SIZE];
size_t cchStringSize;
DWORD dwChars;
// Make sure there is a console to receive output results.
hStdout = GetStdHandle(STD_OUTPUT_HANDLE);
if( hStdout == INVALID_HANDLE_VALUE )
return 1;
// Cast the parameter to the correct data type.
// The pointer is known to be valid because
// it was checked for NULL before the thread was created.
threadedMatrixComputationDataInstance = (ThreadedMatrixComputationDataPtr)lpParam;
unsigned int leftHandSideOperandBound = threadedMatrixComputationDataInstance->A->ColumnSize();
unsigned int rightHandSideOperandBound = threadedMatrixComputationDataInstance->B->RowSize();
double product = 0;
for (unsigned int i=0; i<leftHandSideOperandBound; i++) {
// leftHandSideMatrix only requires access to a single row span
for(unsigned int j=0; j<rightHandSideOperandBound; j++) {
// rightHandSideMatrix only requires access to a single column span
product +=
threadedMatrixComputationDataInstance
->A->getValue_at(threadedMatrixComputationDataInstance->row_position,i) *
threadedMatrixComputationDataInstance
->B->getValue_at(j,threadedMatrixComputationDataInstance->column_position);
}
}
threadedMatrixComputationDataInstance->resultantMatrix->setValue_at(product,
threadedMatrixComputationDataInstance->row_position,
threadedMatrixComputationDataInstance->column_position);
// Print the parameter values using thread-safe functions.
StringCchPrintf(msgBuf, BUF_SIZE, TEXT("[%d]Finished computation at new matrix position: A(%d,-)xB(-,%d) -> C(%d, %d)\n"),
threadedMatrixComputationDataInstance->theadId,
threadedMatrixComputationDataInstance->row_position,
threadedMatrixComputationDataInstance->column_position,
threadedMatrixComputationDataInstance->row_position,
threadedMatrixComputationDataInstance->column_position);
StringCchLength(msgBuf, BUF_SIZE, &cchStringSize);
WriteConsole(hStdout, msgBuf, (DWORD)cchStringSize, &dwChars, NULL);
return 0;
}
void ErrorHandler(LPTSTR lpszFunction)
{
// Retrieve the system error message for the last-error code.
LPVOID lpMsgBuf;
LPVOID lpDisplayBuf;
DWORD dw = GetLastError();
FormatMessage(
FORMAT_MESSAGE_ALLOCATE_BUFFER |
FORMAT_MESSAGE_FROM_SYSTEM |
FORMAT_MESSAGE_IGNORE_INSERTS,
NULL,
dw,
MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT),
(LPTSTR) &lpMsgBuf,
0, NULL );
// Display the error message.
lpDisplayBuf = (LPVOID)LocalAlloc(LMEM_ZEROINIT,
(lstrlen((LPCTSTR) lpMsgBuf) + lstrlen((LPCTSTR) lpszFunction) + 40) * sizeof(TCHAR));
StringCchPrintf((LPTSTR)lpDisplayBuf,
LocalSize(lpDisplayBuf) / sizeof(TCHAR),
TEXT("%s failed with error %d: %s"),
lpszFunction, dw, lpMsgBuf);
MessageBox(NULL, (LPCTSTR) lpDisplayBuf, TEXT("Error"), MB_OK);
// Free error-handling buffer allocations.
LocalFree(lpMsgBuf);
LocalFree(lpDisplayBuf);
}