/*
Matrix Multiply
From PVM: Parallel Virtual Machine
A Users' Guide and Tutorial for Networked Parallel Computing
Geist et al
Hauled over to mpi by t. bennet.
*/
/* defines and prototypes for the MPI library */
#include <mpi.h>
#include <stdio.h>
/* Maximum number of children this program will spawn */
#define MAXNTIDS 100
#define MAXROW 10
/* Message tags */
#define ATAG 2
#define BTAG 3
#define DIMTAG 5
int myid;
void
InitBlock(float *a, float *b, float *c, int blk, int row, int col)
{
int len, ind;
int i,j;
srand(myid);
len = blk*blk;
for (ind = 0; ind < len; ind++)
{ a[ind] = (float)(rand()%1000)/100.0; c[ind] = 0.0; }
for (i = 0; i < blk; i++) {
for (j = 0; j < blk; j++) {
if (row == col)
b[j*blk+i] = (i==j)? 1.0 : 0.0;
else
b[j*blk+i] = 0.0;
}
}
}
void
BlockMult(float* c, float* a, float* b, int blk)
{
int i,j,k;
for (i = 0; i < blk; i++)
for (j = 0; j < blk; j ++)
for (k = 0; k < blk; k++)
c[i*blk+j] += (a[i*blk+k] * b[k*blk+j]);
}
int
main(int argc, char* argv[])
{
/* Get set up. */
MPI_Init(&argc,&argv);
MPI_Comm_rank(MPI_COMM_WORLD,&myid);
/* number of tasks.*/
int ntask;
MPI_Comm_size(MPI_COMM_WORLD,&ntask);
/* Parameters. */
int i, m, blksize;
/* array of the tids in my row */
float *a, *b, *c, *atmp;
int row, col, up, down, left;
/* Get the array dimensions. */
if (argc == 3) {
m = atoi(argv[1]);
blksize = atoi(argv[2]);
}
if (argc < 3) {
fprintf(stderr, "usage: mmult dim\n");
MPI_Finalize();
exit(3);
}
if (ntask != m*m) {
fprintf(stderr,
"number of tasks must equal number of regions.");
MPI_Finalize();
exit(4);
}
/* find the tids in my row */
//for (i = 0; i < m; i++)
// myrow[i] = pvm_gettid("mmult", (myid/m)*m + i);
/* allocate the memory for the local blocks */
a = (float*)malloc(sizeof(float)*blksize*blksize);
b = (float*)malloc(sizeof(float)*blksize*blksize);
c = (float*)malloc(sizeof(float)*blksize*blksize);
atmp = (float*)malloc(sizeof(float)*blksize*blksize);
/* check for valid pointers */
if (!(a && b && c && atmp)) {
fprintf(stderr, "%s: out of memory!\n", argv[0]);
free(a); free(b); free(c); free(atmp);
MPI_Finalize();
exit(5);
}
/* find my block's row and column, and the left end of my row. */
row = myid/m;
col = myid % m;
left = row*m;
/* Make a communicator for our row. (This is gross. Surely there's
a reasonable way to do this.) */
MPI_Group all;
MPI_Comm_group(MPI_COMM_WORLD, &all);
int range[3] = { left, left + m - 1, 1 };
MPI_Group rowgrp;
MPI_Group_range_incl(all, 1, &range, &rowgrp);
MPI_Comm rowcom;
MPI_Comm_create(MPI_COMM_WORLD, rowgrp, &rowcom);
/* calculate the neighbors above and below. */
up = (row ? row-1 : m-1)*m+col;
down = row == m-1 ? col : (row+1)*m+col;
/* initialize the blocks */
InitBlock(a, b, c, blksize, row, col);
/* do the matrix multiply */
for (i = 0; i < m; i++) {
/* Mcast the block of matrix A. The communicator row
determines the set of receivers, which was set earlier
to be our row. */
if (col == (row + i)%m)
memcpy(atmp, a, sizeof(float)*blksize*blksize);
MPI_Bcast(atmp, blksize*blksize, MPI_FLOAT, (row + i)%m,
rowcom);
BlockMult(c,atmp,b,blksize);
/* rotate the columns of B */
MPI_Send(b, blksize*blksize, MPI_FLOAT, up, (i+1)*BTAG,
MPI_COMM_WORLD);
MPI_Status st;
MPI_Recv(b, blksize*blksize, MPI_FLOAT, down, (i+1)*BTAG,
MPI_COMM_WORLD, &st);
}
/* check it */
for (i = 0 ; i < blksize*blksize; i++)
if (a[i] != c[i])
printf("Error a[%d] (%g) != c[%d] (%g) \n", i, a[i],i,
c[i]);
if(myid == 0) printf("Done.\n");
free(a); free(b); free(c); free(atmp);
MPI_Finalize();
return 0;
}
Run this beast with something like:
|
mpiexec -hosts 2 bennet.mc.edu 4 sandbox.mc.edu 5 mmult 3 100 |
Here, I've made the number of tasks (4 + 5) agree with the number of
squares (3 × 3). The program will barf if they don't agree.
This is redundant, and I should fix it, but I hope no one is holding
their breath. (Or you could fix it.)