/* This program is placed in the PUBLIC DOMAIN. It is provided AS IS with absolutely no warranty or support. Use at your own risk. If you use it to produce results for publication, it would be unethical not to provide citation to the original publication. Title: Scaling of Beowulf-class Distributed Systems Authors: John Salmon, Christopher Stein, Thomas Sterling In Proceedings: SC'98 Publisher: IEEE Computer Society and ACM SIGARCH Availability: electronic only, CD-ROM or WWW URL: http://www.supercomp.org/sc98/ And here's the url of my copy of the paper: http://www.cacr.caltech.edu/~johns/papers/sc98/ */ /* Author: John Salmon Center for Advanced Computing Research Caltech 158-79 Pasadena, CA 91125 johns@cacr.caltech.edu http://www.cacr.caltech.edu/~johns/ Date: 10 Aug 1998 */ /* mpiload.c: A program to examine the interplay between communication and computation performance and system loading in MPI. The basic idea is to have every process talk to every other process, punctuated by periods of high-intensity floating point activity. The communication and the fp activity are timed separately. Individual timings are reported on stdout in a form that should be easily parsed by any generic statistics/plotting program. The intent is to produce scatter-plots of e.g., time vs. message length, or time vs #flops, but there's probably enough information on stdout to do some fairly sophisticated statistics as well, e.g., figuring out if the outliers are related to some particular kind of activity. Missing features: - --help argument (you'll have to read the source to do_args below) - positive confirmation of all important settings on stdout */ /* Changelog: Aug 10, 1998 initial 'release' Sep 13, 1998 - MPI_BYTE, not MPI_INT in dump_events. - fix typos in comments. - defensively use calloc instead of malloc for events - defensively call error if thislen too large - don't try to initialize a loop-automatic 'int tag=i;' */ #include #include #include #include #include /* require getopt_long, e.g., GNU getopt */ /* ? swampi.h, mpi.h ? */ #if defined(SWAMPI) #include "swampi.h" #include "Msgs.h" /* If we've got swampi, we can use Msgs too. */ #else #include "mpi.h" #endif #include "hwclock.h" /* Default values, see do_args for how to set them from the command line */ int msgmin = 16; int msgmax = 100000; /* about 0.1sec at 10MBps */ int fpmin = 10; /* about 0.1musec at 100Mflop */ int fpmax = 10000000; /* about 0.1sec at 100Mflop */ int nmsg = 20; /* finish pretty quickly */ int cyclelen = 0; /* set to nmsg unless set explicitly */ int nonrandom = 0; /* disable randomization of comm pattern */ unsigned int seed = 1; /* rng seed */ char *buf; hwclock_t t0; int nproc, rank; int *recips, *ismaster; void banner(int argc, char **argv); void do_args(int argc, char **argv); void error(const char *fmt, ...); /* Perform a random permuation of the values in the array p[]. */ void new_perm(int nitem, int *p){ int i; i = nitem; while( i > 1 ){ int tmp; int swapwith; swapwith = (int) ((double)i*rand()/(RAND_MAX + 1.0)); --i; tmp = p[swapwith]; p[swapwith] = p[i]; p[i] = tmp; } } void create_recipients(unsigned int seed_, int ncyc, int recips[], int ismaster[]){ int i; int *perm; /* SEED MUST BE THE SAME ON ALL PROCESSORS! */ srand(seed_); rand(); rand(); perm = malloc( nproc * sizeof(*perm)); /* This little bit of wierdness makes it lots easier to set up a machinelist for a -nonrandom run. The top half of the perm are the even numberd ranks and the bottom half of the perm are the odd numberd ranks, so 0<->1, 2<->3, etc. */ for(i=0; itype = type; ev->t1 = t1 - t0; ev->t2 = t2 - t0; ev->length = length; ev->partner = partner; } void dump_my_events(int src){ int i; for(i=0; ipartner, ev->length, ev->t1*hwsec_per_clock()); if( ev->type == slave ){ /* *0.5 because the interval measured both 'ping' and 'pong' */ printf(" slave %g\n", 0.5*(ev->t2 - ev->t1)*hwsec_per_clock()); }else{ /* len is flops, report the time it took to complete them. */ printf(" master %g\n", (ev->t2 - ev->t1)*hwsec_per_clock()); } } } void dump_events(void){ if( rank == 0 ){ int i; printf("#myrank partnersrank lengt abstime s_or_m time\n"); dump_my_events(0); for(i=1; i3){ /* Implement xi = xi * b * (1.0 - xi) */ mx1 = 1.0 - x1; mx2 = 1.0 - x2; mx3 = 1.0 - x3; x1 *= b; x2 *= b; x3 *= b; x1 *= mx1; x2 *= mx2; x3 *= mx3; /* second iteration */ mx1 = 1.0 - x1; mx2 = 1.0 - x2; mx3 = 1.0 - x3; x1 *= b; x2 *= b; x3 *= b; x1 *= mx1; x2 *= mx2; x3 *= mx3; /* third iteration */ mx1 = 1.0 - x1; mx2 = 1.0 - x2; mx3 = 1.0 - x3; x1 *= b; x2 *= b; x3 *= b; x1 *= mx1; x2 *= mx2; x3 *= mx3; /* fourth iteration */ mx1 = 1.0 - x1; mx2 = 1.0 - x2; mx3 = 1.0 - x3; x1 *= b; x2 *= b; x3 *= b; x1 *= mx1; x2 *= mx2; x3 *= mx3; niter -= 4; } switch(niter){ case 3: mx1 = 1.0 - x1; mx2 = 1.0 - x2; mx3 = 1.0 - x3; x1 *= b; x2 *= b; x3 *= b; x1 *= mx1; x2 *= mx2; x3 *= mx3; /* no break! */ case 2: mx1 = 1.0 - x1; mx2 = 1.0 - x2; mx3 = 1.0 - x3; x1 *= b; x2 *= b; x3 *= b; x1 *= mx1; x2 *= mx2; x3 *= mx3; /* no break! */ case 1: mx1 = 1.0 - x1; mx2 = 1.0 - x2; mx3 = 1.0 - x3; x1 *= b; x2 *= b; x3 *= b; x1 *= mx1; x2 *= mx2; x3 *= mx3; } *x1p = x1; *x2p = x2; *x3p = x3; return ret; } static double xa=0.4, xb=0.5, xc=0.6; /* Execute the logistic function enough times to do at least the target number of times (on three sequences). Return the actual number of flops evaluated */ int doflops(int ntarget, hwclock_t *t1p, hwclock_t *t2p){ int ret; int nlogistic = (ntarget-1)/9; hwclock(t1p); ret = logistic(nlogistic, &xa, &xb, &xc, 3.9); hwclock(t2p); return ret; } int main(int argc, char **argv){ double logmsgfactor, logmsgmin; double logfpfactor, logfpmin; int i; MPI_Init(&argc, &argv); MPI_Comm_size(MPI_COMM_WORLD, &nproc); MPI_Comm_rank(MPI_COMM_WORLD, &rank); if( nproc%2 ){ error("Must have an even number of processors\n"); } do_args(argc, argv); create_recipients(seed, cyclelen, recips, ismaster); banner(argc, argv); /* From now on we want everybody to be producing their own 'private' stream of random numbers */ srand( (rank+1) * rand() ); /* Pre-compute a couple of constants that help us generate nice plots on log axes */ if( msgmax > 0 ){ logmsgmin = log((double)msgmin); logmsgfactor = log((double)msgmax/(double)msgmin)/(RAND_MAX+1.0); } if( fpmax > 0 ){ logfpmin = log((double)fpmin); logfpfactor = log((double)fpmax/(double)fpmin)/(RAND_MAX+1.0); } /* The barrier is logically superfluous, but we'd like to get everything started more-or-less together. */ MPI_Barrier(MPI_COMM_WORLD); MPI_Barrier(MPI_COMM_WORLD); hwclock(&t0); #ifdef SWAMPI swampi_clear_stats(); #endif #if 0 for(i=0; i 0 ){ fptarget = rint( exp( logfpmin + (rand() * logfpfactor) ) ); nflops = doflops(fptarget, &t1, &t2); }else{ nflops = 0; hwclock(&t1); t2 = t1; } if( msgmax > 0 ){ thislen = rint( exp( logmsgmin + (rand() * logmsgfactor) )); if( thislen > msgmax ) error("thislen=%d, msgmax=%d!\n"); MPI_Send(&thislen, 1, MPI_INT, partner, tag+100, MPI_COMM_WORLD); MPI_Recv(buf, thislen, MPI_BYTE, partner, tag, MPI_COMM_WORLD, &stat); MPI_Send(buf, thislen, MPI_BYTE, partner, tag+200, MPI_COMM_WORLD); } record_event(master, t1, t2, partner, nflops); }else{ if( msgmax > 0 ){ MPI_Recv(&thislen, 1, MPI_INT, partner, tag+100, MPI_COMM_WORLD, &stat); hwclock(&t1); MPI_Send(buf, thislen, MPI_BYTE, partner, tag, MPI_COMM_WORLD); MPI_Recv(buf, thislen, MPI_BYTE, partner, tag+200, MPI_COMM_WORLD, &stat); hwclock(&t2); }else{ hwclock(&t1); t2 = t1; } record_event(slave, t1, t2, partner, thislen); } } #ifdef SWAMPI swampi_dump_stats(Msg_do); #endif dump_events(); MPI_Finalize(); free(buf); free(recips); free(ismaster); free(events); exit(0); } /* figure out exactly who we are and what we have been asked to do, and print it for posterity. */ void banner(int argc, char **argv){ char name[MPI_MAX_PROCESSOR_NAME]; int len; int i; MPI_Get_processor_name(name, &len); name[MPI_MAX_PROCESSOR_NAME-1] = '\0'; /* trust no one */ if( rank == 0 ){ MPI_Status stat; printf("#mpiload: communication pattern with %d msgs per processor\n", nmsg); printf("#"); for(i=0; i