/** * HPC Challenge Class 2 -- Global EP Stream Triad * * IBM Research * (C) Copyright IBM Corp. 2005, All Rights Reserved * */ #include #include #include #include #include #include #include #include typedef unsigned long long u64Int; /* The vector size. The benchmark will allocate 3 * N * sizeof(double) Set in the Makefile for different size machines. */ #ifndef N #define N 2000000 #endif #define NTIMES 10 shared double *a; shared double *b; shared double *c; shared int errorCode; #define Mmin( a_, b_ ) ( ( (a_) < (b_) ) ? (a_) : (b_) ) #define Mmax( a_, b_ ) ( ( (a_) > (b_) ) ? (a_) : (b_) ) double mysecond(); /* *********************************************************************** */ /* main program */ /* *********************************************************************** */ int main() { double alpha = 3.0; double times[NTIMES]; double minTime = FLT_MAX, maxTime = 0.0, avgTime = 0.0; double rateGBs = 0.0; const u64Int VectorSize = N; u64Int i; unsigned k; // --------------------------------------------------------------------- // allocate the arrays // --------------------------------------------------------------------- a = (shared double *) upc_all_alloc(VectorSize,sizeof(double)); b = (shared double *) upc_all_alloc(VectorSize,sizeof(double)); c = (shared double *) upc_all_alloc(VectorSize,sizeof(double)); errorCode = 0; upc_barrier; if(a == NULL || b == NULL || c == NULL) { if (c != NULL) upc_free(c); if (b != NULL) upc_free(b); if (a != NULL) upc_free(a); fprintf(stderr, "Failed to allocate memory (%d).\n", VectorSize ); return 1; } if (MYTHREAD==0) printf ("Memory in use: %lld MB per CPU, %lld MB total, %d threads\n", (VectorSize*24)/1024/1024/THREADS, (VectorSize*24)/1024/1024, THREADS); // --------------------------------------------------------------------- // initialize the b and c arrays with a verifiable sequence // --------------------------------------------------------------------- upc_forall(i = 0; i < VectorSize; i++; i) { b[i] = 1.5*i; c[i] = 2.5*i; } upc_barrier; // --------------------------------------------------------------------- // timing loop, we select the best of NTIMES, excluding the first // --------------------------------------------------------------------- for(k = 0; k < NTIMES; k++) { times[k] = mysecond(); upc_forall(i = 0; i < VectorSize; i++; i) a[i] = b[i] + alpha * c[i]; upc_barrier; times[k] = mysecond() - times[k]; } // --------------------------------------------------------------------- // verify // --------------------------------------------------------------------- if(checkTriad()) errorCode++; upc_barrier; if(MYTHREAD == 0) { if(errorCode != 0) printf("verification failed %d\n", errorCode); else printf("verification successful\n"); } upc_barrier; // --------------------------------------------------------------------- // select the best iteration and compute the rate // --------------------------------------------------------------------- for(k = 1; k < NTIMES; k++) { avgTime = avgTime + times[k]; minTime = Mmin(minTime, times[k]); maxTime = Mmax(maxTime, times[k]); } avgTime /= (double)(NTIMES - 1); /* note -- skip first iteration */ rateGBs = (minTime > 0.0 ? 1.0 / minTime : -1.0); rateGBs *= 1e-9 * 3 * sizeof(double) * N; if(MYTHREAD == 0) { printf("Rate (GB/s) Avg time Min time Max time\n"); printf("%11.4f %11.4f %11.4f %11.4f\n", rateGBs, avgTime, minTime, maxTime); } upc_free(a); upc_free(b); upc_free(c); return 0; } /* *********************************************************************** */ /* Verification function */ /* *********************************************************************** */ int checkTriad() { unsigned k; double val; u64Int index; const u64Int VectorSize = N; // --------------------------------------------------------------------- // each thread verifies THREAD elements randomly spread throughout the // machine // --------------------------------------------------------------------- srandom(MYTHREAD); // initialize the random generator for(k = 0; k < THREADS; k++) { index = random() % VectorSize; // compute the index val = (1.5 * index) + 3.0 * (2.5 * index); // compute the expected val if ( a[index] != val ) { printf("%d: verification failed a[%lld]=%11.4f, val=%11.4f\n", MYTHREAD, index, a[index], val); return 1; } } return 0; } /* *********************************************************************** */ /* Utility function to get the time */ /* *********************************************************************** */ double mysecond() { struct timeval tv; gettimeofday(&tv, 0); return tv.tv_sec + ((double) tv.tv_usec / 1000000); }