/*-----------------------------------------------------------------------*/ /* Program: Stream */ /* This program measures memory transfer rates in MB/s */ /* This is a heavely beefed up version of the original */ /* stream code by John D. McCalpin with some additions by Gunnar von Boehn */ /*-----------------------------------------------------------------------*/ # include # include # include # include # include /* #include #include static sigjmp_buf jmpbuf; static volatile sig_atomic_t canjump = 0; static void sigill_handler (int sig) { if (!canjump) { signal (sig, SIG_DFL); raise (sig); } canjump = 0; siglongjmp (jmpbuf, 1); } int hasaltivec=0; */ # define N 250000 # define LOOPS 40 # define NTIMES 3 # define OFFSET 0 void * memcpy_asmFC64(void *dst, const void *src, size_t len); void * moto_memcpy(void *dst, const void *src, size_t len); void * moto_memcmp(const void *dst, const void *src, size_t len); void * moto_memset(void *dst, const int c, size_t len); # define HLINE "----------------------------------------------------------------\n" # ifndef MIN # define MIN(x,y) ((x)<(y)?(x):(y)) # endif # ifndef MAX # define MAX(x,y) ((x)>(y)?(x):(y)) # endif static double a[N+OFFSET], b[N+OFFSET]; static double avgtime[40] = {0}, maxtime[40] = {0}, mintime[40] = {0}; static char *label_read[20] = { "read 8 ", "read 32 ", "read 64 ", "read 32x2 ", "read 32x4 ", "read 32 CP3 ", "read 32 CP4 ", "read 32 CP5 *", "read 32 CP6 ", "read 32x4 CP3 ", "read 32x4 CP4 ", "read 32x4 CP5 ", "read 32x4 CP6 ", " ", " ", " " }; static char *label_write[10] = { "write 8 ", "write 32 ", "write 64 ", "write 32x2 ", "write 32x4 ", "memset 750 *", "memset 750 0 ", "libmoto memset", "glibc memset ", "glibc memset0 ", }; static char *label_cmp[20] = { "cmp 8 ", "cmp 32 ", "cmp 64 ", "cmp 32x2 ", "cmp 32x4 ", "cmp 32 CP2 ", "cmp 32 CP3 *", "cmp 32 CP4 ", "cmp 32 CP5 ", "cmp 32 CP6 ", "cmp 32x4 CP2 ", "cmp 32x4 CP3 ", "cmp 32x4 CP4 ", "cmp 32x4 CP5 ", "cmp 32x4 CP6 ", "libmoto memcmp", "glibc memcmp " }; static char *label_copy[20] = { "copy 8 ", "copy 32 ", "copy 64 ", "copy 32x2 ", "copy 32x4 ", "copy 32 CP2 ", "copy 32 CP3 ", "copy 32 CP4 *", "copy 32 CP5 ", "copy 32x4 CP2 ", "copy 32x4 CP3 ", "copy 32x4 CP4 ", "copy 32x4 CP5 ", "copy 64x4 CP4 ", "copy 64x4 CP4C", "glibcb memcpy ", "bmove512 ", "FC64 ", "libmoto memcpy", "memcpy 750 ", }; static double bytes_read[20] = { 1 * sizeof(double) * N *LOOPS, 1 * sizeof(double) * N *LOOPS, 1 * sizeof(double) * N *LOOPS, 1 * sizeof(double) * N *LOOPS, 1 * sizeof(double) * N *LOOPS, 1 * sizeof(double) * N *LOOPS, 1 * sizeof(double) * N *LOOPS, 1 * sizeof(double) * N *LOOPS, 1 * sizeof(double) * N *LOOPS, 1 * sizeof(double) * N *LOOPS, 1 * sizeof(double) * N *LOOPS, 1 * sizeof(double) * N *LOOPS, 1 * sizeof(double) * N *LOOPS, 1 * sizeof(double) * N *LOOPS, 1 * sizeof(double) * N *LOOPS, 1 * sizeof(double) * N *LOOPS, 1 * sizeof(double) * N *LOOPS, 1 * sizeof(double) * N *LOOPS, 1 * sizeof(double) * N *LOOPS, 1 * sizeof(double) * N *LOOPS }; static double bytes_write[10] = { 1 * sizeof(double) * N *LOOPS, 1 * sizeof(double) * N *LOOPS, 1 * sizeof(double) * N *LOOPS, 1 * sizeof(double) * N *LOOPS, 1 * sizeof(double) * N *LOOPS, 1 * sizeof(double) * N *LOOPS, 1 * sizeof(double) * N *LOOPS, 1 * sizeof(double) * N *LOOPS, 1 * sizeof(double) * N *LOOPS, 1 * sizeof(double) * N *LOOPS }; static double bytes_cmp[20] = { 2 * sizeof(double) * N *LOOPS, 2 * sizeof(double) * N *LOOPS, 2 * sizeof(double) * N *LOOPS, 2 * sizeof(double) * N *LOOPS, 2 * sizeof(double) * N *LOOPS, 2 * sizeof(double) * N *LOOPS, 2 * sizeof(double) * N *LOOPS, 2 * sizeof(double) * N *LOOPS, 2 * sizeof(double) * N *LOOPS, 2 * sizeof(double) * N *LOOPS, 2 * sizeof(double) * N *LOOPS, 2 * sizeof(double) * N *LOOPS, 2 * sizeof(double) * N *LOOPS, 2 * sizeof(double) * N *LOOPS, 2 * sizeof(double) * N *LOOPS, 2 * sizeof(double) * N *LOOPS, 2 * sizeof(double) * N *LOOPS, 2 * sizeof(double) * N *LOOPS, 2 * sizeof(double) * N *LOOPS, 2 * sizeof(double) * N *LOOPS }; static double bytes_copy[20] = { 2 * sizeof(double) * N *LOOPS, 2 * sizeof(double) * N *LOOPS, 2 * sizeof(double) * N *LOOPS, 2 * sizeof(double) * N *LOOPS, 2 * sizeof(double) * N *LOOPS, 2 * sizeof(double) * N *LOOPS, 2 * sizeof(double) * N *LOOPS, 2 * sizeof(double) * N *LOOPS, 2 * sizeof(double) * N *LOOPS, 2 * sizeof(double) * N *LOOPS, 2 * sizeof(double) * N *LOOPS, 2 * sizeof(double) * N *LOOPS, 2 * sizeof(double) * N *LOOPS, 2 * sizeof(double) * N *LOOPS, 2 * sizeof(double) * N *LOOPS, 2 * sizeof(double) * N *LOOPS, 2 * sizeof(double) * N *LOOPS, 2 * sizeof(double) * N *LOOPS, 2 * sizeof(double) * N *LOOPS, 2 * sizeof(double) * N *LOOPS, }; extern double mysecond(); int read_64(double *source,double *destination, int size) { int j; double z; size=size/8; for (j=0; j>5; for (i=0; i < size ; ++i) { a=*sp++; b=*sp++; c=*sp++; d=*sp++; a2=*sp++; b2=*sp++; c2=*sp++; d2=*sp++; *tp++=a; *tp++=b; *tp++=c; *tp++=d; *tp++=a2; *tp++=b2; *tp++=c2; *tp++=d2; } } void bmove512(int* to,int* from, unsigned int length) { register unsigned long *f,*t,*end; length=(length >> 9) <<9; end = (long*) ((char*) from+length); f= (unsigned long*) from; t= (unsigned long*) to; #if defined(m88k) || defined(sparc) || defined(HAVE_LONG_LONG) do { t[0]=f[0]; t[1]=f[1]; t[2]=f[2]; t[3]=f[3]; t[4]=f[4]; t[5]=f[5]; t[6]=f[6]; t[7]=f[7]; t[8]=f[8]; t[9]=f[9]; t[10]=f[10]; t[11]=f[11]; t[12]=f[12]; t[13]=f[13]; t[14]=f[14]; t[15]=f[15]; t[16]=f[16]; t[17]=f[17]; t[18]=f[18]; t[19]=f[19]; t[20]=f[20]; t[21]=f[21]; t[22]=f[22]; t[23]=f[23]; t[24]=f[24]; t[25]=f[25]; t[26]=f[26]; t[27]=f[27]; t[28]=f[28]; t[29]=f[29]; t[30]=f[30]; t[31]=f[31]; t[32]=f[32]; t[33]=f[33]; t[34]=f[34]; t[35]=f[35]; t[36]=f[36]; t[37]=f[37]; t[38]=f[38]; t[39]=f[39]; t[40]=f[40]; t[41]=f[41]; t[42]=f[42]; t[43]=f[43]; t[44]=f[44]; t[45]=f[45]; t[46]=f[46]; t[47]=f[47]; t[48]=f[48]; t[49]=f[49]; t[50]=f[50]; t[51]=f[51]; t[52]=f[52]; t[53]=f[53]; t[54]=f[54]; t[55]=f[55]; t[56]=f[56]; t[57]=f[57]; t[58]=f[58]; t[59]=f[59]; t[60]=f[60]; t[61]=f[61]; t[62]=f[62]; t[63]=f[63]; #ifdef HAVE_LONG_LONG t+=64; f+=64; #else t[64]=f[64]; t[65]=f[65]; t[66]=f[66]; t[67]=f[67]; t[68]=f[68]; t[69]=f[69]; t[70]=f[70]; t[71]=f[71]; t[72]=f[72]; t[73]=f[73]; t[74]=f[74]; t[75]=f[75]; t[76]=f[76]; t[77]=f[77]; t[78]=f[78]; t[79]=f[79]; t[80]=f[80]; t[81]=f[81]; t[82]=f[82]; t[83]=f[83]; t[84]=f[84]; t[85]=f[85]; t[86]=f[86]; t[87]=f[87]; t[88]=f[88]; t[89]=f[89]; t[90]=f[90]; t[91]=f[91]; t[92]=f[92]; t[93]=f[93]; t[94]=f[94]; t[95]=f[95]; t[96]=f[96]; t[97]=f[97]; t[98]=f[98]; t[99]=f[99]; t[100]=f[100]; t[101]=f[101]; t[102]=f[102]; t[103]=f[103]; t[104]=f[104]; t[105]=f[105]; t[106]=f[106]; t[107]=f[107]; t[108]=f[108]; t[109]=f[109]; t[110]=f[110]; t[111]=f[111]; t[112]=f[112]; t[113]=f[113]; t[114]=f[114]; t[115]=f[115]; t[116]=f[116]; t[117]=f[117]; t[118]=f[118]; t[119]=f[119]; t[120]=f[120]; t[121]=f[121]; t[122]=f[122]; t[123]=f[123]; t[124]=f[124]; t[125]=f[125]; t[126]=f[126]; t[127]=f[127]; t+=128; f+=128; #endif } while (f < end); #else do { *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; *t++ = *f++; } while (f < end); #endif return; } /* bmove512 */ void messen_read(int blocksize, int loops){ register int j, k,l; double scalar, t, times[20][NTIMES]; int z; for (j=0; j<20; j++){ for (k=0 ; k < NTIMES ; k++){ times[j][k]=0; } avgtime[j] = 0, maxtime[j] = 0; mintime[j] = FLT_MAX; } for (k=0; k 0.001) printf("%s%11.4f %11.4f %11.4f %11.4f\n", label_cmp[j], 1.0E-06 * bytes_cmp[j]/mintime[j], avgtime[j], mintime[j], maxtime[j]); } printf(HLINE); if(z==0) printf(HLINE); // use z to prevent agressive optimizing the tests away } void messen_copy(int blocksize, int loops){ register int j, k,l; double scalar, t, times[20][NTIMES]; int z; for (j=0; j<20; j++){ for (k=0 ; k < NTIMES ; k++){ times[j][k]=0; } avgtime[j] = 0, maxtime[j] = 0; mintime[j] = FLT_MAX; } for (k=0; k B).\n"); printf(HLINE); printf("Function Rate (MB/s) Avg time Min time Max time\n"); for (k=1; k b.\n"); // printf(HLINE); // messen(100,1000000); return 0; } # define M 20 int checktick() { int i, minDelta, Delta; double t1, t2, timesfound[M]; /* Collect a sequence of M unique time values from the system. */ for (i = 0; i < M; i++) { t1 = mysecond(); while( ((t2=mysecond()) - t1) < 1.0E-6 ) ; timesfound[i] = t1 = t2; } /* * Determine the minimum difference between these M values. * This result will be our estimate (in microseconds) for the * clock granularity. */ minDelta = 1000000; for (i = 1; i < M; i++) { // Delta = (int) nearbyint( 1.0E6 * (timesfound[i]-timesfound[i-1])); minDelta = MIN(minDelta, MAX(Delta,0)); } return(minDelta); } /* A gettimeofday routine to give access to the wall clock timer on most UNIX-like systems. */ #include double mysecond() { struct timeval tp; struct timezone tzp; int i; i = gettimeofday(&tp,&tzp); return ( (double) tp.tv_sec + (double) tp.tv_usec * 1.e-6 ); }