Hi, group,
I am using the following little program to check BB memory bandwidth. The
number I got is about 31MB/S for C version, and 83MB/S for simd version.
This seems be too slow. I had expected some number like 10x faster. Any
suggestion on where to look? x-loader, u-boot, kernel or just my compile
flags?
Thanks,
Guo
/*
In omap host environment, compile the code as
arm-none-linux-gnueabi-gcc -O2 -o membench membench.c
*/
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
int main(int argc, char** argv)
{
int* pbuf1, *pbuf2;
const int bufSize = 8*1024*1024;
int i,j;
clock_t t1, t2;
double tdiff;
const int ITER =100;
double rate;
typedef int v4si __attribute__ ((vector_size(16)));
v4si *p1, *p2;
pbuf1 = (int*)memalign(16, bufSize*sizeof(int));
pbuf2 = (int*)memalign(16, bufSize*sizeof(int));
for (i=0; i<bufSize; i++)
{
pbuf2[i] = i;
}
t1 = clock();
for(j=0; j<ITER; j++)
{
for (i=0; i<bufSize; i++)
{
pbuf1[i] = pbuf2[i];
}
}
t2 = clock();
tdiff = (double)(t2) - (double)t1;
rate = ITER*bufSize*sizeof(int)/(tdiff/CLOCKS_PER_SEC);
rate /= (1024.0*1024.0);
printf("rate(MB/S) = %.3f, clocks_per_sec %d\n", rate, CLOCKS_PER_SEC);
/*
for(i=900; i<910; i++)
{
printf("%d\n", pbuf1[i]);
}
*/
//SIMD version of the memory benchmark
t1 = clock();
for(j=0; j<ITER; j++)
{
p1 = (v4si*)(pbuf1);
p2 = (v4si*)(pbuf2);
for(i=0; i<bufSize/4; i++)
{
*p1 = *p2;
p1++;
p2++;
}
}
t2 = clock();
tdiff = (double)(t2) - (double)t1;
rate = ITER*bufSize*sizeof(int)/(tdiff/CLOCKS_PER_SEC);
rate /= (1024.0*1024.0);
printf("SIMD rate(MB/S) = %.3f, clocks_per_sec %d\n", rate, CLOCKS_PER_SEC);
free(pbuf1);
free(pbuf2);
return 0;
}