[LAD] vectorization

Jens M Andreasen jens.andreasen at comhem.se
Mon May 5 17:18:39 UTC 2008


Jussi!

Could you try this out with your proposed compiler options on your own
hardware?
Admittedly, the recycled PIII here is very unrepresentative, outdated
and old-skool (although it seems to shine when paired up with icc :)

--8<-----------------------------

// include everything just in case we need it ...

#include <unistd.h>
#include <stdio.h>
#include <sched.h>
#include <time.h>
#include <stdlib.h>

#define N 1024

#include <complex.h>


float // complex
   ffta[N][2]  __attribute__ ((aligned(16))), 
   fftb[N][2]  __attribute__ ((aligned(16))), 
   data[N][2]  __attribute__ ((aligned(16)));

_Complex float 
   cxA[N] __attribute__ ((aligned(16))), 
   cxB[N] __attribute__ ((aligned(16))),
   cxD[N] __attribute__ ((aligned(16))) ;

typedef struct
{
   float r[N] __attribute__ ((aligned(16))); 
   float i[N] __attribute__ ((aligned(16)));
} cvec_t;

cvec_t cA,cB,cD;

int main()
{
   int n = 1000000;
   int i,j;
   char* s;

   clock_t clk = clock();
   s = "(_Complex)";

   for (j = 0; j < n; ++j)
      for (i = 0;i < N; ++i)
	 cxD[i]+= cxA[i]*cxB[i];
   
   fprintf (stderr,"> clock: %d ms %s\n",(clock()-clk)/1000,s);

   s = "(cvec_t)";
   clk = clock(); 

   for (j = 0; j < n; ++j)
      for (i = 0;i < N; ++i)
      {
	 cD.r[i] += cA.r[i] * cB.r[i] - cA.i[i] * cB.i[i];
	 cD.i[i] += cA.r[i] * cB.i[i] + cA.i[i] * cB.r[i];
      }

   fprintf (stderr,"> clock: %d ms %s\n",(clock()-clk)/1000,s);

   s = "(original float array[N][2])";
   clk = clock(); 
   for (j = 0; j < n ; ++j)
      for (i = 0; i <N; ++i)
      {
	 data [i][0] += ffta [i][0] * fftb [i][0] - ffta [i][1] * fftb [i][1];
	 data [i][1] += ffta [i][0] * fftb [i][1] + ffta [i][1] * fftb [i][0];
      }
   fprintf (stderr,"> clock: %d ms %s\n",(clock()-clk)/1000,s);

   return 0;
}

On Mon, 2008-05-05 at 19:15 +0300, Jussi Laako wrote:
> Jussi Laako wrote:
> > I would propose something like "-march=prescott -O3 -ftree-vectorize" or 
> > "-O3 -sse3 -ftree-vectorize".
> 
> Sorry, typo, "-O3 -msse3 -ftree-vectorize" of course...
> 
> 
> 	- Jussi
-- 




More information about the Linux-audio-dev mailing list