/*
 * The code below performs 1D and 2D FHT (Fast Hartley Transform)
 * The 1D hartley transform code is based on an original piece of code by Ron Mayer
 * The 2D Hartley tranform code, as well as the slow hartley transforms is mine
 *
 * 	Copyright:
 *		This code is free.
		Do whatever you want with it.
 *
 *	Emmanuel Mogenet <mgix@mgix.com>, April 2000
 */

#include <math.h>
#include <stdlib.h>
#include <stdio.h>

#define SQRT2	1.414213562373095048801688724209698078569671875376948073176679f

static void fastHT(
float *fz,
int n
)
{
    if(n==1) return;
    else if(n==2)
    {
	float f0= fz[0];
	float f1= fz[1];
	fz[0]= f0+f1;
	fz[1]= f0-f1;
	return;
    }

    int kb= 0;
    for(int ka=1;ka<n;++ka)
    {
	for(int k=(n>>1); ((kb^=k)&k)==0; k>>=1);
	if(ka>kb)
	{
	    float tmp= fz[ka];
	    fz[ka]= fz[kb];
	    fz[kb]= tmp;
	}
    }

    int k= 0;
    while((1<<k)<n) ++k;
    k&= 1;

    if(k==0)
    {
	float *fi= fz;
	float *fn= fz+n;
	while(fi<fn)
	{
	    float f1= fi[0]-fi[1];
	    float f0= fi[0]+fi[1];
	    float f3= fi[2]-fi[3];
	    float f2= fi[2]+fi[3];
	    fi[2]= f0-f2;
	    fi[0]= f0+f2;
	    fi[3]= f1-f3;
	    fi[1]= f1+f3;
	    fi+= 4;
	}
    }
    else
    {
	float *fi= fz;
	float *fn= fz+n;
	float *gi= fi+1;
	while(fi<fn)
	{
	    float c1= fi[0]-gi[0];
	    float s1= fi[0]+gi[0];
	    float c2= fi[2]-gi[2];
	    float s2= fi[2]+gi[2];
	    float c3= fi[4]-gi[4];
	    float s3= fi[4]+gi[4];
	    float c4= fi[6]-gi[6];
	    float s4= fi[6]+gi[6];

	    float f1= s1-s2;
	    float f0= s1+s2;
	    float g1= c1-c2;
	    float g0= c1+c2;
	    float f3= s3-s4;
	    float f2= s3+s4;
	    float g3= SQRT2*c4;
	    float g2= SQRT2*c3;

	    fi[4]= f0-f2;
	    fi[0]= f0+f2;
	    fi[6]= f1-f3;
	    fi[2]= f1+f3;

	    gi[4]= g0-g2;
	    gi[0]= g0+g2;
	    gi[6]= g1-g3;
	    gi[2]= g1+g3;

	    fi+= 8;
	    gi+= 8;
	}
    }
    if(n<16) return;

    int k1= 0;
    int k2= 0;
    int k3= 0;
    int k4= 0;
    int kx= 0;
    while(k4<n)
    {
	k+= 2;
	k1= (1<<k);
	k2= (k1<<1);
	k4= (k2<<1);
	k3= (k2+k1);
	kx= (k1>>1);

	float *fi= fz;
	float *gi= fi+kx;
	float *fn= fz+n;
	while(fi<fn)
	{
	    float f1= fi[ 0]-fi[k1];
	    float f0= fi[ 0]+fi[k1];
	    float f3= fi[k2]-fi[k3];
	    float f2= fi[k2]+fi[k3];

	    fi[k2]= f0-f2;
	    fi[ 0]= f0+f2;
	    fi[k3]= f1-f3;
	    fi[k1]= f1+f3;

	    float g1= gi[ 0]-gi[k1];
	    float g0= gi[ 0]+gi[k1];
	    float g3= SQRT2*gi[k3];
	    float g2= SQRT2*gi[k2];

	    gi[k2]= g0-g2;
	    gi[ 0]= g0+g2;
	    gi[k3]= g1-g3;
	    gi[k1]= g1+g3;

	    gi+= k4;
	    fi+= k4;
	}

	float theta= (float)(M_PI/4.0/(double)kx);
	for(int i=1; i<kx; ++i)
	{
	    float c1= (float)cos(i*theta);
	    float s1= (float)sin(i*theta);
	    float c2= c1*c1-s1*s1;
	    float s2= 2.0f*(c1*s1);

	    fi= fz+i;
	    gi= fz+k1-i;
	    fn= fz+n;
	    while(fi<fn)
	    {
		float b= s2*fi[k1]-c2*gi[k1];
		float a= c2*fi[k1]+s2*gi[k1];

		float f1= fi[0]-a;
		float f0= fi[0]+a;
		float g1= gi[0]-b;
		float g0= gi[0]+b;

		b= s2*fi[k3]-c2*gi[k3];
		a= c2*fi[k3]+s2*gi[k3];
		float f3= fi[k2]-a;
		float f2= fi[k2]+a;
		float g3= gi[k2]-b;
		float g2= gi[k2]+b;

		b= s1*f2-c1*g3;
		a= c1*f2+s1*g3;

		fi[k2]= f0-a;
		fi[ 0]= f0+a;
		gi[k3]= g1-b;
		gi[k1]= g1+b;

		b= c1*g2-s1*f3;
		a= s1*g2+c1*f3;
		gi[k2]= g0-a;
		gi[ 0]= g0+a;
		fi[k3]= f1-b;
		fi[k1]= f1+b;

		gi+= k4;
		fi+= k4;
	    }
	}
    }
}

static void fastHT2D(
float	*img,
int	nx,
int	ny
)
{
    int	kx=nx/2;
    int	ky=ny/2;

    int ix=1;
    int jx=nx-1;
    while(ix<kx)
    {
	int iy=1;
	int jy=ny-1;
	while(iy<ky)
	{
	    float a= img[ix+iy*nx]/2.0f;
	    float b= img[jx+iy*nx]/2.0f;
	    float c= img[ix+jy*nx]/2.0f;
	    float d= img[jx+jy*nx]/2.0f;

	    img[ix+iy*nx]= ( a+b+c-d);
	    img[ix+jy*nx]= ( a-b+c+d);
	    img[jx+iy*nx]= ( a+b-c+d);
	    img[jx+jy*nx]= (-a+b+c+d);

	    iy++;
	    jy--;
	}
	ix++;
	jx--;
    }

    int x;
    int y;
    for(y=0;y<ny;++y) fastHT(img+y*nx,nx);

    float *tmp=(float*) malloc(ny*sizeof(float));
    for(x=0;x<nx;++x)
    {
	for(y=0;y<ny;++y) tmp[y]= img[x+y*nx];
	fastHT(tmp,ny);
	for(y=0;y<ny;++y) img[x+y*nx]=tmp[y];
    }
    free(tmp);
}

static void slowHT(
float	*fz,
int	n
)
{
    float *copy= (float*) malloc(n*sizeof(float));
    memcpy(copy,fz,n*sizeof(float));
    for(int k=0; k<n; ++k)
    {
	fz[k]= 0.0;
	for(int i=0; i<n; ++i)
	{
	    float angle= 2*M_PI*i*k/(float)n;
	    fz[k]+= copy[i]*(cos(angle)+sin(angle));
	}
    }
    free(copy);
}

static void slowHT2D(
float	*img,
int	nx,
int	ny
)
{
    int sz= nx*ny*sizeof(float);
    float *copy= (float*)malloc(sz);
    memcpy(copy,img,sz);
    for(int q= 0; q<ny; ++q)
    {
	for(int p= 0; p<nx; ++p)
	{
	    img[p+q*nx]= 0.0;
	    for(int j= 0; j<ny; ++j)
	    {
		for(int i= 0; i<nx; ++i)
		{
		    float angle= 2*M_PI*((i*p)/(float)nx + (j*q)/(float)ny);
		    img[p+q*nx]+= copy[i+j*nx]*(cos(angle)+sin(angle));
		}
	    }
	}
    }
    free(copy);
}

#define N 64
#define P 32

main()
{
    int i;
    int n= N;
    int p= P;
    float sig1[N];
    float sig2[N];

    for(i=0; i<n; ++i) sig1[i]= sig2[i]= drand48();

    slowHT(sig1,n);
    fastHT(sig2,n);

    float err= 0.0f;
    for(i=0; i<n; ++i) err+= fabs(sig1[i]-sig2[i]);
    printf("Error on FHT1D %f\n", err);

    float img1[N*P];
    float img2[N*P];
    for(i=0; i<n*p; ++i) img1[i]= img2[i]= drand48();

    slowHT2D(img1,n,p);
    fastHT2D(img2,n,p);

    err= 0.0f;
    for(i=0; i<n; ++i) err+= fabs(img1[i]-img2[i]);
    printf("Error on FHT2D %f\n", err);
}



