• 微信公众号：美女很有趣。 工作之余，放松一下，关注即送10G+美女照片！

# FFT学习笔记

2周前 (04-30) 6次浏览

### 前置知识：

1. 点值表达式
2. 复数
3. 单位根

[begin{aligned}
f(w^{i}_{L})&=sum^{L-1}_{j=0}a_{j} w^{ij}_{L}\
&=a_{0}w^{0}+a_{1}w^{1}_{L}+dots+a_{L-1}w^{L-1}_{L}\
&=(a_{0}w^{0}+a_{2}w^{2}_{L}+dots+a_{L-2}w^{L-2)}_{L})+w^{1}_{L}(a_{1}w^{0}_{L}+a_{3}w^{2}_{L}+dots+a_{L-1}w^{L-2}_{L})\
&=A(w^{2}_{L})+w^{1}_{L}B(w^{2}_{L})\
end{aligned}
]

[begin{aligned}
f(w^{t+frac{L}{2}}_{L})&=A(w^{2t+L}_{L})+w^{t+frac{L}{2}}_{L}B(w^{2t+L}_{L})\
&=A(w^{2t}_{L})-w^{t}_{L}B(w^{2t}_{L})\
f(w^{t}_{L})&=A(w^{2t}_{L})+w^{t}B(w^{2t}_{L})\
end{aligned}
]

DFT过程的实现代码
``````const double Pi=acos(-1.0);
struct complex
{
double x,y;
complex (double xx=0,double yy=0){x=xx,y=yy;}
complex operator+(const complex &b){
complex a;
a.x=x+b.x;
a.y=y+b.y;
return a;
}
complex operator-(const complex &b){
complex a;
a.x=x-b.x;
a.y=y-b.y;
return a;
}
complex operator*(const complex &b){
complex a;
a.x=x*b.x-y*b.y;
a.y=x*b.y+y*b.x;
return a;
}
};
void DFT(int limit,complex *a){
if(limit==1) return;
complex a1[limit>>1],a2[limit>>1];
for(ri i=0;i<limit;i+=2){
a1[i>>1]=a[i],a2[i>>1]=a[i+1];
}
DFT(limit>>1,a1);
DFT(limit>>1,a2);
complex Wn=complex(cos(2.0*Pi/limit),sin(2.0*Pi/limit)),w=complex(1,0);
for(ri i=0;i<limit>>1;++i,w=w*Wn){
a[i]=a1[i]+w*a2[i];
a[i+(limit>>1)]=a1[i]-w*a2[i];
}
}
``````

[begin{aligned}
H_{k}&=sum^{L-1}_{i=0}F_{k}(w_{L}^{-k})^i\
&=sum^{L-1}_{i=0}(sum^{L-1}_{j=0}f_{j}(w^{i}_{L})^{j})(w_{L}^{-k})^i\
&=sum^{L-1}_{i=0}(sum^{L-1}_{j=0}f_{j}(w_{L}^{j})^{i})(w_{L}^{-k})^i\
&=sum^{L-1}_{i=0}sum^{L-1}_{j=0}f_{j}(w_{L}^{j-k})^i\
&=sum^{L-1}_{j=0}f_{j}sum^{L-1}_{i=0}(w_{L}^{j-k})^i\
end{aligned}
]

IDFT过程的实现代码
``````const double Pi=acos(-1.0);
struct complex
{
double x,y;
complex (double xx=0,double yy=0){x=xx,y=yy;}
complex operator+(const complex &b){
complex a;
a.x=x+b.x;
a.y=y+b.y;
return a;
}
complex operator-(const complex &b){
complex a;
a.x=x-b.x;
a.y=y-b.y;
return a;
}
complex operator*(const complex &b){
complex a;
a.x=x*b.x-y*b.y;
a.y=x*b.y+y*b.x;
return a;
}
};
void DFT(int limit,complex *a,int type){
if(limit==1) return;
complex a1[limit>>1],a2[limit>>1];
for(ri i=0;i<limit;i+=2){
a1[i>>1]=a[i],a2[i>>1]=a[i+1];
}
DFT(limit>>1,a1,type);
DFT(limit>>1,a2,type);
complex Wn=complex(cos(2.0*Pi/limit),sin(2.0*type*Pi/limit)),w=complex(1,0);
for(ri i=0;i<limit>>1;++i,w=w*Wn){
a[i]=a1[i]+w*a2[i];
a[i+(limit>>1)]=a1[i]-w*a2[i];
}
}
``````

### 优化

1. 减少乘法次数

复数的乘法运算次数比较多，注意到 (wtimes a_{2}) 这个出现了两次，可以预先记录一下。

2. 递归->迭代

可以发现，递归版效率之所以低，很大原因是一直在开数组，如果能够用迭代实现的话就可以在原数组上进行滚动而不用额外开数组了。

可以发现上面递归的过程实际上是先不断地改变 (a) 数组的位置，直到最后一层为止。然后不停地执行 (a=a_{1}+wtimes a_{2})

[begin{bmatrix}
0,1,2,3,4,5,6,7
end{bmatrix}
\
downarrow\
begin{bmatrix}
0,2,4,6],[1,3,5,7
end{bmatrix}\
downarrow\
begin{bmatrix}
0,4],[2,6],[1,5],[3,7
end{bmatrix}\
downarrow\
begin{bmatrix}
0],[4],[2],[6],[1],[5],[3],[7
end{bmatrix}\
]

这样似乎看不出什么特点，但是转成二进制之后就有：

[ begin{bmatrix}
000,001,010,011,100,101,110,111
end{bmatrix}\
downarrow\
begin{bmatrix}
000,100,010,110,001,101,011,111
end{bmatrix}\
]

刚好每一位的二进制在操作之后都进行了一次翻转。

因此，可以先记录一个 (r) 数组，记录一下下标之间的对应关系。

(r) 数组的求法也比较简单，对于 (r_{i}) ，都是可以通过 (r_{i>>1}) 转移过来的，只需要对 (i) 分奇偶讨论就可以了。

求r数组的过程
``````void rev(int limit,int ws){
//ws=log2(limit)
for(ri i=0;i<limit;i+=2){
r[i]=r[i>>1]>>1;
r[i|1]=(r[i>>1]>>1)|(1<<ws-1);
}
}
``````

然后，就是用三个`for` 来实现递归的过程。

Code
``````void FFT(int limit,complex *a,int type){
for(ri i=0;i<=limit;++i){
if(i<r[i]) swap(a[i],a[r[i]]);
}
for(ri l=1;l<limit;l<<=1){
complex Wn=complex(cos(Pi/l),sin(type*Pi/l));
for(ri i=0;i<limit;i+=(l<<1)){
complex w=complex(1,0);
for(ri j=0;j<len;++j){
complex x=a[i+j],y=w*a[i+j+len];
a[i+j]=x+y;
a[i+j+len]=x-y;
w=w*Wn;
}
}
}
}
``````
3. 预处理单位根

就是把计算单位根的过程提前预处理出来。

但是 (IDFT) 过程的单位根也要预处理吗？

注意到 (w_{L}^{-t}=w_{L}^{L-t})

因此，并不需要在计算的过程中判断当前是在做 (DFT) 还是 (IDFT) ，只需要在最后判断是否需要`reverse` 就可以了。

Code
``````vector<Complex> W[23];
const double Pi=acos(-1.0);
void init(){
int d=log(N)/log(2)+0.5;
for(ri t=1;t<=d;++t){
W[t].resize(1<<t);
W[t][0]=(Complex){1,0},W[t][1]=(Complex){cos(Pi/(1<<t-1)),sin(Pi/(1<<t-1))};
for(ri i=2;i<(1<<t);++i) W[t][i]=W[t][i-1]*W[t][1];
}
}
void DFT(int limit,Complex *f,int flag){
for(ri i=1;i<limit;++i)
if(r[i]<i) swap(f[i],f[r[i]]);
for(ri l=1,t=1;l<limit;++t,l<<=1){
for(ri i=0;i<limit;i+=(l<<1)){
Complex *w=&W[t][0];
for(ri j=0;j<l;++j){
Complex tmp=f[i+j+l]*(*w++);
f[i+j+l]=f[i+j]-tmp;
f[i+j]=f[i+j]+tmp;
}
}
}
if(flag==-1){
reverse(f+1,f+limit);
for(ri i=0;i<limit;++i) f[i].x/=limit,f[i].y/=limit;
}
}
``````

``````#include<cmath>
#include<cstdio>
#include<algorithm>
#include<vector>
using namespace std;
#define il inline
#define ri register int
#define ll long long
#define ui unsigned int
bool f=true;ll x=0;
register char ch=getchar();
while(ch<'0'||ch>'9') {if(ch=='-') f=false;ch=getchar();}
while(ch>='0'&&ch<='9') x=(x<<3)+(x<<1)+(ch^48),ch=getchar();
if(f) return x;
return ~(--x);
}
il void write(const ll &x){if(x>9) write(x/10);putchar(x%10+'0');}
il void print(const ll &x) {x<0?putchar('-'),write(~(x-1)):write(x);putchar(' ');}
il ll max(const ll &a,const ll &b){return a>b?a:b;}
il ll min(const ll &a,const ll &b){return a<b?a:b;}
const int N=1<<21;
int r[N];
void rev(int limit,int ws){
for(ri i=0;i<limit;i+=2){
r[i]=r[i>>1]>>1;
r[i|1]=(r[i>>1]>>1)|(1<<ws-1);
}
}
struct Complex
{
double x,y;
Complex operator+(const Complex &p)const{
return (Complex){x+p.x,y+p.y};
}
Complex operator-(const Complex &p)const{
return (Complex){x-p.x,y-p.y};
}
Complex operator*(const Complex &p)const{
return (Complex){x*p.x-y*p.y,x*p.y+y*p.x};
}
};
vector<Complex> W[23];
const double Pi=acos(-1.0);
void init(){
int d=log(N)/log(2)+0.5;
for(ri t=1;t<=d;++t){
W[t].resize(1<<t);
W[t][0]=(Complex){1,0},W[t][1]=(Complex){cos(Pi/(1<<t-1)),sin(Pi/(1<<t-1))};
for(ri i=2;i<(1<<t);++i) W[t][i]=W[t][i-1]*W[t][1];
}
}
void DFT(int limit,Complex *f,int flag){
for(ri i=1;i<limit;++i)
if(r[i]<i) swap(f[i],f[r[i]]);
for(ri l=1,t=1;l<limit;++t,l<<=1){
for(ri i=0;i<limit;i+=(l<<1)){
Complex *w=&W[t][0];
for(ri j=0;j<l;++j){
Complex tmp=f[i+j+l]*(*w++);
f[i+j+l]=f[i+j]-tmp;
f[i+j]=f[i+j]+tmp;
}
}
}
if(flag==-1){
reverse(f+1,f+limit);
for(ri i=0;i<limit;++i) f[i].x/=(limit<<1),f[i].y/=(limit<<1);
}
}
Complex FFT_f[N];
void FFT(int n,ll *f,int m,ll *g,int lim=0){
if(!lim) lim=n+m;
int limit=1,ws=0;
for(;limit<=n+m;limit<<=1,++ws);
rev(limit,ws);
for(ri i=0;i<limit;++i) FFT_f[i].x=FFT_f[i].y=0;
for(ri i=0;i<=n;++i) FFT_f[i].x=f[i];
for(ri i=0;i<=m;++i) FFT_f[i].y=g[i];
DFT(limit,FFT_f,1);
for(ri i=0;i<limit;++i) FFT_f[i]=FFT_f[i]*FFT_f[i];
DFT(limit,FFT_f,-1);
for(ri i=0;i<=lim;++i) f[i]=FFT_f[i].y+0.5;
}
ll n,m,f[N],g[N];
int main(){
init();
// freopen("rand.in","r",stdin);
// freopen("1.out","w",stdout);
FFT(n,f,m,g);
for(ri i=0;i<=n+m;++i) print(f[i]);
return 0;
}
``````

### 进一步优化

[begin{aligned}
F(x)&=A(x)+iB(x)\
G(x)&=A(x)-iB(x)\
F(w^{t}_{L})&=sum^{L-1}_{j=0}(a_{j}sin(frac{2jpi}{L})-b_{j}cos(frac{2jpi}{L}))+i(a_{j}cos(frac{2jpi}{L})+b_{j}sin(frac{2jpi}{L}))\
G(w^{t}_{L})&=sum^{L-1}_{j=0}(a_{j}sin(frac{2jpi}{L})+b_{j}cos(frac{2jpi}{L}))+i(a_{j}cos(frac{2jpi}{L})-b_{j}sin(frac{2jpi}{L}))\
end{aligned}
]

[begin{aligned}
F(w^{t}_{L})+G(w^{t}_{L})&=sum^{L-1}_{j=0}2a_{j}sin(frac{2jpi}{L})+2ia_{j}cos(frac{2jpi}{L})=2A(w^{t}_{L})\
A(w^{t}_{L})&=frac{F(w_{L}^{t})+G(w^{t}_{L})}{2}\
texttt{同理有}B(w^{t}_{L})&=-ifrac{F(w^{t}_{L})-G(w^{t}_{L})}{2}\
end{aligned}
]

[begin{aligned}
G(w^{t}_{L})&=sum^{L-1}_{j=0}(a_{j}sin(frac{2jpi}{L})+b_{j}cos(frac{2jpi}{L}))+i(a_{j}cos(frac{2jpi}{L})-b_{j}sin(frac{2jpi}{L}))\
&=sum^{L-1}_{j=0}conj(a_{j}sin(frac{2jpi}{L})+b_{j}cos(frac{2jpi}{L}))-i(a_{j}cos(frac{2jpi}{L})-b_{j}sin(frac{2jpi}{L})))\
&=conj(sum^{L-1}_{j=0}a_{j}sin(frac{2jpi}{L})+b_{j}cos(frac{2jpi}{L}))-i(a_{j}cos(frac{2jpi}{L})-b_{j}sin(frac{2jpi}{L})))\
&=conj(sum^{L-1}_{j=0}a_{j}sin(frac{(2L-2j)pi}{L})-b_{j}cos(frac{(2L-2j)pi}{L}))+i(a_{j}cos(frac{(2L-2j)pi}{L})+b_{j}sin(frac{(2L-2j)pi}{L})))\
&=conj(F(w^{L-j}_{L}))\
end{aligned}
]