【OTFFT: High Speed FFT library】

【4基底の Stockham FFT】

 さて、みなさんは4基底の Cooley-Tukey のアルゴリズムをご存知でしょうか。 FFT における小さなサイズのフーリエ変換への分解をサイズ2で行うのではなく、 サイズ4で行うのが4基底のアルゴリズムです。2基底のアルゴリズムより計算量が 少なくなることが知られています。

 ところで、Cooley-Tukey で4基底のアルゴリズムを考えるのは簡単ですが、 あの Stockham の漸化式から4基底を考えるのは多少骨が折れます。実際、 とあるサイトでは Stockham の漸化式から強引に4基底のアルゴリズムを導いて いるせいで余分なステップを踏むアルゴリズムが紹介されていました。

 しかし、Stockham のアルゴリズムが Cooley-Tukey のアルゴリズムの簡単な 変形であることを知っていれば、4基底の Cooley-Tukey のアルゴリズムから、 4基底の Stockham のアルゴリズムを簡単に導くことができます。

 ではまず、4基底の Cooley-Tukey(DIF) のアルゴリズムを示しましょう。今回は 逆変換も Cooley-Tukey のアルゴリズムから簡単に導けることを示すため、 ifft()fft0() で兼用したりせず、Cooley-Tukey の逆フーリエ変換も同時に示します。

リスト8:4基底の Cooley-Tukey(DIF) アルゴリズム
#include <complex>
#include <cmath>

typedef std::complex<double> complex_t;

void fft0(int n, int q, complex_t* x, complex_t* y)
{
    static const complex_t j = complex_t(0, 1);
    const int n0 = 0;
    const int n1 = n/4;
    const int n2 = n/2;
    const int n3 = n1 + n2;
    const double theta0 = 2*M_PI/n;

    if (n == 1) {}
    else if (n == 2) {
        const complex_t a = x[q + 0];
        const complex_t b = x[q + 1];
        x[q + 0] = a + b;
        x[q + 1] = a - b;
    }
    else if (n > 2) {
        for (int p = 0; p < n1; p++) {
            const complex_t w1p = complex_t(cos(p*theta0), -sin(p*theta0));
            const complex_t w2p = w1p*w1p;
            const complex_t w3p = w1p*w2p;
            const complex_t a = x[q + p + n0];
            const complex_t b = x[q + p + n1];
            const complex_t c = x[q + p + n2];
            const complex_t d = x[q + p + n3];
            const complex_t  apc =    a + c;
            const complex_t  amc =    a - c;
            const complex_t  bpd =    b + d;
            const complex_t jbmd = j*(b - d);
            y[q + p + n0] =      apc +  bpd;
            y[q + p + n1] = w1p*(amc - jbmd);
            y[q + p + n2] = w2p*(apc -  bpd);
            y[q + p + n3] = w3p*(amc + jbmd);
        }
        fft0(n/4, q + n0, y, x);
        fft0(n/4, q + n1, y, x);
        fft0(n/4, q + n2, y, x);
        fft0(n/4, q + n3, y, x);
        for (int p = 0; p < n1; p++) {
            x[q + 4*p + 0] = y[q + p + n0];
            x[q + 4*p + 1] = y[q + p + n1];
            x[q + 4*p + 2] = y[q + p + n2];
            x[q + 4*p + 3] = y[q + p + n3];
        }
    }
}

void ifft0(int n, int q, complex_t* x, complex_t* y)
{
    static const complex_t j = complex_t(0, 1);
    const int n0 = 0;
    const int n1 = n/4;
    const int n2 = n/2;
    const int n3 = n1 + n2;
    const double theta0 = 2*M_PI/n;

    if (n == 1) {}
    else if (n == 2) {
        const complex_t a = x[q + 0];
        const complex_t b = x[q + 1];
        x[q + 0] = a + b;
        x[q + 1] = a - b;
    }
    else if (n > 2) {
        for (int p = 0; p < n1; p++) {
            const complex_t w1p = complex_t(cos(p*theta0), sin(p*theta0));
            const complex_t w2p = w1p*w1p;
            const complex_t w3p = w1p*w2p;
            const complex_t a = x[q + p + n0];
            const complex_t b = x[q + p + n1];
            const complex_t c = x[q + p + n2];
            const complex_t d = x[q + p + n3];
            const complex_t  apc =    a + c;
            const complex_t  amc =    a - c;
            const complex_t  bpd =    b + d;
            const complex_t jbmd = j*(b - d);
            y[q + p + n0] =      apc +  bpd;
            y[q + p + n1] = w1p*(amc + jbmd);
            y[q + p + n2] = w2p*(apc -  bpd);
            y[q + p + n3] = w3p*(amc - jbmd);
        }
        ifft0(n/4, q + n0, y, x);
        ifft0(n/4, q + n1, y, x);
        ifft0(n/4, q + n2, y, x);
        ifft0(n/4, q + n3, y, x);
        for (int p = 0; p < n1; p++) {
            x[q + 4*p + 0] = y[q + p + n0];
            x[q + 4*p + 1] = y[q + p + n1];
            x[q + 4*p + 2] = y[q + p + n2];
            x[q + 4*p + 3] = y[q + p + n3];
        }
    }
}

void fft(int N, complex_t* x)
{
    complex_t* y = new complex_t[N];
    fft0(N, 0, x, y);
    delete[] y;
    for (int k = 0; k < N; k++) x[k] /= N;
}

void ifft(int N, complex_t* x)
{
    complex_t* y = new complex_t[N];
    ifft0(N, 0, x, y);
    delete[] y;
}

 出力を自然な順序で並べるために、4の倍数成分、4で割ると1余る成分、 4で割ると2余る成分、4で割ると3余る成分を合成するようにしてあります。 これを基に、セクション1, セクション2の要領で 変形してやると4基底の Stockham(DIF) のアルゴリズムは以下のようになります。

リスト9:4基底の Stockham(DIF) アルゴリズム
#include <complex>
#include <cmath>

typedef std::complex<double> complex_t;

void fft0(int n, int s, bool eo, complex_t* x, complex_t* y)
{
    static const complex_t j = complex_t(0, 1);
    const int n0 = 0;
    const int n1 = n/4;
    const int n2 = n/2;
    const int n3 = n1 + n2;
    const double theta0 = 2*M_PI/n;

    if (n == 1) { if (eo) for (int q = 0; q < s; q++) y[q] = x[q]; }
    else if (n == 2) {
        for (int q = 0; q < s; q++) {
            const complex_t a = x[q + 0];
            const complex_t b = x[q + s];
            y[q + 0] = a + b;
            y[q + s] = a - b;
        }
        fft0(1, 2*s, !eo, y, x);
    }
    else if (n > 2) {
        for (int p = 0; p < n1; p++) {
            const complex_t w1p = complex_t(cos(p*theta0), -sin(p*theta0));
            const complex_t w2p = w1p*w1p;
            const complex_t w3p = w1p*w2p;
            for (int q = 0; q < s; q++) {
                const complex_t a = x[q + s*(p + n0)];
                const complex_t b = x[q + s*(p + n1)];
                const complex_t c = x[q + s*(p + n2)];
                const complex_t d = x[q + s*(p + n3)];
                const complex_t  apc =    a + c;
                const complex_t  amc =    a - c;
                const complex_t  bpd =    b + d;
                const complex_t jbmd = j*(b - d);
                y[q + s*(4*p + 0)] =      apc +  bpd;
                y[q + s*(4*p + 1)] = w1p*(amc - jbmd);
                y[q + s*(4*p + 2)] = w2p*(apc -  bpd);
                y[q + s*(4*p + 3)] = w3p*(amc + jbmd);
            }
        }
        fft0(n/4, 4*s, !eo, y, x);
    }
}

void ifft0(int n, int s, bool eo, complex_t* x, complex_t* y)
{
    static const complex_t j = complex_t(0, 1);
    const int n0 = 0;
    const int n1 = n/4;
    const int n2 = n/2;
    const int n3 = n1 + n2;
    const double theta0 = 2*M_PI/n;

    if (n == 1) { if (eo) for (int q = 0; q < s; q++) y[q] = x[q]; }
    else if (n == 2) {
        for (int q = 0; q < s; q++) {
            const complex_t a = x[q + 0];
            const complex_t b = x[q + s];
            y[q + 0] = a + b;
            y[q + s] = a - b;
        }
        ifft0(1, 2*s, !eo, y, x);
    }
    else if (n > 2) {
        for (int p = 0; p < n1; p++) {
            const complex_t w1p = complex_t(cos(p*theta0), sin(p*theta0));
            const complex_t w2p = w1p*w1p;
            const complex_t w3p = w1p*w2p;
            for (int q = 0; q < s; q++) {
                const complex_t a = x[q + s*(p + n0)];
                const complex_t b = x[q + s*(p + n1)];
                const complex_t c = x[q + s*(p + n2)];
                const complex_t d = x[q + s*(p + n3)];
                const complex_t  apc =    a + c;
                const complex_t  amc =    a - c;
                const complex_t  bpd =    b + d;
                const complex_t jbmd = j*(b - d);
                y[q + s*(4*p + 0)] =      apc +  bpd;
                y[q + s*(4*p + 1)] = w1p*(amc + jbmd);
                y[q + s*(4*p + 2)] = w2p*(apc -  bpd);
                y[q + s*(4*p + 3)] = w3p*(amc - jbmd);
            }
        }
        ifft0(n/4, 4*s, !eo, y, x);
    }
}

void fft(int N, complex_t* x)
{
    complex_t* y = new complex_t[N];
    fft0(N, 1, 0, x, y);
    delete[] y;
    for (int k = 0; k < N; k++) x[k] /= N;
}

void ifft(int N, complex_t* x)
{
    complex_t* y = new complex_t[N];
    ifft0(N, 1, 0, x, y);
    delete[] y;
}