４基底の Stockham FFT

【４基底の Stockham FFT】

　さて、みなさんは４基底の Cooley-Tukey のアルゴリズムをご存知でしょうか。 FFT における小さなサイズのフーリエ変換への分解をサイズ２で行うのではなく、サイズ４で行うのが４基底のアルゴリズムです。２基底のアルゴリズムより計算量が少なくなることが知られています。

　ところで、Cooley-Tukey で４基底のアルゴリズムを考えるのは簡単ですが、あの Stockham の漸化式から４基底を考えるのは多少骨が折れます。実際、とあるサイトでは Stockham の漸化式から強引に４基底のアルゴリズムを導いているせいで余分なステップを踏むアルゴリズムが紹介されていました。

　しかし、Stockham のアルゴリズムが Cooley-Tukey のアルゴリズムの簡単な変形であることを知っていれば、４基底の Cooley-Tukey のアルゴリズムから、４基底の Stockham のアルゴリズムを簡単に導くことができます。

　ではまず、４基底の Cooley-Tukey(DIF) のアルゴリズムを示しましょう。今回は逆変換も Cooley-Tukey のアルゴリズムから簡単に導けることを示すため、 ifft() を fft0() で兼用したりせず、Cooley-Tukey の逆フーリエ変換も同時に示します。

リスト８：４基底の Cooley-Tukey(DIF) アルゴリズム

#include <complex>
#include <cmath>

typedef std::complex<double> complex_t;

void fft0(int n, int q, complex_t* x, complex_t* y)
{
    static const complex_t j = complex_t(0, 1);
    const int n0 = 0;
    const int n1 = n/4;
    const int n2 = n/2;
    const int n3 = n1 + n2;
    const double theta0 = 2*M_PI/n;

    if (n == 1) {}
    else if (n == 2) {
        const complex_t a = x[q + 0];
        const complex_t b = x[q + 1];
        x[q + 0] = a + b;
        x[q + 1] = a - b;
    }
    else if (n > 2) {
        for (int p = 0; p < n1; p++) {
            const complex_t w1p = complex_t(cos(p*theta0), -sin(p*theta0));
            const complex_t w2p = w1p*w1p;
            const complex_t w3p = w1p*w2p;
            const complex_t a = x[q + p + n0];
            const complex_t b = x[q + p + n1];
            const complex_t c = x[q + p + n2];
            const complex_t d = x[q + p + n3];
            const complex_t  apc =    a + c;
            const complex_t  amc =    a - c;
            const complex_t  bpd =    b + d;
            const complex_t jbmd = j*(b - d);
            y[q + p + n0] =      apc +  bpd;
            y[q + p + n1] = w1p*(amc - jbmd);
            y[q + p + n2] = w2p*(apc -  bpd);
            y[q + p + n3] = w3p*(amc + jbmd);
        }
        fft0(n/4, q + n0, y, x);
        fft0(n/4, q + n1, y, x);
        fft0(n/4, q + n2, y, x);
        fft0(n/4, q + n3, y, x);
        for (int p = 0; p < n1; p++) {
            x[q + 4*p + 0] = y[q + p + n0];
            x[q + 4*p + 1] = y[q + p + n1];
            x[q + 4*p + 2] = y[q + p + n2];
            x[q + 4*p + 3] = y[q + p + n3];
        }
    }
}

void ifft0(int n, int q, complex_t* x, complex_t* y)
{
    static const complex_t j = complex_t(0, 1);
    const int n0 = 0;
    const int n1 = n/4;
    const int n2 = n/2;
    const int n3 = n1 + n2;
    const double theta0 = 2*M_PI/n;

    if (n == 1) {}
    else if (n == 2) {
        const complex_t a = x[q + 0];
        const complex_t b = x[q + 1];
        x[q + 0] = a + b;
        x[q + 1] = a - b;
    }
    else if (n > 2) {
        for (int p = 0; p < n1; p++) {
            const complex_t w1p = complex_t(cos(p*theta0), sin(p*theta0));
            const complex_t w2p = w1p*w1p;
            const complex_t w3p = w1p*w2p;
            const complex_t a = x[q + p + n0];
            const complex_t b = x[q + p + n1];
            const complex_t c = x[q + p + n2];
            const complex_t d = x[q + p + n3];
            const complex_t  apc =    a + c;
            const complex_t  amc =    a - c;
            const complex_t  bpd =    b + d;
            const complex_t jbmd = j*(b - d);
            y[q + p + n0] =      apc +  bpd;
            y[q + p + n1] = w1p*(amc + jbmd);
            y[q + p + n2] = w2p*(apc -  bpd);
            y[q + p + n3] = w3p*(amc - jbmd);
        }
        ifft0(n/4, q + n0, y, x);
        ifft0(n/4, q + n1, y, x);
        ifft0(n/4, q + n2, y, x);
        ifft0(n/4, q + n3, y, x);
        for (int p = 0; p < n1; p++) {
            x[q + 4*p + 0] = y[q + p + n0];
            x[q + 4*p + 1] = y[q + p + n1];
            x[q + 4*p + 2] = y[q + p + n2];
            x[q + 4*p + 3] = y[q + p + n3];
        }
    }
}

void fft(int N, complex_t* x)
{
    complex_t* y = new complex_t[N];
    fft0(N, 0, x, y);
    delete[] y;
    for (int k = 0; k < N; k++) x[k] /= N;
}

void ifft(int N, complex_t* x)
{
    complex_t* y = new complex_t[N];
    ifft0(N, 0, x, y);
    delete[] y;
}

　出力を自然な順序で並べるために、４の倍数成分、４で割ると１余る成分、４で割ると２余る成分、４で割ると３余る成分を合成するようにしてあります。これを基に、セクション1, セクション2の要領で変形してやると４基底の Stockham(DIF) のアルゴリズムは以下のようになります。

リスト９：４基底の Stockham(DIF) アルゴリズム

#include <complex>
#include <cmath>

typedef std::complex<double> complex_t;

void fft0(int n, int s, bool eo, complex_t* x, complex_t* y)
{
    static const complex_t j = complex_t(0, 1);
    const int n0 = 0;
    const int n1 = n/4;
    const int n2 = n/2;
    const int n3 = n1 + n2;
    const double theta0 = 2*M_PI/n;

    if (n == 1) { if (eo) for (int q = 0; q < s; q++) y[q] = x[q]; }
    else if (n == 2) {
        for (int q = 0; q < s; q++) {
            const complex_t a = x[q + 0];
            const complex_t b = x[q + s];
            y[q + 0] = a + b;
            y[q + s] = a - b;
        }
        fft0(1, 2*s, !eo, y, x);
    }
    else if (n > 2) {
        for (int p = 0; p < n1; p++) {
            const complex_t w1p = complex_t(cos(p*theta0), -sin(p*theta0));
            const complex_t w2p = w1p*w1p;
            const complex_t w3p = w1p*w2p;
            for (int q = 0; q < s; q++) {
                const complex_t a = x[q + s*(p + n0)];
                const complex_t b = x[q + s*(p + n1)];
                const complex_t c = x[q + s*(p + n2)];
                const complex_t d = x[q + s*(p + n3)];
                const complex_t  apc =    a + c;
                const complex_t  amc =    a - c;
                const complex_t  bpd =    b + d;
                const complex_t jbmd = j*(b - d);
                y[q + s*(4*p + 0)] =      apc +  bpd;
                y[q + s*(4*p + 1)] = w1p*(amc - jbmd);
                y[q + s*(4*p + 2)] = w2p*(apc -  bpd);
                y[q + s*(4*p + 3)] = w3p*(amc + jbmd);
            }
        }
        fft0(n/4, 4*s, !eo, y, x);
    }
}

void ifft0(int n, int s, bool eo, complex_t* x, complex_t* y)
{
    static const complex_t j = complex_t(0, 1);
    const int n0 = 0;
    const int n1 = n/4;
    const int n2 = n/2;
    const int n3 = n1 + n2;
    const double theta0 = 2*M_PI/n;

    if (n == 1) { if (eo) for (int q = 0; q < s; q++) y[q] = x[q]; }
    else if (n == 2) {
        for (int q = 0; q < s; q++) {
            const complex_t a = x[q + 0];
            const complex_t b = x[q + s];
            y[q + 0] = a + b;
            y[q + s] = a - b;
        }
        ifft0(1, 2*s, !eo, y, x);
    }
    else if (n > 2) {
        for (int p = 0; p < n1; p++) {
            const complex_t w1p = complex_t(cos(p*theta0), sin(p*theta0));
            const complex_t w2p = w1p*w1p;
            const complex_t w3p = w1p*w2p;
            for (int q = 0; q < s; q++) {
                const complex_t a = x[q + s*(p + n0)];
                const complex_t b = x[q + s*(p + n1)];
                const complex_t c = x[q + s*(p + n2)];
                const complex_t d = x[q + s*(p + n3)];
                const complex_t  apc =    a + c;
                const complex_t  amc =    a - c;
                const complex_t  bpd =    b + d;
                const complex_t jbmd = j*(b - d);
                y[q + s*(4*p + 0)] =      apc +  bpd;
                y[q + s*(4*p + 1)] = w1p*(amc + jbmd);
                y[q + s*(4*p + 2)] = w2p*(apc -  bpd);
                y[q + s*(4*p + 3)] = w3p*(amc - jbmd);
            }
        }
        ifft0(n/4, 4*s, !eo, y, x);
    }
}

void fft(int N, complex_t* x)
{
    complex_t* y = new complex_t[N];
    fft0(N, 1, 0, x, y);
    delete[] y;
    for (int k = 0; k < N; k++) x[k] /= N;
}

void ifft(int N, complex_t* x)
{
    complex_t* y = new complex_t[N];
    ifft0(N, 1, 0, x, y);
    delete[] y;
}