/*
 * Copyright (c) 2003 Matteo Frigo
 * Copyright (c) 2003 Massachusetts Institute of Technology
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 */

/* This file was automatically generated --- DO NOT EDIT */
/* Generated on Sat Apr 19 18:05:33 EDT 2003 */

#include "codelet-dft.h"

/* Generated by: /homea/athena/fftw3/genfft/gen_twiddle_c -simd -trivial-stores -compact -variables 4 -n 15 -name t1bv_15 -include t1b.h -sign 1 */

/*
 * This function contains 92 FP additions, 53 FP multiplications,
 * (or, 78 additions, 39 multiplications, 14 fused multiply/add),
 * 48 stack variables, and 30 memory accesses
 */
/*
 * Generator Id's : 
 * $Id: algsimp.ml,v 1.7 2003/03/15 20:29:42 stevenj Exp $
 * $Id: fft.ml,v 1.2 2003/03/15 20:29:42 stevenj Exp $
 * $Id: gen_twiddle_c.ml,v 1.7 2003/04/16 19:51:27 athena Exp $
 */

#include "t1b.h"

static const R *t1bv_15(R *ri, R *ii, const R *W, stride ios, int m, int dist)
{
     DVK(KP509036960, +0.509036960455127183450980863393907648510733164);
     DVK(KP823639103, +0.823639103546331925877420039278190003029660514);
     DVK(KP216506350, +0.216506350946109661690930792688234045867850657);
     DVK(KP250000000, +0.250000000000000000000000000000000000000000000);
     DVK(KP559016994, +0.559016994374947424102293417182819058860154590);
     DVK(KP951056516, +0.951056516295153572116439333379382143405698634);
     DVK(KP587785252, +0.587785252292473129168705954639072768597652438);
     DVK(KP866025403, +0.866025403784438646763723170752936183471402627);
     DVK(KP484122918, +0.484122918275927110647408174972799951354115213);
     DVK(KP500000000, +0.500000000000000000000000000000000000000000000);
     int i;
     R *x;
     x = ii;
     BEGIN_SIMD();
     for (i = m; i > 0; i = i - VL, x = x + (VL * dist), W = W + (TWVL * 28)) {
	  V Ts, TV, T1p, TZ, T10, T1q, T1r, T1s, T1t, T1u, T1v, Tn, Tt, TQ, TR;
	  V TE, TW, TN;
	  {
	       V TT, Tr, Tp, Tq, To, TU;
	       TT = LD(&(x[0]), dist, &(x[0]));
	       Tq = LD(&(x[WS(ios, 10)]), dist, &(x[0]));
	       Tr = BYTW(&(W[TWVL * 18]), Tq);
	       To = LD(&(x[WS(ios, 5)]), dist, &(x[WS(ios, 1)]));
	       Tp = BYTW(&(W[TWVL * 8]), To);
	       Ts = VSUB(Tp, Tr);
	       TU = VADD(Tp, Tr);
	       TV = VFNMS(LDK(KP500000000), TU, TT);
	       T1p = VADD(TT, TU);
	  }
	  {
	       V Tx, TG, TK, TB, T5, Ty, Tg, TH, Tl, TL, Ta, TC, Tb, Tm;
	       {
		    V Tw, TF, TJ, TA;
		    Tw = LD(&(x[WS(ios, 3)]), dist, &(x[WS(ios, 1)]));
		    Tx = BYTW(&(W[TWVL * 4]), Tw);
		    TF = LD(&(x[WS(ios, 6)]), dist, &(x[0]));
		    TG = BYTW(&(W[TWVL * 10]), TF);
		    TJ = LD(&(x[WS(ios, 9)]), dist, &(x[WS(ios, 1)]));
		    TK = BYTW(&(W[TWVL * 16]), TJ);
		    TA = LD(&(x[WS(ios, 12)]), dist, &(x[0]));
		    TB = BYTW(&(W[TWVL * 22]), TA);
	       }
	       {
		    V T2, T4, T1, T3;
		    T1 = LD(&(x[WS(ios, 8)]), dist, &(x[0]));
		    T2 = BYTW(&(W[TWVL * 14]), T1);
		    T3 = LD(&(x[WS(ios, 13)]), dist, &(x[WS(ios, 1)]));
		    T4 = BYTW(&(W[TWVL * 24]), T3);
		    T5 = VSUB(T2, T4);
		    Ty = VADD(T2, T4);
	       }
	       {
		    V Td, Tf, Tc, Te;
		    Tc = LD(&(x[WS(ios, 11)]), dist, &(x[WS(ios, 1)]));
		    Td = BYTW(&(W[TWVL * 20]), Tc);
		    Te = LD(&(x[WS(ios, 1)]), dist, &(x[WS(ios, 1)]));
		    Tf = BYTW(&(W[0]), Te);
		    Tg = VSUB(Td, Tf);
		    TH = VADD(Td, Tf);
	       }
	       {
		    V Ti, Tk, Th, Tj;
		    Th = LD(&(x[WS(ios, 14)]), dist, &(x[0]));
		    Ti = BYTW(&(W[TWVL * 26]), Th);
		    Tj = LD(&(x[WS(ios, 4)]), dist, &(x[0]));
		    Tk = BYTW(&(W[TWVL * 6]), Tj);
		    Tl = VSUB(Ti, Tk);
		    TL = VADD(Ti, Tk);
	       }
	       {
		    V T7, T9, T6, T8;
		    T6 = LD(&(x[WS(ios, 2)]), dist, &(x[0]));
		    T7 = BYTW(&(W[TWVL * 2]), T6);
		    T8 = LD(&(x[WS(ios, 7)]), dist, &(x[WS(ios, 1)]));
		    T9 = BYTW(&(W[TWVL * 12]), T8);
		    Ta = VSUB(T7, T9);
		    TC = VADD(T7, T9);
	       }
	       TZ = VSUB(T5, Ta);
	       T10 = VSUB(Tg, Tl);
	       T1q = VADD(Tx, Ty);
	       T1r = VADD(TB, TC);
	       T1s = VADD(T1q, T1r);
	       T1t = VADD(TG, TH);
	       T1u = VADD(TK, TL);
	       T1v = VADD(T1t, T1u);
	       Tb = VADD(T5, Ta);
	       Tm = VADD(Tg, Tl);
	       Tn = VMUL(LDK(KP484122918), VSUB(Tb, Tm));
	       Tt = VADD(Tb, Tm);
	       {
		    V Tz, TD, TI, TM;
		    Tz = VFNMS(LDK(KP500000000), Ty, Tx);
		    TD = VFNMS(LDK(KP500000000), TC, TB);
		    TQ = VADD(Tz, TD);
		    TI = VFNMS(LDK(KP500000000), TH, TG);
		    TM = VFNMS(LDK(KP500000000), TL, TK);
		    TR = VADD(TI, TM);
		    TE = VSUB(Tz, TD);
		    TW = VADD(TQ, TR);
		    TN = VSUB(TI, TM);
	       }
	  }
	  {
	       V T1n, T1o, T1l, T1m;
	       T1l = VADD(TV, TW);
	       T1m = VBYI(VMUL(LDK(KP866025403), VADD(Ts, Tt)));
	       T1n = VSUB(T1l, T1m);
	       T1o = VADD(T1m, T1l);
	       ST(&(x[WS(ios, 5)]), T1n, dist, &(x[WS(ios, 1)]));
	       ST(&(x[WS(ios, 10)]), T1o, dist, &(x[0]));
	  }
	  {
	       V T1C, T1F, T1z, T1J, T1G;
	       {
		    V T1A, T1B, T1y, T1w, T1x;
		    T1A = VSUB(T1q, T1r);
		    T1B = VSUB(T1t, T1u);
		    T1C = VBYI(VFNMS(LDK(KP951056516), T1B, VMUL(LDK(KP587785252), T1A)));
		    T1F = VBYI(VFMA(LDK(KP951056516), T1A, VMUL(LDK(KP587785252), T1B)));
		    T1y = VMUL(LDK(KP559016994), VSUB(T1s, T1v));
		    T1w = VADD(T1s, T1v);
		    T1x = VFNMS(LDK(KP250000000), T1w, T1p);
		    T1z = VSUB(T1x, T1y);
		    T1J = VADD(T1p, T1w);
		    T1G = VADD(T1y, T1x);
	       }
	       {
		    V T1I, T1D, T1E, T1H;
		    ST(&(x[0]), T1J, dist, &(x[0]));
		    T1I = VSUB(T1G, T1F);
		    ST(&(x[WS(ios, 9)]), T1I, dist, &(x[WS(ios, 1)]));
		    T1D = VSUB(T1z, T1C);
		    ST(&(x[WS(ios, 3)]), T1D, dist, &(x[WS(ios, 1)]));
		    T1E = VADD(T1C, T1z);
		    ST(&(x[WS(ios, 12)]), T1E, dist, &(x[0]));
		    T1H = VADD(T1F, T1G);
		    ST(&(x[WS(ios, 6)]), T1H, dist, &(x[0]));
	       }
	  }
	  {
	       V TP, T1h, T15, T1b, T12, T1i, T16, T1e;
	       {
		    V TO, T1a, Tv, T19, Tu;
		    TO = VFMA(LDK(KP951056516), TE, VMUL(LDK(KP587785252), TN));
		    T1a = VFNMS(LDK(KP951056516), TN, VMUL(LDK(KP587785252), TE));
		    Tu = VFNMS(LDK(KP216506350), Tt, VMUL(LDK(KP866025403), Ts));
		    Tv = VADD(Tn, Tu);
		    T19 = VSUB(Tn, Tu);
		    TP = VBYI(VADD(Tv, TO));
		    T1h = VBYI(VSUB(T1a, T19));
		    T15 = VBYI(VSUB(Tv, TO));
		    T1b = VBYI(VADD(T19, T1a));
	       }
	       {
		    V T11, T1c, TY, T1d, TS, TX;
		    T11 = VFMA(LDK(KP823639103), TZ, VMUL(LDK(KP509036960), T10));
		    T1c = VFNMS(LDK(KP823639103), T10, VMUL(LDK(KP509036960), TZ));
		    TS = VMUL(LDK(KP559016994), VSUB(TQ, TR));
		    TX = VFNMS(LDK(KP250000000), TW, TV);
		    TY = VADD(TS, TX);
		    T1d = VSUB(TX, TS);
		    T12 = VSUB(TY, T11);
		    T1i = VSUB(T1d, T1c);
		    T16 = VADD(T11, TY);
		    T1e = VADD(T1c, T1d);
	       }
	       {
		    V T13, T1j, T1k, T14;
		    T13 = VADD(TP, T12);
		    ST(&(x[WS(ios, 1)]), T13, dist, &(x[WS(ios, 1)]));
		    T1j = VADD(T1h, T1i);
		    ST(&(x[WS(ios, 7)]), T1j, dist, &(x[WS(ios, 1)]));
		    T1k = VSUB(T1i, T1h);
		    ST(&(x[WS(ios, 8)]), T1k, dist, &(x[0]));
		    T14 = VSUB(T12, TP);
		    ST(&(x[WS(ios, 14)]), T14, dist, &(x[0]));
	       }
	       {
		    V T17, T1f, T1g, T18;
		    T17 = VADD(T15, T16);
		    ST(&(x[WS(ios, 4)]), T17, dist, &(x[0]));
		    T1f = VADD(T1b, T1e);
		    ST(&(x[WS(ios, 2)]), T1f, dist, &(x[0]));
		    T1g = VSUB(T1e, T1b);
		    ST(&(x[WS(ios, 13)]), T1g, dist, &(x[WS(ios, 1)]));
		    T18 = VSUB(T16, T15);
		    ST(&(x[WS(ios, 11)]), T18, dist, &(x[WS(ios, 1)]));
	       }
	  }
     }
     END_SIMD();
     return W;
}

static const tw_instr twinstr[] = {
     VTW(1),
     VTW(2),
     VTW(3),
     VTW(4),
     VTW(5),
     VTW(6),
     VTW(7),
     VTW(8),
     VTW(9),
     VTW(10),
     VTW(11),
     VTW(12),
     VTW(13),
     VTW(14),
     {TW_NEXT, VL, 0}
};

static const ct_desc desc = { 15, "t1bv_15", twinstr, {78, 39, 14, 0}, &GENUS, 0, 0, 0 };

void X(codelet_t1bv_15) (planner *p) {
     X(kdft_dit_register) (p, t1bv_15, &desc);
}
