/*
 * Copyright (c) 2003 Matteo Frigo
 * Copyright (c) 2003 Massachusetts Institute of Technology
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 */

/* This file was automatically generated --- DO NOT EDIT */
/* Generated on Sat Apr 19 18:02:39 EDT 2003 */

#include "codelet-dft.h"

/* Generated by: /homea/athena/fftw3/genfft/gen_notw_noinline_c -simd -trivial-stores -compact -variables 4 -n 64 -name m1fv_64 -include n1f.h */

/*
 * This function contains 456 FP additions, 124 FP multiplications,
 * (or, 404 additions, 72 multiplications, 52 fused multiply/add),
 * 111 stack variables, and 128 memory accesses
 */
/*
 * Generator Id's : 
 * $Id: algsimp.ml,v 1.7 2003/03/15 20:29:42 stevenj Exp $
 * $Id: fft.ml,v 1.2 2003/03/15 20:29:42 stevenj Exp $
 * $Id: gen_notw_noinline_c.ml,v 1.1 2003/04/17 10:44:21 athena Exp $
 */

#include "n1f.h"

static void m1fv_64_0(const R *xi, R *xo, stride is, stride os, int ivs, int ovs)
{
     DVK(KP995184726, +0.995184726672196886244836953109479921575474869);
     DVK(KP098017140, +0.098017140329560601994195563888641845861136673);
     DVK(KP471396736, +0.471396736825997648556387625905254377657460319);
     DVK(KP881921264, +0.881921264348355029712756863660388349508442621);
     DVK(KP773010453, +0.773010453362736960810906609758469800971041293);
     DVK(KP634393284, +0.634393284163645498215171613225493370675687095);
     DVK(KP290284677, +0.290284677254462367636192375817395274691476278);
     DVK(KP956940335, +0.956940335732208864935797886980269969482849206);
     DVK(KP555570233, +0.555570233019602224742830813948532874374937191);
     DVK(KP831469612, +0.831469612302545237078788377617905756738560812);
     DVK(KP980785280, +0.980785280403230449126182236134239036973933731);
     DVK(KP195090322, +0.195090322016128267848284868477022240927691618);
     DVK(KP382683432, +0.382683432365089771728459984030398866761344562);
     DVK(KP923879532, +0.923879532511286756128183189396788286822416626);
     DVK(KP707106781, +0.707106781186547524400844362104849039284835938);
     {
	  V Tr, T2P, T71, T7N, T74, T80, T2o, T30, T3r, T4j, T53, T6p, T5Y, T6A, T3S;
	  V T4u, T7o, T7r, TI, T2p, T7Q, T81, TZ, T2q, T3x, T3U, T61, T6q, T3u, T3T;
	  V T5i, T6B, T78, T7b, T7c, T5z, T6t, T7U, T83, T1s, T2S, T1B, T2T, T3F, T4n;
	  V T5u, T6s, T3C, T4m, T7f, T7i, T7j, T5Q, T6w, T7X, T84, T23, T2V, T2c, T2W;
	  V T3M, T4q, T5L, T6v, T3J, T4p;
	  {
	       V T3, T4T, T2m, T4U, Ta, T5V, T2j, T5U, T4W, T4X, Ti, T4Y, T2h, T4Z, T50;
	       V Tp, T51, T2g;
	       {
		    V T1, T2, T2k, T2l;
		    T1 = LD(&(xi[0]), ivs, &(xi[0]));
		    T2 = LD(&(xi[WS(is, 32)]), ivs, &(xi[0]));
		    T3 = VSUB(T1, T2);
		    T4T = VADD(T1, T2);
		    T2k = LD(&(xi[WS(is, 16)]), ivs, &(xi[0]));
		    T2l = LD(&(xi[WS(is, 48)]), ivs, &(xi[0]));
		    T2m = VSUB(T2k, T2l);
		    T4U = VADD(T2k, T2l);
	       }
	       {
		    V T4, T5, T6, T7, T8, T9;
		    T4 = LD(&(xi[WS(is, 8)]), ivs, &(xi[0]));
		    T5 = LD(&(xi[WS(is, 40)]), ivs, &(xi[0]));
		    T6 = VSUB(T4, T5);
		    T7 = LD(&(xi[WS(is, 56)]), ivs, &(xi[0]));
		    T8 = LD(&(xi[WS(is, 24)]), ivs, &(xi[0]));
		    T9 = VSUB(T7, T8);
		    Ta = VMUL(LDK(KP707106781), VADD(T6, T9));
		    T5V = VADD(T4, T5);
		    T2j = VMUL(LDK(KP707106781), VSUB(T9, T6));
		    T5U = VADD(T7, T8);
	       }
	       {
		    V Te, Th, Tl, To;
		    {
			 V Tc, Td, Tf, Tg;
			 Tc = LD(&(xi[WS(is, 4)]), ivs, &(xi[0]));
			 Td = LD(&(xi[WS(is, 36)]), ivs, &(xi[0]));
			 Te = VSUB(Tc, Td);
			 T4W = VADD(Tc, Td);
			 Tf = LD(&(xi[WS(is, 20)]), ivs, &(xi[0]));
			 Tg = LD(&(xi[WS(is, 52)]), ivs, &(xi[0]));
			 Th = VSUB(Tf, Tg);
			 T4X = VADD(Tf, Tg);
		    }
		    Ti = VFNMS(LDK(KP382683432), Th, VMUL(LDK(KP923879532), Te));
		    T4Y = VSUB(T4W, T4X);
		    T2h = VFMA(LDK(KP382683432), Te, VMUL(LDK(KP923879532), Th));
		    {
			 V Tj, Tk, Tm, Tn;
			 Tj = LD(&(xi[WS(is, 60)]), ivs, &(xi[0]));
			 Tk = LD(&(xi[WS(is, 28)]), ivs, &(xi[0]));
			 Tl = VSUB(Tj, Tk);
			 T4Z = VADD(Tj, Tk);
			 Tm = LD(&(xi[WS(is, 12)]), ivs, &(xi[0]));
			 Tn = LD(&(xi[WS(is, 44)]), ivs, &(xi[0]));
			 To = VSUB(Tm, Tn);
			 T50 = VADD(Tm, Tn);
		    }
		    Tp = VFMA(LDK(KP923879532), Tl, VMUL(LDK(KP382683432), To));
		    T51 = VSUB(T4Z, T50);
		    T2g = VFNMS(LDK(KP923879532), To, VMUL(LDK(KP382683432), Tl));
	       }
	       {
		    V Tb, Tq, T6Z, T70;
		    Tb = VADD(T3, Ta);
		    Tq = VADD(Ti, Tp);
		    Tr = VSUB(Tb, Tq);
		    T2P = VADD(Tb, Tq);
		    T6Z = VADD(T4T, T4U);
		    T70 = VADD(T5V, T5U);
		    T71 = VADD(T6Z, T70);
		    T7N = VSUB(T6Z, T70);
	       }
	       {
		    V T72, T73, T2i, T2n;
		    T72 = VADD(T4W, T4X);
		    T73 = VADD(T4Z, T50);
		    T74 = VADD(T72, T73);
		    T80 = VSUB(T73, T72);
		    T2i = VSUB(T2g, T2h);
		    T2n = VSUB(T2j, T2m);
		    T2o = VSUB(T2i, T2n);
		    T30 = VADD(T2n, T2i);
	       }
	       {
		    V T3p, T3q, T4V, T52;
		    T3p = VSUB(T3, Ta);
		    T3q = VADD(T2h, T2g);
		    T3r = VADD(T3p, T3q);
		    T4j = VSUB(T3p, T3q);
		    T4V = VSUB(T4T, T4U);
		    T52 = VMUL(LDK(KP707106781), VADD(T4Y, T51));
		    T53 = VADD(T4V, T52);
		    T6p = VSUB(T4V, T52);
	       }
	       {
		    V T5W, T5X, T3Q, T3R;
		    T5W = VSUB(T5U, T5V);
		    T5X = VMUL(LDK(KP707106781), VSUB(T51, T4Y));
		    T5Y = VADD(T5W, T5X);
		    T6A = VSUB(T5X, T5W);
		    T3Q = VADD(T2m, T2j);
		    T3R = VSUB(Tp, Ti);
		    T3S = VADD(T3Q, T3R);
		    T4u = VSUB(T3R, T3Q);
	       }
	  }
	  {
	       V Tu, T7m, TG, T5d, TS, T7p, TW, T56, TB, T7n, TD, T5g, TP, T7q, TX;
	       V T59, T5Z, T60;
	       {
		    V Ts, Tt, T5b, TE, TF, T5c;
		    Ts = LD(&(xi[WS(is, 62)]), ivs, &(xi[0]));
		    Tt = LD(&(xi[WS(is, 30)]), ivs, &(xi[0]));
		    T5b = VADD(Ts, Tt);
		    TE = LD(&(xi[WS(is, 14)]), ivs, &(xi[0]));
		    TF = LD(&(xi[WS(is, 46)]), ivs, &(xi[0]));
		    T5c = VADD(TE, TF);
		    Tu = VSUB(Ts, Tt);
		    T7m = VADD(T5b, T5c);
		    TG = VSUB(TE, TF);
		    T5d = VSUB(T5b, T5c);
	       }
	       {
		    V TQ, TR, T55, TU, TV, T54;
		    TQ = LD(&(xi[WS(is, 18)]), ivs, &(xi[0]));
		    TR = LD(&(xi[WS(is, 50)]), ivs, &(xi[0]));
		    T55 = VADD(TQ, TR);
		    TU = LD(&(xi[WS(is, 2)]), ivs, &(xi[0]));
		    TV = LD(&(xi[WS(is, 34)]), ivs, &(xi[0]));
		    T54 = VADD(TU, TV);
		    TS = VSUB(TQ, TR);
		    T7p = VADD(T54, T55);
		    TW = VSUB(TU, TV);
		    T56 = VSUB(T54, T55);
	       }
	       {
		    V Tx, T5f, TA, T5e;
		    {
			 V Tv, Tw, Ty, Tz;
			 Tv = LD(&(xi[WS(is, 6)]), ivs, &(xi[0]));
			 Tw = LD(&(xi[WS(is, 38)]), ivs, &(xi[0]));
			 Tx = VSUB(Tv, Tw);
			 T5f = VADD(Tv, Tw);
			 Ty = LD(&(xi[WS(is, 54)]), ivs, &(xi[0]));
			 Tz = LD(&(xi[WS(is, 22)]), ivs, &(xi[0]));
			 TA = VSUB(Ty, Tz);
			 T5e = VADD(Ty, Tz);
		    }
		    TB = VMUL(LDK(KP707106781), VADD(Tx, TA));
		    T7n = VADD(T5f, T5e);
		    TD = VMUL(LDK(KP707106781), VSUB(TA, Tx));
		    T5g = VSUB(T5e, T5f);
	       }
	       {
		    V TL, T57, TO, T58;
		    {
			 V TJ, TK, TM, TN;
			 TJ = LD(&(xi[WS(is, 58)]), ivs, &(xi[0]));
			 TK = LD(&(xi[WS(is, 26)]), ivs, &(xi[0]));
			 TL = VSUB(TJ, TK);
			 T57 = VADD(TJ, TK);
			 TM = LD(&(xi[WS(is, 10)]), ivs, &(xi[0]));
			 TN = LD(&(xi[WS(is, 42)]), ivs, &(xi[0]));
			 TO = VSUB(TM, TN);
			 T58 = VADD(TM, TN);
		    }
		    TP = VMUL(LDK(KP707106781), VSUB(TL, TO));
		    T7q = VADD(T58, T57);
		    TX = VMUL(LDK(KP707106781), VADD(TO, TL));
		    T59 = VSUB(T57, T58);
	       }
	       T7o = VADD(T7m, T7n);
	       T7r = VADD(T7p, T7q);
	       {
		    V TC, TH, T7O, T7P;
		    TC = VADD(Tu, TB);
		    TH = VSUB(TD, TG);
		    TI = VFMA(LDK(KP195090322), TC, VMUL(LDK(KP980785280), TH));
		    T2p = VFNMS(LDK(KP195090322), TH, VMUL(LDK(KP980785280), TC));
		    T7O = VSUB(T7p, T7q);
		    T7P = VSUB(T7m, T7n);
		    T7Q = VMUL(LDK(KP707106781), VADD(T7O, T7P));
		    T81 = VMUL(LDK(KP707106781), VSUB(T7P, T7O));
	       }
	       {
		    V TT, TY, T3v, T3w;
		    TT = VSUB(TP, TS);
		    TY = VADD(TW, TX);
		    TZ = VFNMS(LDK(KP195090322), TY, VMUL(LDK(KP980785280), TT));
		    T2q = VFMA(LDK(KP980785280), TY, VMUL(LDK(KP195090322), TT));
		    T3v = VSUB(Tu, TB);
		    T3w = VADD(TG, TD);
		    T3x = VFNMS(LDK(KP555570233), T3w, VMUL(LDK(KP831469612), T3v));
		    T3U = VFMA(LDK(KP555570233), T3v, VMUL(LDK(KP831469612), T3w));
	       }
	       T5Z = VFNMS(LDK(KP382683432), T56, VMUL(LDK(KP923879532), T59));
	       T60 = VFMA(LDK(KP382683432), T5d, VMUL(LDK(KP923879532), T5g));
	       T61 = VADD(T5Z, T60);
	       T6q = VSUB(T60, T5Z);
	       {
		    V T3s, T3t, T5a, T5h;
		    T3s = VSUB(TW, TX);
		    T3t = VADD(TS, TP);
		    T3u = VFMA(LDK(KP831469612), T3s, VMUL(LDK(KP555570233), T3t));
		    T3T = VFNMS(LDK(KP555570233), T3s, VMUL(LDK(KP831469612), T3t));
		    T5a = VFMA(LDK(KP923879532), T56, VMUL(LDK(KP382683432), T59));
		    T5h = VFNMS(LDK(KP382683432), T5g, VMUL(LDK(KP923879532), T5d));
		    T5i = VADD(T5a, T5h);
		    T6B = VSUB(T5h, T5a);
	       }
	  }
	  {
	       V T5l, T5k, T1q, T76, T1v, T5v, T5w, T1n, T77, T1w, T18, T7a, T1z, T5s, T1f;
	       V T79, T1y, T5p, T1j, T1m, T5x, T5y;
	       {
		    V T1o, T1p, T1t, T1u;
		    T1o = LD(&(xi[WS(is, 17)]), ivs, &(xi[WS(is, 1)]));
		    T1p = LD(&(xi[WS(is, 49)]), ivs, &(xi[WS(is, 1)]));
		    T5l = VADD(T1o, T1p);
		    T1t = LD(&(xi[WS(is, 1)]), ivs, &(xi[WS(is, 1)]));
		    T1u = LD(&(xi[WS(is, 33)]), ivs, &(xi[WS(is, 1)]));
		    T5k = VADD(T1t, T1u);
		    T1q = VSUB(T1o, T1p);
		    T76 = VADD(T5k, T5l);
		    T1v = VSUB(T1t, T1u);
	       }
	       {
		    V T1h, T1i, T1k, T1l;
		    T1h = LD(&(xi[WS(is, 57)]), ivs, &(xi[WS(is, 1)]));
		    T1i = LD(&(xi[WS(is, 25)]), ivs, &(xi[WS(is, 1)]));
		    T1j = VSUB(T1h, T1i);
		    T5v = VADD(T1h, T1i);
		    T1k = LD(&(xi[WS(is, 9)]), ivs, &(xi[WS(is, 1)]));
		    T1l = LD(&(xi[WS(is, 41)]), ivs, &(xi[WS(is, 1)]));
		    T1m = VSUB(T1k, T1l);
		    T5w = VADD(T1k, T1l);
	       }
	       T1n = VMUL(LDK(KP707106781), VSUB(T1j, T1m));
	       T77 = VADD(T5w, T5v);
	       T1w = VMUL(LDK(KP707106781), VADD(T1m, T1j));
	       {
		    V T14, T5q, T17, T5r;
		    {
			 V T12, T13, T15, T16;
			 T12 = LD(&(xi[WS(is, 61)]), ivs, &(xi[WS(is, 1)]));
			 T13 = LD(&(xi[WS(is, 29)]), ivs, &(xi[WS(is, 1)]));
			 T14 = VSUB(T12, T13);
			 T5q = VADD(T12, T13);
			 T15 = LD(&(xi[WS(is, 13)]), ivs, &(xi[WS(is, 1)]));
			 T16 = LD(&(xi[WS(is, 45)]), ivs, &(xi[WS(is, 1)]));
			 T17 = VSUB(T15, T16);
			 T5r = VADD(T15, T16);
		    }
		    T18 = VFNMS(LDK(KP923879532), T17, VMUL(LDK(KP382683432), T14));
		    T7a = VADD(T5q, T5r);
		    T1z = VFMA(LDK(KP923879532), T14, VMUL(LDK(KP382683432), T17));
		    T5s = VSUB(T5q, T5r);
	       }
	       {
		    V T1b, T5n, T1e, T5o;
		    {
			 V T19, T1a, T1c, T1d;
			 T19 = LD(&(xi[WS(is, 5)]), ivs, &(xi[WS(is, 1)]));
			 T1a = LD(&(xi[WS(is, 37)]), ivs, &(xi[WS(is, 1)]));
			 T1b = VSUB(T19, T1a);
			 T5n = VADD(T19, T1a);
			 T1c = LD(&(xi[WS(is, 21)]), ivs, &(xi[WS(is, 1)]));
			 T1d = LD(&(xi[WS(is, 53)]), ivs, &(xi[WS(is, 1)]));
			 T1e = VSUB(T1c, T1d);
			 T5o = VADD(T1c, T1d);
		    }
		    T1f = VFMA(LDK(KP382683432), T1b, VMUL(LDK(KP923879532), T1e));
		    T79 = VADD(T5n, T5o);
		    T1y = VFNMS(LDK(KP382683432), T1e, VMUL(LDK(KP923879532), T1b));
		    T5p = VSUB(T5n, T5o);
	       }
	       T78 = VADD(T76, T77);
	       T7b = VADD(T79, T7a);
	       T7c = VSUB(T78, T7b);
	       T5x = VSUB(T5v, T5w);
	       T5y = VMUL(LDK(KP707106781), VSUB(T5s, T5p));
	       T5z = VADD(T5x, T5y);
	       T6t = VSUB(T5y, T5x);
	       {
		    V T7S, T7T, T1g, T1r;
		    T7S = VSUB(T76, T77);
		    T7T = VSUB(T7a, T79);
		    T7U = VFMA(LDK(KP923879532), T7S, VMUL(LDK(KP382683432), T7T));
		    T83 = VFNMS(LDK(KP382683432), T7S, VMUL(LDK(KP923879532), T7T));
		    T1g = VSUB(T18, T1f);
		    T1r = VSUB(T1n, T1q);
		    T1s = VSUB(T1g, T1r);
		    T2S = VADD(T1r, T1g);
	       }
	       {
		    V T1x, T1A, T3D, T3E;
		    T1x = VADD(T1v, T1w);
		    T1A = VADD(T1y, T1z);
		    T1B = VSUB(T1x, T1A);
		    T2T = VADD(T1x, T1A);
		    T3D = VADD(T1q, T1n);
		    T3E = VSUB(T1z, T1y);
		    T3F = VADD(T3D, T3E);
		    T4n = VSUB(T3E, T3D);
	       }
	       {
		    V T5m, T5t, T3A, T3B;
		    T5m = VSUB(T5k, T5l);
		    T5t = VMUL(LDK(KP707106781), VADD(T5p, T5s));
		    T5u = VADD(T5m, T5t);
		    T6s = VSUB(T5m, T5t);
		    T3A = VSUB(T1v, T1w);
		    T3B = VADD(T1f, T18);
		    T3C = VADD(T3A, T3B);
		    T4m = VSUB(T3A, T3B);
	       }
	  }
	  {
	       V T5B, T5C, T1F, T7d, T2a, T5N, T5M, T1M, T7e, T27, T1U, T7g, T25, T5G, T21;
	       V T7h, T24, T5J, T1I, T1L, T5O, T5P;
	       {
		    V T1D, T1E, T28, T29;
		    T1D = LD(&(xi[WS(is, 63)]), ivs, &(xi[WS(is, 1)]));
		    T1E = LD(&(xi[WS(is, 31)]), ivs, &(xi[WS(is, 1)]));
		    T5B = VADD(T1D, T1E);
		    T28 = LD(&(xi[WS(is, 15)]), ivs, &(xi[WS(is, 1)]));
		    T29 = LD(&(xi[WS(is, 47)]), ivs, &(xi[WS(is, 1)]));
		    T5C = VADD(T28, T29);
		    T1F = VSUB(T1D, T1E);
		    T7d = VADD(T5B, T5C);
		    T2a = VSUB(T28, T29);
	       }
	       {
		    V T1G, T1H, T1J, T1K;
		    T1G = LD(&(xi[WS(is, 7)]), ivs, &(xi[WS(is, 1)]));
		    T1H = LD(&(xi[WS(is, 39)]), ivs, &(xi[WS(is, 1)]));
		    T1I = VSUB(T1G, T1H);
		    T5N = VADD(T1G, T1H);
		    T1J = LD(&(xi[WS(is, 55)]), ivs, &(xi[WS(is, 1)]));
		    T1K = LD(&(xi[WS(is, 23)]), ivs, &(xi[WS(is, 1)]));
		    T1L = VSUB(T1J, T1K);
		    T5M = VADD(T1J, T1K);
	       }
	       T1M = VMUL(LDK(KP707106781), VADD(T1I, T1L));
	       T7e = VADD(T5N, T5M);
	       T27 = VMUL(LDK(KP707106781), VSUB(T1L, T1I));
	       {
		    V T1Q, T5E, T1T, T5F;
		    {
			 V T1O, T1P, T1R, T1S;
			 T1O = LD(&(xi[WS(is, 3)]), ivs, &(xi[WS(is, 1)]));
			 T1P = LD(&(xi[WS(is, 35)]), ivs, &(xi[WS(is, 1)]));
			 T1Q = VSUB(T1O, T1P);
			 T5E = VADD(T1O, T1P);
			 T1R = LD(&(xi[WS(is, 19)]), ivs, &(xi[WS(is, 1)]));
			 T1S = LD(&(xi[WS(is, 51)]), ivs, &(xi[WS(is, 1)]));
			 T1T = VSUB(T1R, T1S);
			 T5F = VADD(T1R, T1S);
		    }
		    T1U = VFNMS(LDK(KP382683432), T1T, VMUL(LDK(KP923879532), T1Q));
		    T7g = VADD(T5E, T5F);
		    T25 = VFMA(LDK(KP382683432), T1Q, VMUL(LDK(KP923879532), T1T));
		    T5G = VSUB(T5E, T5F);
	       }
	       {
		    V T1X, T5H, T20, T5I;
		    {
			 V T1V, T1W, T1Y, T1Z;
			 T1V = LD(&(xi[WS(is, 59)]), ivs, &(xi[WS(is, 1)]));
			 T1W = LD(&(xi[WS(is, 27)]), ivs, &(xi[WS(is, 1)]));
			 T1X = VSUB(T1V, T1W);
			 T5H = VADD(T1V, T1W);
			 T1Y = LD(&(xi[WS(is, 11)]), ivs, &(xi[WS(is, 1)]));
			 T1Z = LD(&(xi[WS(is, 43)]), ivs, &(xi[WS(is, 1)]));
			 T20 = VSUB(T1Y, T1Z);
			 T5I = VADD(T1Y, T1Z);
		    }
		    T21 = VFMA(LDK(KP923879532), T1X, VMUL(LDK(KP382683432), T20));
		    T7h = VADD(T5H, T5I);
		    T24 = VFNMS(LDK(KP923879532), T20, VMUL(LDK(KP382683432), T1X));
		    T5J = VSUB(T5H, T5I);
	       }
	       T7f = VADD(T7d, T7e);
	       T7i = VADD(T7g, T7h);
	       T7j = VSUB(T7f, T7i);
	       T5O = VSUB(T5M, T5N);
	       T5P = VMUL(LDK(KP707106781), VSUB(T5J, T5G));
	       T5Q = VADD(T5O, T5P);
	       T6w = VSUB(T5P, T5O);
	       {
		    V T7V, T7W, T1N, T22;
		    T7V = VSUB(T7d, T7e);
		    T7W = VSUB(T7h, T7g);
		    T7X = VFNMS(LDK(KP382683432), T7W, VMUL(LDK(KP923879532), T7V));
		    T84 = VFMA(LDK(KP382683432), T7V, VMUL(LDK(KP923879532), T7W));
		    T1N = VADD(T1F, T1M);
		    T22 = VADD(T1U, T21);
		    T23 = VSUB(T1N, T22);
		    T2V = VADD(T1N, T22);
	       }
	       {
		    V T26, T2b, T3K, T3L;
		    T26 = VSUB(T24, T25);
		    T2b = VSUB(T27, T2a);
		    T2c = VSUB(T26, T2b);
		    T2W = VADD(T2b, T26);
		    T3K = VADD(T2a, T27);
		    T3L = VSUB(T21, T1U);
		    T3M = VADD(T3K, T3L);
		    T4q = VSUB(T3L, T3K);
	       }
	       {
		    V T5D, T5K, T3H, T3I;
		    T5D = VSUB(T5B, T5C);
		    T5K = VMUL(LDK(KP707106781), VADD(T5G, T5J));
		    T5L = VADD(T5D, T5K);
		    T6v = VSUB(T5D, T5K);
		    T3H = VSUB(T1F, T1M);
		    T3I = VADD(T25, T24);
		    T3J = VADD(T3H, T3I);
		    T4p = VSUB(T3H, T3I);
	       }
	  }
	  {
	       V T7l, T7x, T7u, T7y;
	       {
		    V T75, T7k, T7s, T7t;
		    T75 = VSUB(T71, T74);
		    T7k = VMUL(LDK(KP707106781), VADD(T7c, T7j));
		    T7l = VADD(T75, T7k);
		    T7x = VSUB(T75, T7k);
		    T7s = VSUB(T7o, T7r);
		    T7t = VMUL(LDK(KP707106781), VSUB(T7j, T7c));
		    T7u = VBYI(VADD(T7s, T7t));
		    T7y = VBYI(VSUB(T7t, T7s));
	       }
	       {
		    V T7v, T7A, T7w, T7z;
		    T7v = VSUB(T7l, T7u);
		    ST(&(xo[WS(os, 56)]), T7v, ovs, &(xo[0]));
		    T7A = VADD(T7x, T7y);
		    ST(&(xo[WS(os, 24)]), T7A, ovs, &(xo[0]));
		    T7w = VADD(T7l, T7u);
		    ST(&(xo[WS(os, 8)]), T7w, ovs, &(xo[0]));
		    T7z = VSUB(T7x, T7y);
		    ST(&(xo[WS(os, 40)]), T7z, ovs, &(xo[0]));
	       }
	  }
	  {
	       V T7Z, T89, T86, T8a;
	       {
		    V T7R, T7Y, T82, T85;
		    T7R = VADD(T7N, T7Q);
		    T7Y = VADD(T7U, T7X);
		    T7Z = VADD(T7R, T7Y);
		    T89 = VSUB(T7R, T7Y);
		    T82 = VADD(T80, T81);
		    T85 = VADD(T83, T84);
		    T86 = VBYI(VADD(T82, T85));
		    T8a = VBYI(VSUB(T85, T82));
	       }
	       {
		    V T87, T8c, T88, T8b;
		    T87 = VSUB(T7Z, T86);
		    ST(&(xo[WS(os, 60)]), T87, ovs, &(xo[0]));
		    T8c = VADD(T89, T8a);
		    ST(&(xo[WS(os, 28)]), T8c, ovs, &(xo[0]));
		    T88 = VADD(T7Z, T86);
		    ST(&(xo[WS(os, 4)]), T88, ovs, &(xo[0]));
		    T8b = VSUB(T89, T8a);
		    ST(&(xo[WS(os, 36)]), T8b, ovs, &(xo[0]));
	       }
	  }
	  {
	       V T8f, T8l, T8i, T8m;
	       {
		    V T8d, T8e, T8g, T8h;
		    T8d = VSUB(T7X, T7U);
		    T8e = VSUB(T81, T80);
		    T8f = VBYI(VSUB(T8d, T8e));
		    T8l = VBYI(VADD(T8e, T8d));
		    T8g = VSUB(T7N, T7Q);
		    T8h = VSUB(T84, T83);
		    T8i = VSUB(T8g, T8h);
		    T8m = VADD(T8g, T8h);
	       }
	       {
		    V T8j, T8o, T8k, T8n;
		    T8j = VADD(T8f, T8i);
		    ST(&(xo[WS(os, 20)]), T8j, ovs, &(xo[0]));
		    T8o = VSUB(T8m, T8l);
		    ST(&(xo[WS(os, 52)]), T8o, ovs, &(xo[0]));
		    T8k = VSUB(T8i, T8f);
		    ST(&(xo[WS(os, 44)]), T8k, ovs, &(xo[0]));
		    T8n = VADD(T8l, T8m);
		    ST(&(xo[WS(os, 12)]), T8n, ovs, &(xo[0]));
	       }
	  }
	  {
	       V T7D, T7J, T7G, T7K;
	       {
		    V T7B, T7C, T7E, T7F;
		    T7B = VADD(T71, T74);
		    T7C = VADD(T7r, T7o);
		    T7D = VADD(T7B, T7C);
		    T7J = VSUB(T7B, T7C);
		    T7E = VADD(T78, T7b);
		    T7F = VADD(T7f, T7i);
		    T7G = VADD(T7E, T7F);
		    T7K = VBYI(VSUB(T7F, T7E));
	       }
	       {
		    V T7H, T7M, T7I, T7L;
		    T7H = VSUB(T7D, T7G);
		    ST(&(xo[WS(os, 32)]), T7H, ovs, &(xo[0]));
		    T7M = VADD(T7J, T7K);
		    ST(&(xo[WS(os, 16)]), T7M, ovs, &(xo[0]));
		    T7I = VADD(T7D, T7G);
		    ST(&(xo[0]), T7I, ovs, &(xo[0]));
		    T7L = VSUB(T7J, T7K);
		    ST(&(xo[WS(os, 48)]), T7L, ovs, &(xo[0]));
	       }
	  }
	  {
	       V T5T, T6l, T69, T6f, T66, T6m, T6a, T6i;
	       {
		    V T5j, T6e, T5S, T6d, T5A, T5R;
		    T5j = VADD(T53, T5i);
		    T6e = VSUB(T61, T5Y);
		    T5A = VFMA(LDK(KP980785280), T5u, VMUL(LDK(KP195090322), T5z));
		    T5R = VFNMS(LDK(KP195090322), T5Q, VMUL(LDK(KP980785280), T5L));
		    T5S = VADD(T5A, T5R);
		    T6d = VSUB(T5R, T5A);
		    T5T = VADD(T5j, T5S);
		    T6l = VBYI(VADD(T6e, T6d));
		    T69 = VSUB(T5j, T5S);
		    T6f = VBYI(VSUB(T6d, T6e));
	       }
	       {
		    V T62, T6g, T65, T6h, T63, T64;
		    T62 = VADD(T5Y, T61);
		    T6g = VSUB(T53, T5i);
		    T63 = VFNMS(LDK(KP195090322), T5u, VMUL(LDK(KP980785280), T5z));
		    T64 = VFMA(LDK(KP195090322), T5L, VMUL(LDK(KP980785280), T5Q));
		    T65 = VADD(T63, T64);
		    T6h = VSUB(T64, T63);
		    T66 = VBYI(VADD(T62, T65));
		    T6m = VADD(T6g, T6h);
		    T6a = VBYI(VSUB(T65, T62));
		    T6i = VSUB(T6g, T6h);
	       }
	       {
		    V T67, T6n, T6o, T68;
		    T67 = VSUB(T5T, T66);
		    ST(&(xo[WS(os, 62)]), T67, ovs, &(xo[0]));
		    T6n = VADD(T6l, T6m);
		    ST(&(xo[WS(os, 14)]), T6n, ovs, &(xo[0]));
		    T6o = VSUB(T6m, T6l);
		    ST(&(xo[WS(os, 50)]), T6o, ovs, &(xo[0]));
		    T68 = VADD(T5T, T66);
		    ST(&(xo[WS(os, 2)]), T68, ovs, &(xo[0]));
	       }
	       {
		    V T6b, T6j, T6k, T6c;
		    T6b = VSUB(T69, T6a);
		    ST(&(xo[WS(os, 34)]), T6b, ovs, &(xo[0]));
		    T6j = VADD(T6f, T6i);
		    ST(&(xo[WS(os, 18)]), T6j, ovs, &(xo[0]));
		    T6k = VSUB(T6i, T6f);
		    ST(&(xo[WS(os, 46)]), T6k, ovs, &(xo[0]));
		    T6c = VADD(T69, T6a);
		    ST(&(xo[WS(os, 30)]), T6c, ovs, &(xo[0]));
	       }
	  }
	  {
	       V T6z, T6V, T6J, T6P, T6G, T6W, T6K, T6S;
	       {
		    V T6r, T6O, T6y, T6N, T6u, T6x;
		    T6r = VADD(T6p, T6q);
		    T6O = VSUB(T6B, T6A);
		    T6u = VFMA(LDK(KP831469612), T6s, VMUL(LDK(KP555570233), T6t));
		    T6x = VFNMS(LDK(KP555570233), T6w, VMUL(LDK(KP831469612), T6v));
		    T6y = VADD(T6u, T6x);
		    T6N = VSUB(T6x, T6u);
		    T6z = VADD(T6r, T6y);
		    T6V = VBYI(VADD(T6O, T6N));
		    T6J = VSUB(T6r, T6y);
		    T6P = VBYI(VSUB(T6N, T6O));
	       }
	       {
		    V T6C, T6Q, T6F, T6R, T6D, T6E;
		    T6C = VADD(T6A, T6B);
		    T6Q = VSUB(T6p, T6q);
		    T6D = VFNMS(LDK(KP555570233), T6s, VMUL(LDK(KP831469612), T6t));
		    T6E = VFMA(LDK(KP555570233), T6v, VMUL(LDK(KP831469612), T6w));
		    T6F = VADD(T6D, T6E);
		    T6R = VSUB(T6E, T6D);
		    T6G = VBYI(VADD(T6C, T6F));
		    T6W = VADD(T6Q, T6R);
		    T6K = VBYI(VSUB(T6F, T6C));
		    T6S = VSUB(T6Q, T6R);
	       }
	       {
		    V T6H, T6X, T6Y, T6I;
		    T6H = VSUB(T6z, T6G);
		    ST(&(xo[WS(os, 58)]), T6H, ovs, &(xo[0]));
		    T6X = VADD(T6V, T6W);
		    ST(&(xo[WS(os, 10)]), T6X, ovs, &(xo[0]));
		    T6Y = VSUB(T6W, T6V);
		    ST(&(xo[WS(os, 54)]), T6Y, ovs, &(xo[0]));
		    T6I = VADD(T6z, T6G);
		    ST(&(xo[WS(os, 6)]), T6I, ovs, &(xo[0]));
	       }
	       {
		    V T6L, T6T, T6U, T6M;
		    T6L = VSUB(T6J, T6K);
		    ST(&(xo[WS(os, 38)]), T6L, ovs, &(xo[0]));
		    T6T = VADD(T6P, T6S);
		    ST(&(xo[WS(os, 22)]), T6T, ovs, &(xo[0]));
		    T6U = VSUB(T6S, T6P);
		    ST(&(xo[WS(os, 42)]), T6U, ovs, &(xo[0]));
		    T6M = VADD(T6J, T6K);
		    ST(&(xo[WS(os, 26)]), T6M, ovs, &(xo[0]));
	       }
	  }
	  {
	       V T3P, T4f, T4g, T40, T43, T49, T4c, T44;
	       {
		    V T3z, T4a, T3W, T48, T3O, T47, T3Z, T4b, T3y, T3V;
		    T3y = VADD(T3u, T3x);
		    T3z = VADD(T3r, T3y);
		    T4a = VSUB(T3r, T3y);
		    T3V = VADD(T3T, T3U);
		    T3W = VADD(T3S, T3V);
		    T48 = VSUB(T3V, T3S);
		    {
			 V T3G, T3N, T3X, T3Y;
			 T3G = VFMA(LDK(KP956940335), T3C, VMUL(LDK(KP290284677), T3F));
			 T3N = VFNMS(LDK(KP290284677), T3M, VMUL(LDK(KP956940335), T3J));
			 T3O = VADD(T3G, T3N);
			 T47 = VSUB(T3N, T3G);
			 T3X = VFNMS(LDK(KP290284677), T3C, VMUL(LDK(KP956940335), T3F));
			 T3Y = VFMA(LDK(KP290284677), T3J, VMUL(LDK(KP956940335), T3M));
			 T3Z = VADD(T3X, T3Y);
			 T4b = VSUB(T3Y, T3X);
		    }
		    T3P = VADD(T3z, T3O);
		    T4f = VBYI(VADD(T48, T47));
		    T4g = VADD(T4a, T4b);
		    T40 = VBYI(VADD(T3W, T3Z));
		    T43 = VSUB(T3z, T3O);
		    T49 = VBYI(VSUB(T47, T48));
		    T4c = VSUB(T4a, T4b);
		    T44 = VBYI(VSUB(T3Z, T3W));
	       }
	       {
		    V T41, T4h, T4i, T42;
		    T41 = VSUB(T3P, T40);
		    ST(&(xo[WS(os, 61)]), T41, ovs, &(xo[WS(os, 1)]));
		    T4h = VADD(T4f, T4g);
		    ST(&(xo[WS(os, 13)]), T4h, ovs, &(xo[WS(os, 1)]));
		    T4i = VSUB(T4g, T4f);
		    ST(&(xo[WS(os, 51)]), T4i, ovs, &(xo[WS(os, 1)]));
		    T42 = VADD(T3P, T40);
		    ST(&(xo[WS(os, 3)]), T42, ovs, &(xo[WS(os, 1)]));
	       }
	       {
		    V T45, T4d, T4e, T46;
		    T45 = VSUB(T43, T44);
		    ST(&(xo[WS(os, 35)]), T45, ovs, &(xo[WS(os, 1)]));
		    T4d = VADD(T49, T4c);
		    ST(&(xo[WS(os, 19)]), T4d, ovs, &(xo[WS(os, 1)]));
		    T4e = VSUB(T4c, T49);
		    ST(&(xo[WS(os, 45)]), T4e, ovs, &(xo[WS(os, 1)]));
		    T46 = VADD(T43, T44);
		    ST(&(xo[WS(os, 29)]), T46, ovs, &(xo[WS(os, 1)]));
	       }
	  }
	  {
	       V T2f, T2L, T2M, T2w, T2z, T2F, T2I, T2A;
	       {
		    V T11, T2G, T2s, T2E, T2e, T2D, T2v, T2H, T10, T2r;
		    T10 = VSUB(TI, TZ);
		    T11 = VADD(Tr, T10);
		    T2G = VSUB(Tr, T10);
		    T2r = VSUB(T2p, T2q);
		    T2s = VADD(T2o, T2r);
		    T2E = VSUB(T2r, T2o);
		    {
			 V T1C, T2d, T2t, T2u;
			 T1C = VFMA(LDK(KP634393284), T1s, VMUL(LDK(KP773010453), T1B));
			 T2d = VFNMS(LDK(KP634393284), T2c, VMUL(LDK(KP773010453), T23));
			 T2e = VADD(T1C, T2d);
			 T2D = VSUB(T2d, T1C);
			 T2t = VFNMS(LDK(KP634393284), T1B, VMUL(LDK(KP773010453), T1s));
			 T2u = VFMA(LDK(KP773010453), T2c, VMUL(LDK(KP634393284), T23));
			 T2v = VADD(T2t, T2u);
			 T2H = VSUB(T2u, T2t);
		    }
		    T2f = VADD(T11, T2e);
		    T2L = VBYI(VADD(T2E, T2D));
		    T2M = VADD(T2G, T2H);
		    T2w = VBYI(VADD(T2s, T2v));
		    T2z = VSUB(T11, T2e);
		    T2F = VBYI(VSUB(T2D, T2E));
		    T2I = VSUB(T2G, T2H);
		    T2A = VBYI(VSUB(T2v, T2s));
	       }
	       {
		    V T2x, T2N, T2O, T2y;
		    T2x = VSUB(T2f, T2w);
		    ST(&(xo[WS(os, 57)]), T2x, ovs, &(xo[WS(os, 1)]));
		    T2N = VADD(T2L, T2M);
		    ST(&(xo[WS(os, 9)]), T2N, ovs, &(xo[WS(os, 1)]));
		    T2O = VSUB(T2M, T2L);
		    ST(&(xo[WS(os, 55)]), T2O, ovs, &(xo[WS(os, 1)]));
		    T2y = VADD(T2f, T2w);
		    ST(&(xo[WS(os, 7)]), T2y, ovs, &(xo[WS(os, 1)]));
	       }
	       {
		    V T2B, T2J, T2K, T2C;
		    T2B = VSUB(T2z, T2A);
		    ST(&(xo[WS(os, 39)]), T2B, ovs, &(xo[WS(os, 1)]));
		    T2J = VADD(T2F, T2I);
		    ST(&(xo[WS(os, 23)]), T2J, ovs, &(xo[WS(os, 1)]));
		    T2K = VSUB(T2I, T2F);
		    ST(&(xo[WS(os, 41)]), T2K, ovs, &(xo[WS(os, 1)]));
		    T2C = VADD(T2z, T2A);
		    ST(&(xo[WS(os, 25)]), T2C, ovs, &(xo[WS(os, 1)]));
	       }
	  }
	  {
	       V T4t, T4P, T4Q, T4A, T4D, T4J, T4M, T4E;
	       {
		    V T4l, T4K, T4w, T4I, T4s, T4H, T4z, T4L, T4k, T4v;
		    T4k = VSUB(T3U, T3T);
		    T4l = VADD(T4j, T4k);
		    T4K = VSUB(T4j, T4k);
		    T4v = VSUB(T3x, T3u);
		    T4w = VADD(T4u, T4v);
		    T4I = VSUB(T4v, T4u);
		    {
			 V T4o, T4r, T4x, T4y;
			 T4o = VFMA(LDK(KP881921264), T4m, VMUL(LDK(KP471396736), T4n));
			 T4r = VFNMS(LDK(KP471396736), T4q, VMUL(LDK(KP881921264), T4p));
			 T4s = VADD(T4o, T4r);
			 T4H = VSUB(T4r, T4o);
			 T4x = VFNMS(LDK(KP471396736), T4m, VMUL(LDK(KP881921264), T4n));
			 T4y = VFMA(LDK(KP471396736), T4p, VMUL(LDK(KP881921264), T4q));
			 T4z = VADD(T4x, T4y);
			 T4L = VSUB(T4y, T4x);
		    }
		    T4t = VADD(T4l, T4s);
		    T4P = VBYI(VADD(T4I, T4H));
		    T4Q = VADD(T4K, T4L);
		    T4A = VBYI(VADD(T4w, T4z));
		    T4D = VSUB(T4l, T4s);
		    T4J = VBYI(VSUB(T4H, T4I));
		    T4M = VSUB(T4K, T4L);
		    T4E = VBYI(VSUB(T4z, T4w));
	       }
	       {
		    V T4B, T4R, T4S, T4C;
		    T4B = VSUB(T4t, T4A);
		    ST(&(xo[WS(os, 59)]), T4B, ovs, &(xo[WS(os, 1)]));
		    T4R = VADD(T4P, T4Q);
		    ST(&(xo[WS(os, 11)]), T4R, ovs, &(xo[WS(os, 1)]));
		    T4S = VSUB(T4Q, T4P);
		    ST(&(xo[WS(os, 53)]), T4S, ovs, &(xo[WS(os, 1)]));
		    T4C = VADD(T4t, T4A);
		    ST(&(xo[WS(os, 5)]), T4C, ovs, &(xo[WS(os, 1)]));
	       }
	       {
		    V T4F, T4N, T4O, T4G;
		    T4F = VSUB(T4D, T4E);
		    ST(&(xo[WS(os, 37)]), T4F, ovs, &(xo[WS(os, 1)]));
		    T4N = VADD(T4J, T4M);
		    ST(&(xo[WS(os, 21)]), T4N, ovs, &(xo[WS(os, 1)]));
		    T4O = VSUB(T4M, T4J);
		    ST(&(xo[WS(os, 43)]), T4O, ovs, &(xo[WS(os, 1)]));
		    T4G = VADD(T4D, T4E);
		    ST(&(xo[WS(os, 27)]), T4G, ovs, &(xo[WS(os, 1)]));
	       }
	  }
	  {
	       V T2Z, T3l, T3m, T36, T39, T3f, T3i, T3a;
	       {
		    V T2R, T3g, T32, T3e, T2Y, T3d, T35, T3h, T2Q, T31;
		    T2Q = VADD(T2q, T2p);
		    T2R = VADD(T2P, T2Q);
		    T3g = VSUB(T2P, T2Q);
		    T31 = VADD(TZ, TI);
		    T32 = VADD(T30, T31);
		    T3e = VSUB(T31, T30);
		    {
			 V T2U, T2X, T33, T34;
			 T2U = VFMA(LDK(KP098017140), T2S, VMUL(LDK(KP995184726), T2T));
			 T2X = VFNMS(LDK(KP098017140), T2W, VMUL(LDK(KP995184726), T2V));
			 T2Y = VADD(T2U, T2X);
			 T3d = VSUB(T2X, T2U);
			 T33 = VFNMS(LDK(KP098017140), T2T, VMUL(LDK(KP995184726), T2S));
			 T34 = VFMA(LDK(KP995184726), T2W, VMUL(LDK(KP098017140), T2V));
			 T35 = VADD(T33, T34);
			 T3h = VSUB(T34, T33);
		    }
		    T2Z = VADD(T2R, T2Y);
		    T3l = VBYI(VADD(T3e, T3d));
		    T3m = VADD(T3g, T3h);
		    T36 = VBYI(VADD(T32, T35));
		    T39 = VSUB(T2R, T2Y);
		    T3f = VBYI(VSUB(T3d, T3e));
		    T3i = VSUB(T3g, T3h);
		    T3a = VBYI(VSUB(T35, T32));
	       }
	       {
		    V T37, T3n, T3o, T38;
		    T37 = VSUB(T2Z, T36);
		    ST(&(xo[WS(os, 63)]), T37, ovs, &(xo[WS(os, 1)]));
		    T3n = VADD(T3l, T3m);
		    ST(&(xo[WS(os, 15)]), T3n, ovs, &(xo[WS(os, 1)]));
		    T3o = VSUB(T3m, T3l);
		    ST(&(xo[WS(os, 49)]), T3o, ovs, &(xo[WS(os, 1)]));
		    T38 = VADD(T2Z, T36);
		    ST(&(xo[WS(os, 1)]), T38, ovs, &(xo[WS(os, 1)]));
	       }
	       {
		    V T3b, T3j, T3k, T3c;
		    T3b = VSUB(T39, T3a);
		    ST(&(xo[WS(os, 33)]), T3b, ovs, &(xo[WS(os, 1)]));
		    T3j = VADD(T3f, T3i);
		    ST(&(xo[WS(os, 17)]), T3j, ovs, &(xo[WS(os, 1)]));
		    T3k = VSUB(T3i, T3f);
		    ST(&(xo[WS(os, 47)]), T3k, ovs, &(xo[WS(os, 1)]));
		    T3c = VADD(T39, T3a);
		    ST(&(xo[WS(os, 31)]), T3c, ovs, &(xo[WS(os, 1)]));
	       }
	  }
     }
}

static void m1fv_64(const R *ri, const R *ii, R *ro, R *io, stride is, stride os, int v, int ivs, int ovs)
{
     int i;
     BEGIN_SIMD();
     for (i = 0; i < v; i += VL) {
	  m1fv_64_0(ri, ro, is, os, ivs, ovs);
	  ri += VL * ivs;
	  ro += VL * ovs;
     }
     END_SIMD();
}

static const kdft_desc desc = { 64, "m1fv_64", {404, 72, 52, 0}, &GENUS, 0, 0, 0, 0 };
void X(codelet_m1fv_64) (planner *p) {
     X(kdft_register) (p, m1fv_64, &desc);
}
