/* 
 *    rematrix.c
 *
 *	Copyright (C) Aaron Holtzman - July 1999
 *
 *  This file is part of ac3dec, a free Dolby AC-3 stream decoder.
 *	
 *  ac3dec is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2, or (at your option)
 *  any later version.
 *   
 *  ac3dec is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *   
 *  You should have received a copy of the GNU General Public License
 *  along with GNU Make; see the file COPYING.  If not, write to
 *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
 *
 *  Modified for using AMD's 3DNow! - 3DNowEx(DSP)! SIMD operations 
 *  by Nick Kurshev <nickols_k@mail.ru>
 *
 */

void rematrix (audblk_t *audblk, stream_samples_t samples)
{
	uint32_t num_bands;
	uint32_t start;
	uint32_t end;
	uint32_t end_3dnow;
	int i,j;

	if (!audblk->cplinu || audblk->cplbegf > 2)
		num_bands = 4;
	else if (audblk->cplbegf > 0)
		num_bands = 3;
	else
		num_bands = 2;

	for (i=0; i < num_bands; i++) {
		if (!audblk->rematflg[i])
			continue;

		start = rematrix_band[i].start;
		end = min (rematrix_band[i].end ,12 * audblk->cplbegf + 36);
		/* 8-byte alignment */
		j = start;
		if(((unsigned long)(&(samples[0][j]))&7)!=0)
		{
			float left,right;
			left  = samples[0][j] + samples[1][j];
			right = samples[0][j] - samples[1][j];
			samples[0][j] = left;
			samples[1][j] = right;
			j++;
		}
		end_3dnow = end & 0xFFFFFFFEUL;
		__asm __volatile("femms":::"memory");
		for (;j < end_3dnow; j+=2) {
			__asm __volatile(
				"movq	%2,	%%mm0\n\t"
				"movq	%3,	%%mm1\n\t"
				"movq	%%mm0,	%%mm2\n\t"
				"pfadd	%%mm1,	%%mm0\n\t"
				"pfsub	%%mm1,	%%mm2\n\t"
				"movq	%%mm0,	%0\n\t"
				"movq	%%mm2,	%1"
				:"=m"(samples[0][j]), "=m"(samples[1][j])
				:"m"(samples[0][j]), "m"(samples[1][j])
				:"memory");
		}
		__asm __volatile("femms":::"memory");
		/* do tail */
		if (j < end) {
			float left,right;
			left  = samples[0][j] + samples[1][j];
			right = samples[0][j] - samples[1][j];
			samples[0][j] = left;
			samples[1][j] = right;
		}
	}
}
