/*
    libfame - Fast Assembly MPEG Encoder Library
    Copyright (C) 2000-2001 Vivien Chappelier

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public
    License along with this library; if not, write to the Free
    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/********************** MMX accelerated quantisation *************************/

static void inline quantise(dct_t *block, dct_t *cache, dct_t *qmatrix)
{
  asm volatile ("movq 0x00(%0), %%mm0\n"
		"movq 0x08(%0), %%mm1\n"
		"movq 0x10(%0), %%mm2\n"
		"movq 0x18(%0), %%mm3\n"
		"movq %%mm0, %%mm4\n"
		"movq %%mm1, %%mm5\n"
		"movq %%mm2, %%mm6\n"
		"movq %%mm3, %%mm7\n"
		"psraw $0x0f, %%mm4\n"
		"psraw $0x0f, %%mm5\n"
		"psraw $0x0f, %%mm6\n"
		"psraw $0x0f, %%mm7\n"
		"pmulhw 0x00(%1), %%mm0\n"
		"pmulhw 0x08(%1), %%mm1\n"
		"pmulhw 0x10(%1), %%mm2\n"
		"pmulhw 0x18(%1), %%mm3\n"
		"psubsw %%mm4, %%mm0\n"
		"psubsw %%mm5, %%mm1\n"
		"psubsw %%mm6, %%mm2\n"
		"psubsw %%mm7, %%mm3\n"
		"movq %%mm0, 0x00(%2)\n"
		"movq %%mm1, 0x08(%2)\n"
		"movq %%mm2, 0x10(%2)\n"
		"movq %%mm3, 0x18(%2)\n"
		"movq 0x20(%0), %%mm0\n"
		"movq 0x28(%0), %%mm1\n"
		"movq 0x30(%0), %%mm2\n"
		"movq 0x38(%0), %%mm3\n"
		"movq %%mm0, %%mm4\n"
		"movq %%mm1, %%mm5\n"
		"movq %%mm2, %%mm6\n"
		"movq %%mm3, %%mm7\n"
		"psraw $0x0f, %%mm4\n"
		"psraw $0x0f, %%mm5\n"
		"psraw $0x0f, %%mm6\n"
		"psraw $0x0f, %%mm7\n"
		"pmulhw 0x20(%1), %%mm0\n"
		"pmulhw 0x28(%1), %%mm1\n"
		"pmulhw 0x30(%1), %%mm2\n"
		"pmulhw 0x38(%1), %%mm3\n"
		"psubsw %%mm4, %%mm0\n"
		"psubsw %%mm5, %%mm1\n"
		"psubsw %%mm6, %%mm2\n"
		"psubsw %%mm7, %%mm3\n"
		"movq %%mm0, 0x20(%2)\n"
		"movq %%mm1, 0x28(%2)\n"
		"movq %%mm2, 0x30(%2)\n"
		"movq %%mm3, 0x38(%2)\n"
		"movq 0x40(%0), %%mm0\n"
		"movq 0x48(%0), %%mm1\n"
		"movq 0x50(%0), %%mm2\n"
		"movq 0x58(%0), %%mm3\n"
		"movq %%mm0, %%mm4\n"
		"movq %%mm1, %%mm5\n"
		"movq %%mm2, %%mm6\n"
		"movq %%mm3, %%mm7\n"
		"psraw $0x0f, %%mm4\n"
		"psraw $0x0f, %%mm5\n"
		"psraw $0x0f, %%mm6\n"
		"psraw $0x0f, %%mm7\n"
		"pmulhw 0x40(%1), %%mm0\n"
		"pmulhw 0x48(%1), %%mm1\n"
		"pmulhw 0x50(%1), %%mm2\n"
		"pmulhw 0x58(%1), %%mm3\n"
		"psubsw %%mm4, %%mm0\n"
		"psubsw %%mm5, %%mm1\n"
		"psubsw %%mm6, %%mm2\n"
		"psubsw %%mm7, %%mm3\n"
		"movq %%mm0, 0x40(%2)\n"
		"movq %%mm1, 0x48(%2)\n"
		"movq %%mm2, 0x50(%2)\n"
		"movq %%mm3, 0x58(%2)\n"
		"movq 0x60(%0), %%mm0\n"
		"movq 0x68(%0), %%mm1\n"
		"movq 0x70(%0), %%mm2\n"
		"movq 0x78(%0), %%mm3\n"
		"movq %%mm0, %%mm4\n"
		"movq %%mm1, %%mm5\n"
		"movq %%mm2, %%mm6\n"
		"movq %%mm3, %%mm7\n"
		"psraw $0x0f, %%mm4\n"
		"psraw $0x0f, %%mm5\n"
		"psraw $0x0f, %%mm6\n"
		"psraw $0x0f, %%mm7\n"
		"pmulhw 0x60(%1), %%mm0\n"
		"pmulhw 0x68(%1), %%mm1\n"
		"pmulhw 0x70(%1), %%mm2\n"
		"pmulhw 0x78(%1), %%mm3\n"
		"psubsw %%mm4, %%mm0\n"
		"psubsw %%mm5, %%mm1\n"
		"psubsw %%mm6, %%mm2\n"
		"psubsw %%mm7, %%mm3\n"
		"movq %%mm0, 0x60(%2)\n"
		"movq %%mm1, 0x68(%2)\n"
		"movq %%mm2, 0x70(%2)\n"
		"movq %%mm3, 0x78(%2)\n"
		: "=r"(cache), "=r"(qmatrix), "=r"(block)
		: "0"(cache), "1"(qmatrix), "2"(block)
		: "memory");
}

