/*
    libfame - Fast Assembly MPEG Encoder Library
    Copyright (C) 2000-2001 Vivien Chappelier

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public
    License along with this library; if not, write to the Free
    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/****** compute mean absolute difference of pixels in block ******************/
#ifndef __MAD_MMX_H__
#define __MAD_MMX_H__

#ifdef HAS_MMX
#include "mean_mmx.h"

/*  mad_withoutmask                                                          */
/*                                                                           */
/*  Description:                                                             */
/*    Compute mean absolute difference of pixels values in an 8x8 block.     */
/*                                                                           */
/*  Arguments:                                                               */
/*    unsigned char *input: the input 8x8 block                              */
/*    short pitch: number of pixels to the next line                         */
/*    unsigned long *mad : the mean absolute difference.                     */
/*                                                                           */
/*  Return value:                                                            */


static unsigned short inline mad_withoutmask(unsigned char *input,
					     unsigned long pitch,
					     unsigned long *mad)
{
  unsigned long m;
  unsigned short c;
  int dummy;
  unsigned long retval;

  c = mean_withoutmask(input, pitch, &m);

#define MAD_STEP_NOMASK()			\
    "movq (%0), %%mm0\n"			\
    "movq %%mm5, %%mm1\n"			\
    "movq %%mm0, %%mm2\n"			\
    "addl %2, %0\n"				\
    "psubusb %%mm1, %%mm0\n"			\
    "psubusb %%mm2, %%mm1\n"			\
    "por %%mm1, %%mm0\n"			\
    "movq %%mm0, %%mm2\n"			\
    "punpcklbw %%mm7, %%mm0\n"			\
    "punpckhbw %%mm7, %%mm2\n"			\
    "paddw %%mm0, %%mm6\n"			\
    "paddw %%mm2, %%mm6\n"

  asm volatile ("pxor %%mm7, %%mm7\n"
		"pxor %%mm6, %%mm6\n" 
		"movd %1, %%mm5\n"
		"punpcklbw %%mm5, %%mm5\n"
		"punpcklwd %%mm5, %%mm5\n"
		"punpckldq %%mm5, %%mm5\n"
		MAD_STEP_NOMASK()
		MAD_STEP_NOMASK()
		MAD_STEP_NOMASK()
		MAD_STEP_NOMASK()
		MAD_STEP_NOMASK()
		MAD_STEP_NOMASK()
		MAD_STEP_NOMASK()
		MAD_STEP_NOMASK()
		"movq %%mm6, %%mm7\n"
		"psrlq $0x20, %%mm7\n"
		"paddw %%mm7, %%mm6\n"
		"movq %%mm6, %%mm7\n"
		"psrlq $0x10, %%mm7\n"
		"paddw %%mm7, %%mm6\n"
		"movd %%mm6, %3\n"
		: "=r"(dummy), "=r"(m), "=r"(pitch), "=r"(retval)
		: "0"(input), "1"(m), "2"(pitch), "3"(0)
		: "memory");

  *mad = retval&65535;

  return(c);
}

/*  mad_withmask                                                             */
/*                                                                           */
/*  Description:                                                             */
/*    Compute mean absolute difference of pixels values in an 8x8 block      */
/*    using a binary mask.                                                   */
/*                                                                           */
/*  Arguments:                                                               */
/*    unsigned char *input: the input 8x8 block                              */
/*    unsigned char *mask: the input 8x8 mask                                */
/*    short pitch: number of pixels to the next line                         */
/*                                                                           */
/*  Return value:                                                            */
/*    unsigned short : the mean absolute difference.                         */

static unsigned short inline mad_withmask(unsigned char *input,
					  unsigned char *mask,
					  unsigned long pitch,
					  unsigned long *mad)
{
  unsigned long m;
  unsigned short c;
  int dummy;
  unsigned long retval;

  c = mean_withmask(input, mask, pitch, &m);

#define MAD_STEP_MASK() 			\
    "movq (%0), %%mm0\n"  			\
    "movq %%mm5, %%mm1\n"			\
    "movq (%4), %%mm4\n"			\
    "pcmpgtb %%mm7, %%mm4\n"			\
    "movq %%mm0, %%mm2\n"			\
    "addl %2, %0\n"				\
    "addl %2, %4\n"				\
    "psubusb %%mm1, %%mm0\n"			\
    "psubusb %%mm2, %%mm1\n"			\
    "por %%mm1, %%mm0\n"			\
    "pand %%mm4,%%mm0\n"			\
    "movq %%mm0, %%mm2\n"			\
    "punpcklbw %%mm7, %%mm0\n"			\
    "punpckhbw %%mm7, %%mm2\n"			\
    "paddw %%mm0, %%mm6\n"			\
    "paddw %%mm2, %%mm6\n"			\
    
  asm volatile ("pxor %%mm7, %%mm7\n"
		"pxor %%mm6, %%mm6\n" 
		"movd %1, %%mm5\n"
		"punpcklbw %%mm5, %%mm5\n"
		"punpcklwd %%mm5, %%mm5\n"
		"punpckldq %%mm5, %%mm5\n"
		MAD_STEP_MASK()
		MAD_STEP_MASK()
		MAD_STEP_MASK()
		MAD_STEP_MASK()
		MAD_STEP_MASK()
		MAD_STEP_MASK()
		MAD_STEP_MASK()
		MAD_STEP_MASK()
		"movq %%mm6, %%mm7\n"
		"psrlq $0x20, %%mm7\n"
		"paddw %%mm7, %%mm6\n"
		"movq %%mm6, %%mm7\n"
		"psrlq $0x10, %%mm7\n"
		"paddw %%mm7, %%mm6\n"
		"movd %%mm6, %3\n"
		: "=r"(dummy), "=r"(m), "=r"(pitch), "=r"(retval), "=r"(mask)
		: "0"(input), "1"(m), "2"(pitch), "3"(0), "4"(mask)
		: "memory");
  
  *mad = retval & 65535;
  
  return(c);
}
#endif
#endif
