/*
    libfame - Fast Assembly MPEG Encoder Library
    Copyright (C) 2000-2001 Vivien Chappelier

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public
    License along with this library; if not, write to the Free
    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*********************** fetch data from current picture *********************/

/*  prefetch_Y                                                               */
/*                                                                           */
/*  Description:                                                             */
/*    Get a 8x8 block of data from the source image Y plane                  */
/*                                                                           */
/*  Arguments:                                                               */
/*    unsigned char *input: the source image, plane Y                        */
/*    dct_t *output: the output 8x8 block                                    */
/*    int pitch: the number of bytes to the next line in the input plane     */
/*                                                                           */
/*  Return value:                                                            */
/*    None.                                                                  */

static void inline prefetch_Y_withoutmask(unsigned char *input,
					  dct_t *output,
					  int pitch)
{
  int i, j;
  
  for(i = 0; i < 8; i++) {
    for(j = 0; j < 8; j++)
      *output++ = (dct_t) input[j];
    input += pitch;
  }
}

/*  prefetch_C                                                               */
/*                                                                           */
/*  Description:                                                             */
/*    Get a 8x8 block of data from the source image C plane,                 */
/*    substracting 128 to translate the value in [-128,+127]                 */
/*                                                                           */
/*  Arguments:                                                               */
/*    unsigned char *input: the source image, plane C                        */
/*    dct_t *output: the output 8x8 block                                    */
/*    int pitch: the number of bytes to the next line in the input plane     */
/*                                                                           */
/*  Return value:                                                            */
/*    None.                                                                  */

static void inline prefetch_C_withoutmask(unsigned char *input,
					  dct_t *output,
					  int pitch)
{
  int i, j;

  for(i = 0; i < 8; i++) {
    for(j = 0; j < 8; j++)
      *output++ = (dct_t) input[j] - 128.0;
    input += pitch;
  }
}

/*  prefetch_Y_shape                                                         */
/*                                                                           */
/*  Description:                                                             */
/*    Get a 8x8 block of data from the source image Y plane                  */
/*    and perform LPE padding of arbitrary shape border block                */
/*                                                                           */
/*  Arguments:                                                               */
/*    unsigned char *input: the source image, plane Y                        */
/*    dct_t *output: the output 8x8 block                                    */
/*    unsigned char *mask: binary mask of the block                          */
/*    int pitch: the number of bytes to the next line in the input plane     */
/*                                                                           */
/*  Return value:                                                            */
/*    None.                                                                  */

static void inline prefetch_Y_withmask(unsigned char *input,
				       dct_t *output,
				       unsigned char *mask,
				       int pitch)
{
  int i, j;
  unsigned char *p, *m;
  unsigned int s, n;
  
  p = input;
  m = mask;
  s = 0;
  n = 0;
  
  /* sum of visible pixels */
  for(i = 0; i < 8; i++) {
    for(j = 0; j < 8; j++) {
      if(m[j]) {
	s += p[j];
	n++;
      }
    }
    p += pitch;
    m += pitch;
  }
  
  if(n) s /= n;
  
  p = input;
  m = mask;
  
  for(i = 0; i < 8; i++) {
    for(j = 0; j < 8; j++) {
      if(!m[j])
	*output++ = (dct_t) s;
      else
	*output++ = (dct_t) p[j];
    }
    p += pitch;
    m += pitch;
  }

  /* bilinear filtering */
  output -= 64;
  m = mask;
  if(!m[0]) output[0] = (output[1]+
			 output[8]) / 2; 
  for(j = 1; j < 7; j++)
    if(!m[j]) output[j] = (output[j-1]+
			   output[j+1]+
			   output[8+j]) / 3; 
  if(!m[7]) output[7] = (output[6]+
			 output[15]) / 2;
  m += pitch;

  for(i = 1; i < 7; i++) {
    if(!m[0]) output[8*i] = (output[8*i+1]+
			     output[8*i-8]+
			     output[8*i+8]) / 3; 
    for(j = 1; j < 7; j++)
      if(!m[j]) output[8*i+j] = (output[8*i+j-1]+
				 output[8*i+j+1]+
				 output[8*i+j-8]+
				 output[8*i+j+8]) / 4; 
    if(!m[7]) output[8*i+7] = (output[8*i+6]+
			       output[8*i-1]+
			       output[8*i+15]) / 3; 
    m += pitch;
  }

  if(!m[0]) output[8*7+0] = (output[8*7+1]+
			     output[8*7-8]) / 2; 
  for(j = 1; j < 7; j++)
    if(!m[j]) output[8*7+j] = (output[8*7+j-1]+
			       output[8*7+j+1]+
			       output[8*7-8+j]) / 3; 
  if(!m[7]) output[8*7+7] = (output[8*7+6]+
			     output[8*7-1]) / 2;
}

/*  prefetch_C_shape                                                         */
/*                                                                           */
/*  Description:                                                             */
/*    Get a 8x8 block of data from the source image C plane,                 */
/*    substracting 128 to translate the value in [-128,+127]                 */
/*    and perform LPE padding of arbitrary shape border block                */
/*                                                                           */
/*  Arguments:                                                               */
/*    unsigned char *input: the source image, plane C                        */
/*    dct_t *output: the output 8x8 block                                    */
/*    unsigned char *mask: binary mask of the block                          */
/*    int pitch: the number of bytes to the next line in the input plane     */
/*                                                                           */
/*  Return value:                                                            */
/*    None.                                                                  */

static void inline prefetch_C_withmask(unsigned char *input,
				       dct_t *output,
				       unsigned char *mask,
				       int pitch)
{
  int i, j;
  unsigned char *p, *m;
  unsigned int s, n;

  p = input;
  m = mask;
  s = 0;
  n = 0;

  /* sum of visible pixels */
  for(i = 0; i < 8; i++) {
    for(j = 0; j < 8; j++) {
      if(m[(j<<1)] |
	 m[(j<<1)+1] |
	 m[(j<<1)+(pitch<<1)] |
	 m[(j<<1)+(pitch<<1)+1]) {
	s += p[j];
	n++;
      }
    }
    p += pitch;
    m += (pitch << 2);
  }

  if(n) s /= n;
  else s = 128;

  p = input;
  m = mask;
  for(i = 0; i < 8; i++) {
    for(j = 0; j < 8; j++) {
      if(!(m[(j<<1)] |
	   m[(j<<1)+1] |
	   m[(j<<1)+(pitch<<1)] |
	   m[(j<<1)+(pitch<<1)+1]))
	*output++ = (dct_t) s - 128.0;
      else
	*output++ = (dct_t) p[j] - 128.0;
    }
    p += pitch;
    m += (pitch << 2);
  }

  /* bilinear filtering */
  output -= 64;
  m = mask;
  if(!(m[0] |
       m[1] |
       m[(pitch<<1)] |
       m[(pitch<<1)+1]))
    output[0] = (output[1]+
		 output[8]) / 2; 

  for(j = 1; j < 7; j++)
    if(!(m[(j<<1)] |
	 m[(j<<1)+1] |
	 m[(j<<1)+(pitch<<1)] |
	 m[(j<<1)+(pitch<<1)+1]))
      output[j] = (output[j-1]+
		   output[j+1]+
		   output[8+j]) / 3; 
  if(!(m[14] |
       m[15] |
       m[(pitch<<1)+14] |
       m[(pitch<<1)+15]))
    output[7] = (output[6]+
		 output[15]) / 2;
  m += (pitch << 2);

  for(i = 1; i < 7; i++) {
    if(!(m[0] |
	 m[1] |
	 m[(pitch<<1)] |
	 m[(pitch<<1)+1]))
      output[8*i] = (output[8*i+1]+
		     output[8*i-8]+
		     output[8*i+8]) / 3; 
    for(j = 1; j < 7; j++)
      if(!(m[(j<<1)] |
	   m[(j<<1)+1] |
	   m[(j<<1)+(pitch<<1)] |
	   m[(j<<1)+(pitch<<1)+1]))
	output[8*i+j] = (output[8*i+j-1]+
			 output[8*i+j+1]+
			 output[8*i+j-8]+
			 output[8*i+j+8]) / 4; 
    if(!(m[14] |
	 m[15] |
	 m[(pitch<<1)+14] |
	 m[(pitch<<1)+15]))
      output[8*i+7] = (output[8*i+6]+
		       output[8*i-1]+
		       output[8*i+15]) / 3; 
    m += (pitch << 2);
  }

  if(!(m[0] |
       m[1] |
       m[(pitch<<1)] |
       m[(pitch<<1)+1]))
    output[8*7+0] = (output[8*7+1]+
		     output[8*7-8]) / 2; 
  for(j = 1; j < 7; j++)
    if(!(m[(j<<1)] |
	 m[(j<<1)+1] |
	 m[(j<<1)+(pitch<<1)] |
	 m[(j<<1)+(pitch<<1)+1]))
      output[8*7+j] = (output[8*7+j-1]+
		       output[8*7+j+1]+
		       output[8*7-8+j]) / 3; 
  if(!(m[14] |
       m[15] |
       m[(pitch<<1)+14] |
       m[(pitch<<1)+15]))
    output[8*7+7] = (output[8*7+6]+
		     output[8*7-1]) / 2;
}

/*  diff                                                                     */
/*                                                                           */
/*  Description:                                                             */
/*    Compute difference 8x8 block between the reference image and the       */
/*    source image.                                                          */
/*                                                                           */
/*  Arguments:                                                               */
/*    unsigned char *input: the source image                                 */
/*    unsigned char *ref: the reference image                                */
/*    dct_t *output: the output 8x8 block                                    */
/*    int pitch: the number of bytes to the next line in the input plane     */
/*                                                                           */
/*  Return value:                                                            */
/*    None.                                                                  */

static void inline diff(unsigned char *input,
		        unsigned char *ref,
			dct_t *output,
			int pitch)
{
  int i, j;

  for(i = 0; i < 8; i++) {
    for(j = 0; j < 8; j++)
      *output++ = (dct_t) input[j] - (dct_t) ref[j];
    input += pitch;
    ref += pitch;
  }
}
