/*
    libfame - Fast Assembly MPEG Encoder Library
    Copyright (C) 2000-2001 Vivien Chappelier

    This library is free software; you can redistribute it and/or
    modify it under the terms of the GNU Library General Public
    License as published by the Free Software Foundation; either
    version 2 of the License, or (at your option) any later version.

    This library is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    Library General Public License for more details.

    You should have received a copy of the GNU Library General Public
    License along with this library; if not, write to the Free
    Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "fame.h"
#include "fame_syntax.h"
#include "fame_bitbuffer.h"
#include "fame_syntax_mpeg1.h"
#include "table_zigzag_mpeg1.h"
#include "table_rlehuff_mpeg1.h"
#include "table_dc_mpeg1.h"
#include "table_cbp_mpeg1.h"
#include "table_quant_mpeg1.h"
#include "table_clip_mpeg1.h"
#include "table_mv.h"

/* The header codes */
#define SEQUENCE_START_CODE 0x1b3
#define SEQUENCE_END_CODE   0x1b9
#define GOP_START_CODE      0x1b8
#define PICT_START_CODE     0x100
#define SLICE_BASE_CODE     0x101

/* The frame type values */
#define I_FRAME 1
#define P_FRAME 2
#define B_FRAME 3

static void mpeg1_init(fame_syntax_t *syntax, 
		       int mb_width,
		       int mb_height,
		       unsigned int search_range,
 		       unsigned char **intra_default_matrix,
		       unsigned char **inter_default_matrix,
		       fame_mismatch_t *mismatch_type,
		       unsigned int flags);
static void mpeg1_close(fame_syntax_t *syntax);
static void mpeg1_use(fame_syntax_t *syntax,
		      unsigned char *buffer,
		      int size);
static int  mpeg1_flush(fame_syntax_t *syntax);
static void mpeg1_start_sequence(fame_syntax_t *syntax,
				 int width,
				 int height,
				 int fps_num,
				 int fps_den,
				 int size,
				 int bitrate);
static void mpeg1_start_GOP(fame_syntax_t *syntax,
			    int frame);
static void mpeg1_start_picture(fame_syntax_t *syntax,
				char frame_type,
				int frame_number,
				fame_box_t *box);
static void mpeg1_start_slice(fame_syntax_t *syntax,
			      int vpos,
			      int length,
			      unsigned char qscale,
			      unsigned char *intra_y_scale,
			      unsigned char *intra_c_scale);
static void mpeg1_end_picture(fame_syntax_t *syntax);
static void mpeg1_end_sequence(fame_syntax_t *syntax);
static void mpeg1_predict_vector(fame_syntax_t *syntax,
				 int mb_x,
				 int mb_y,
				 int k,
				 fame_motion_vector_t *mv);
static void mpeg1_compute_chrominance_vectors(fame_syntax_t *syntax,
					      fame_motion_vector_t *vectors);
static void mpeg1_write_intra_mb(fame_syntax_t *syntax,
				 int mb_x,
				 int mb_y,
				 short *blocks[6],
				 unsigned char *bab,
				 fame_bab_t bab_type,
				 unsigned char pattern);
static void mpeg1_write_inter_mb(fame_syntax_t *syntax,
				 int mb_x,
				 int mb_y,
				 short *blocks[6],
				 unsigned char *bab,
				 fame_bab_t bab_type,
				 unsigned char pattern,
				 fame_motion_vector_t *forward,
				 fame_motion_vector_t *backward,
				 fame_motion_coding_t motion_coding);

FAME_CONSTRUCTOR(fame_syntax_mpeg1_t)
{
  FAME_OBJECT(this)->name = "MPEG-1 bitstream syntax";
  FAME_SYNTAX(this)->init = mpeg1_init;
  FAME_SYNTAX(this)->use = mpeg1_use;
  FAME_SYNTAX(this)->flush = mpeg1_flush;
  FAME_SYNTAX(this)->start_sequence = mpeg1_start_sequence;
  FAME_SYNTAX(this)->start_GOP = mpeg1_start_GOP;
  FAME_SYNTAX(this)->start_picture = mpeg1_start_picture;
  FAME_SYNTAX(this)->start_slice = mpeg1_start_slice;
  FAME_SYNTAX(this)->end_picture = mpeg1_end_picture;
  FAME_SYNTAX(this)->end_sequence = mpeg1_end_sequence;
  FAME_SYNTAX(this)->predict_vector = mpeg1_predict_vector;
  FAME_SYNTAX(this)->compute_chrominance_vectors = mpeg1_compute_chrominance_vectors;
  FAME_SYNTAX(this)->write_intra_mb = mpeg1_write_intra_mb;
  FAME_SYNTAX(this)->write_inter_mb = mpeg1_write_inter_mb;
  FAME_SYNTAX(this)->close = mpeg1_close;
  return(this);
}

static void inline mpeg1_init_vlc_table(fame_vlc_t *vlc)
{
  int z, l;
  
  for(l = -255; l < 256; l++) {
    for(z = 0; z < 64; z++) {

      if(z < HUFFMAXRUN &&
	 l <  rlehuff_max_level[z] &&
	 l > -rlehuff_max_level[z])
      {
	/* encode value and number of z */
	vlc->code = huff_table[z][l+rlehuff_max_level[z]-1];
	vlc->length =huff_bits[z][l+rlehuff_max_level[z]-1];
      }
      else
      {
	/* escape code and encode number of z */
	vlc->code = ((0x40+z) << rlehuff_table[l+255].length) |
	            rlehuff_table[l+255].code;
	vlc->length = rlehuff_table[l+255].length+12;
      }
      vlc++;
    }
  }
}

static void inline mpeg1_next_start_code(fame_bitbuffer_t *buffer)
{
  if(bitbuffer_padding(buffer) != 0)
    bitbuffer_write(buffer, 0x00, bitbuffer_padding(buffer));
}

static void mpeg1_init(fame_syntax_t *syntax,
		       int mb_width,
		       int mb_height,
		       unsigned int search_range,
 		       unsigned char **intra_matrix,
		       unsigned char **inter_matrix,
		       fame_mismatch_t *mismatch_type,
		       unsigned int flags)
{
  fame_syntax_mpeg1_t *syntax_mpeg1 = FAME_SYNTAX_MPEG1(syntax);

  if(search_range < 8)
    syntax_mpeg1->f_code = 1;
  else if(search_range < 16)
    syntax_mpeg1->f_code = 2;
  else if(search_range < 32)
    syntax_mpeg1->f_code = 3;
  else if(search_range < 64)
    syntax_mpeg1->f_code = 4;
  else if(search_range < 128)
    syntax_mpeg1->f_code = 5;
  else if(search_range < 256)
    syntax_mpeg1->f_code = 6;
  else
    syntax_mpeg1->f_code = 7;
  syntax_mpeg1->mb_width = mb_width;
  syntax_mpeg1->mb_height = mb_height;

  syntax_mpeg1->fps_num = 25;
  syntax_mpeg1->fps_den =  1;

  /* initialize vlc_table */
  syntax_mpeg1->vlc_table = (fame_vlc_t *) malloc(64*511*sizeof(fame_vlc_t));
  mpeg1_init_vlc_table(syntax_mpeg1->vlc_table);
  /* center vlc_table */
  syntax_mpeg1->vlc_table += 64*255;

  if(*intra_matrix) {
    /* TODO: flag to write matrix to bitstream */
    /* TEMP: use default matrix instead */
    FAME_WARNING("Custom quantisation matrix not supported, using default.\n");
    *intra_matrix = mpeg1_intra_quantisation_table;
  } else {
    *intra_matrix = mpeg1_intra_quantisation_table;
  }

  if(*inter_matrix) {
    /* TODO: flag to write matrix to bitstream */
    /* TEMP: use default matrix instead */
    FAME_WARNING("Custom quantisation matrix not supported, using default.\n");
    *inter_matrix = mpeg1_inter_quantisation_table;
  } else {
    *inter_matrix = mpeg1_inter_quantisation_table;
  }

  /* MPEG-1 uses local mismatch control */
  *mismatch_type = fame_mismatch_local;
}

static void mpeg1_close(fame_syntax_t *syntax)
{
  fame_syntax_mpeg1_t *syntax_mpeg1 = FAME_SYNTAX_MPEG1(syntax);

  syntax_mpeg1->vlc_table -= 64*255;  /* uncenter vlc_table */
  free(syntax_mpeg1->vlc_table);
}

static void mpeg1_use(fame_syntax_t *syntax,
		      unsigned char *buffer,
		      int size)
{
  fame_syntax_mpeg1_t *syntax_mpeg1 = FAME_SYNTAX_MPEG1(syntax);

  bitbuffer_init(&syntax_mpeg1->buffer, buffer, size);
}

static int mpeg1_flush(fame_syntax_t *syntax)
{
  fame_syntax_mpeg1_t *syntax_mpeg1 = FAME_SYNTAX_MPEG1(syntax);

  return(bitbuffer_flush(&syntax_mpeg1->buffer));
}

static void mpeg1_start_sequence(fame_syntax_t *syntax,
				 int width,
				 int height,
				 int fps_num,
				 int fps_den,
				 int size,
				 int bitrate)
{
  fame_syntax_mpeg1_t *syntax_mpeg1 = FAME_SYNTAX_MPEG1(syntax);
  int frame_rate = 0;

  if(fps_num == 60    && fps_den ==    1) frame_rate = 8; /* 60         fps */
  if(fps_num == 60000 && fps_den == 1001) frame_rate = 7; /* 60000/1001 fps */
  if(fps_num == 50    && fps_den ==    1) frame_rate = 6; /* 50         fps */
  if(fps_num == 30    && fps_den ==    1) frame_rate = 5; /* 30         fps */
  if(fps_num == 30000 && fps_den == 1001) frame_rate = 4; /* 30000/1001 fps */
  if(fps_num == 25    && fps_den ==    1) frame_rate = 3; /* 25         fps */
  if(fps_num == 24    && fps_den ==    1) frame_rate = 2; /* 24         fps */
  if(fps_num == 24000 && fps_den == 1001) frame_rate = 1; /* 24000/1001 fps */
  if(!frame_rate) {
    FAME_WARNING("MPEG-1 doesn't support frame rate %d/%d! "
		 "Using 25 fps instead.\n", fps_num, fps_den);
    fps_num = 25;
    fps_den =  1;
    frame_rate = 3; /* 25 fps */
  }

  syntax_mpeg1->fps_num = fps_num;
  syntax_mpeg1->fps_den = fps_den;

  /* sequence start code                  */
  bitbuffer_write(&syntax_mpeg1->buffer, SEQUENCE_START_CODE, 32);
  /* picture size x                       */ 
  bitbuffer_write(&syntax_mpeg1->buffer, width & 0xfff, 12);
  /* picture size y                       */
  bitbuffer_write(&syntax_mpeg1->buffer, height & 0xfff, 12);
  /* pixel aspect ratio                   */
  bitbuffer_write(&syntax_mpeg1->buffer, 1,   4);
  /* frames per second                    */
  bitbuffer_write(&syntax_mpeg1->buffer, frame_rate,   4);
  /* bitrate                              */
  bitbuffer_write(&syntax_mpeg1->buffer, (bitrate/50) & 0x3ffff, 18);
  /* marker                               */
  bitbuffer_write(&syntax_mpeg1->buffer, 1, 1);
  /* VBV buffer size                      */
  bitbuffer_write(&syntax_mpeg1->buffer, size/2048, 10);
  /* constrained mode                     */
  bitbuffer_write(&syntax_mpeg1->buffer, 0, 1);
  /* load intra matrix                    */
  bitbuffer_write(&syntax_mpeg1->buffer, 0, 1);
  /* load non-intra matrix                */
  bitbuffer_write(&syntax_mpeg1->buffer, 0, 1);
}

static void mpeg1_start_GOP(fame_syntax_t *syntax, int frame)
{
  fame_syntax_mpeg1_t *syntax_mpeg1 = FAME_SYNTAX_MPEG1(syntax);
  int fps_num, fps_den;

  fps_num = syntax_mpeg1->fps_num;
  fps_den = syntax_mpeg1->fps_den;

  /* gop start code                       */
  bitbuffer_write(&syntax_mpeg1->buffer, GOP_START_CODE, 32);
  /* drop frame flag                      */
  bitbuffer_write(&syntax_mpeg1->buffer, 0, 1);
  /* timecount hours                      */
  bitbuffer_write(&syntax_mpeg1->buffer,
		  ((int)(frame*fps_den/(3600*fps_num))) & 0x1f, 5);
  /* timecount minutes                    */
  bitbuffer_write(&syntax_mpeg1->buffer, 
		  (((int)(frame*fps_den/(60*fps_num)))%60) & 0x3f, 6); 
  /* marker                               */
  bitbuffer_write(&syntax_mpeg1->buffer, 1, 1);
  /* timecount seconds                    */
  bitbuffer_write(&syntax_mpeg1->buffer,
		  (((int)(frame*fps_den/fps_num))%60) & 0x3f, 6);
  /* timecount frames                     */
  bitbuffer_write(&syntax_mpeg1->buffer,
		  (frame%(fps_num/fps_den+1)) & 0x3f, 6);
  /* closed GOP                           */
  bitbuffer_write(&syntax_mpeg1->buffer, 1, 1);
  /* broken link                          */
  bitbuffer_write(&syntax_mpeg1->buffer, 0, 1);
  /* fill out to 8 bytes                  */
  bitbuffer_write(&syntax_mpeg1->buffer, 0, 5);
}

static void mpeg1_start_picture(fame_syntax_t *syntax, char frame_type, int frame_number, fame_box_t *box)
{
  fame_syntax_mpeg1_t *syntax_mpeg1 = FAME_SYNTAX_MPEG1(syntax);

  switch(frame_type) {
    case 'I':
      syntax_mpeg1->frame_type = frame_type_I;
    break;
    case 'P':
      syntax_mpeg1->frame_type = frame_type_P;
    break;
    default:
      FAME_ERROR("Unsupported picture coding type %c", frame_type);
      return;
  }

  /* picture start code                   */
  bitbuffer_write(&syntax_mpeg1->buffer, PICT_START_CODE, 32);
  /* frame number                         */
  bitbuffer_write(&syntax_mpeg1->buffer, frame_number & 0x3ff, 10);
  /* frame type                           */
  switch(syntax_mpeg1->frame_type) {
    case frame_type_I:
      bitbuffer_write(&syntax_mpeg1->buffer, I_FRAME,  3);
    break;
    case frame_type_P:
      bitbuffer_write(&syntax_mpeg1->buffer, P_FRAME,  3);
    break;
  }

  /* vbv delay */
  bitbuffer_write(&syntax_mpeg1->buffer, 0xFFFF, 16); /* variable bitrate */

  switch(syntax_mpeg1->frame_type) {
    case frame_type_I:
      bitbuffer_write(&syntax_mpeg1->buffer, 0, 1);  /* end marker */
      bitbuffer_write(&syntax_mpeg1->buffer, 0, 2);  /* fill out to 8 bytes */
    break;
    case frame_type_P:
      /* half-pel forward vector */
      bitbuffer_write(&syntax_mpeg1->buffer, 0, 1);
      /* forward vector range */
      bitbuffer_write(&syntax_mpeg1->buffer, syntax_mpeg1->f_code & 0x7, 3);
      /* end marker                           */
      bitbuffer_write(&syntax_mpeg1->buffer, 0, 1);
      /* fill out to 8 bytes                  */
      bitbuffer_write(&syntax_mpeg1->buffer, 0, 6);
    break;
  }
}

static void mpeg1_start_slice(fame_syntax_t *syntax,
			      int vpos,
			      int length,
			      unsigned char qscale,
			      unsigned char *intra_y_scale,
			      unsigned char *intra_c_scale)
{
  fame_syntax_mpeg1_t *syntax_mpeg1 = FAME_SYNTAX_MPEG1(syntax);

  /* MPEG-1 uses linear quantization */
  *intra_y_scale = 8;
  *intra_c_scale = 8;

  /* reset the predictors to their original values */
  syntax_mpeg1->y_dc_pred = 128;
  syntax_mpeg1->cr_dc_pred = syntax_mpeg1->cb_dc_pred = 0;
  syntax_mpeg1->mv_pred.dx = syntax_mpeg1->mv_pred.dy = 0;
  syntax_mpeg1->prev_mb_addr = -1;
  syntax_mpeg1->slice_length = length;
  syntax_mpeg1->previous_coding = motion_intra;
  /* new slice starting at vpos      */
  bitbuffer_write(&syntax_mpeg1->buffer, SLICE_BASE_CODE + vpos, 32);
  /* quantization scale              */
  bitbuffer_write(&syntax_mpeg1->buffer, qscale & 31, 5);
  /* end marker                      */
  bitbuffer_write(&syntax_mpeg1->buffer, 0, 1);
}

static void mpeg1_end_picture(fame_syntax_t *syntax)
{
  fame_syntax_mpeg1_t *syntax_mpeg1 = FAME_SYNTAX_MPEG1(syntax);

  mpeg1_next_start_code(&syntax_mpeg1->buffer);  /* align to byte */
}

static void mpeg1_end_sequence(fame_syntax_t *syntax)
{
  fame_syntax_mpeg1_t *syntax_mpeg1 = FAME_SYNTAX_MPEG1(syntax);

  /* end sequence code                    */
  bitbuffer_write(&syntax_mpeg1->buffer, SEQUENCE_END_CODE, 32);
}

static void mpeg1_predict_vector(fame_syntax_t *syntax,
				 int mb_x,
				 int mb_y,
				 int k,
				 fame_motion_vector_t *mv)
{
  fame_syntax_mpeg1_t *syntax_mpeg1 = FAME_SYNTAX_MPEG1(syntax);

  mv[k].dx = syntax_mpeg1->mv_pred.dx;
  mv[k].dy = syntax_mpeg1->mv_pred.dy;
}

static void mpeg1_block_intra(fame_syntax_t *syntax, short *block, fame_vlc_t const *table, short *pred)
{
  fame_syntax_mpeg1_t *syntax_mpeg1 = FAME_SYNTAX_MPEG1(syntax);
  short v;

  /* encode DC coefficient */
  v = block[0] - *pred;

  v = mpeg1_table_clip[2048+v];
  *pred += v;
  bitbuffer_write(&syntax_mpeg1->buffer,
		  table[v+255].code,
		  table[v+255].length);

  /* encode AC coefficients */
#if defined(HAS_BSWAP)
  {
    unsigned long dummy1, dummy2, dummy3, dummy4;

    /* Note:
        movsx mpeg1_table_clip+4096(, %%eax ,2), %%eax
       has been replaced by
        movw mpeg1_table_clip+4096(, %%eax ,2), %%ax
        movsx %%ax, %%eax
       because the first instruction failed on a PIII!! (wrong sign extension)
       whereas it worked well on my P75 :)
    */

    block[0] = 1; /* non-zero value to stop the rle loop */
    __asm__ __volatile__ ("pushl %%ebx\n"             /* save ebx            */
			  "pushl %%ebp\n"             /* save stack pointer  */
			  "movl %%esi, %%ebp\n"       /* ebp = vlc_table     */
			  "xorl %%ecx, %%ecx\n"       /* ecx = 0             */
			  "movl $" ASMSYM "mpeg1_zigzag_table+1, %%esi\n" /*esi = zigzag*/
			  "lea 1(%%esi), %%ebx\n"     /* ebx = zigzag_table+1*/
			  "neg %%ebx\n"               /* ebx = -(esi+1)      */
			  ".p2align 4,,7\n"           /* align for jump      */
			  "0: xorl %%eax, %%eax\n"    /* eax = 0             */
			  "1: movb (%%esi), %%cl\n"   /* ecx = index in block*/
			  "incl %%esi\n"              /* (faster than lodsb) */
			  "addw (%%edx, %%ecx, 2), %%ax\n" /* eax = unzig    */
			  "jz 1b\n"                   /* coeff == 0 then loop*/
			  "orl %%ecx, %%ecx\n"        /* index == 0 then quit*/
			  "jz 2f\n"                   /* (faster than jcxz)  */
			  "movsx %%ax, %%eax\n"       /* extend sign         */
			  "movw " ASMSYM "mpeg1_table_clip+4096(, %%eax ,2), %%ax\n" /*clip*/
			  "movsx %%ax, %%eax\n"       /* extend sign         */
			  "addl %%esi, %%ebx\n"       /* ebx = run           */
			  "shll $7, %%eax\n"          /* eax *= 128(indexing)*/
			  "lea (%%eax, %%ebx, 2), %%eax\n" /*eax = 2 * offset*/
			  "lea (%%ebp, %%eax, 4), %%ebx\n" /* ebx = &vlc     */
			  "movl (%%ebx), %%eax\n"     /* eax = code          */
			  "movl 4(%%ebx), %%ecx\n"    /* ecx = length        */
			  "addl 8(%%edi), %%ecx\n"    /* ecx = shift + length*/
			  "xorl %%ebx, %%ebx\n"       /* ebx = 0             */
			  "shrd %%cl, %%eax, %%ebx\n" /* adjust code to fit  */
			  "movl %%ecx, 8(%%edi)\n"    /* shift += length     */
			  "shr %%cl, %%eax\n"         /* adjust code to fit  */
			  "bswap %%eax\n"      /* reverse byte order of code */
			  "movl 4(%%edi), %%ecx\n"    /* ecx = data          */
			  "bswap %%ebx\n"      /* reverse byte order of code */
			  "or %%eax, (%%ecx)\n"       /* put first 32 bits   */
			  "movl 8(%%edi), %%eax\n"    /* eax = shift + length*/
			  "shrl $5, %%eax\n"          /* get dword increment */
			  "andl $31, 8(%%edi)\n"      /* mask shift          */
			  "lea   (%%ecx, %%eax, 4), %%ecx\n"/* data+=(ecx>32)*/
			  "orl %%ebx, (%%ecx)\n"      /* put last 32 bits    */
			  "movl %%ecx, 4(%%edi)\n"    /* save data           */
			  "xorl %%ecx, %%ecx\n"       /* ecx = 0             */
			  "lea 1(%%esi), %%ebx\n"     /* ebx = esi + 1 (last)*/
			  "neg %%ebx\n"               /* ebx = -(esi + 1)    */
			  "jmp 0b\n"                  /* loop                */
			  "2: popl %%ebp\n"           /* reload stack pointer*/
			  "popl %%ebx\n"              /* reload ebx          */
			  : "=c"(dummy1),
			    "=a"(dummy2),
			    "=d"(block),
			    "=D"(dummy3),
			    "=S"(dummy4)
			  : "d"(block),
			    "D"(&syntax_mpeg1->buffer),
                            "S"(syntax_mpeg1->vlc_table)
			  : "memory");
    block[0] = v;
  }
#else
  {
    short i;
    unsigned long last;
    fame_vlc_t const *vlc;

    last = 1;
    for(i = 1; i < 64; i++)
    {
      v = block[mpeg1_zigzag_table[i]];

      /* count zeroes */
      if(v != 0) {
	/* write code */
	vlc = syntax_mpeg1->vlc_table + (mpeg1_table_clip[2048+v] << 6) + i - last;
	bitbuffer_write(&syntax_mpeg1->buffer, vlc->code, vlc->length);

	/* reset zeroes count */
	last = i+1;
      }
    }
  }
#endif /* HAS_BSWAP */

  /* mark end of block */
  bitbuffer_write(&syntax_mpeg1->buffer, 2, 2);
}

static void mpeg1_write_intra_mb(fame_syntax_t *syntax,
				 int mb_x,
				 int mb_y,
				 short *blocks[6],
				 unsigned char *bab,
				 fame_bab_t bab_type,
				 unsigned char pattern)
{
  fame_syntax_mpeg1_t *syntax_mpeg1 = FAME_SYNTAX_MPEG1(syntax);
  int incr;

  incr = mb_y * syntax_mpeg1->mb_width + mb_x - syntax_mpeg1->prev_mb_addr;

  syntax_mpeg1->prev_mb_addr += incr;

  while(incr > 33) {
    /* address escape */
    bitbuffer_write(&syntax_mpeg1->buffer, mb_addr_inc[33].code, mb_addr_inc[33].length);
    incr -= 33;
  }

  /* address increment */
  bitbuffer_write(&syntax_mpeg1->buffer, mb_addr_inc[incr-1].code, mb_addr_inc[incr-1].length); 

  switch(syntax_mpeg1->frame_type) {
    case frame_type_I:
      bitbuffer_write(&syntax_mpeg1->buffer, 1, 1); /* macroblock type   */
    break;
    case frame_type_P:
      bitbuffer_write(&syntax_mpeg1->buffer, 3, 5); /* intra coded */
    break;
  }

  mpeg1_block_intra(syntax, blocks[0], encode_ydc_table, &syntax_mpeg1->y_dc_pred);
  mpeg1_block_intra(syntax, blocks[1], encode_ydc_table, &syntax_mpeg1->y_dc_pred);
  mpeg1_block_intra(syntax, blocks[2], encode_ydc_table, &syntax_mpeg1->y_dc_pred);
  mpeg1_block_intra(syntax, blocks[3], encode_ydc_table, &syntax_mpeg1->y_dc_pred);
  mpeg1_block_intra(syntax, blocks[4], encode_cdc_table, &syntax_mpeg1->cb_dc_pred);
  mpeg1_block_intra(syntax, blocks[5], encode_cdc_table, &syntax_mpeg1->cr_dc_pred);

  /* reset the motion predictors */
  syntax_mpeg1->mv_pred.dx = syntax_mpeg1->mv_pred.dy = 0;
}

static void mpeg1_block_inter(fame_syntax_t *syntax, short *block)
{
  fame_syntax_mpeg1_t *syntax_mpeg1 = FAME_SYNTAX_MPEG1(syntax);
  short i, v;
  unsigned long last;
  fame_vlc_t const *vlc;

  /* TODO: optimized loop if HAS_BSWAP */
  /* encode DC coefficient */
  v = mpeg1_table_clip[2048+block[0]];
  if(v == 1) {
    bitbuffer_write(&syntax_mpeg1->buffer, 2, 2);
    i = 1;
  } else if(v == -1) {
    bitbuffer_write(&syntax_mpeg1->buffer, 3, 2);
    i = 1;
  } else {
    i = 0;
  }

  /* encode AC coefficients */
  last = i;
  for(; i < 64; i++)
  {
    v = block[mpeg1_zigzag_table[i]];

    /* count zeroes */
    if(v != 0) {
      /* write code */
      vlc = syntax_mpeg1->vlc_table + (mpeg1_table_clip[2048+v] << 6) + i - last;
      bitbuffer_write(&syntax_mpeg1->buffer, vlc->code, vlc->length);
      
      /* reset zeroes count */
      last = i+1;
    }
  }

  /* mark end of block */
  bitbuffer_write(&syntax_mpeg1->buffer, 2, 2);
}

static void inline mpeg1_write_vector(fame_syntax_t *syntax, short delta)
{
  fame_syntax_mpeg1_t *syntax_mpeg1 = FAME_SYNTAX_MPEG1(syntax);
  short length;
  short f_code;
  short code;
  short residual;

  if (delta == 0) {
    bitbuffer_write(&syntax_mpeg1->buffer,
		    mb_motion_table[32].code,
		    mb_motion_table[32].length);
  } else {
    f_code = syntax_mpeg1->f_code;
    length = 8 << f_code;
    f_code--;

    if(delta > 0) {
      if(delta >= length) {
	delta = delta - length - length;
      }
      delta--;
      residual = delta & ((1 << f_code) - 1);
      code = ((delta - residual) >> f_code) + 1;
    } else {
      if(delta < -length) {
	delta = delta + length + length;
      }
      delta = -delta;
      delta--;
      residual = delta & ((1 << f_code) - 1);
      code = ((delta - residual)>> f_code) + 1;
      code = -code;
    }
  
    code += 32;
    bitbuffer_write(&syntax_mpeg1->buffer,
		    mb_motion_table[code].code,
		    mb_motion_table[code].length);    
    if(f_code)
      bitbuffer_write(&syntax_mpeg1->buffer, residual, f_code);
  }
}

static void mpeg1_write_inter_mb(fame_syntax_t *syntax,
				 int mb_x,
				 int mb_y,
				 short *blocks[6],
				 unsigned char *bab,
				 fame_bab_t bab_type,
				 unsigned char pattern,
				 fame_motion_vector_t *forward,
				 fame_motion_vector_t *backward,
				 fame_motion_coding_t motion_coding)
{
  fame_syntax_mpeg1_t *syntax_mpeg1 = FAME_SYNTAX_MPEG1(syntax);
  int incr;
  int motion_forward, motion_backward;
  int coded[6];
  int cbp;
  int i, j;

  incr = mb_y * syntax_mpeg1->mb_width + mb_x - syntax_mpeg1->prev_mb_addr;

  /* TODO: support B pictures */
  pattern = 0;
  motion_forward = 0;
  motion_backward = 0;

  if(forward && (forward[0].dx != 0 || forward[0].dy != 0))
    motion_forward = 1;

  if(backward && (backward[0].dx != 0 || backward[0].dy != 0))
    motion_backward = 1;

  /* check for not coded blocks */
  for(j = 0; j < 6; j++) {
    coded[j] = 0;
    if(blocks[j] != NULL)
      for(i = 0; i < 64; i++) {
	coded[j] |= blocks[j][i];
      }
  }

  /* make cbp code */
  cbp = 0;
  if(coded[0])
    cbp |= 0x20;
  if(coded[1])
    cbp |= 0x10;
  if(coded[2])
    cbp |= 0x08;
  if(coded[3])
    cbp |= 0x04;
  if(coded[4])
    cbp |= 0x02;
  if(coded[5])
    cbp |= 0x01;

  if(!cbp && !motion_forward && !motion_backward && 
     syntax_mpeg1->prev_mb_addr != -1 &&
     (syntax_mpeg1->prev_mb_addr + incr) != (syntax_mpeg1->slice_length - 1)) {
    /* reset the DC predictors */
    syntax_mpeg1->y_dc_pred = 128;
    syntax_mpeg1->cr_dc_pred = syntax_mpeg1->cb_dc_pred = 0;
    /* reset the motion predictors */
    syntax_mpeg1->mv_pred.dx = syntax_mpeg1->mv_pred.dy = 0;
    /* skip macroblock */
    return; 
  }
  else
    motion_forward = 1; /* force coding of block */

  syntax_mpeg1->prev_mb_addr += incr;

  while(incr > 33) {
    bitbuffer_write(&syntax_mpeg1->buffer, mb_addr_inc[33].code, mb_addr_inc[33].length);     /* address escape */
    incr -= 33;
  }
  bitbuffer_write(&syntax_mpeg1->buffer, mb_addr_inc[incr-1].code, mb_addr_inc[incr-1].length); /* address increment */

  switch(syntax_mpeg1->frame_type) {
    case frame_type_P:
      motion_backward = 0;
      if(!motion_forward) {
	bitbuffer_write(&syntax_mpeg1->buffer, 1, 2); /* no motion, pattern */
      } else {
	if(!cbp) {
	  bitbuffer_write(&syntax_mpeg1->buffer, 1, 3); /* motion,no pattern */
	} else {
	  bitbuffer_write(&syntax_mpeg1->buffer, 1, 1); /* motion,pattern */
	}
      }
    break;
    default:
    break;
  }

  /* motion vectors */
  if(motion_forward) {
    mpeg1_write_vector(syntax, forward[0].dx - syntax_mpeg1->mv_pred.dx);
    mpeg1_write_vector(syntax, forward[0].dy - syntax_mpeg1->mv_pred.dy);
  }

  /* update motion predictors */
  syntax_mpeg1->mv_pred.dx = forward[0].dx;
  syntax_mpeg1->mv_pred.dy = forward[0].dy;
    
  /* code block pattern */
  if(cbp)
    bitbuffer_write(&syntax_mpeg1->buffer, mb_pattern_table[cbp].code, mb_pattern_table[cbp].length);

  /* code only useful blocks according to pattern value */
  if(cbp & 0x20)
    mpeg1_block_inter(syntax, blocks[0]);
  if(cbp & 0x10)
    mpeg1_block_inter(syntax, blocks[1]);
  if(cbp & 0x08)
    mpeg1_block_inter(syntax, blocks[2]);
  if(cbp & 0x04)
    mpeg1_block_inter(syntax, blocks[3]);
  if(cbp & 0x02)
    mpeg1_block_inter(syntax, blocks[4]);
  if(cbp & 0x01)
    mpeg1_block_inter(syntax, blocks[5]);

  /* reset the predictors to their original values */
  syntax_mpeg1->y_dc_pred = 128;
  syntax_mpeg1->cr_dc_pred = syntax_mpeg1->cb_dc_pred = 0;
}

static void mpeg1_compute_chrominance_vectors(fame_syntax_t *syntax,
					      fame_motion_vector_t *vectors)
{
  int x, y;

  x = vectors[0].dx+vectors[1].dx+vectors[2].dx+vectors[3].dx;
  y = vectors[0].dy+vectors[1].dy+vectors[2].dy+vectors[3].dy;
  if(x > 0) vectors[4].dx = x >> 3;
  else      vectors[4].dx = -((-x) >> 3);
  if(y > 0) vectors[4].dy = y >> 3;
  else      vectors[4].dy = -((-y) >> 3);
  vectors[5].dx = vectors[4].dx;
  vectors[5].dy = vectors[4].dy;
}
