
/* Little cms */
/* Copyright (C) 1998-2000 Marti Maria */

/* THIS SOFTWARE IS PROVIDED "AS-IS" AND WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS, IMPLIED OR OTHERWISE, INCLUDING WITHOUT LIMITATION, ANY */
/* WARRANTY OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. */

/* IN NO EVENT SHALL MARTI MARIA BE LIABLE FOR ANY SPECIAL, INCIDENTAL, */
/* INDIRECT OR CONSEQUENTIAL DAMAGES OF ANY KIND, */
/* OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, */
/* WHETHER OR NOT ADVISED OF THE POSSIBILITY OF DAMAGE, AND ON ANY THEORY OF */
/* LIABILITY, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE */
/* OF THIS SOFTWARE. */


/* This library is free software; you can redistribute it and/or */
/* modify it under the terms of the GNU Lesser General Public */
/* License as published by the Free Software Foundation; either */
/* version 2 of the License, or (at your option) any later version. */

/* This library is distributed in the hope that it will be useful, */
/* but WITHOUT ANY WARRANTY; without even the implied warranty of */
/* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU */
/* Lesser General Public License for more details. */

/* You should have received a copy of the GNU Lesser General Public */
/* License along with this library; if not, write to the Free Software */
/* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */

/* Interpolation */

#include "lcms.h"

void cmsCalcL16Params(int nSamples, LPL16PARAMS p)
{
       p -> nSamples = nSamples;
       p -> Domain   = (WORD) (nSamples - 1);
       p -> nInputs = p -> nOutputs = 1;

}


void cmsCalcCLUT16Params(int nSamples, int InputChan, int OutputChan, LPL16PARAMS p)
{
       int clutPoints;

       cmsCalcL16Params(nSamples, p);
       p -> nInputs  = InputChan;
       p -> nOutputs = OutputChan;

       clutPoints = p -> Domain + 1;

       p -> a1 = p -> nOutputs;           /* Z */
       p -> a2 = p -> a1 * clutPoints;    /* Y */
       p -> a3 = p -> a2 * clutPoints;    /* X */
       p -> a4 = p -> a3 * clutPoints;    /* Used only in 4 inputs LUT */
}



#ifdef USE_FLOAT


/* Floating-point version */

WORD cmsLinearInterpLUT16(WORD Value, WORD LutTable[], LPL16PARAMS p)
{
       double y1, y0;
       double y;
       double val2, rest;
       int cell0, cell1;

       /* if last value... */

       if (Value == 0xffff) return LutTable[p -> Domain];

       val2 = p -> Domain * ((double) Value / 65535.0);

       cell0 = (int) floor(val2);
       cell1 = cell0 + 1;

       /* Rest is 16 LSB bits */

       rest = val2 - cell0;

       y0 = LutTable[cell0] ;
       y1 = LutTable[cell1] ;

       y = y0 + (y1 - y0) * rest;


       return (WORD) floor(y+.5);
}

#endif



/* Linear interpolation (Fixed-point optimized, but C source) */



#ifdef USE_C

WORD cmsLinearInterpLUT16(WORD Value1, WORD LutTable[], LPL16PARAMS p)
{
       WORD y1, y0;
       WORD y;
       int dif, a1;
       int cell0, rest;
       int val3, Value;

       /* if last value... */


       Value = Value1;
       if (Value == 0xffff) return LutTable[p -> Domain];

       val3 = p -> Domain * Value;
       val3 = ToFixedDomain(val3);              /* To fixed 15.16 */

       cell0 = FIXED_TO_INT(val3);             /* Cell is 16 MSB bits */
       rest  = FIXED_REST_TO_INT(val3);        /* Rest is 16 LSB bits */

       y0 = LutTable[cell0] ;
       y1 = LutTable[cell0+1] ;

       dif = (int) y1 - y0;        /* dif is in domain -ffff ... ffff */

       if (dif >= 0)
       {
       a1 = ToFixedDomain(dif * rest);
       a1 += 0x8000;
       }
       else
       {
              a1 = ToFixedDomain((- dif) * rest);
              a1 -= 0x8000;
              a1 = -a1;
       }

       y = y0 + FIXED_TO_INT(a1);

       return (WORD) y;
}

#endif

/* Linear interpolation (asm by hand optimized) */

#ifdef USE_ASSEMBLER

#ifndef __BORLANDC__
#pragma warning(disable : 4033)
#endif

WORD cmsLinearInterpLUT16(WORD Value, WORD LutTable[], LPL16PARAMS p)
{
       int xDomain = p -> Domain;


       if (Value == 0xffff) return LutTable[p -> Domain];
       else
       ASM {
              movzx     eax, word ptr ss:Value
              mov       edx, ss:xDomain
              mul       edx                         /* val3 = p -> Domain * Value; */
              shld      edx, eax, 16                /* Convert it to fixed 15.16 */
              shl       eax, 16                     /* * 65536 / 65535 */
              mov       ebx, 0x0000ffff
              div       ebx
              mov       ecx, eax
              sar       ecx, 16                        /* ecx = cell0 */
              mov       edx, eax                       /* rest = (val2 & 0xFFFFU) */
              and       edx, 0x0000ffff                /* edx = rest */
              mov       ebx, ss:LutTable
              lea       eax, dword ptr [ebx+2*ecx]     /* Ptr to LUT */
              movzx     ebx, word  ptr [eax]           /* EBX = y0 */
              movzx     eax, word  ptr [eax+2]         /* EAX = y1 */
              sub       eax, ebx                       /* EAX = y1-y0 */
              js        IsNegative
              mul       edx                            /* EAX = EAX * rest */
              shld      edx, eax, 16                   /* Pass it to fixed */
              sal       eax, 16                        /* * 65536 / 65535 */
              mov       ecx, 0x0000ffff
              div       ecx
              add       eax, 0x8000                    /* Rounding */
              sar       eax, 16
              add       eax, ebx                       /* Done! */
              }

              RET((WORD) _EAX);

       IsNegative:

              ASM {
              neg       eax
              mul       edx                            /* EAX = EAX * rest */
              shld      edx, eax, 16                   /* Pass it to fixed */
              sal       eax, 16                        /* * 65536 / 65535 */
              mov       ecx, 0x0000ffff
              div       ecx
              sub       eax, 0x8000
              neg       eax
              sar       eax, 16
              add       eax, ebx                       /* Done! */
              }

              RET((WORD) _EAX);
}

#ifndef __BORLANDC__
#pragma warning(default : 4033)
#endif

#endif

Fixed32 cmsLinearInterpFixed(WORD Value1, WORD LutTable[], LPL16PARAMS p)
{
       Fixed32 y1, y0;
       int cell0;
       int val3, Value;

       /* if last value... */


       Value = Value1;
       if (Value == 0xffffU) return LutTable[p -> Domain];

       val3 = p -> Domain * Value;
       val3 = ToFixedDomain(val3);              /* To fixed 15.16 */

       cell0 = FIXED_TO_INT(val3);             /* Cell is 16 MSB bits */

       y0 = LutTable[cell0] ;
       y1 = LutTable[cell0+1] ;


       return y0 + FixedMul((y1 - y0), (val3 & 0xFFFFL));
}


/* Reverse Lineal interpolation (16 bits) */
/* Im using a sort of binary search here, this is not a time-critical function */

WORD cmsReverseLinearInterpLUT16(WORD Value, WORD LutTable[], LPL16PARAMS p)
{
        register int l = 1;
        register int r = 0x10000;
        register int x = 0, res;       /* 'int' Give spacing for negative values */

        while (r > l)
        {
                x = (l+r)/2;
                res = (int) cmsLinearInterpLUT16((WORD) (x-1), LutTable, p);
                if (res == Value) return (WORD) x;
                if (res > Value) r = x - 1;
                else l = x + 1;
        }

        return (WORD) x ;
}




/* Trilinear interpolation (16 bits) - float version */

#ifdef USE_FLOAT
void cmsTrilinearInterp16(WORD Input[], WORD Output[],
                            WORD LutTable[], LPL16PARAMS p)

{
#   define LERP(a,l,h)  (double) ((l)+(((h)-(l))*(a)))
#   define DENS(X, Y, Z)    (double) (LutTable[TotalOut*((Z)+clutPoints*((Y)+clutPoints*(X)))+OutChan])



    double     px, py, pz;
    int        x0, y0, z0,
               x1, y1, z1;
               int clutPoints, TotalOut, OutChan;
    double     fx, fy, fz,
               d000, d001, d010, d011,
               d100, d101, d110, d111,
               dx00, dx01, dx10, dx11,
               dxy0, dxy1, dxyz;


    clutPoints = p -> Domain + 1;
    TotalOut   = p -> nOutputs;

    px = ((double) Input[0] * (p->Domain)) / 65535.0;
    py = ((double) Input[1] * (p->Domain)) / 65535.0;
    pz = ((double) Input[2] * (p->Domain)) / 65535.0;

    x0 = (int) floor(px); fx = px - x0;
    y0 = (int) floor(py); fy = py - y0;
    z0 = (int) floor(pz); fz = pz - z0;

    x1 = x0 + (Input[0] != 0xFFFFU ? 1 : 0);
    y1 = y0 + (Input[1] != 0xFFFFU ? 1 : 0);
    z1 = z0 + (Input[2] != 0xFFFFU ? 1 : 0);


    for (OutChan = 0; OutChan < TotalOut; OutChan++)
    {

        d000 = DENS(x0, y0, z0);
        d001 = DENS(x0, y0, z1);
        d010 = DENS(x0, y1, z0);
        d011 = DENS(x0, y1, z1);

        d100 = DENS(x1, y0, z0);
        d101 = DENS(x1, y0, z1);
        d110 = DENS(x1, y1, z0);
        d111 = DENS(x1, y1, z1);


    dx00 = LERP(fx, d000, d100);
    dx01 = LERP(fx, d001, d101);
    dx10 = LERP(fx, d010, d110);
    dx11 = LERP(fx, d011, d111);

    dxy0 = LERP(fy, dx00, dx10);
    dxy1 = LERP(fy, dx01, dx11);

    dxyz = LERP(fz, dxy0, dxy1);

    Output[OutChan] = (WORD) floor(dxyz + .5);
    }


#   undef LERP
#   undef DENS
}


#endif


#ifdef USE_C

/* Trilinear interpolation (16 bits) - optimized version */

void cmsTrilinearInterp16(WORD Input[], WORD Output[],
                            WORD LutTable[], LPL16PARAMS p)

{
#define DENS(i,j,k) (LutTable[(i)+(j)+(k)+OutChan])
#define LERP(a,l,h)     (WORD) (l+ ROUND_FIXED_TO_INT(((h-l)*a)))


           int        OutChan, TotalOut;
           Fixed32    fx, fy, fz;
  register WORD       rx, ry, rz;
           int        x0, y0, z0, x1, y1, z1;
  register int        X0, X1, Y0, Y1, Z0, Z1;
           int        d000, d001, d010, d011,
                      d100, d101, d110, d111,
                      dx00, dx01, dx10, dx11,
                      dxy0, dxy1, dxyz;


    TotalOut   = p -> nOutputs;

    fx = ToFixedDomain((int) Input[0] * p -> Domain);
    x0  = FIXED_TO_INT(fx);
    rx  = (WORD) FIXED_REST_TO_INT(fx);    /* Rest in 0..1.0 domain */


    fy = ToFixedDomain((int) Input[1] * p -> Domain);
    y0  = FIXED_TO_INT(fy);
    ry  = (WORD) FIXED_REST_TO_INT(fy);

    fz = ToFixedDomain((int) Input[2] * p -> Domain);
    z0 = FIXED_TO_INT(fz);
    rz = (WORD) FIXED_REST_TO_INT(fz);

    x1 = x0 + (Input[0] != 0xFFFFU ? 1 : 0);
    y1 = y0 + (Input[1] != 0xFFFFU ? 1 : 0);
    z1 = z0 + (Input[2] != 0xFFFFU ? 1 : 0);

    Z0 = p -> a1 * z0;
    Z1 = p -> a1 * z1;
    Y0 = p -> a2 * y0;
    Y1 = p -> a2 * y1;
    X0 = p -> a3 * x0;
    X1 = p -> a3 * x1;


    for (OutChan = 0; OutChan < TotalOut; OutChan++)
    {

        d000 = DENS(X0, Y0, Z0);
        d001 = DENS(X0, Y0, Z1);
        d010 = DENS(X0, Y1, Z0);
        d011 = DENS(X0, Y1, Z1);

        d100 = DENS(X1, Y0, Z0);
        d101 = DENS(X1, Y0, Z1);
        d110 = DENS(X1, Y1, Z0);
        d111 = DENS(X1, Y1, Z1);


        dx00 = LERP(rx, d000, d100);
        dx01 = LERP(rx, d001, d101);
        dx10 = LERP(rx, d010, d110);
        dx11 = LERP(rx, d011, d111);

        dxy0 = LERP(ry, dx00, dx10);
        dxy1 = LERP(ry, dx01, dx11);

        dxyz = LERP(rz, dxy0, dxy1);

        Output[OutChan] = dxyz;
    }


#   undef LERP
#   undef DENS
}

#endif

#ifdef USE_ASSEMBLER



#define         d000        0
#define         d001        1
#define         d010        2
#define         d011        3
#define         d100        4
#define         d101        5
#define         d110        6
#define         d111        7

#define         dx00        8
#define         dx01        9
#define         dx10        10
#define         dx11        11

#define         dxy0        12
#define         dxy1        13

#define         dxyz        14

#define         rx          15
#define         ry          16
#define         rz          17


#define         X0          18
#define         X1          19
#define         Y0          20
#define         Y1          21
#define         Z0          22
#define         Z1          23

#define         x0          24
#define         y0          25
#define         z0          26

static Fixed32  Cube[27];

void cmsTrilinearInterp16(WORD Input[], WORD Output[],
                            WORD LutTable[], LPL16PARAMS p)
{

       ASM {

          lea     esi, Cube                      /* esi = Variables */
          mov     edi, ss:p


          mov       eax, ss:Input
          movzx     eax, word ptr [eax]          /* x0 */
          mov       dword ptr [esi+4*x0], eax
          movzx     edx, word ptr [edi+12]       /* .Domain */
          imul      edx
          shld      edx, eax, 16
          sal       eax, 16
          mov       ebx, 0x0000ffff
          idiv      ebx
          mov       ecx, eax
          sar       ecx, 16                        /* ecx = cell0 */
          mov       edx, eax                       /* rest = (val2 & 0xFFFFU) */
          and       edx, 0x0000ffff                /* edx = rest */
          mov       dword ptr [esi + 4*rx], edx    /* rest to rx */

          /* Using a1, a2, a3, calculate cube in 3D grid */

          mov     eax, [edi].a3
          push    eax
          imul    eax, ecx                        /* ecx = x0 */
          mov     dword ptr [esi+4*X0], eax
          cmp     dword ptr [esi+4*x0], 0x0000FFFF
          jz      NoInc1
          inc     ecx
          }
NoInc1:
        ASM {

          pop     eax
          imul    eax, ecx
          mov     dword ptr [esi + 4*X1], eax


          mov       eax, ss:Input
          movzx     eax, word ptr [eax+ 2]             /* y0 */
          mov       dword ptr [esi+4*y0], eax
          movzx     edx, word ptr [edi+12]              /* .Domain */
          imul      edx
          shld      edx, eax, 16
          sal       eax, 16
          mov       ebx, 0x0000ffff
          idiv      ebx
          mov       ecx, eax
          sar       ecx, 16                        /* ecx = cell1 */
          mov       edx, eax
          and       edx, 0x0000ffff                /* edx = rest */
          mov       dword ptr [esi + 4*ry], edx    /* rest to ry */


          mov     eax, [edi].a2
          push    eax
          /* mov     ecx, ss:y0 */
          imul    eax, ecx
          mov     dword ptr [esi + 4*Y0], eax
          cmp     dword ptr [esi+4*y0], 0x0000FFFF
          jz      NoInc2
          inc     ecx
          }
NoInc2:
       ASM {
          pop     eax
          imul    eax, ecx
          mov     dword ptr [esi + 4*Y1], eax


          mov       eax, ss:Input
          movzx     eax, word ptr [eax+ 4]             /* z0 */
          mov       dword ptr [esi+4*z0], eax
          movzx     edx, word ptr [edi+12]              /* .Domain */
          imul      edx
          shld      edx, eax, 16
          sal       eax, 16
          mov       ebx, 0x0000ffff
          idiv      ebx
          mov       ecx, eax
          sar       ecx, 16                        /* ecx = cell1 */
          mov       edx, eax
          and       edx, 0x0000ffff                /* edx = rest */
          mov       dword ptr [esi + 4*rz], edx    /* rest to ry */

          mov     eax, [edi].a1
          push    eax
          /* mov     ecx, ss:z0 */
          imul    eax, ecx
          mov     dword ptr [esi + 4*Z0], eax
          cmp     dword ptr [esi+4*z0], 0x0000FFFF
          jz      NoInc3
          inc     ecx
          }
NoInc3:
       ASM {
          pop     eax
          imul    eax, ecx
          mov     dword ptr [esi + 4*Z1], eax
          xor     ebx, ebx                       /* ebx = actual Output */
          mov     edi, ss:LutTable               /* edi = LUT */
          jmp     Label01
          }
Label00:

       ASM {

           mov    eax, dword ptr [esi + 4*X0]
           add    eax, dword ptr [esi + 4*Y0]
           add    eax, ebx
           push   eax
           add    eax, dword ptr [esi + 4*Z0]
           movzx  eax, word ptr [edi+2*eax]
           sal    eax, 16
           mov    dword ptr [esi + 4*d000], eax

           pop    eax
           add    eax, dword ptr [esi + 4*Z1]
           movzx  eax, word ptr [edi+2*eax]
           sal    eax, 16
           mov    dword ptr [esi + 4*d001], eax


           mov    eax, dword ptr [esi + 4*X0]
           add    eax, dword ptr [esi + 4*Y1]
           add    eax, ebx
           push   eax
           add    eax, dword ptr [esi + 4*Z0]
           movzx  eax, word ptr [edi+2*eax]
           sal    eax, 16
           mov    dword ptr [esi + 4*d010], eax

           pop    eax
           add    eax, dword ptr [esi + 4*Z1]
           movzx  eax, word ptr [edi+2*eax]
           sal    eax, 16
           mov    dword ptr [esi + 4*d011], eax


           mov    eax, dword ptr [esi + 4*X1]
           add    eax, dword ptr [esi + 4*Y0]
           add    eax, ebx
           push   eax
           add    eax, dword ptr [esi + 4*Z0]
           movzx  eax, word ptr [edi+2*eax]
           sal    eax, 16
           mov    dword ptr [esi + 4*d100], eax

           pop    eax
           add    eax, dword ptr [esi + 4*Z1]
           movzx  eax, word ptr [edi+2*eax]
           sal    eax, 16
           mov    dword ptr [esi + 4*d101], eax


           mov    eax, dword ptr [esi + 4*X1]
           add    eax, dword ptr [esi + 4*Y1]
           add    eax, ebx
           push   eax
           add    eax, dword ptr [esi + 4*Z0]
           movzx  eax, word ptr [edi+2*eax]
           sal    eax, 16
           mov    dword ptr [esi + 4*d110], eax

           pop    eax
           add    eax, dword ptr [esi + 4*Z1]
           movzx  eax, word ptr [edi+2*eax]
           sal    eax, 16
           mov    dword ptr [esi + 4*d111], eax



           /* Cube[dx00] = FixedLERP2(rx, d000, d100); */

           mov    eax, dword ptr [esi+4*d100]
           mov    edx, dword ptr [esi+4*d000]
           push   edx
           mov    ecx, dword ptr [esi+4*rx]
           sub    eax, edx
           imul   ecx
           shrd   eax, edx, 16
           pop    edx
           add    eax, edx
           mov    dword ptr[esi+4*dx00], eax

           /* Cube[dx01] = FixedLERP2(rx, d001, d101); */

           mov    eax, dword ptr [esi+4*d101]
           mov    edx, dword ptr [esi+4*d001]
           push   edx
           sub    eax, edx
           imul   ecx
           shrd   eax, edx, 16
           pop    edx
           add    eax, edx
           mov    dword ptr[esi+4*dx01], eax

           /* Cube[dx10] = FixedLERP2(rx, d010, d110); */

           mov    eax, dword ptr [esi+4*d110]
           mov    edx, dword ptr [esi+4*d010]
           push   edx
           sub    eax, edx
           imul   ecx
           shrd   eax, edx, 16
           pop    edx
           add    eax, edx
           mov    dword ptr[esi+4*dx10], eax

           /* Cube[dx11] = FixedLERP2(rx, d011, d111); */

           mov    eax, dword ptr [esi+4*d111]
           mov    edx, dword ptr [esi+4*d011]
           push   edx
           sub    eax, edx
           imul   ecx
           shrd   eax, edx, 16
           pop    edx
           add    eax, edx
           mov    dword ptr[esi+4*dx11], eax


           /* Cube[dxy0] = FixedLERP2(ry, dx00, dx10); */

           mov    ecx, dword ptr[esi+4*ry]

           mov    eax, dword ptr [esi+4*dx10]
           mov    edx, dword ptr [esi+4*dx00]
           push   edx
           sub    eax, edx
           imul   ecx
           shrd   eax, edx, 16
           pop    edx
           add    eax, edx
           mov    dword ptr[esi+4*dxy0], eax


           /* Cube[dxy1] = FixedLERP2(ry, dx01, dx11); */

           mov    eax, dword ptr [esi+4*dx11]
           mov    edx, dword ptr [esi+4*dx01]
           push   edx
           sub    eax, edx
           imul   ecx
           shrd   eax, edx, 16
           pop    edx
           add    eax, edx
           mov    dword ptr[esi+4*dxy1], eax


           /* Cube[dxyz] = FixedLERP2(rz, dxy0, dxy1); */

           mov    ecx, dword ptr [esi+4*rz]
           mov    eax, dword ptr [esi+4*dxy1]
           mov    edx, dword ptr [esi+4*dxy0]
           push   edx
           sub    eax, edx
           imul   ecx
           shrd   eax, edx, 16
           pop    edx
           add    eax, edx
           /* mov    dword ptr[esi+4*dxyz], eax */

           add    eax, 0x8000
           sar    eax, 16
           mov    edx, ss:Output
           mov    word ptr [edx + 2*ebx], ax

           inc    ebx
           }
Label01:

       ASM {
           mov    edx, ss:p
           cmp    ebx, [edx].nOutputs
           jl     Label00
           }

}


#undef         d000
#undef         d001
#undef         d010
#undef         d011
#undef         d100
#undef         d101
#undef         d110
#undef         d111

#undef         dx00
#undef         dx01
#undef         dx10
#undef         dx11

#undef         dxy0
#undef         dxy1

#undef         dxyz

#undef         rx
#undef         ry
#undef         rz


#undef         X0
#undef         X1
#undef         Y0
#undef         Y1
#undef         Z0
#undef         Z1

#undef         x0
#undef         y0
#undef         z0

#endif




#ifdef USE_FLOAT

#define DENS(X, Y, Z)    (double) (LutTable[TotalOut*((Z)+clutPoints*((Y)+clutPoints*(X)))+OutChan])


/* Tetrahedral interpolation, using Sakamoto algorithm. This was under */
/* patent, but the patent expired. */

void cmsTetrahedralInterp16(WORD Input[],
                            WORD Output[],
                            WORD LutTable[],
                            LPL16PARAMS p)
{
    double     px, py, pz;
    int        x0, y0, z0,
               x1, y1, z1;
    double     fx, fy, fz;
    double     c1, c2, c3;
    int        clutPoints, OutChan, TotalOut;


    clutPoints = p -> Domain + 1;
    TotalOut   = p -> nOutputs;


    px = ((double) Input[0] * p->Domain) / 65535.0;
    py = ((double) Input[1] * p->Domain) / 65535.0;
    pz = ((double) Input[2] * p->Domain) / 65535.0;

    x0 = (int) floor(px); fx = (px - (double) x0);
    y0 = (int) floor(py); fy = (py - (double) y0);
    z0 = (int) floor(pz); fz = (pz - (double) z0);


    x1 = x0 + (Input[0] != 0xFFFFU ? 1 : 0);
    y1 = y0 + (Input[1] != 0xFFFFU ? 1 : 0);
    z1 = z0 + (Input[2] != 0xFFFFU ? 1 : 0);


    for (OutChan=0; OutChan < TotalOut; OutChan++)
    {

       /* These are the 6 Tetrahedral */

       if (fx >= fy && fy >= fz)
       {
              c1 = DENS(x1, y0, z0) - DENS(x0, y0, z0);
              c2 = DENS(x1, y1, z0) - DENS(x1, y0, z0);
              c3 = DENS(x1, y1, z1) - DENS(x1, y1, z0);
       }
       else
       if (fx >= fz && fz >= fy)
       {
              c1 = DENS(x1, y0, z0) - DENS(x0, y0, z0);
              c2 = DENS(x1, y1, z1) - DENS(x1, y0, z1);
              c3 = DENS(x1, y0, z1) - DENS(x1, y0, z0);
       }
       else
       if (fz >= fx && fx >= fy)
       {
              c1 = DENS(x1, y0, z1) - DENS(x0, y0, z1);
              c2 = DENS(x1, y1, z1) - DENS(x1, y0, z1);
              c3 = DENS(x0, y0, z1) - DENS(x0, y0, z0);
       }
       else
       if (fy >= fx && fx >= fz)
       {
              c1 = DENS(x1, y1, z0) - DENS(x0, y1, z0);
              c2 = DENS(x0, y1, z0) - DENS(x0, y0, z0);
              c3 = DENS(x1, y1, z1) - DENS(x1, y1, z0);

       }
       else
       if (fy >= fz && fz >= fx)
       {
              c1 = DENS(x1, y1, z1) - DENS(x0, y1, z1);
              c2 = DENS(x0, y1, z0) - DENS(x0, y0, z0);
              c3 = DENS(x0, y1, z1) - DENS(x0, y1, z0);
       }
       else
       if (fz >= fy && fy >= fx)
       {
              c1 = DENS(x1, y1, z1) - DENS(x0, y1, z1);
              c2 = DENS(x0, y1, z1) - DENS(x0, y0, z1);
              c3 = DENS(x0, y0, z1) - DENS(x0, y0, z0);
       }
       else
              assert(FALSE);


       Output[OutChan] = (WORD) floor((double) DENS(x0,y0,z0) + c1 * fx + c2 * fy + c3 * fz + .5);
       }

}

#else




/* p(x,y,z) = p000 + c1*Dx/(x1-x0) + c2*Dy/(y1-y0) + c3*Dz/(z1-z0) */



#define CELLX(n)     (x##n)
#define CELLY(n)     (y##n)
#define CELLZ(n)     (z##n)

#define DENS(i,j,k) ((Fixed32) LutTable[(i)+(j)+(k)+OutChan] << 16)

#define EVAL  Rest = FixedMul(c1,rx)+FixedMul(c2,ry)+FixedMul(c3,rz);\
              Output[OutChan] = (WORD) FIXED_TO_INT(((int) DENS(X0,Y0,Z0) + FixedDiv(Rest, 0xFFFFU)))


void cmsTetrahedralInterp16(WORD Input[],
                            WORD Output[],
                            WORD LutTable[],
                            LPL16PARAMS p)
{

       Fixed32    fx, fy, fz;
       Fixed32    rx, ry, rz;
       int        x0, y0, z0;
       int        x1, y1, z1;
       Fixed32    c1, c2, c3, Rest;
       int        OutChan;
register   Fixed32    X0, X1, Y0, Y1, Z0, Z1;
       int TotalOut = p -> nOutputs;



    fx = ToFixedDomain((int) Input[0] * p -> Domain);
    x0  = FIXED_TO_INT(fx);
    rx  = FIXED_REST_TO_INT(fx);    /* Rest in 0..1.0 domain */


    fy = ToFixedDomain((int) Input[1] * p -> Domain);
    y0  = FIXED_TO_INT(fy);
    ry  = FIXED_REST_TO_INT(fy);

    fz = ToFixedDomain((int) Input[2] * p -> Domain);
    z0 = FIXED_TO_INT(fz);
    rz = FIXED_REST_TO_INT(fz);

    x1 = x0 + (Input[0] != 0xFFFFU ? 1 : 0);
    y1 = y0 + (Input[1] != 0xFFFFU ? 1 : 0);
    z1 = z0 + (Input[2] != 0xFFFFU ? 1 : 0);


    Z0 = p -> a1 * z0;
    Z1 = p -> a1 * z1;
    Y0 = p -> a2 * y0;
    Y1 = p -> a2 * y1;
    X0 = p -> a3 * x0;
    X1 = p -> a3 * x1;


       /* These are the 6 Tetrahedral */

       if (rx >= ry && ry >= rz)
       {
              for (OutChan=0; OutChan < TotalOut; OutChan++)
              {
              c1 = DENS(X1, Y0, Z0) - DENS(X0, Y0, Z0);
              c2 = DENS(X1, Y1, Z0) - DENS(X1, Y0, Z0);
              c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0);
              EVAL;
              }

       }
       else
       if (rx >= rz && rz >= ry)
       {
              for (OutChan=0; OutChan < TotalOut; OutChan++)
              {

              c1 = DENS(X1, Y0, Z0) - DENS(X0, Y0, Z0);
              c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1);
              c3 = DENS(X1, Y0, Z1) - DENS(X1, Y0, Z0);
              EVAL;
              }

       }
       else
       if (rz >= rx && rx >= ry)
       {
              for (OutChan=0; OutChan < TotalOut; OutChan++)
              {
              c1 = DENS(X1, Y0, Z1) - DENS(X0, Y0, Z1);
              c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1);
              c3 = DENS(X0, Y0, Z1) - DENS(X0, Y0, Z0);
              EVAL;
              }

       }
       else
       if (ry >= rx && rx >= rz)
       {
              for (OutChan=0; OutChan < TotalOut; OutChan++)
              {
              c1 = DENS(X1, Y1, Z0) - DENS(X0, Y1, Z0);
              c2 = DENS(X0, Y1, Z0) - DENS(X0, Y0, Z0);
              c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0);
              EVAL;
              }

       }
       else
       if (ry >= rz && rz >= rx)
       {
              for (OutChan=0; OutChan < TotalOut; OutChan++)
              {
              c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1);
              c2 = DENS(X0, Y1, Z0) - DENS(X0, Y0, Z0);
              c3 = DENS(X0, Y1, Z1) - DENS(X0, Y1, Z0);
              EVAL;
              }
       }
       else
       if (rz >= ry && ry >= rx)
       {
              for (OutChan=0; OutChan < TotalOut; OutChan++)
              {
              c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1);
              c2 = DENS(X0, Y1, Z1) - DENS(X0, Y0, Z1);
              c3 = DENS(X0, Y0, Z1) - DENS(X0, Y0, Z0);
              EVAL;
              }
       }
       else
              assert(FALSE);


}

#endif


