/**************************************************************************
 *{@C
 *      Copyright:      1988-2025 Paul Obermeier (obermeier@poSoft.de)
 *
 *      Module:         ImageProcessing
 *      Filename:       IP_WarpKeypoint.c
 *
 *      Author:         Paul Obermeier
 *
 *      Description:    Functions for keypoint based image
 *                      warping and blending.
 *
 *      Additional documentation:
 *                      None.
 *
 *      Exported functions:
 *                      IP_WarpKeypoint
 *                      IP_BlendKeypoint
 *
 **************************************************************************/

#include <stdio.h>
#include <math.h>

#include "UT_Compat.h"
#include "UT_Macros.h"
#include "UT_Error.h"
#include "UT_Memory.h"
#include "UT_Math.h"
#include "UT_Parallel.h"

#include "FF_Image.h"

#include "IP_Image.h"
#include "IP_ImagePrivate.h"

/***************************************************************************
 *
 *      Macros and type definitions
 *
 ***************************************************************************/

/* A structure to store a rectangular grid of pixel displacement and sampling
area size data interpolated from a set of arbitrarily distributed points */

typedef struct {
    Int32   numrow,         /* Number of rows in the grid */
            numcol;         /* Number of columns in the grid */
    Float32 *xdata,         /* Interpolated displacement, x component */
            *ydata,         /* Interpolated displacement, y component */
            *sdata;         /* Interpolated sampling area size */
} xyinterp;


/* A structure to store a rectangular grid of pixel blending factors
interpolated from a set of arbitrarily distributed points */

typedef struct {
    Int32   numrow,         /* Number of rows in the grid */
            numcol;         /* Number of columns in the grid */
    Float32 foffset,        /* Offset to be added to the stored data */
            *fdata;         /* Interpolated blending factors */
} finterp;


/* Parameters passed to the "conc_make_xyitp_1"
and "conc_make_xyitp_2" functions */

typedef struct {
    Float32         mu,             /* Coordinate scaling factors */
                    mv,
                    r;              /* X-Y interpolation data, input */
    const Float32   *xdest,
                    *ydest;
    Float64         R2,
                    *a;
    Int32           nkey;
    xyinterp        *xyitp;         /* X-Y interpolation data, output */
} xyiparms;


/* Parameters passed to the "conc_make_fitp" function */

typedef struct {
    Float32         mu,             /* Coordinate scaling factors */
                    mv,
                    r;              /* F interpolation data, input */
    const Float32   *xdest,
                    *ydest;
    Float64         R2,
                    *a;
    Int32           nkey;
    finterp         *fitp;          /* F interpolation data, output */
} fiparms;


/* Parameters passed to the "conc_warp" function */

typedef struct {
    IP_ImageId      srcimg,         /* Source image */
                    destimg;        /* Destination image */
    IP_FillModeType oor;            /* Border treatment mode */
    pyramid         *srcpyr;        /* Source image pyramid */
    xyinterp        xyitp;          /* X-Y interpolation data */
    Float32         mx,             /* Coordinate scaling factors */
                    my,
                    msize;
} warpparms;


/* Parameters passed to the "conc_blend_1" and "conc_blend_2" functions */

typedef struct {
    IP_ImageId      srcimg,         /* Source image */
                    destimg;        /* Destination image */
    IP_FillModeType oor;            /* Border treatment mode */
    pyramid         *srcpyr;        /* Source image pyramid */
    xyinterp        xyitp;          /* X-Y interpolation data */
    finterp         fitp;           /* F interpolation data */
    Float32         mx,             /* Coordinate scaling factors */
                    my,
                    msize;
} blendparms;


/***************************************************************************
 *
 *      Image warping and blending functions
 *
 ***************************************************************************/

/* Find a pixel displacement, (xout, yout), and a sampling patch size, "size",
by sampling the data stored in "xyitp" at position (xin, yin). */

static void sample_xyinterp
        (Float32 xin, Float32 yin,
         const xyinterp *xyitp,
         Float32 *xout, Float32 *yout, Float32 *size)
{
    Int32   col0, col1, row0, row1,
            LL_offset, LR_offset, UL_offset, UR_offset;
    Float32 clipx, clipy, fcol, frow, u0, u1, v0, v1;
    Float32 LL, LR, UL, UR;

    clipx = UT_MAX (0.0, UT_MIN (xin, 1.0));
    clipy = UT_MAX (0.0, UT_MIN (yin, 1.0));
    fcol = clipx * (xyitp->numcol - 1);
    col0 = (Int32)fcol;
    col1 = UT_MIN (col0 + 1, xyitp->numcol - 1);
    u0 = fcol - col0;
    u1 = 1.0 - u0;
    frow = clipy * (xyitp->numrow - 1);
    row0 = (Int32)frow;
    row1 = UT_MIN (row0 + 1, xyitp->numrow - 1);
    v0 = frow - row0;
    v1 = 1.0 - v0;
    LL_offset = row0 * xyitp->numcol + col0;
    LR_offset = row0 * xyitp->numcol + col1;
    UL_offset = row1 * xyitp->numcol + col0;
    UR_offset = row1 * xyitp->numcol + col1;
    LL = xyitp->xdata[LL_offset];
    LR = xyitp->xdata[LR_offset];
    UL = xyitp->xdata[UL_offset];
    UR = xyitp->xdata[UR_offset];
    *xout = clipx + u1 * (v1 * LL + v0 * UL) + u0 * (v1 * LR + v0 * UR);
    LL = xyitp->ydata[LL_offset];
    LR = xyitp->ydata[LR_offset];
    UL = xyitp->ydata[UL_offset];
    UR = xyitp->ydata[UR_offset];
    *yout = clipy + u1 * (v1 * LL + v0 * UL) + u0 * (v1 * LR + v0 * UR);
    LL = xyitp->sdata[LL_offset];
    LR = xyitp->sdata[LR_offset];
    UL = xyitp->sdata[UL_offset];
    UR = xyitp->sdata[UR_offset];
    *size = u1 * (v1 * LL + v0 * UL) + u0 * (v1 * LR + v0 * UR);
    return;
}

/* Find a pixel weighting factor, "fout", by sampling the data stored in
"fitp" at position (xin, yin). */

static void sample_finterp
        (Float32 xin, Float32 yin,
         const finterp *fitp,
         Float32 *fout)
{
    Int32   col0, col1, row0, row1;
    Float32 clipx, clipy, fcol, frow, u0, u1, v0, v1;
    Float32 LL, LR, UL, UR;

    clipx = UT_MAX (0.0, UT_MIN (xin, 1.0));
    clipy = UT_MAX (0.0, UT_MIN (yin, 1.0));
    fcol = clipx * (fitp->numcol - 1);
    col0 = (Int32)fcol;
    col1 = UT_MIN (col0 + 1, fitp->numcol - 1);
    u0 = fcol - col0;
    u1 = 1.0 - u0;
    frow = clipy * (fitp->numrow - 1);
    row0 = (Int32)frow;
    row1 = UT_MIN (row0 + 1, fitp->numrow - 1);
    v0 = frow - row0;
    v1 = 1.0 - v0;
    LL = fitp->fdata[row0 * fitp->numcol + col0];
    LR = fitp->fdata[row0 * fitp->numcol + col1];
    UL = fitp->fdata[row1 * fitp->numcol + col0];
    UR = fitp->fdata[row1 * fitp->numcol + col1];
    *fout = fitp->foffset +
        u1 * (v1 * LL + v0 * UL) + u0 * (v1 * LR + v0 * UR);
    *fout = UT_MAX (0.0, UT_MIN (*fout, 1.0));
    return;
}

/* Concurrent main loop for function "make_xyinterp", first part. */

static void conc_make_xyitp_1 (void *ptr, Int32 n_conc, Int32 i_conc)
{
    Float32       mu, mv, r, u, v, dx, dy, tmp;
    const Float32 *xdest, *ydest;
    Float64       R2, *a;
    Int32         nkey, rmin, rmax, row, col, offset;
    xyinterp      *xyitp;

    /* Initialization */

    mu = ((xyiparms *) ptr)->mu;
    mv = ((xyiparms *) ptr)->mv;
    r = ((xyiparms *) ptr)->r;
    R2 = ((xyiparms *) ptr)->R2;
    xdest = ((xyiparms *) ptr)->xdest;
    ydest = ((xyiparms *) ptr)->ydest;
    a = ((xyiparms *) ptr)->a;
    nkey = ((xyiparms *) ptr)->nkey;
    xyitp = ((xyiparms *) ptr)->xyitp;
    n_conc = (n_conc == 0? 1: n_conc);
    rmin = (xyitp->numrow * i_conc) / n_conc;
    rmax = (xyitp->numrow * (i_conc + 1)) / n_conc;

    /* Main loop */

    for (row = rmin; row < rmax; ++row) {
        for (col = 0; col < xyitp->numcol; ++col) {
            offset = row * xyitp->numcol + col;
            u = col * mu;
            v = row * mv;
            if (r <= 0.0) {
                xyitp->xdata[offset] =
                    UT_MathHardy1 (nkey, xdest, ydest, R2, a, u, v);
                UT_MathHardyGrad1 (nkey, xdest, ydest, R2, a, u, v, &dx, &dy);
            } else {
                xyitp->xdata[offset] =
                    UT_MathHardy2 (nkey, xdest, ydest, R2, a, u, v);
                UT_MathHardyGrad2 (nkey, xdest, ydest, R2, a, u, v, &dx, &dy);
            }
            tmp = UT_ABS (1.0 + dx) + UT_ABS (dy);
            xyitp->sdata[offset] = tmp;
        }
    }
    return;
}

/* Concurrent main loop for function "make_xyinterp", second part. */

static void conc_make_xyitp_2 (void *ptr, Int32 n_conc, Int32 i_conc)
{
    Float32       mu, mv, r, u, v, dx, dy, tmp;
    const Float32 *xdest, *ydest;
    Float64       R2, *a;
    Int32         nkey, rmin, rmax, row, col, offset;
    xyinterp      *xyitp;

    /* Initialization */

    mu = ((xyiparms *) ptr)->mu;
    mv = ((xyiparms *) ptr)->mv;
    r = ((xyiparms *) ptr)->r;
    R2 = ((xyiparms *) ptr)->R2;
    xdest = ((xyiparms *) ptr)->xdest;
    ydest = ((xyiparms *) ptr)->ydest;
    a = ((xyiparms *) ptr)->a;
    nkey = ((xyiparms *) ptr)->nkey;
    xyitp = ((xyiparms *) ptr)->xyitp;
    n_conc = (n_conc == 0? 1: n_conc);
    rmin = (xyitp->numrow * i_conc) / n_conc;
    rmax = (xyitp->numrow * (i_conc + 1)) / n_conc;

    /* Main loop */

    for (row = rmin; row < rmax; ++row) {
        for (col = 0; col < xyitp->numcol; ++col) {
            offset = row * xyitp->numcol + col;
            u = col * mu;
            v = row * mv;
            if (r <= 0.0) {
                xyitp->ydata[offset] =
                    UT_MathHardy1 (nkey, xdest, ydest, R2, a, u, v);
                UT_MathHardyGrad1 (nkey, xdest, ydest, R2, a, u, v, &dx, &dy);
            } else {
                xyitp->ydata[offset] =
                    UT_MathHardy2 (nkey, xdest, ydest, R2, a, u, v);
                UT_MathHardyGrad2 (nkey, xdest, ydest, R2, a, u, v, &dx, &dy);
            }
            tmp = UT_ABS (dx) + UT_ABS (1.0 + dy);
            xyitp->sdata[offset] = UT_MAX (xyitp->sdata[offset], tmp);
        }
    }
    return;
}

/* Concurrent main loop for function "make_finterp". */

static void conc_make_fitp (void *ptr, Int32 n_conc, Int32 i_conc)
{
    Float32       mu, mv, r;
    const Float32 *xdest, *ydest;
    Float64       R2, *a;
    Int32         nkey, rmin, rmax, row, col;
    finterp       *fitp;

    /* Initialization */

    mu = ((fiparms *) ptr)->mu;
    mv = ((fiparms *) ptr)->mv;
    r = ((fiparms *) ptr)->r;
    R2 = ((fiparms *) ptr)->R2;
    xdest = ((fiparms *) ptr)->xdest;
    ydest = ((fiparms *) ptr)->ydest;
    a = ((fiparms *) ptr)->a;
    nkey = ((fiparms *) ptr)->nkey;
    fitp = ((fiparms *) ptr)->fitp;
    n_conc = (n_conc == 0? 1: n_conc);
    rmin = (fitp->numrow * i_conc) / n_conc;
    rmax = (fitp->numrow * (i_conc + 1)) / n_conc;

    /* Main loop */

    for (row = rmin; row < rmax; ++row) {
        for (col = 0; col < fitp->numcol; ++col) {
            if (r <= 0.0) {
                fitp->fdata[row * fitp->numcol + col] =
                    UT_MathHardy1 (nkey, xdest, ydest, R2, a, col * mu, row * mv);
            } else {
                fitp->fdata[row * fitp->numcol + col] =
                    UT_MathHardy2 (nkey, xdest, ydest, R2, a, col * mu, row * mv);
            }
        }
    }
    return;
}

/* Concurrent main loop for function "warp". */

static void conc_warp (void *ptr, Int32 n_conc, Int32 i_conc)
{
    IP_ImageId      srcimg, destimg;
    IP_FillModeType oor;
    pyramid         *srcpyr;
    const xyinterp  *xyitp;
    Float32         mx, my, msize, xs, ys, size, cdata[FF_NumImgChanTypes];
    Int32           i, xd, yd, ymin, ymax;

    /* Initialization */

    srcimg = ((warpparms *) ptr)->srcimg;
    destimg = ((warpparms *) ptr)->destimg;
    oor = ((warpparms *) ptr)->oor;
    srcpyr = ((warpparms *) ptr)->srcpyr;
    xyitp = &((warpparms *) ptr)->xyitp;
    mx = ((warpparms *) ptr)->mx;
    my = ((warpparms *) ptr)->my;
    msize = ((warpparms *) ptr)->msize;
    n_conc = (n_conc == 0? 1: n_conc);
    ymin = (destimg->pimg.desc.height * i_conc) / n_conc;
    ymax = (destimg->pimg.desc.height * (i_conc + 1)) / n_conc;
    for (i = 0; i < FF_NumImgChanTypes; ++i) {
        cdata[i] = 0.0;
    }

    /* Main loop */

    for (yd = ymin; yd < ymax; ++yd) {
        for (xd = 0; xd < destimg->pimg.desc.width; ++xd) {
            sample_xyinterp (xd * mx, yd * my, xyitp, &xs, &ys, &size);
            xs *= srcimg->pimg.desc.width - 1;
            ys *= srcimg->pimg.desc.height - 1;
            size *= msize;
            IP_SamplePyramid (srcpyr, xs, ys, size, cdata, oor);
            for (i = 0; i < FF_NumImgChanTypes; ++i) {
                switch (destimg->pimg.channel[i]) {
                    case FF_ImgFmtTypeNone: {
                        break;
                    }
                    case FF_ImgFmtTypeUByte: {
                        *FF_ImgUBytePixel (&destimg->pimg, i, xd, yd) =
                            (UInt8) (255.0 * cdata[i]);
                        break;
                    }
                    case FF_ImgFmtTypeFloat: {
                        *FF_ImgFloatPixel (&destimg->pimg, i, xd, yd) =
                            cdata[i];
                        break;
                    }
                    default: {
                        UT_ErrFatal ("conc_warp", "invalid channel type");
                    }
                }
            }
        }
    }
    return;
}


/* Concurrent main loop for function "blend", first part. */

static void conc_blend_1 (void *ptr, Int32 n_conc, Int32 i_conc)
{
    IP_ImageId      srcimg, destimg;
    IP_FillModeType oor;
    pyramid         *srcpyr;
    const xyinterp  *xyitp;
    const finterp   *fitp;
    Float32         mx, my, msize, xt, yt, xs, ys,
                    size, fs, cdata[FF_NumImgChanTypes];
    Int32           i, xd, yd, ymin, ymax;

    /* Initialization */

    srcimg = ((blendparms *) ptr)->srcimg;
    destimg = ((blendparms *) ptr)->destimg;
    oor = ((blendparms *) ptr)->oor;
    srcpyr = ((blendparms *) ptr)->srcpyr;
    xyitp = &((blendparms *) ptr)->xyitp;
    fitp = &((blendparms *) ptr)->fitp;
    mx = ((blendparms *) ptr)->mx;
    my = ((blendparms *) ptr)->my;
    msize = ((blendparms *) ptr)->msize;
    n_conc = (n_conc == 0? 1: n_conc);
    ymin = (destimg->pimg.desc.height * i_conc) / n_conc;
    ymax = (destimg->pimg.desc.height * (i_conc + 1)) / n_conc;
    for (i = 0; i < FF_NumImgChanTypes; ++i) {
        cdata[i] = 0.0;
    }

    /* Main loop */

    for (yd = ymin; yd < ymax; ++yd) {
        for (xd = 0; xd < destimg->pimg.desc.width; ++xd) {
            xt = xd * mx;
            yt = yd * my;
            sample_finterp (xt, yt, fitp, &fs);
            sample_xyinterp (xt, yt, xyitp, &xs, &ys, &size);
            xs *= srcimg->pimg.desc.width - 1;
            ys *= srcimg->pimg.desc.height - 1;
            size *= msize;
            IP_SamplePyramid (srcpyr, xs, ys, size, cdata, oor);
            for (i = 0; i < FF_NumImgChanTypes; ++i) {
                switch (destimg->pimg.channel[i]) {
                    case FF_ImgFmtTypeNone: {
                        break;
                    }
                    case FF_ImgFmtTypeUByte: {
                        *FF_ImgUBytePixel (&destimg->pimg, i, xd, yd) =
                            (UInt8) (255.0 * cdata[i] * fs);
                        break;
                    }
                    case FF_ImgFmtTypeFloat: {
                        *FF_ImgFloatPixel (&destimg->pimg, i, xd, yd) =
                            cdata[i] * fs;
                        break;
                    }
                    default: {
                        UT_ErrFatal ("conc_blend_1", "invalid channel type");
                    }
                }
            }
        }
    }
    return;
}

/* Concurrent main loop for function "blend", second part. */

static void conc_blend_2 (void *ptr, Int32 n_conc, Int32 i_conc)
{
    IP_ImageId      srcimg, destimg;
    IP_FillModeType oor;
    pyramid         *srcpyr;
    const xyinterp  *xyitp;
    const finterp   *fitp;
    Float32         mx, my, msize, xt, yt, xs, ys,
                    size, fs, cdata[FF_NumImgChanTypes];
    Int32           i, xd, yd, ymin, ymax;

    /* Initialization */

    srcimg = ((blendparms *) ptr)->srcimg;
    destimg = ((blendparms *) ptr)->destimg;
    oor = ((blendparms *) ptr)->oor;
    srcpyr = ((blendparms *) ptr)->srcpyr;
    xyitp = &((blendparms *) ptr)->xyitp;
    fitp = &((blendparms *) ptr)->fitp;
    mx = ((blendparms *) ptr)->mx;
    my = ((blendparms *) ptr)->my;
    msize = ((blendparms *) ptr)->msize;
    n_conc = (n_conc == 0? 1: n_conc);
    ymin = (destimg->pimg.desc.height * i_conc) / n_conc;
    ymax = (destimg->pimg.desc.height * (i_conc + 1)) / n_conc;
    for (i = 0; i < FF_NumImgChanTypes; ++i) {
        cdata[i] = 0.0;
    }

    /* Main loop */

    for (yd = ymin; yd < ymax; ++yd) {
        for (xd = 0; xd < destimg->pimg.desc.width; ++xd) {
            xt = xd * mx;
            yt = yd * my;
            sample_finterp (xt, yt, fitp, &fs);
            fs = 1.0 - fs;
            sample_xyinterp (xt, yt, xyitp, &xs, &ys, &size);
            xs *= srcimg->pimg.desc.width - 1;
            ys *= srcimg->pimg.desc.height - 1;
            size *= msize;
            IP_SamplePyramid (srcpyr, xs, ys, size, cdata, oor);
            for (i = 0; i < FF_NumImgChanTypes; ++i) {
                switch (destimg->pimg.channel[i]) {
                    case FF_ImgFmtTypeNone: {
                        break;
                    }
                    case FF_ImgFmtTypeUByte: {
                        *FF_ImgUBytePixel (&destimg->pimg, i, xd, yd) +=
                            (UInt8) (255.0 * cdata[i] * fs);
                        break;
                    }
                    case FF_ImgFmtTypeFloat: {
                        *FF_ImgFloatPixel (&destimg->pimg, i, xd, yd) +=
                            cdata[i] * fs;
                        break;
                    }
                    default: {
                        UT_ErrFatal ("conc_blend_2", "invalid channel type");
                    }
                }
            }
        }
    }
    return;
}

/* Interpolate pixel displacements and sampling area sizes for image warping
from a set of arbitrarily distributed key points. Store the interpolated data
in an "xyinterp" structure, "xyitp". */

static UT_Bool make_xyinterp
        (Int32 istep, Int32 width, Int32 height,
         Int32 nkey,
         const Float32 xsrc[], const Float32 ysrc[],
         const Float32 xdest[], const Float32 ydest[],
         Float32 r,
         xyinterp *xyitp)
{
    xyiparms parms;
    Int32    i, ndata;
    Float64  *z;

    xyitp->numcol = width / istep;
    if (xyitp->numcol < 2) {
        xyitp->numcol = 2;
    }
    xyitp->numrow = height / istep;
    if (xyitp->numrow < 2) {
        xyitp->numrow = 2;
    }
    parms.mu = 1.0 / (Float32) (xyitp->numcol - 1);
    parms.mv = 1.0 / (Float32) (xyitp->numrow - 1);
    ndata = xyitp->numcol * xyitp->numrow;
    if (!(parms.a = UT_MemTempArray (nkey, Float64)) ||
        !(z = UT_MemTempArray (nkey, Float64)) ||
        !(xyitp->xdata = UT_MemTempArray (ndata, Float32)) ||
        !(xyitp->ydata = UT_MemTempArray (ndata, Float32)) ||
        !(xyitp->sdata = UT_MemTempArray (ndata, Float32))) {
        return UT_False;
    }
    parms.r = r;
    parms.xdest = xdest;
    parms.ydest = ydest;
    parms.nkey = nkey;
    parms.xyitp = xyitp;
    for (i = 0; i < nkey; ++i) {
        z[i] = xsrc[i] - xdest[i];
    }
    if (r <= 0.0) {
        if (!UT_MathHardyCoeff1 (nkey, xdest, ydest, z, &parms.R2, parms.a)) {
            return UT_False;
        }
    } else {
        if (!UT_MathHardyCoeff2 (nkey, xdest, ydest, z, &parms.R2, parms.a, r)) {
            return UT_False;
        }
    }
    if (!UT_THREAD_EXEC (conc_make_xyitp_1, &parms, IP_StateNumThreads, THREAD_STACKSIZE)) {
        return UT_False;
    }
    for (i = 0; i < nkey; ++i) {
        z[i] = ysrc[i] - ydest[i];
    }
    if (r <= 0.0) {
        if (!UT_MathHardyCoeff1 (nkey, xdest, ydest, z, &parms.R2, parms.a)) {
            return UT_False;
        }
    } else {
        if (!UT_MathHardyCoeff2 (nkey, xdest, ydest, z, &parms.R2, parms.a, r)) {
            return UT_False;
        }
    }
    if (!UT_THREAD_EXEC (conc_make_xyitp_2, &parms, IP_StateNumThreads, THREAD_STACKSIZE)) {
        return UT_False;
    }
    UT_MemFree (parms.a);
    UT_MemFree (z);
    return UT_True;
}

/* Interpolate pixel weighting factors for image blending from a set of
arbitrarily distributed key points. Store the interpolated data in an
"finterp" structure, "fitp". */

static UT_Bool make_finterp
        (Int32 istep, Int32 width, Int32 height,
         Int32 nkey,
         const Float32 f[],
         const Float32 xdest[], const Float32 ydest[],
         Float32 r,
         finterp *fitp)
{
    fiparms parms;
    Int32   i, ndata;
    Float64 *z;

    fitp->numcol = width / istep;
    if (fitp->numcol < 2) {
        fitp->numcol = 2;
    }
    fitp->numrow = height / istep;
    if (fitp->numrow < 2) {
        fitp->numrow = 2;
    }
    parms.mu = 1.0 / (Float32) (fitp->numcol - 1);
    parms.mv = 1.0 / (Float32) (fitp->numrow - 1);
    ndata = fitp->numcol * fitp->numrow;
    if (!(parms.a = UT_MemTempArray (nkey, Float64)) ||
        !(z = UT_MemTempArray (nkey, Float64)) ||
        !(fitp->fdata = UT_MemTempArray (ndata, Float32))) {
        return UT_False;
    }
    parms.r = r;
    parms.xdest = xdest;
    parms.ydest = ydest;
    parms.nkey = nkey;
    parms.fitp = fitp;
    fitp->foffset = 0.0;
    for (i = 0; i < nkey; ++i) {
        fitp->foffset += f[i];
    }
    fitp->foffset /= nkey;
    for (i = 0; i < nkey; ++i) {
        z[i] = f[i] - fitp->foffset;
    }
    if (r <= 0.0) {
        if (!UT_MathHardyCoeff1 (nkey, xdest, ydest, z, &parms.R2, parms.a)) {
            return UT_False;
        }
    } else {
        if (!UT_MathHardyCoeff2 (nkey, xdest, ydest, z, &parms.R2, parms.a, r)) {
            return UT_False;
        }
    }
    if (!UT_THREAD_EXEC (conc_make_fitp, &parms, IP_StateNumThreads, THREAD_STACKSIZE)) {
        return UT_False;
    }
    UT_MemFree (parms.a);
    UT_MemFree (z);
    return UT_True;
}

/* Perform free-form deformations on an image. See the comments on function
IP_WarpKeypoint for an explanation of the parameters. */

static UT_Bool warp
        (IP_ImageId srcimg, IP_ImageId destimg,
         Int32 nkey,
         const Float32 xsrc[], const Float32 ysrc[],
         const Float32 xdest[], const Float32 ydest[],
         Int32 istep,
         Float32 r,
         IP_FillModeType oor)
{
    warpparms parms;

    if (!(parms.srcpyr = IP_GenPyramid (srcimg)) ||
        !make_xyinterp (istep,
                        srcimg->pimg.desc.width, srcimg->pimg.desc.height,
                        nkey, xsrc, ysrc, xdest, ydest, r, &parms.xyitp)) {
        return UT_False;
    }
    parms.mx =
        (Float32) (srcimg->pimg.desc.width - 1) /
        (Float32) (destimg->pimg.desc.width - 1);
    parms.my =
        (Float32) (srcimg->pimg.desc.height - 1) /
        (Float32) (destimg->pimg.desc.height - 1);
    parms.msize = UT_MAX (parms.mx, parms.my);
    parms.mx = 1.0 / (Float32) (destimg->pimg.desc.width - 1);
    parms.my = 1.0 / (Float32) (destimg->pimg.desc.height - 1);
    parms.srcimg = srcimg;
    parms.destimg = destimg;
    parms.oor = oor;
    return UT_THREAD_EXEC (conc_warp, &parms, IP_StateNumThreads, THREAD_STACKSIZE);
}

/* Blend two images. See the comments on function IP_BlendKeypoint
for an explanation of the parameters. */

static UT_Bool blend
        (IP_ImageId srcimg1, IP_ImageId srcimg2, IP_ImageId destimg,
         Int32 nkey,
         const Float32 xsrc1[], const Float32 ysrc1[],
         const Float32 xsrc2[], const Float32 ysrc2[],
         const Float32 f[],
         Int32 istep,
         Float32 r,
         IP_FillModeType oor)
{
    UT_MemState memstate;
    blendparms  parms;
    Int32       i;
    Float32     *xdest, *ydest;

    if (!(xdest = UT_MemTempArray (nkey, Float32)) ||
        !(ydest = UT_MemTempArray (nkey, Float32))) {
        return UT_False;
    }
    for (i = 0; i < nkey; ++i) {
        xdest[i] = f[i] * xsrc1[i] + (1.0 - f[i]) * xsrc2[i];
        ydest[i] = f[i] * ysrc1[i] + (1.0 - f[i]) * ysrc2[i];
    }
    if (!make_finterp (istep,
                       srcimg1->pimg.desc.width, srcimg1->pimg.desc.height,
                       nkey, f, xdest, ydest, r, &parms.fitp)) {
        return UT_False;
    }
    memstate = UT_MemRemember ();
    if (!(parms.srcpyr = IP_GenPyramid (srcimg1))) {
        return UT_False;
    }
    if (!make_xyinterp (istep,
                        srcimg1->pimg.desc.width, srcimg1->pimg.desc.height,
                        nkey, xsrc1, ysrc1, xdest, ydest, r, &parms.xyitp)) {
        return UT_False;
    }
    parms.mx =
        (Float32) (srcimg1->pimg.desc.width - 1) /
        (Float32) (destimg->pimg.desc.width - 1);
    parms.my =
        (Float32) (srcimg1->pimg.desc.height - 1) /
        (Float32) (destimg->pimg.desc.height - 1);
    parms.msize = UT_MAX (parms.mx, parms.my);
    parms.mx = 1.0 / (Float32) (destimg->pimg.desc.width - 1);
    parms.my = 1.0 / (Float32) (destimg->pimg.desc.height - 1);
    parms.srcimg = srcimg1;
    parms.destimg = destimg;
    parms.oor = oor;
    if (!UT_THREAD_EXEC (conc_blend_1, &parms, IP_StateNumThreads, THREAD_STACKSIZE)) {
        return UT_False;
    }
    UT_MemRestore (memstate);
    if (!(parms.srcpyr = IP_GenPyramid (srcimg2))) {
        return UT_False;
    }
    if (!make_xyinterp (istep,
                        srcimg2->pimg.desc.width, srcimg2->pimg.desc.height,
                        nkey, xsrc2, ysrc2, xdest, ydest, r, &parms.xyitp)) {
        return UT_False;
    }
    parms.mx =
        (Float32) (srcimg2->pimg.desc.width - 1) /
        (Float32) (destimg->pimg.desc.width - 1);
    parms.my =
        (Float32) (srcimg2->pimg.desc.height - 1) /
        (Float32) (destimg->pimg.desc.height - 1);
    parms.msize = UT_MAX (parms.mx, parms.my);
    parms.mx = 1.0 / (Float32) (destimg->pimg.desc.width - 1);
    parms.my = 1.0 / (Float32) (destimg->pimg.desc.height - 1);
    parms.srcimg = srcimg2;
    if (!UT_THREAD_EXEC (conc_blend_2, &parms, IP_StateNumThreads, THREAD_STACKSIZE)) {
        return UT_False;
    }
    return UT_True;
}

/***************************************************************************
 *[@e
 *      Name:           IP_WarpKeypoint
 *
 *      Usage:          Perform free-form deformations on an image.
 *
 *      Synopsis:       UT_Bool IP_WarpKeypoint(
 *                              IP_ImageId srcImg,
 *                              IP_ImageId destImg,
 *                              Int32 numKeys,
 *                              const Float32 xsrcList[],
 *                              const Float32 ysrcList[],
 *                              const Float32 xdestList[],
 *                              const Float32 ydestList[],
 *                              Int32 interpStep,
 *                              Float32 smoothness,
 *                              IP_FillModeType fillMode)
 *
 *      Description:    Image "srcImg" is deformed smoothly. The result is
 *                      stored in image "destImg". The deformation is defined
 *                      by a set of "numKeys" key points:
 *
 *                      The position of key point number "i" in "srcImg" is
 *                      (xsrcList[i], ysrcList[i]); (xdestList[i], ydestList[i])
 *                      defines the position of the key point in "destImg".
 *                      The pixels from "srcImg" are displaced so that the
 *                      pixel at position (xsrcList[i], ysrcList[i]) arrives at
 *                      position (xdestList[i], ydestList[i]) in "destImg". The
 *                      displacement of pixels whose positions do not coincide
 *                      with some key point is smoothly interpolated from the
 *                      key points.
 *
 *                      Interpolating smoothly between the key points can
 *                      be quite time-consuming. In order to save some time,
 *                      the interpolating function is sampled only every
 *                      "interpStep" pixels; in-between values are interpolated
 *                      linearly.
 *                      If "interpStep" is 1, the interpolation function is sampled
 *                      for every pixel and the resulting image looks as smooth
 *                      as possible. Setting "interpStep" to a value greater than 1
 *                      saves time, but the result looks less smooth.
 *
 *                      "smoothness" selects the smoothness of the function used to
 *                      interpolate between the key points:
 *                      If "smoothness" is less than or equal to 0.0, interpolation is
 *                      very smooth, but changing one key point has a strong
 *                      effect on the whole image.
 *                      If "smoothness" is greater than 0.0, a different interpolation
 *                      algorithm is used. The influence of key points is much
 *                      more limited to the area around the key points. "R"
 *                      should be somewhere around 1.0; interpolation becomes
 *                      smoother when "smoothness" increases.
 *
 *                      "fillMode" selects how colors are assigned to output image
 *                      pixels which are not covered by pixels in the input
 *                      image. The following values for "fillMode" are accepted:
 *
 *                          IP_FillModeFill     Pixels not covered by the input
 *                                              image are filled with the
 *                                              current drawing color.
 *
 *                          IP_FillModeWrap     The input image is repeated in
 *                                              horizontal and vertical
 *                                              direction so that the output
 *                                              image is tiled with copies of
 *                                              the input image.
 *
 *                          IP_FillModeClip     Input pixel coordinates are
 *                                              clipped so that regions outside
 *                                              the input image are filled with
 *                                              the color of the nearest input
 *                                              image pixel.
 *
 *                      Notes:
 *                      - "xsrcList" and "ysrcList" should be in the range from 0.0 to
 *                        1.0. The lower left corner of "srcImg" and "destImg"
 *                        is at position (0.0, 0.0); the upper right corner is
 *                        at position (1.0, 1.0).
 *
 *      States:         State settings influencing functionality:
 *                      Draw mask:    No
 *                      Draw mode:    No
 *                      Draw color:   No
 *                      Threading:    Yes
 *                      UByte format: All
 *                      Float format: All
 *
 *      Return value:   UT_True if successful, else UT_False.
 *
 *      See also:       IP_BlendKeypoint
 *                      IP_WarpFunct
 *                      IP_BlendFunct
 *                      IP_SetNumThreads
 *
 ***************************************************************************/

UT_Bool IP_WarpKeypoint
        (IP_ImageId srcImg, IP_ImageId destImg,
         Int32 numKeys,
         const Float32 xsrcList[], const Float32 ysrcList[],
         const Float32 xdestList[], const Float32 ydestList[],
         Int32 interpStep, Float32 smoothness, IP_FillModeType fillMode)
{
    UT_MemState memstate;
    UT_Bool     success;

    if (numKeys < 1) {
        UT_ErrSetNum (UT_ErrParamInvalid, str_inotless, str_nkey, 1);
        return UT_False;
    }
    if (interpStep < 1) {
        UT_ErrSetNum (UT_ErrParamInvalid, str_inotless, str_istep, 1);
        return UT_False;
    }
    if (fillMode < 0 || fillMode >= IP_NumFillModeTypes) {
        UT_ErrSetNum (UT_ErrParamInvalid, str_fillmode, fillMode);
        return UT_False;
    }
    memstate = UT_MemRemember ();
    success = warp
            (srcImg, destImg, numKeys, xsrcList, ysrcList,
             xdestList, ydestList, interpStep, smoothness, fillMode);
    UT_MemRestore (memstate);
    return success;
}

/***************************************************************************
 *[@e
 *      Name:           IP_BlendKeypoint
 *
 *      Usage:          Blend two images, producing an in-between image.
 *
 *      Synopsis:       UT_Bool IP_BlendKeypoint(
 *                              IP_ImageId srcImg1,
 *                              IP_ImageId srcImg2,
 *                              IP_ImageId destImg,
 *                              Int32 numKeys,
 *                              const Float32 xsrc1List[],
 *                              const Float32 ysrc1List[],
 *                              const Float32 xsrc2List[],
 *                              const Float32 ysrc2List[],
 *                              const Float32 mixList[],
 *                              Int32 interpStep,
 *                              Float32 smoothness,
 *                              IP_FillModeType fillMode)
 *
 *      Description:    IP_BlendKeypoint interpolates between two source
 *                      images, "srcImg1" and "srcImg2", by displacing pixels
 *                      and blending the pixel colors. The result is stored
 *                      in image "destImg". The interpolation is defined by a
 *                      set of "numKeys" key points:
 *
 *                      The position of key point number "i" in "srcImg1" is
 *                      (xsrc1List[i], ysrc1List[i]); the key point's position in
 *                      "srcImg2" is (xsrc2List[i], ysrc2List[i]). The key point's
 *                      position in "destImg" is (xdest[i], ydest[i]), where
 *
 *                          xdest[i] = xsrc1List[i] * mixList[i] + xsrc2List[i] * (1-mixList[i]);
 *                          ydest[i] = ysrc1List[i] * mixList[i] + ysrc2List[i] * (1-mixList[i]);
 *
 *                      The color of the pixel at position (xdest[i], ydest[i])
 *                      in "destImg" is mixList[i] times the color of the pixel at
 *                      position (xsrc1List[i], ysrc1List[i]) in "srcImg1" plus
 *                      (1-mixList[i]) times the color of the pixel at position
 *                      (xsrc2List[i], ysrc2List[i]) in "srcImg2".
 *
 *                      For pixels in "destImg", which do not coincide with
 *                      some key point, the positions in "srcImg1" and
 *                      "srcImg2", from where the pixel colors are taken, are
 *                      smoothly interpolated from the key points.
 *
 *                      Interpolating smoothly between the key points can
 *                      be quite time-consuming. In order to save some time,
 *                      the interpolating function is sampled only every
 *                      "interpStep" pixels; in-between values are interpolated
 *                      linearly.
 *                      If "interpStep" is 1, the interpolation function is sampled
 *                      for every pixel and the resulting image looks as smooth
 *                      as possible. Setting "interpStep" to a value greater than 1
 *                      saves time, but the result looks less smooth.
 *
 *                      "smoothness" selects the smoothness of the function used to
 *                      interpolate between the key points:
 *                      If "smoothness" is less than or equal to 0.0, interpolation is
 *                      very smooth, but changing one key point has a strong
 *                      effect on the whole image.
 *                      If "smoothness" is greater than 0.0, a different interpolation
 *                      algorithm is used. The influence of key points is much
 *                      more limited to the area around the key points. "smoothness"
 *                      should be somewhere around 1.0; interpolation becomes
 *                      smoother when "smoothness" increases.
 *
 *                      "fillMode" selects how colors are assigned to output image
 *                      pixels which are not covered by pixels in the input
 *                      images. The following values for "fillMode" are accepted:
 *
 *                          IP_FillModeFill     Pixels not covered by the input
 *                                              images are filled with the
 *                                              current drawing color.
 *
 *                          IP_FillModeWrap     The input images are repeated
 *                                              in horizontal and vertical
 *                                              direction so that the output
 *                                              image is tiled with copies of
 *                                              the input images.
 *
 *                          IP_FillModeClip     Input pixel coordinates are
 *                                              clipped so that regions outside
 *                                              the input images are filled
 *                                              with the color of the nearest
 *                                              input image pixel.
 *
 *                      Notes:
 *                      - "xsrc1List", "ysrc1List", "xsrc2List" and "ysrc2List"
 *                        should be in the range from 0.0 to 1.0.
 *                        The lower left corner of "srcImg1", "srcImg2" and
 *                        "destImg" is at position (0.0, 0.0).
 *                        The upper right corner is at position (1.0, 1.0).
 *
 *      States:         State settings influencing functionality:
 *                      Draw mask:    No
 *                      Draw mode:    No
 *                      Draw color:   No
 *                      Threading:    Yes
 *                      UByte format: All
 *                      Float format: All
 *
 *      Return value:   UT_True if successful, else UT_False.
 *
 *      See also:       IP_WarpKeypoint
 *                      IP_WarpFunct
 *                      IP_BlendFunct
 *                      IP_SetNumThreads
 *
 ***************************************************************************/

UT_Bool IP_BlendKeypoint
        (IP_ImageId srcImg1,
         IP_ImageId srcImg2,
         IP_ImageId destImg,
         Int32 numKeys,
         const Float32 xsrc1List[],
         const Float32 ysrc1List[],
         const Float32 xsrc2List[],
         const Float32 ysrc2List[],
         const Float32 mixList[],
         Int32 interpStep,
         Float32 smoothness,
         IP_FillModeType fillMode)
{
    UT_MemState memstate;
    UT_Bool     success;

    if (numKeys < 1) {
        UT_ErrSetNum (UT_ErrParamInvalid, str_inotless, str_nkey, 1);
        return UT_False;
    }
    if (interpStep < 1) {
        UT_ErrSetNum (UT_ErrParamInvalid, str_inotless, str_istep, 1);
        return UT_False;
    }
    if (fillMode < 0 || fillMode >= IP_NumFillModeTypes) {
        UT_ErrSetNum (UT_ErrParamInvalid, str_fillmode, fillMode);
        return UT_False;
    }
    memstate = UT_MemRemember ();
    success = blend
            (srcImg1, srcImg2, destImg,
             numKeys, xsrc1List, ysrc1List, xsrc2List, ysrc2List,
             mixList, interpStep, smoothness, fillMode);
    UT_MemRestore (memstate);
    return success;
}
