#	$Id: math.pl,v 1.7 1999/01/20 07:44:59 ryu Exp $

#	Copyright (C) 1999 Robert K. Yu
#	email: robert@yu.org

#	This file is part of Autochar.

#	Autochar is free software; you can redistribute it and/or modify
#	it under the terms of the GNU General Public License as published by
#	the Free Software Foundation; either version 2, or (at your option)
#	any later version.

#	Autochar is distributed in the hope that it will be useful,
#	but WITHOUT ANY WARRANTY; without even the implied warranty of
#	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#	GNU General Public License for more details.

#	You should have received a copy of the GNU General Public License
#	along with Autochar; see the file COPYING.  If not, write to the
#	Free Software Foundation, Inc., 59 Temple Place - Suite 330,
#	Boston, MA 02111-1307, USA.

#   svlinreg --
#
#	Perform single variable linear regression.  Given lists @X and @Y,
#	calculate a, b, and r2 for the specified type of fit:
#		lin: y = a + b * x
#		exp: y = a * exp (b * x)
#		pow: y = a * x^b
#		log: y = a + b * ln(x)
#	Input is a concatenated list of data points ($type, \@X, \@Y);
#	Output is a list (a, b, r2);
#
sub svlinreg {

    local ($type, *X, *Y) = @_;

    my($x, $y, $sumx, $sumy, $sumxy, $sumx2, $sumy2, $N);
    my($a, $b, $r2);
    my($i);

    # init
    $sumx = 0.0;
    $sumy = 0.0;
    $sumxy = 0.0;
    $sumx2 = 0.0;
    $sumy2 = 0.0;

    $N = $#X;

    for ($i = 0; $i < $N; $i++) {
	if ($type eq 'lin') {
	    $x = $X[$i]; 
	    $y = $Y[$i]; 
	} elsif ($type eq 'exp') {
	    $x = $X[$i]; 
	    $y = log($Y[$i]); 
	} elsif ($type eq 'log') {
	    $x = log($X[$i]); 
	    $y = $Y[$i]; 
	} elsif ($type eq 'pow') {
	    $x = log($X[$i]); 
	    $y = log($Y[$i]); 
	} else {
	    printf STDERR "ERROR: unknown regression type '$type'.\n";
	    die;
	}

	$sumx += $x;
	$sumy += $y;
	$sumxy += ($x * $y);
	$sumx2 += ($x**2);
	$sumy2 += ($y**2);
    }

    # calculate a, b, r2
    $a = ($sumy*$sumx2 - $sumx*$sumxy) / ($N*$sumx2 - $sumx**2);
    $b = ($N*$sumxy - $sumx*$sumy) / ($N*$sumx2 - $sumx**2);
    $r2 = ($a*$sumy + $b*$sumxy - ($sumy**2)/$N) / ($sumy2 - ($sumy**2)/$N);

    return ($a, $b, $r2);
}


#
#    mvlinreg --
#
#	If the m multivariable dataset are give as:
#	
#	Independent variables:
#	    x1: x11, x12, ..., x1N
#	    x2: x21, x22, ..., x2N
#	    ...
#	    xM: xM1, xM2, ..., xMN
#
#	and the dependent variable:
#	    y: y1, y2, ..., yN
#
#	Then the generalized multivariable linear regression is:
#	    y = c0 + c1 x1 + c2 x2 + ... + cN xN
#
#	The inputs to this function are:
#	    ndata	= number of data points (N)
#	    nvar	= number of multivariables (M)
#	    X		= pointer to 2-dimensional array X[i][j],
#			  where (0 <= i < N) and (0 <= j < M).
#	    Y		= pointer to dependent variable Y[i], where
#			  (0 <= i < N).
#	Example:
#	    @X = (
#		[ 11, 21, 31, 41 ],	# datapoint 0
#		[ 12, 22, 32, 42 ],	# datapoint 1
#		[ 13, 23, 33, 43 ],	# datapoint 2
#		[ 14, 24, 34, 44 ],	# datapoint 3
#	    );
#
#	    @Y = (0, 1, 2, 3);
#
sub mvlinreg {

    local($ndata, $nvar, *X, *Y) = @_;
    local (@x, @y, @a, @b, @c);
    local ($i, $j, $k, $l);

    # place X[0:N-1][0:M-1] into x[1:N][1:M]
    for $i (1 .. $ndata) {
	for $j (1 .. $nvar) {
	    $x[$i][$j] = $X[$i-1][$j-1];
	} # j
    } # i
    for $i (1 .. $ndata)  {
	$x[$i][0]= 1.0;
    } # i

    # place Y[0:N-1] into y[1:N]
    for $i (1 .. $ndata) {
	$y[$i] = $Y[$i-1];
    } # i

    for $j (1 .. $nvar+1) {
	for $k (1 .. $nvar+1) {
	    $a[$j][$k] = 0.0;
	} # k
        $b[$j] = 0.0;
    } # j

    for $j (1 .. $nvar+1) {
	for $k ($j .. $nvar+1) {
	    for $i (1 .. $ndata) {
		$a[$j][$k] = $a[$j][$k] + ($x[$i][$j-1] * $x[$i][$k-1]);
	    } # i
	} # k
    } # j

    for $j (2 .. $nvar+1) {
	for $k (1 .. $j-1) {
	    $a[$j][$k] = $a[$k][$j];
	} # k
    } # j

    for $j (1 .. $nvar+1) {
	for $i (1 .. $ndata) {
	    $b[$j] = $b[$j] + ($x[$i][$j-1] * $y[$i]);
	} # i
    } # j

    &solve_mvlinreg( \@a, \@b, \@c, $nvar+1);

    return(@c);
}

sub solve_mvlinreg(a,b,c,n) {
    local(*a, *b, *c, $n) = @_;
    my ($i, $j, $k, $l);
    my ($ihold, $big, $hold, $fac, $sum);

    for $i (1 .. $n-1) {
	$big = abs($a[$i][$i]);
        $ihold = $i;
        for $j ($i+1 .. $n) {
	    if (abs($a[$j][$i]) > $big) {
		$big = abs($a[$j][$i]);
		$ihold = $j;
	    }
	} # j

	if ($ihold != $i) {
	    for $j ($i .. $n) {
		$hold = $a[$i][$j];
		$a[$i][$j] = $a[$ihold][$j];
		$a[$ihold][$j] = $hold;
	    } # j

	    $hold = $b[$i];
	    $b[$i] = $b[$ihold];
	    $b[$ihold] = $hold;
	}

	for $j ($i+1 .. $n) {
	    $fac = $a[$j][$i]/$a[$i][$i];
	    for $l (1 .. $n) {
		$a[$j][$l] = $a[$j][$l] - ($a[$i][$l] * $fac);
	    }
	    $b[$j] = $b[$j] - ($b[$i] * $fac);
	} # j 
    } # i

    for ($i=$n; $i>=1; $i--) {
	$sum = 0;
        for $l ($i+1 .. $n) {
	    $sum = $sum + ($a[$i][$l] * $c[$l]);
	} # l
        $c[$i] = ($b[$i]-$sum) / $a[$i][$i];
    } # i
}

#   is_even:
#	Returns 1 if scalar value is even.  0 otherwise.
#
sub is_even {
    my($i) = @_;
    return (($i % 2) == 0);
}


#   is_odd:
#	Returns 1 if scalar value is odd.  0 otherwise.
#
sub is_odd {
    my($i) = @_;
    return (($i % 2) == 1);
}


1;