/**************************************************************************
**
** Copyright (C) 1993 David E. Steward & Zbigniew Leyk, all rights reserved.
**
** Meschach Library
**
** This Meschach Library is provided "as is" without any express
** or implied warranty of any kind with respect to this software.
** In particular the authors shall not be liable for any direct,
** indirect, special, incidental or consequential damages arising
** in any way from use of the software.
**
** Everyone is granted permission to copy, modify and redistribute this
** Meschach Library, provided:
** 1. All copies contain this copyright notice.
** 2. All modified copies shall carry a notice stating who
** made the last modification and the date of such modification.
** 3. No charge is made for this software or works derived from it.
** This clause shall not be construed as constraining other software
** distributed on the same medium as this software, nor is a
** distribution fee considered a charge.
**
***************************************************************************/
/*
Band matrix factorisation routines
*/
/* bdfactor.c 18/11/93 */
static char rcsid[] = "$Id: ";
#include <stdio.h>
#include "matrix2.h"
#include <math.h>
/* generate band matrix
for a matrix with n columns,
lb subdiagonals and ub superdiagonals;
Way of saving a band of a matrix:
first we save subdiagonals (from 0 to lb-1);
then main diagonal (in the lb row)
and then superdiagonals (from lb+1 to lb+ub)
in such a way that the elements which were previously
in one column are now also in one column
*/
BAND *bd_get(lb,ub,n)
int lb, ub, n;
{
BAND *A;
if (lb < 0 || ub < 0 || n <= 0)
error(E_NEG,"bd_get");
if ((A = NEW(BAND)) == (BAND *)NULL)
error(E_MEM,"bd_get");
else if (mem_info_is_on()) {
mem_bytes(TYPE_BAND,0,sizeof(BAND));
mem_numvar(TYPE_BAND,1);
}
lb = A->lb = min(n-1,lb);
ub = A->ub = min(n-1,ub);
A->mat = m_get(lb+ub+1,n);
return A;
}
int bd_free(A)
BAND *A;
{
if ( A == (BAND *)NULL || A->lb < 0 || A->ub < 0 )
/* don't trust it */
return (-1);
if (A->mat) m_free(A->mat);
if (mem_info_is_on()) {
mem_bytes(TYPE_BAND,sizeof(BAND),0);
mem_numvar(TYPE_BAND,-1);
}
free((char *)A);
return 0;
}
/* resize band matrix */
BAND *bd_resize(A,new_lb,new_ub,new_n)
BAND *A;
int new_lb,new_ub,new_n;
{
int lb,ub,i,j,l,shift,umin;
Real **Av;
if (new_lb < 0 || new_ub < 0 || new_n <= 0)
error(E_NEG,"bd_resize");
if ( ! A )
return bd_get(new_lb,new_ub,new_n);
if ( A->lb+A->ub+1 > A->mat->m )
error(E_INTERN,"bd_resize");
if ( A->lb == new_lb && A->ub == new_ub && A->mat->n == new_n )
return A;
lb = A->lb;
ub = A->ub;
Av = A->mat->me;
umin = min(ub,new_ub);
/* ensure that unused triangles at edges are zero'd */
for ( i = 0; i < lb; i++ )
for ( j = A->mat->n - lb + i; j < A->mat->n; j++ )
Av[i][j] = 0.0;
for ( i = lb+1,l=1; l <= umin; i++,l++ )
for ( j = 0; j < l; j++ )
Av[i][j] = 0.0;
new_lb = A->lb = min(new_lb,new_n-1);
new_ub = A->ub = min(new_ub,new_n-1);
A->mat = m_resize(A->mat,new_lb+new_ub+1,new_n);
Av = A->mat->me;
/* if new_lb != lb then move the rows to get the main diag
in the new_lb row */
if (new_lb > lb) {
shift = new_lb-lb;
for (i=lb+umin, l=i+shift; i >= 0; i--,l--)
MEM_COPY(Av[i],Av[l],new_n*sizeof(Real));
for (l=shift-1; l >= 0; l--)
__zero__(Av[l],new_n);
}
else if (new_lb < lb) {
shift = lb - new_lb;
for (i=shift, l=0; i <= lb+umin; i++,l++)
MEM_COPY(Av[i],Av[l],new_n*sizeof(Real));
for (i=lb+umin+1; i <= new_lb+new_ub; i++)
__zero__(Av[i],new_n);
}
return A;
}
BAND *bd_copy(A,B)
BAND *A,*B;
{
int lb,ub,i,j,n;
if ( !A )
error(E_NULL,"bd_copy");
if (A == B) return B;
n = A->mat->n;
if ( !B )
B = bd_get(A->lb,A->ub,n);
else if (B->lb != A->lb || B->ub != A->ub || B->mat->n != n )
B = bd_resize(B,A->lb,A->ub,n);
if (A->mat == B->mat) return B;
ub = B->ub = A->ub;
lb = B->lb = A->lb;
for ( i=0, j=n-lb; i <= lb; i++, j++ )
MEM_COPY(A->mat->me[i],B->mat->me[i],j*sizeof(Real));
for ( i=lb+1, j=1; i <= lb+ub; i++, j++ )
MEM_COPY(A->mat->me[i]+j,B->mat->me[i]+j,(n - j)*sizeof(Real));
return B;
}
/* copy band matrix to a square matrix */
MAT *band2mat(bA,A)
BAND *bA;
MAT *A;
{
int i,j,l,n,n1;
int lb, ub;
Real **bmat;
if ( !bA || !A)
error(E_NULL,"band2mat");
if ( bA->mat == A )
error(E_INSITU,"band2mat");
ub = bA->ub;
lb = bA->lb;
n = bA->mat->n;
n1 = n-1;
bmat = bA->mat->me;
A = m_resize(A,n,n);
m_zero(A);
for (j=0; j < n; j++)
for (i=min(n1,j+lb),l=lb+j-i; i >= max(0,j-ub); i--,l++)
A->me[i][j] = bmat[l][j];
return A;
}
/* copy a square matrix to a band matrix with
lb subdiagonals and ub superdiagonals */
BAND *mat2band(A,lb,ub,bA)
BAND *bA;
MAT *A;
int lb, ub;
{
int i, j, l, n1;
Real **bmat;
if (! A || ! bA)
error(E_NULL,"mat2band");
if (ub < 0 || lb < 0)
error(E_SIZES,"mat2band");
if (bA->mat == A)
error(E_INSITU,"mat2band");
n1 = A->n-1;
lb = min(n1,lb);
ub = min(n1,ub);
bA = bd_resize(bA,lb,ub,n1+1);
bmat = bA->mat->me;
for (j=0; j <= n1; j++)
for (i=min(n1,j+lb),l=lb+j-i; i >= max(0,j-ub); i--,l++)
bmat[l][j] = A->me[i][j];
return bA;
}
/* transposition of matrix in;
out - matrix after transposition;
can be done in situ
*/
BAND *bd_transp(in,out)
BAND *in, *out;
{
int i, j, jj, l, k, lb, ub, lub, n, n1;
int in_situ;
Real **in_v, **out_v;
if ( in == (BAND *)NULL || in->mat == (MAT *)NULL )
error(E_NULL,"bd_transp");
lb = in->lb;
ub = in->ub;
lub = lb+ub;
n = in->mat->n;
n1 = n-1;
in_situ = ( in == out );
if ( ! in_situ )
out = bd_resize(out,ub,lb,n);
else
{ /* only need to swap lb and ub fields */
out->lb = ub;
out->ub = lb;
}
in_v = in->mat->me;
if (! in_situ) {
int sh_in,sh_out;
out_v = out->mat->me;
for (i=0, l=lub, k=lb-i; i <= lub; i++,l--,k--) {
sh_in = max(-k,0);
sh_out = max(k,0);
MEM_COPY(&(in_v[i][sh_in]),&(out_v[l][sh_out]),
(n-sh_in-sh_out)*sizeof(Real));
/**********************************
for (j=n1-sh_out, jj=n1-sh_in; j >= sh_in; j--,jj--) {
out_v[l][jj] = in_v[i][j];
}
**********************************/
}
}
else if (ub == lb) {
Real tmp;
for (i=0, l=lub, k=lb-i; i < lb; i++,l--,k--) {
for (j=n1-k, jj=n1; j >= 0; j--,jj--) {
tmp = in_v[l][jj];
in_v[l][jj] = in_v[i][j];
in_v[i][j] = tmp;
}
}
}
else if (ub > lb) { /* hence i-ub <= 0 & l-lb >= 0 */
int p,pp,lbi;
for (i=0, l=lub; i < (lub+1)/2; i++,l--) {
lbi = lb-i;
for (j=l-lb, jj=0, p=max(-lbi,0), pp = max(l-ub,0); j <= n1;
j++,jj++,p++,pp++) {
in_v[l][pp] = in_v[i][p];
in_v[i][jj] = in_v[l][j];
}
for ( ; p <= n1-max(lbi,0); p++,pp++)
in_v[l][pp] = in_v[i][p];
}
if (lub%2 == 0) { /* shift only */
i = lub/2;
for (j=max(i-lb,0), jj=0; jj <= n1-ub+i; j++,jj++)
in_v[i][jj] = in_v[i][j];
}
}
else { /* ub < lb, hence ub-l <= 0 & lb-i >= 0 */
int p,pp,ubi;
for (i=0, l=lub; i < (lub+1)/2; i++,l--) {
ubi = i-ub;
for (j=n1-max(lb-l,0), jj=n1-max(-ubi,0), p=n1-lb+i, pp=n1;
p >= 0; j--, jj--, pp--, p--) {
in_v[i][jj] = in_v[l][j];
in_v[l][pp] = in_v[i][p];
}
for ( ; jj >= max(ubi,0); j--, jj--)
in_v[i][jj] = in_v[l][j];
}
if (lub%2 == 0) { /* shift only */
i = lub/2;
for (j=n1-lb+i, jj=n1-max(ub-i,0); j >= 0; j--, jj--)
in_v[i][jj] = in_v[i][j];
}
}
return out;
}
/* bdLUfactor -- gaussian elimination with partial pivoting
-- on entry, the matrix A in band storage with elements
in rows 0 to lb+ub;
The jth column of A is stored in the jth column of
band A (bA) as follows:
bA->mat->me[lb+j-i][j] = A->me[i][j] for
max(0,j-lb) <= i <= min(A->n-1,j+ub);
-- on exit: U is stored as an upper triangular matrix
with lb+ub superdiagonals in rows lb to 2*lb+ub,
and the matrix L is stored in rows 0 to lb-1.
Matrix U is permuted, whereas L is not permuted !!!
Therefore we save some memory.
*/
BAND *bdLUfactor(bA,pivot)
BAND *bA;
PERM *pivot;
{
int i, j, k, l, n, n1, lb, ub, lub, k_end, k_lub;
int i_max, shift;
Real **bA_v;
Real max1, temp;
if ( bA==(BAND *)NULL || pivot==(PERM *)NULL )
error(E_NULL,"bdLUfactor");
lb = bA->lb;
ub = bA->ub;
lub = lb+ub;
n = bA->mat->n;
n1 = n-1;
lub = lb+ub;
if ( pivot->size != n )
error(E_SIZES,"bdLUfactor");
/* initialise pivot with identity permutation */
for ( i=0; i < n; i++ )
pivot->pe[i] = i;
/* extend band matrix */
/* extended part is filled with zeros */
bA = bd_resize(bA,lb,min(n1,lub),n);
bA_v = bA->mat->me;
/* main loop */
for ( k=0; k < n1; k++ )
{
k_end = max(0,lb+k-n1);
k_lub = min(k+lub,n1);
/* find the best pivot row */
max1 = 0.0;
i_max = -1;
for ( i=lb; i >= k_end; i-- ) {
temp = fabs(bA_v[i][k]);
if ( temp > max1 )
{ max1 = temp; i_max = i; }
}
/* if no pivot then ignore column k... */
if ( i_max == -1 )
continue;
/* do we pivot ? */
if ( i_max != lb ) /* yes we do... */
{
/* save transposition using non-shifted indices */
shift = lb-i_max;
px_transp(pivot,k+shift,k);
for ( i=lb, j=k; j <= k_lub; i++,j++ )
{
temp = bA_v[i][j];
bA_v[i][j] = bA_v[i-shift][j];
bA_v[i-shift][j] = temp;
}
}
/* row operations */
for ( i=lb-1; i >= k_end; i-- ) {
temp = bA_v[i][k] /= bA_v[lb][k];
shift = lb-i;
for ( j=k+1,l=i+1; j <= k_lub; l++,j++ )
bA_v[l][j] -= temp*bA_v[l+shift][j];
}
}
return bA;
}
/* bdLUsolve -- given an LU factorisation in bA, solve bA*x=b */
/* pivot is changed upon return */
VEC *bdLUsolve(bA,pivot,b,x)
BAND *bA;
PERM *pivot;
VEC *b,*x;
{
int i,j,l,n,n1,pi,lb,ub,jmin, maxj;
Real c;
Real **bA_v;
if ( bA==(BAND *)NULL || b==(VEC *)NULL || pivot==(PERM *)NULL )
error(E_NULL,"bdLUsolve");
if ( bA->mat->n != b->dim || bA->mat->n != pivot->size)
error(E_SIZES,"bdLUsolve");
lb = bA->lb;
ub = bA->ub;
n = b->dim;
n1 = n-1;
bA_v = bA->mat->me;
x = v_resize(x,b->dim);
px_vec(pivot,b,x);
/* solve Lx = b; implicit diagonal = 1
L is not permuted, therefore it must be permuted now
*/
px_inv(pivot,pivot);
for (j=0; j < n; j++) {
jmin = j+1;
c = x->ve[j];
maxj = max(0,j+lb-n1);
for (i=jmin,l=lb-1; l >= maxj; i++,l--) {
if ( (pi = pivot->pe[i]) < jmin)
pi = pivot->pe[i] = pivot->pe[pi];
x->ve[pi] -= bA_v[l][j]*c;
}
}
/* solve Ux = b; explicit diagonal */
x->ve[n1] /= bA_v[lb][n1];
for (i=n-2; i >= 0; i--) {
c = x->ve[i];
for (j=min(n1,i+ub), l=lb+j-i; j > i; j--,l--)
c -= bA_v[l][j]*x->ve[j];
x->ve[i] = c/bA_v[lb][i];
}
return (x);
}
/* LDLfactor -- L.D.L' factorisation of A in-situ;
A is a band matrix
it works using only lower bandwidth & main diagonal
so it is possible to set A->ub = 0
*/
BAND *bdLDLfactor(A)
BAND *A;
{
int i,j,k,n,n1,lb,ki,jk,ji,lbkm,lbkp;
Real **Av;
Real c, cc;
if ( ! A )
error(E_NULL,"bdLDLfactor");
if (A->lb == 0) return A;
lb = A->lb;
n = A->mat->n;
n1 = n-1;
Av = A->mat->me;
for (k=0; k < n; k++) {
lbkm = lb-k;
lbkp = lb+k;
/* matrix D */
c = Av[lb][k];
for (j=max(0,-lbkm), jk=lbkm+j; j < k; j++, jk++) {
cc = Av[jk][j];
c -= Av[lb][j]*cc*cc;
}
if (c == 0.0)
error(E_SING,"bdLDLfactor");
Av[lb][k] = c;
/* matrix L */
for (i=min(n1,lbkp), ki=lbkp-i; i > k; i--,ki++) {
c = Av[ki][k];
for (j=max(0,i-lb), ji=lb+j-i, jk=lbkm+j; j < k;
j++, ji++, jk++)
c -= Av[lb][j]*Av[ji][j]*Av[jk][j];
Av[ki][k] = c/Av[lb][k];
}
}
return A;
}
/* solve A*x = b, where A is factorized by
Choleski LDL^T factorization */
VEC *bdLDLsolve(A,b,x)
BAND *A;
VEC *b, *x;
{
int i,j,l,n,n1,lb,ilb;
Real **Av, *Avlb;
Real c;
if ( ! A || ! b )
error(E_NULL,"bdLDLsolve");
if ( A->mat->n != b->dim )
error(E_SIZES,"bdLDLsolve");
n = A->mat->n;
n1 = n-1;
x = v_resize(x,n);
lb = A->lb;
Av = A->mat->me;
Avlb = Av[lb];
/* solve L*y = b */
x->ve[0] = b->ve[0];
for (i=1; i < n; i++) {
ilb = i-lb;
c = b->ve[i];
for (j=max(0,ilb), l=j-ilb; j < i; j++,l++)
c -= Av[l][j]*x->ve[j];
x->ve[i] = c;
}
/* solve D*z = y */
for (i=0; i < n; i++)
x->ve[i] /= Avlb[i];
/* solve L^T*x = z */
for (i=n-2; i >= 0; i--) {
ilb = i+lb;
c = x->ve[i];
for (j=min(n1,ilb), l=ilb-j; j > i; j--,l++)
c -= Av[l][i]*x->ve[j];
x->ve[i] = c;
}
return x;
}
/* ******************************************************
This function is a contribution from Ruediger Franke.
His e-mail addres is: Ruediger.Franke@rz.tu-ilmenau.de
******************************************************
*/
/* bd_mv_mlt --
* computes out = A * x
* may not work in situ (x != out)
*/
VEC *bd_mv_mlt(A, x, out)
BAND *A;
VEC *x, *out;
{
int i, j, j_end, k;
int start_idx, end_idx;
int n, m, lb, ub;
Real **A_me;
Real *x_ve;
Real sum;
if (!A || !x)
error(E_NULL,"bd_mv_mlt");
if (x->dim != A->mat->n)
error(E_SIZES,"bd_mv_mlt");
if (!out || out->dim != A->mat->n)
out = v_resize(out, A->mat->n);
if (out == x)
error(E_INSITU,"bd_mv_mlt");
n = A->mat->n;
m = A->mat->m;
lb = A->lb;
ub = A->ub;
A_me = A->mat->me;
start_idx = lb;
end_idx = m + n-1 - ub;
for (i=0; i<n; i++, start_idx--, end_idx--) {
j = max(0, start_idx);
k = max(0, -start_idx);
j_end = min(m, end_idx);
x_ve = x->ve + k;
sum = 0.0;
for (; j < j_end; j++, k++)
sum += A_me[j][k] * *x_ve++;
out->ve[i] = sum;
}
return out;
}