Shapeworks Studio  2.1
Shape analysis software suite
List of all members | Public Types | Protected Member Functions | Friends
Eigen::internal::SparseLUImpl< Scalar, Index > Class Template Reference

#include <SparseLUImpl.h>

Public Types

typedef Matrix< Scalar, Dynamic, 1 > ScalarVector
 
typedef Matrix< Index, Dynamic, 1 > IndexVector
 
typedef ScalarVector::RealScalar RealScalar
 
typedef Ref< Matrix< Scalar, Dynamic, 1 > > BlockScalarVector
 
typedef Ref< Matrix< Index, Dynamic, 1 > > BlockIndexVector
 
typedef LU_GlobalLU_t< IndexVector, ScalarVectorGlobalLU_t
 
typedef SparseMatrix< Scalar, ColMajor, Index > MatrixType
 

Protected Member Functions

template<typename VectorType >
Index expand (VectorType &vec, Index &length, Index nbElts, Index keep_prev, Index &num_expansions)
 
Index memInit (Index m, Index n, Index annz, Index lwork, Index fillratio, Index panel_size, GlobalLU_t &glu)
 Allocate various working space for the numerical factorization phase. More...
 
template<typename VectorType >
Index memXpand (VectorType &vec, Index &maxlen, Index nbElts, MemType memtype, Index &num_expansions)
 Expand the existing storage. More...
 
void heap_relax_snode (const Index n, IndexVector &et, const Index relax_columns, IndexVector &descendants, IndexVector &relax_end)
 Identify the initial relaxed supernodes. More...
 
void relax_snode (const Index n, IndexVector &et, const Index relax_columns, IndexVector &descendants, IndexVector &relax_end)
 Identify the initial relaxed supernodes. More...
 
Index snode_dfs (const Index jcol, const Index kcol, const MatrixType &mat, IndexVector &xprune, IndexVector &marker, GlobalLU_t &glu)
 
Index snode_bmod (const Index jcol, const Index fsupc, ScalarVector &dense, GlobalLU_t &glu)
 
Index pivotL (const Index jcol, const RealScalar &diagpivotthresh, IndexVector &perm_r, IndexVector &iperm_c, Index &pivrow, GlobalLU_t &glu)
 Performs the numerical pivotin on the current column of L, and the CDIV operation. More...
 
template<typename Traits >
void dfs_kernel (const Index jj, IndexVector &perm_r, Index &nseg, IndexVector &panel_lsub, IndexVector &segrep, Ref< IndexVector > repfnz_col, IndexVector &xprune, Ref< IndexVector > marker, IndexVector &parent, IndexVector &xplore, GlobalLU_t &glu, Index &nextl_col, Index krow, Traits &traits)
 
void panel_dfs (const Index m, const Index w, const Index jcol, MatrixType &A, IndexVector &perm_r, Index &nseg, ScalarVector &dense, IndexVector &panel_lsub, IndexVector &segrep, IndexVector &repfnz, IndexVector &xprune, IndexVector &marker, IndexVector &parent, IndexVector &xplore, GlobalLU_t &glu)
 Performs a symbolic factorization on a panel of columns [jcol, jcol+w) More...
 
void panel_bmod (const Index m, const Index w, const Index jcol, const Index nseg, ScalarVector &dense, ScalarVector &tempv, IndexVector &segrep, IndexVector &repfnz, GlobalLU_t &glu)
 Performs numeric block updates (sup-panel) in topological order. More...
 
Index column_dfs (const Index m, const Index jcol, IndexVector &perm_r, Index maxsuper, Index &nseg, BlockIndexVector lsub_col, IndexVector &segrep, BlockIndexVector repfnz, IndexVector &xprune, IndexVector &marker, IndexVector &parent, IndexVector &xplore, GlobalLU_t &glu)
 Performs a symbolic factorization on column jcol and decide the supernode boundary. More...
 
Index column_bmod (const Index jcol, const Index nseg, BlockScalarVector dense, ScalarVector &tempv, BlockIndexVector segrep, BlockIndexVector repfnz, Index fpanelc, GlobalLU_t &glu)
 Performs numeric block updates (sup-col) in topological order. More...
 
Index copy_to_ucol (const Index jcol, const Index nseg, IndexVector &segrep, BlockIndexVector repfnz, IndexVector &perm_r, BlockScalarVector dense, GlobalLU_t &glu)
 Performs numeric block updates (sup-col) in topological order. More...
 
void pruneL (const Index jcol, const IndexVector &perm_r, const Index pivrow, const Index nseg, const IndexVector &segrep, BlockIndexVector repfnz, IndexVector &xprune, GlobalLU_t &glu)
 Prunes the L-structure. More...
 
void countnz (const Index n, Index &nnzL, Index &nnzU, GlobalLU_t &glu)
 Count Nonzero elements in the factors.
 
void fixupL (const Index n, const IndexVector &perm_r, GlobalLU_t &glu)
 Fix up the data storage lsub for L-subscripts. More...
 

Friends

template<typename , typename >
struct column_dfs_traits
 

Detailed Description

template<typename Scalar, typename Index>
class Eigen::internal::SparseLUImpl< Scalar, Index >

Base class for sparseLU

Definition at line 20 of file SparseLUImpl.h.

Member Function Documentation

template<typename Scalar , typename Index>
Index SparseLUImpl< Scalar, Index >::column_bmod ( const Index  jcol,
const Index  nseg,
BlockScalarVector  dense,
ScalarVector tempv,
BlockIndexVector  segrep,
BlockIndexVector  repfnz,
Index  fpanelc,
GlobalLU_t glu 
)
protected

Performs numeric block updates (sup-col) in topological order.

Parameters
jcolcurrent column to update
nsegNumber of segments in the U part
denseStore the full representation of the column
tempvworking array
segrepsegment representative ...
repfnz??? First nonzero column in each row ??? ...
fpanelcFirst column in the current panel
gluGlobal LU data.
Returns
0 - successful return > 0 - number of bytes allocated when run out of space

Definition at line 53 of file SparseLU_column_bmod.h.

54 {
55  Index jsupno, k, ksub, krep, ksupno;
56  Index lptr, nrow, isub, irow, nextlu, new_next, ufirst;
57  Index fsupc, nsupc, nsupr, luptr, kfnz, no_zeros;
58  /* krep = representative of current k-th supernode
59  * fsupc = first supernodal column
60  * nsupc = number of columns in a supernode
61  * nsupr = number of rows in a supernode
62  * luptr = location of supernodal LU-block in storage
63  * kfnz = first nonz in the k-th supernodal segment
64  * no_zeros = no lf leading zeros in a supernodal U-segment
65  */
66 
67  jsupno = glu.supno(jcol);
68  // For each nonzero supernode segment of U[*,j] in topological order
69  k = nseg - 1;
70  Index d_fsupc; // distance between the first column of the current panel and the
71  // first column of the current snode
72  Index fst_col; // First column within small LU update
73  Index segsize;
74  for (ksub = 0; ksub < nseg; ksub++)
75  {
76  krep = segrep(k); k--;
77  ksupno = glu.supno(krep);
78  if (jsupno != ksupno )
79  {
80  // outside the rectangular supernode
81  fsupc = glu.xsup(ksupno);
82  fst_col = (std::max)(fsupc, fpanelc);
83 
84  // Distance from the current supernode to the current panel;
85  // d_fsupc = 0 if fsupc > fpanelc
86  d_fsupc = fst_col - fsupc;
87 
88  luptr = glu.xlusup(fst_col) + d_fsupc;
89  lptr = glu.xlsub(fsupc) + d_fsupc;
90 
91  kfnz = repfnz(krep);
92  kfnz = (std::max)(kfnz, fpanelc);
93 
94  segsize = krep - kfnz + 1;
95  nsupc = krep - fst_col + 1;
96  nsupr = glu.xlsub(fsupc+1) - glu.xlsub(fsupc);
97  nrow = nsupr - d_fsupc - nsupc;
98  Index lda = glu.xlusup(fst_col+1) - glu.xlusup(fst_col);
99 
100 
101  // Perform a triangular solver and block update,
102  // then scatter the result of sup-col update to dense
103  no_zeros = kfnz - fst_col;
104  if(segsize==1)
105  LU_kernel_bmod<1>::run(segsize, dense, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros);
106  else
107  LU_kernel_bmod<Dynamic>::run(segsize, dense, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros);
108  } // end if jsupno
109  } // end for each segment
110 
111  // Process the supernodal portion of L\U[*,j]
112  nextlu = glu.xlusup(jcol);
113  fsupc = glu.xsup(jsupno);
114 
115  // copy the SPA dense into L\U[*,j]
116  Index mem;
117  new_next = nextlu + glu.xlsub(fsupc + 1) - glu.xlsub(fsupc);
118  Index offset = internal::first_multiple<Index>(new_next, internal::packet_traits<Scalar>::size) - new_next;
119  if(offset)
120  new_next += offset;
121  while (new_next > glu.nzlumax )
122  {
123  mem = memXpand<ScalarVector>(glu.lusup, glu.nzlumax, nextlu, LUSUP, glu.num_expansions);
124  if (mem) return mem;
125  }
126 
127  for (isub = glu.xlsub(fsupc); isub < glu.xlsub(fsupc+1); isub++)
128  {
129  irow = glu.lsub(isub);
130  glu.lusup(nextlu) = dense(irow);
131  dense(irow) = Scalar(0.0);
132  ++nextlu;
133  }
134 
135  if(offset)
136  {
137  glu.lusup.segment(nextlu,offset).setZero();
138  nextlu += offset;
139  }
140  glu.xlusup(jcol + 1) = nextlu; // close L\U(*,jcol);
141 
142  /* For more updates within the panel (also within the current supernode),
143  * should start from the first column of the panel, or the first column
144  * of the supernode, whichever is bigger. There are two cases:
145  * 1) fsupc < fpanelc, then fst_col <-- fpanelc
146  * 2) fsupc >= fpanelc, then fst_col <-- fsupc
147  */
148  fst_col = (std::max)(fsupc, fpanelc);
149 
150  if (fst_col < jcol)
151  {
152  // Distance between the current supernode and the current panel
153  // d_fsupc = 0 if fsupc >= fpanelc
154  d_fsupc = fst_col - fsupc;
155 
156  lptr = glu.xlsub(fsupc) + d_fsupc;
157  luptr = glu.xlusup(fst_col) + d_fsupc;
158  nsupr = glu.xlsub(fsupc+1) - glu.xlsub(fsupc); // leading dimension
159  nsupc = jcol - fst_col; // excluding jcol
160  nrow = nsupr - d_fsupc - nsupc;
161 
162  // points to the beginning of jcol in snode L\U(jsupno)
163  ufirst = glu.xlusup(jcol) + d_fsupc;
164  Index lda = glu.xlusup(jcol+1) - glu.xlusup(jcol);
165  Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > A( &(glu.lusup.data()[luptr]), nsupc, nsupc, OuterStride<>(lda) );
166  VectorBlock<ScalarVector> u(glu.lusup, ufirst, nsupc);
167  u = A.template triangularView<UnitLower>().solve(u);
168 
169  new (&A) Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > ( &(glu.lusup.data()[luptr+nsupc]), nrow, nsupc, OuterStride<>(lda) );
170  VectorBlock<ScalarVector> l(glu.lusup, ufirst+nsupc, nrow);
171  l.noalias() -= A * u;
172 
173  } // End if fst_col
174  return 0;
175 }
template<typename Scalar , typename Index>
Index SparseLUImpl< Scalar, Index >::column_dfs ( const Index  m,
const Index  jcol,
IndexVector perm_r,
Index  maxsuper,
Index &  nseg,
BlockIndexVector  lsub_col,
IndexVector segrep,
BlockIndexVector  repfnz,
IndexVector xprune,
IndexVector marker,
IndexVector parent,
IndexVector xplore,
GlobalLU_t glu 
)
protected

Performs a symbolic factorization on column jcol and decide the supernode boundary.

A supernode representative is the last column of a supernode. The nonzeros in U[*,j] are segments that end at supernodes representatives. The routine returns a list of the supernodal representatives in topological order of the dfs that generates them. The location of the first nonzero in each supernodal segment (supernodal entry location) is also returned.

Parameters
mnumber of rows in the matrix
jcolCurrent column
perm_rRow permutation
maxsuperMaximum number of column allowed in a supernode
[in,out]nsegNumber of segments in current U[*,j] - new segments appended
lsub_coldefines the rhs vector to start the dfs
[in,out]segrepSegment representatives - new segments appended
repfnzFirst nonzero location in each row
xprune
markermarker[i] == jj, if i was visited during dfs of current column jj;
parent
xploreworking array
gluglobal LU data
Returns
0 success > 0 number of bytes allocated when run out of space

Definition at line 93 of file SparseLU_column_dfs.h.

94 {
95 
96  Index jsuper = glu.supno(jcol);
97  Index nextl = glu.xlsub(jcol);
98  VectorBlock<IndexVector> marker2(marker, 2*m, m);
99 
100 
101  column_dfs_traits<IndexVector, ScalarVector> traits(jcol, jsuper, glu, *this);
102 
103  // For each nonzero in A(*,jcol) do dfs
104  for (Index k = 0; ((k < m) ? lsub_col[k] != emptyIdxLU : false) ; k++)
105  {
106  Index krow = lsub_col(k);
107  lsub_col(k) = emptyIdxLU;
108  Index kmark = marker2(krow);
109 
110  // krow was visited before, go to the next nonz;
111  if (kmark == jcol) continue;
112 
113  dfs_kernel(jcol, perm_r, nseg, glu.lsub, segrep, repfnz, xprune, marker2, parent,
114  xplore, glu, nextl, krow, traits);
115  } // for each nonzero ...
116 
117  Index fsupc, jptr, jm1ptr, ito, ifrom, istop;
118  Index nsuper = glu.supno(jcol);
119  Index jcolp1 = jcol + 1;
120  Index jcolm1 = jcol - 1;
121 
122  // check to see if j belongs in the same supernode as j-1
123  if ( jcol == 0 )
124  { // Do nothing for column 0
125  nsuper = glu.supno(0) = 0 ;
126  }
127  else
128  {
129  fsupc = glu.xsup(nsuper);
130  jptr = glu.xlsub(jcol); // Not yet compressed
131  jm1ptr = glu.xlsub(jcolm1);
132 
133  // Use supernodes of type T2 : see SuperLU paper
134  if ( (nextl-jptr != jptr-jm1ptr-1) ) jsuper = emptyIdxLU;
135 
136  // Make sure the number of columns in a supernode doesn't
137  // exceed threshold
138  if ( (jcol - fsupc) >= maxsuper) jsuper = emptyIdxLU;
139 
140  /* If jcol starts a new supernode, reclaim storage space in
141  * glu.lsub from previous supernode. Note we only store
142  * the subscript set of the first and last columns of
143  * a supernode. (first for num values, last for pruning)
144  */
145  if (jsuper == emptyIdxLU)
146  { // starts a new supernode
147  if ( (fsupc < jcolm1-1) )
148  { // >= 3 columns in nsuper
149  ito = glu.xlsub(fsupc+1);
150  glu.xlsub(jcolm1) = ito;
151  istop = ito + jptr - jm1ptr;
152  xprune(jcolm1) = istop; // intialize xprune(jcol-1)
153  glu.xlsub(jcol) = istop;
154 
155  for (ifrom = jm1ptr; ifrom < nextl; ++ifrom, ++ito)
156  glu.lsub(ito) = glu.lsub(ifrom);
157  nextl = ito; // = istop + length(jcol)
158  }
159  nsuper++;
160  glu.supno(jcol) = nsuper;
161  } // if a new supernode
162  } // end else: jcol > 0
163 
164  // Tidy up the pointers before exit
165  glu.xsup(nsuper+1) = jcolp1;
166  glu.supno(jcolp1) = nsuper;
167  xprune(jcol) = nextl; // Intialize upper bound for pruning
168  glu.xlsub(jcolp1) = nextl;
169 
170  return 0;
171 }
template<typename Scalar , typename Index>
Index SparseLUImpl< Scalar, Index >::copy_to_ucol ( const Index  jcol,
const Index  nseg,
IndexVector segrep,
BlockIndexVector  repfnz,
IndexVector perm_r,
BlockScalarVector  dense,
GlobalLU_t glu 
)
protected

Performs numeric block updates (sup-col) in topological order.

Parameters
jcolcurrent column to update
nsegNumber of segments in the U part
segrepsegment representative ...
repfnzFirst nonzero column in each row ...
perm_rRow permutation
denseStore the full representation of the column
gluGlobal LU data.
Returns
0 - successful return > 0 - number of bytes allocated when run out of space

Definition at line 50 of file SparseLU_copy_to_ucol.h.

51 {
52  Index ksub, krep, ksupno;
53 
54  Index jsupno = glu.supno(jcol);
55 
56  // For each nonzero supernode segment of U[*,j] in topological order
57  Index k = nseg - 1, i;
58  Index nextu = glu.xusub(jcol);
59  Index kfnz, isub, segsize;
60  Index new_next,irow;
61  Index fsupc, mem;
62  for (ksub = 0; ksub < nseg; ksub++)
63  {
64  krep = segrep(k); k--;
65  ksupno = glu.supno(krep);
66  if (jsupno != ksupno ) // should go into ucol();
67  {
68  kfnz = repfnz(krep);
69  if (kfnz != emptyIdxLU)
70  { // Nonzero U-segment
71  fsupc = glu.xsup(ksupno);
72  isub = glu.xlsub(fsupc) + kfnz - fsupc;
73  segsize = krep - kfnz + 1;
74  new_next = nextu + segsize;
75  while (new_next > glu.nzumax)
76  {
77  mem = memXpand<ScalarVector>(glu.ucol, glu.nzumax, nextu, UCOL, glu.num_expansions);
78  if (mem) return mem;
79  mem = memXpand<IndexVector>(glu.usub, glu.nzumax, nextu, USUB, glu.num_expansions);
80  if (mem) return mem;
81 
82  }
83 
84  for (i = 0; i < segsize; i++)
85  {
86  irow = glu.lsub(isub);
87  glu.usub(nextu) = perm_r(irow); // Unlike the L part, the U part is stored in its final order
88  glu.ucol(nextu) = dense(irow);
89  dense(irow) = Scalar(0.0);
90  nextu++;
91  isub++;
92  }
93 
94  } // end nonzero U-segment
95 
96  } // end if jsupno
97 
98  } // end for each segment
99  glu.xusub(jcol + 1) = nextu; // close U(*,jcol)
100  return 0;
101 }
template<typename Scalar , typename Index>
template<typename VectorType >
Index SparseLUImpl< Scalar, Index >::expand ( VectorType &  vec,
Index &  length,
Index  nbElts,
Index  keep_prev,
Index &  num_expansions 
)
protected

Expand the existing storage to accomodate more fill-ins

Parameters
vecValid pointer to the vector to allocate or expand
[in,out]lengthAt input, contain the current length of the vector that is to be increased. At output, length of the newly allocated vector
[in]nbEltsCurrent number of elements in the factors
keep_prev1: use length and do not expand the vector; 0: compute new_len and expand
[in,out]num_expansionsNumber of times the memory has been expanded

Definition at line 64 of file SparseLU_Memory.h.

65 {
66 
67  float alpha = 1.5; // Ratio of the memory increase
68  Index new_len; // New size of the allocated memory
69 
70  if(num_expansions == 0 || keep_prev)
71  new_len = length ; // First time allocate requested
72  else
73  new_len = Index(alpha * length);
74 
75  VectorType old_vec; // Temporary vector to hold the previous values
76  if (nbElts > 0 )
77  old_vec = vec.segment(0,nbElts);
78 
79  //Allocate or expand the current vector
80  try
81  {
82  vec.resize(new_len);
83  }
84  catch(std::bad_alloc& )
85  {
86  if ( !num_expansions )
87  {
88  // First time to allocate from LUMemInit()
89  throw; // Pass the exception to LUMemInit() which has a try... catch block
90  }
91  if (keep_prev)
92  {
93  // In this case, the memory length should not not be reduced
94  return new_len;
95  }
96  else
97  {
98  // Reduce the size and increase again
99  Index tries = 0; // Number of attempts
100  do
101  {
102  alpha = (alpha + 1)/2;
103  new_len = Index(alpha * length);
104  try
105  {
106  vec.resize(new_len);
107  }
108  catch(std::bad_alloc& )
109  {
110  tries += 1;
111  if ( tries > 10) return new_len;
112  }
113  } while (!vec.size());
114  }
115  }
116  //Copy the previous values to the newly allocated space
117  if (nbElts > 0)
118  vec.segment(0, nbElts) = old_vec;
119 
120 
121  length = new_len;
122  if(num_expansions) ++num_expansions;
123  return 0;
124 }
template<typename Scalar , typename Index>
void SparseLUImpl< Scalar, Index >::fixupL ( const Index  n,
const IndexVector perm_r,
GlobalLU_t glu 
)
protected

Fix up the data storage lsub for L-subscripts.

It removes the subscripts sets for structural pruning, and applies permutation to the remaining subscripts

Definition at line 52 of file SparseLU_Utils.h.

53 {
54  Index fsupc, i, j, k, jstart;
55 
56  Index nextl = 0;
57  Index nsuper = (glu.supno)(n);
58 
59  // For each supernode
60  for (i = 0; i <= nsuper; i++)
61  {
62  fsupc = glu.xsup(i);
63  jstart = glu.xlsub(fsupc);
64  glu.xlsub(fsupc) = nextl;
65  for (j = jstart; j < glu.xlsub(fsupc + 1); j++)
66  {
67  glu.lsub(nextl) = perm_r(glu.lsub(j)); // Now indexed into P*A
68  nextl++;
69  }
70  for (k = fsupc+1; k < glu.xsup(i+1); k++)
71  glu.xlsub(k) = nextl; // other columns in supernode i
72  }
73 
74  glu.xlsub(n) = nextl;
75 }
template<typename Scalar , typename Index>
void SparseLUImpl< Scalar, Index >::heap_relax_snode ( const Index  n,
IndexVector et,
const Index  relax_columns,
IndexVector descendants,
IndexVector relax_end 
)
protected

Identify the initial relaxed supernodes.

This routine applied to a symmetric elimination tree. It assumes that the matrix has been reordered according to the postorder of the etree

Parameters
nThe number of columns
etelimination tree
relax_columnsMaximum number of columns allowed in a relaxed snode
descendantsNumber of descendants of each node in the etree
relax_endlast column in a supernode

Definition at line 46 of file SparseLU_heap_relax_snode.h.

47 {
48 
49  // The etree may not be postordered, but its heap ordered
50  IndexVector post;
51  internal::treePostorder(n, et, post); // Post order etree
52  IndexVector inv_post(n+1);
53  Index i;
54  for (i = 0; i < n+1; ++i) inv_post(post(i)) = i; // inv_post = post.inverse()???
55 
56  // Renumber etree in postorder
57  IndexVector iwork(n);
58  IndexVector et_save(n+1);
59  for (i = 0; i < n; ++i)
60  {
61  iwork(post(i)) = post(et(i));
62  }
63  et_save = et; // Save the original etree
64  et = iwork;
65 
66  // compute the number of descendants of each node in the etree
67  relax_end.setConstant(emptyIdxLU);
68  Index j, parent;
69  descendants.setZero();
70  for (j = 0; j < n; j++)
71  {
72  parent = et(j);
73  if (parent != n) // not the dummy root
74  descendants(parent) += descendants(j) + 1;
75  }
76  // Identify the relaxed supernodes by postorder traversal of the etree
77  Index snode_start; // beginning of a snode
78  Index k;
79  Index nsuper_et_post = 0; // Number of relaxed snodes in postordered etree
80  Index nsuper_et = 0; // Number of relaxed snodes in the original etree
81  Index l;
82  for (j = 0; j < n; )
83  {
84  parent = et(j);
85  snode_start = j;
86  while ( parent != n && descendants(parent) < relax_columns )
87  {
88  j = parent;
89  parent = et(j);
90  }
91  // Found a supernode in postordered etree, j is the last column
92  ++nsuper_et_post;
93  k = n;
94  for (i = snode_start; i <= j; ++i)
95  k = (std::min)(k, inv_post(i));
96  l = inv_post(j);
97  if ( (l - k) == (j - snode_start) ) // Same number of columns in the snode
98  {
99  // This is also a supernode in the original etree
100  relax_end(k) = l; // Record last column
101  ++nsuper_et;
102  }
103  else
104  {
105  for (i = snode_start; i <= j; ++i)
106  {
107  l = inv_post(i);
108  if (descendants(i) == 0)
109  {
110  relax_end(l) = l;
111  ++nsuper_et;
112  }
113  }
114  }
115  j++;
116  // Search for a new leaf
117  while (descendants(j) != 0 && j < n) j++;
118  } // End postorder traversal of the etree
119 
120  // Recover the original etree
121  et = et_save;
122 }
void treePostorder(Index n, IndexVector &parent, IndexVector &post)
Post order a tree.
template<typename Scalar , typename Index>
Index SparseLUImpl< Scalar, Index >::memInit ( Index  m,
Index  n,
Index  annz,
Index  lwork,
Index  fillratio,
Index  panel_size,
GlobalLU_t glu 
)
protected

Allocate various working space for the numerical factorization phase.

Parameters
mnumber of rows of the input matrix
nnumber of columns
annznumber of initial nonzeros in the matrix
lworkif lwork=-1, this routine returns an estimated size of the required memory
glupersistent data to facilitate multiple factors : will be deleted later ??
fillratioestimated ratio of fill in the factors
panel_sizeSize of a panel
Returns
an estimated size of the required memory if lwork = -1; otherwise, return the size of actually allocated memory when allocation failed, and 0 on success
Note
Unlike SuperLU, this routine does not support successive factorization with the same pattern and the same row permutation

Definition at line 139 of file SparseLU_Memory.h.

140 {
141  Index& num_expansions = glu.num_expansions; //No memory expansions so far
142  num_expansions = 0;
143  glu.nzumax = glu.nzlumax = (std::max)(fillratio * annz, m*n); // estimated number of nonzeros in U
144  glu.nzlmax = (std::max)(Index(4), fillratio) * annz / 4; // estimated nnz in L factor
145 
146  // Return the estimated size to the user if necessary
147  Index tempSpace;
148  tempSpace = (2*panel_size + 4 + LUNoMarker) * m * sizeof(Index) + (panel_size + 1) * m * sizeof(Scalar);
149  if (lwork == emptyIdxLU)
150  {
151  Index estimated_size;
152  estimated_size = (5 * n + 5) * sizeof(Index) + tempSpace
153  + (glu.nzlmax + glu.nzumax) * sizeof(Index) + (glu.nzlumax+glu.nzumax) * sizeof(Scalar) + n;
154  return estimated_size;
155  }
156 
157  // Setup the required space
158 
159  // First allocate Integer pointers for L\U factors
160  glu.xsup.resize(n+1);
161  glu.supno.resize(n+1);
162  glu.xlsub.resize(n+1);
163  glu.xlusup.resize(n+1);
164  glu.xusub.resize(n+1);
165 
166  // Reserve memory for L/U factors
167  do
168  {
169  try
170  {
171  expand<ScalarVector>(glu.lusup, glu.nzlumax, 0, 0, num_expansions);
172  expand<ScalarVector>(glu.ucol,glu.nzumax, 0, 0, num_expansions);
173  expand<IndexVector>(glu.lsub,glu.nzlmax, 0, 0, num_expansions);
174  expand<IndexVector>(glu.usub,glu.nzumax, 0, 1, num_expansions);
175  }
176  catch(std::bad_alloc& )
177  {
178  //Reduce the estimated size and retry
179  glu.nzlumax /= 2;
180  glu.nzumax /= 2;
181  glu.nzlmax /= 2;
182  if (glu.nzlumax < annz ) return glu.nzlumax;
183  }
184 
185  } while (!glu.lusup.size() || !glu.ucol.size() || !glu.lsub.size() || !glu.usub.size());
186 
187 
188 
189  ++num_expansions;
190  return 0;
191 
192 } // end LuMemInit
template<typename Scalar , typename Index>
template<typename VectorType >
Index SparseLUImpl< Scalar, Index >::memXpand ( VectorType &  vec,
Index &  maxlen,
Index  nbElts,
MemType  memtype,
Index &  num_expansions 
)
protected

Expand the existing storage.

Parameters
vecvector to expand
[in,out]maxlenOn input, previous size of vec (Number of elements to copy ). on output, new size
nbEltscurrent number of elements in the vector.
memtypeType of the element to expand
num_expansionsNumber of expansions
Returns
0 on success, > 0 size of the memory allocated so far

Definition at line 205 of file SparseLU_Memory.h.

206 {
207  Index failed_size;
208  if (memtype == USUB)
209  failed_size = this->expand<VectorType>(vec, maxlen, nbElts, 1, num_expansions);
210  else
211  failed_size = this->expand<VectorType>(vec, maxlen, nbElts, 0, num_expansions);
212 
213  if (failed_size)
214  return failed_size;
215 
216  return 0 ;
217 }
template<typename Scalar , typename Index>
void SparseLUImpl< Scalar, Index >::panel_bmod ( const Index  m,
const Index  w,
const Index  jcol,
const Index  nseg,
ScalarVector dense,
ScalarVector tempv,
IndexVector segrep,
IndexVector repfnz,
GlobalLU_t glu 
)
protected

Performs numeric block updates (sup-panel) in topological order.

Before entering this routine, the original nonzeros in the panel were already copied i nto the spa[m,w]

Parameters
mnumber of rows in the matrix
wPanel size
jcolStarting column of the panel
nsegNumber of segments in the U part
denseStore the full representation of the panel
tempvworking array
segrepsegment representative... first row in the segment
repfnzFirst nonzero rows
gluGlobal LU data.

Definition at line 56 of file SparseLU_panel_bmod.h.

59 {
60 
61  Index ksub,jj,nextl_col;
62  Index fsupc, nsupc, nsupr, nrow;
63  Index krep, kfnz;
64  Index lptr; // points to the row subscripts of a supernode
65  Index luptr; // ...
66  Index segsize,no_zeros ;
67  // For each nonz supernode segment of U[*,j] in topological order
68  Index k = nseg - 1;
69  const Index PacketSize = internal::packet_traits<Scalar>::size;
70 
71  for (ksub = 0; ksub < nseg; ksub++)
72  { // For each updating supernode
73  /* krep = representative of current k-th supernode
74  * fsupc = first supernodal column
75  * nsupc = number of columns in a supernode
76  * nsupr = number of rows in a supernode
77  */
78  krep = segrep(k); k--;
79  fsupc = glu.xsup(glu.supno(krep));
80  nsupc = krep - fsupc + 1;
81  nsupr = glu.xlsub(fsupc+1) - glu.xlsub(fsupc);
82  nrow = nsupr - nsupc;
83  lptr = glu.xlsub(fsupc);
84 
85  // loop over the panel columns to detect the actual number of columns and rows
86  Index u_rows = 0;
87  Index u_cols = 0;
88  for (jj = jcol; jj < jcol + w; jj++)
89  {
90  nextl_col = (jj-jcol) * m;
91  VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row
92 
93  kfnz = repfnz_col(krep);
94  if ( kfnz == emptyIdxLU )
95  continue; // skip any zero segment
96 
97  segsize = krep - kfnz + 1;
98  u_cols++;
99  u_rows = (std::max)(segsize,u_rows);
100  }
101 
102  if(nsupc >= 2)
103  {
104  Index ldu = internal::first_multiple<Index>(u_rows, PacketSize);
105  Map<Matrix<Scalar,Dynamic,Dynamic>, Aligned, OuterStride<> > U(tempv.data(), u_rows, u_cols, OuterStride<>(ldu));
106 
107  // gather U
108  Index u_col = 0;
109  for (jj = jcol; jj < jcol + w; jj++)
110  {
111  nextl_col = (jj-jcol) * m;
112  VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row
113  VectorBlock<ScalarVector> dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here
114 
115  kfnz = repfnz_col(krep);
116  if ( kfnz == emptyIdxLU )
117  continue; // skip any zero segment
118 
119  segsize = krep - kfnz + 1;
120  luptr = glu.xlusup(fsupc);
121  no_zeros = kfnz - fsupc;
122 
123  Index isub = lptr + no_zeros;
124  Index off = u_rows-segsize;
125  for (Index i = 0; i < off; i++) U(i,u_col) = 0;
126  for (Index i = 0; i < segsize; i++)
127  {
128  Index irow = glu.lsub(isub);
129  U(i+off,u_col) = dense_col(irow);
130  ++isub;
131  }
132  u_col++;
133  }
134  // solve U = A^-1 U
135  luptr = glu.xlusup(fsupc);
136  Index lda = glu.xlusup(fsupc+1) - glu.xlusup(fsupc);
137  no_zeros = (krep - u_rows + 1) - fsupc;
138  luptr += lda * no_zeros + no_zeros;
139  Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > A(glu.lusup.data()+luptr, u_rows, u_rows, OuterStride<>(lda) );
140  U = A.template triangularView<UnitLower>().solve(U);
141 
142  // update
143  luptr += u_rows;
144  Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > B(glu.lusup.data()+luptr, nrow, u_rows, OuterStride<>(lda) );
145  eigen_assert(tempv.size()>w*ldu + nrow*w + 1);
146 
147  Index ldl = internal::first_multiple<Index>(nrow, PacketSize);
148  Index offset = (PacketSize-internal::first_aligned(B.data(), PacketSize)) % PacketSize;
149  Map<Matrix<Scalar,Dynamic,Dynamic>, 0, OuterStride<> > L(tempv.data()+w*ldu+offset, nrow, u_cols, OuterStride<>(ldl));
150 
151  L.setZero();
152  internal::sparselu_gemm<Scalar>(L.rows(), L.cols(), B.cols(), B.data(), B.outerStride(), U.data(), U.outerStride(), L.data(), L.outerStride());
153 
154  // scatter U and L
155  u_col = 0;
156  for (jj = jcol; jj < jcol + w; jj++)
157  {
158  nextl_col = (jj-jcol) * m;
159  VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row
160  VectorBlock<ScalarVector> dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here
161 
162  kfnz = repfnz_col(krep);
163  if ( kfnz == emptyIdxLU )
164  continue; // skip any zero segment
165 
166  segsize = krep - kfnz + 1;
167  no_zeros = kfnz - fsupc;
168  Index isub = lptr + no_zeros;
169 
170  Index off = u_rows-segsize;
171  for (Index i = 0; i < segsize; i++)
172  {
173  Index irow = glu.lsub(isub++);
174  dense_col(irow) = U.coeff(i+off,u_col);
175  U.coeffRef(i+off,u_col) = 0;
176  }
177 
178  // Scatter l into SPA dense[]
179  for (Index i = 0; i < nrow; i++)
180  {
181  Index irow = glu.lsub(isub++);
182  dense_col(irow) -= L.coeff(i,u_col);
183  L.coeffRef(i,u_col) = 0;
184  }
185  u_col++;
186  }
187  }
188  else // level 2 only
189  {
190  // Sequence through each column in the panel
191  for (jj = jcol; jj < jcol + w; jj++)
192  {
193  nextl_col = (jj-jcol) * m;
194  VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero column index for each row
195  VectorBlock<ScalarVector> dense_col(dense, nextl_col, m); // Scatter/gather entire matrix column from/to here
196 
197  kfnz = repfnz_col(krep);
198  if ( kfnz == emptyIdxLU )
199  continue; // skip any zero segment
200 
201  segsize = krep - kfnz + 1;
202  luptr = glu.xlusup(fsupc);
203 
204  Index lda = glu.xlusup(fsupc+1)-glu.xlusup(fsupc);// nsupr
205 
206  // Perform a trianglar solve and block update,
207  // then scatter the result of sup-col update to dense[]
208  no_zeros = kfnz - fsupc;
209  if(segsize==1) LU_kernel_bmod<1>::run(segsize, dense_col, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros);
210  else if(segsize==2) LU_kernel_bmod<2>::run(segsize, dense_col, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros);
211  else if(segsize==3) LU_kernel_bmod<3>::run(segsize, dense_col, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros);
212  else LU_kernel_bmod<Dynamic>::run(segsize, dense_col, tempv, glu.lusup, luptr, lda, nrow, glu.lsub, lptr, no_zeros);
213  } // End for each column in the panel
214  }
215 
216  } // End for each updating supernode
217 } // end panel bmod
template<typename Scalar , typename Index>
void SparseLUImpl< Scalar, Index >::panel_dfs ( const Index  m,
const Index  w,
const Index  jcol,
MatrixType A,
IndexVector perm_r,
Index &  nseg,
ScalarVector dense,
IndexVector panel_lsub,
IndexVector segrep,
IndexVector repfnz,
IndexVector xprune,
IndexVector marker,
IndexVector parent,
IndexVector xplore,
GlobalLU_t glu 
)
protected

Performs a symbolic factorization on a panel of columns [jcol, jcol+w)

A supernode representative is the last column of a supernode. The nonzeros in U[*,j] are segments that end at supernodes representatives

The routine returns a list of the supernodal representatives in topological order of the dfs that generates them. This list is a superset of the topological order of each individual column within the panel. The location of the first nonzero in each supernodal segment (supernodal entry location) is also returned. Each column has a separate list for this purpose.

Two markers arrays are used for dfs : marker[i] == jj, if i was visited during dfs of current column jj; marker1[i] >= jcol, if i was visited by earlier columns in this panel;

Parameters
[in]mnumber of rows in the matrix
[in]wPanel size
[in]jcolStarting column of the panel
[in]AInput matrix in column-major storage
[in]perm_rRow permutation
[out]nsegNumber of U segments
[out]denseAccumulate the column vectors of the panel
[out]panel_lsubSubscripts of the row in the panel
[out]segrepSegment representative i.e first nonzero row of each segment
[out]repfnzFirst nonzero location in each row
[out]xpruneThe pruned elimination tree
[out]markerwork vector
parentThe elimination tree
xplorework vector
gluThe global data structure

Definition at line 219 of file SparseLU_panel_dfs.h.

220 {
221  Index nextl_col; // Next available position in panel_lsub[*,jj]
222 
223  // Initialize pointers
224  VectorBlock<IndexVector> marker1(marker, m, m);
225  nseg = 0;
226 
227  panel_dfs_traits<IndexVector> traits(jcol, marker1.data());
228 
229  // For each column in the panel
230  for (Index jj = jcol; jj < jcol + w; jj++)
231  {
232  nextl_col = (jj - jcol) * m;
233 
234  VectorBlock<IndexVector> repfnz_col(repfnz, nextl_col, m); // First nonzero location in each row
235  VectorBlock<ScalarVector> dense_col(dense,nextl_col, m); // Accumulate a column vector here
236 
237 
238  // For each nnz in A[*, jj] do depth first search
239  for (typename MatrixType::InnerIterator it(A, jj); it; ++it)
240  {
241  Index krow = it.row();
242  dense_col(krow) = it.value();
243 
244  Index kmark = marker(krow);
245  if (kmark == jj)
246  continue; // krow visited before, go to the next nonzero
247 
248  dfs_kernel(jj, perm_r, nseg, panel_lsub, segrep, repfnz_col, xprune, marker, parent,
249  xplore, glu, nextl_col, krow, traits);
250  }// end for nonzeros in column jj
251 
252  } // end for column jj
253 }
template<typename Scalar , typename Index>
Index SparseLUImpl< Scalar, Index >::pivotL ( const Index  jcol,
const RealScalar &  diagpivotthresh,
IndexVector perm_r,
IndexVector iperm_c,
Index &  pivrow,
GlobalLU_t glu 
)
protected

Performs the numerical pivotin on the current column of L, and the CDIV operation.

Pivot policy : (1) Compute thresh = u * max_(i>=j) abs(A_ij); (2) IF user specifies pivot row k and abs(A_kj) >= thresh THEN pivot row = k; ELSE IF abs(A_jj) >= thresh THEN pivot row = j; ELSE pivot row = m;

Note: If you absolutely want to use a given pivot order, then set u=0.0.

Parameters
jcolThe current column of L
diagpivotthreshdiagonal pivoting threshold
[in,out]perm_rRow permutation (threshold pivoting)
[in]iperm_ccolumn permutation - used to finf diagonal of Pc*A*Pc'
[out]pivrowThe pivot row
gluGlobal LU data
Returns
0 if success, i > 0 if U(i,i) is exactly zero

Definition at line 60 of file SparseLU_pivotL.h.

61 {
62 
63  Index fsupc = (glu.xsup)((glu.supno)(jcol)); // First column in the supernode containing the column jcol
64  Index nsupc = jcol - fsupc; // Number of columns in the supernode portion, excluding jcol; nsupc >=0
65  Index lptr = glu.xlsub(fsupc); // pointer to the starting location of the row subscripts for this supernode portion
66  Index nsupr = glu.xlsub(fsupc+1) - lptr; // Number of rows in the supernode
67  Index lda = glu.xlusup(fsupc+1) - glu.xlusup(fsupc); // leading dimension
68  Scalar* lu_sup_ptr = &(glu.lusup.data()[glu.xlusup(fsupc)]); // Start of the current supernode
69  Scalar* lu_col_ptr = &(glu.lusup.data()[glu.xlusup(jcol)]); // Start of jcol in the supernode
70  Index* lsub_ptr = &(glu.lsub.data()[lptr]); // Start of row indices of the supernode
71 
72  // Determine the largest abs numerical value for partial pivoting
73  Index diagind = iperm_c(jcol); // diagonal index
74  RealScalar pivmax = 0.0;
75  Index pivptr = nsupc;
76  Index diag = emptyIdxLU;
77  RealScalar rtemp;
78  Index isub, icol, itemp, k;
79  for (isub = nsupc; isub < nsupr; ++isub) {
80  rtemp = std::abs(lu_col_ptr[isub]);
81  if (rtemp > pivmax) {
82  pivmax = rtemp;
83  pivptr = isub;
84  }
85  if (lsub_ptr[isub] == diagind) diag = isub;
86  }
87 
88  // Test for singularity
89  if ( pivmax == 0.0 ) {
90  pivrow = lsub_ptr[pivptr];
91  perm_r(pivrow) = jcol;
92  return (jcol+1);
93  }
94 
95  RealScalar thresh = diagpivotthresh * pivmax;
96 
97  // Choose appropriate pivotal element
98 
99  {
100  // Test if the diagonal element can be used as a pivot (given the threshold value)
101  if (diag >= 0 )
102  {
103  // Diagonal element exists
104  rtemp = std::abs(lu_col_ptr[diag]);
105  if (rtemp != 0.0 && rtemp >= thresh) pivptr = diag;
106  }
107  pivrow = lsub_ptr[pivptr];
108  }
109 
110  // Record pivot row
111  perm_r(pivrow) = jcol;
112  // Interchange row subscripts
113  if (pivptr != nsupc )
114  {
115  std::swap( lsub_ptr[pivptr], lsub_ptr[nsupc] );
116  // Interchange numerical values as well, for the two rows in the whole snode
117  // such that L is indexed the same way as A
118  for (icol = 0; icol <= nsupc; icol++)
119  {
120  itemp = pivptr + icol * lda;
121  std::swap(lu_sup_ptr[itemp], lu_sup_ptr[nsupc + icol * lda]);
122  }
123  }
124  // cdiv operations
125  Scalar temp = Scalar(1.0) / lu_col_ptr[nsupc];
126  for (k = nsupc+1; k < nsupr; k++)
127  lu_col_ptr[k] *= temp;
128  return 0;
129 }
template<typename Scalar , typename Index>
void SparseLUImpl< Scalar, Index >::pruneL ( const Index  jcol,
const IndexVector perm_r,
const Index  pivrow,
const Index  nseg,
const IndexVector segrep,
BlockIndexVector  repfnz,
IndexVector xprune,
GlobalLU_t glu 
)
protected

Prunes the L-structure.

It prunes the L-structure of supernodes whose L-structure contains the current pivot row "pivrow"

Parameters
jcolThe current column of L
[in]perm_rRow permutation
[out]pivrowThe pivot row
nsegNumber of segments
segrep
repfnz
[out]xprune
gluGlobal LU data

Definition at line 53 of file SparseLU_pruneL.h.

54 {
55  // For each supernode-rep irep in U(*,j]
56  Index jsupno = glu.supno(jcol);
57  Index i,irep,irep1;
58  bool movnum, do_prune = false;
59  Index kmin = 0, kmax = 0, minloc, maxloc,krow;
60  for (i = 0; i < nseg; i++)
61  {
62  irep = segrep(i);
63  irep1 = irep + 1;
64  do_prune = false;
65 
66  // Don't prune with a zero U-segment
67  if (repfnz(irep) == emptyIdxLU) continue;
68 
69  // If a snode overlaps with the next panel, then the U-segment
70  // is fragmented into two parts -- irep and irep1. We should let
71  // pruning occur at the rep-column in irep1s snode.
72  if (glu.supno(irep) == glu.supno(irep1) ) continue; // don't prune
73 
74  // If it has not been pruned & it has a nonz in row L(pivrow,i)
75  if (glu.supno(irep) != jsupno )
76  {
77  if ( xprune (irep) >= glu.xlsub(irep1) )
78  {
79  kmin = glu.xlsub(irep);
80  kmax = glu.xlsub(irep1) - 1;
81  for (krow = kmin; krow <= kmax; krow++)
82  {
83  if (glu.lsub(krow) == pivrow)
84  {
85  do_prune = true;
86  break;
87  }
88  }
89  }
90 
91  if (do_prune)
92  {
93  // do a quicksort-type partition
94  // movnum=true means that the num values have to be exchanged
95  movnum = false;
96  if (irep == glu.xsup(glu.supno(irep)) ) // Snode of size 1
97  movnum = true;
98 
99  while (kmin <= kmax)
100  {
101  if (perm_r(glu.lsub(kmax)) == emptyIdxLU)
102  kmax--;
103  else if ( perm_r(glu.lsub(kmin)) != emptyIdxLU)
104  kmin++;
105  else
106  {
107  // kmin below pivrow (not yet pivoted), and kmax
108  // above pivrow: interchange the two suscripts
109  std::swap(glu.lsub(kmin), glu.lsub(kmax));
110 
111  // If the supernode has only one column, then we
112  // only keep one set of subscripts. For any subscript
113  // intercnahge performed, similar interchange must be
114  // done on the numerical values.
115  if (movnum)
116  {
117  minloc = glu.xlusup(irep) + ( kmin - glu.xlsub(irep) );
118  maxloc = glu.xlusup(irep) + ( kmax - glu.xlsub(irep) );
119  std::swap(glu.lusup(minloc), glu.lusup(maxloc));
120  }
121  kmin++;
122  kmax--;
123  }
124  } // end while
125 
126  xprune(irep) = kmin; //Pruning
127  } // end if do_prune
128  } // end pruning
129  } // End for each U-segment
130 }
template<typename Scalar , typename Index>
void SparseLUImpl< Scalar, Index >::relax_snode ( const Index  n,
IndexVector et,
const Index  relax_columns,
IndexVector descendants,
IndexVector relax_end 
)
protected

Identify the initial relaxed supernodes.

This routine is applied to a column elimination tree. It assumes that the matrix has been reordered according to the postorder of the etree

Parameters
nthe number of columns
etelimination tree
relax_columnsMaximum number of columns allowed in a relaxed snode
descendantsNumber of descendants of each node in the etree
relax_endlast column in a supernode

Definition at line 47 of file SparseLU_relax_snode.h.

48 {
49 
50  // compute the number of descendants of each node in the etree
51  Index j, parent;
52  relax_end.setConstant(emptyIdxLU);
53  descendants.setZero();
54  for (j = 0; j < n; j++)
55  {
56  parent = et(j);
57  if (parent != n) // not the dummy root
58  descendants(parent) += descendants(j) + 1;
59  }
60  // Identify the relaxed supernodes by postorder traversal of the etree
61  Index snode_start; // beginning of a snode
62  for (j = 0; j < n; )
63  {
64  parent = et(j);
65  snode_start = j;
66  while ( parent != n && descendants(parent) < relax_columns )
67  {
68  j = parent;
69  parent = et(j);
70  }
71  // Found a supernode in postordered etree, j is the last column
72  relax_end(snode_start) = j; // Record last column
73  j++;
74  // Search for a new leaf
75  while (descendants(j) != 0 && j < n) j++;
76  } // End postorder traversal of the etree
77 
78 }

The documentation for this class was generated from the following files: