ergo
template_lapack_laneg.h
Go to the documentation of this file.
1 /* Ergo, version 3.8, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2019 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4  * and Anastasia Kruchinina.
5  *
6  * This program is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program. If not, see <http://www.gnu.org/licenses/>.
18  *
19  * Primary academic reference:
20  * Ergo: An open-source program for linear-scaling electronic structure
21  * calculations,
22  * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23  * Kruchinina,
24  * SoftwareX 7, 107 (2018),
25  * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26  *
27  * For further information about Ergo, see <http://www.ergoscf.org>.
28  */
29 
30  /* This file belongs to the template_lapack part of the Ergo source
31  * code. The source files in the template_lapack directory are modified
32  * versions of files originally distributed as CLAPACK, see the
33  * Copyright/license notice in the file template_lapack/COPYING.
34  */
35 
36 
37 #ifndef TEMPLATE_LAPACK_LANEG_HEADER
38 #define TEMPLATE_LAPACK_LANEG_HEADER
39 
40 
41 #include "template_lapack_isnan.h"
42 
43 
44 template<class Treal>
45 int template_lapack_laneg(integer *n, Treal *d__, Treal *lld, Treal *
46  sigma, Treal *pivmin, integer *r__)
47 {
48  /* System generated locals */
49  integer ret_val, i__1, i__2, i__3, i__4;
50 
51  /* Local variables */
52  integer j;
53  Treal p, t;
54  integer bj;
55  Treal tmp;
56  integer neg1, neg2;
57  Treal bsav, gamma, dplus;
58  integer negcnt;
59  logical sawnan;
60  Treal dminus;
61 
62 
63 /* -- LAPACK auxiliary routine (version 3.2) -- */
64 /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
65 /* November 2006 */
66 
67 /* .. Scalar Arguments .. */
68 /* .. */
69 /* .. Array Arguments .. */
70 /* .. */
71 
72 /* Purpose */
73 /* ======= */
74 
75 /* DLANEG computes the Sturm count, the number of negative pivots */
76 /* encountered while factoring tridiagonal T - sigma I = L D L^T. */
77 /* This implementation works directly on the factors without forming */
78 /* the tridiagonal matrix T. The Sturm count is also the number of */
79 /* eigenvalues of T less than sigma. */
80 
81 /* This routine is called from DLARRB. */
82 
83 /* The current routine does not use the PIVMIN parameter but rather */
84 /* requires IEEE-754 propagation of Infinities and NaNs. This */
85 /* routine also has no input range restrictions but does require */
86 /* default exception handling such that x/0 produces Inf when x is */
87 /* non-zero, and Inf/Inf produces NaN. For more information, see: */
88 
89 /* Marques, Riedy, and Voemel, "Benefits of IEEE-754 Features in */
90 /* Modern Symmetric Tridiagonal Eigensolvers," SIAM Journal on */
91 /* Scientific Computing, v28, n5, 2006. DOI 10.1137/050641624 */
92 /* (Tech report version in LAWN 172 with the same title.) */
93 
94 /* Arguments */
95 /* ========= */
96 
97 /* N (input) INTEGER */
98 /* The order of the matrix. */
99 
100 /* D (input) DOUBLE PRECISION array, dimension (N) */
101 /* The N diagonal elements of the diagonal matrix D. */
102 
103 /* LLD (input) DOUBLE PRECISION array, dimension (N-1) */
104 /* The (N-1) elements L(i)*L(i)*D(i). */
105 
106 /* SIGMA (input) DOUBLE PRECISION */
107 /* Shift amount in T - sigma I = L D L^T. */
108 
109 /* PIVMIN (input) DOUBLE PRECISION */
110 /* The minimum pivot in the Sturm sequence. May be used */
111 /* when zero pivots are encountered on non-IEEE-754 */
112 /* architectures. */
113 
114 /* R (input) INTEGER */
115 /* The twist index for the twisted factorization that is used */
116 /* for the negcount. */
117 
118 /* Further Details */
119 /* =============== */
120 
121 /* Based on contributions by */
122 /* Osni Marques, LBNL/NERSC, USA */
123 /* Christof Voemel, University of California, Berkeley, USA */
124 /* Jason Riedy, University of California, Berkeley, USA */
125 
126 /* ===================================================================== */
127 
128 /* .. Parameters .. */
129 /* Some architectures propagate Infinities and NaNs very slowly, so */
130 /* the code computes counts in BLKLEN chunks. Then a NaN can */
131 /* propagate at most BLKLEN columns before being detected. This is */
132 /* not a general tuning parameter; it needs only to be just large */
133 /* enough that the overhead is tiny in common cases. */
134 /* .. */
135 /* .. Local Scalars .. */
136 /* .. */
137 /* .. Intrinsic Functions .. */
138 /* .. */
139 /* .. External Functions .. */
140 /* .. */
141 /* .. Executable Statements .. */
142  /* Parameter adjustments */
143  --lld;
144  --d__;
145 
146  /* Function Body */
147  negcnt = 0;
148 /* I) upper part: L D L^T - SIGMA I = L+ D+ L+^T */
149  t = -(*sigma);
150  i__1 = *r__ - 1;
151  for (bj = 1; bj <= i__1; bj += 128) {
152  neg1 = 0;
153  bsav = t;
154 /* Computing MIN */
155  i__3 = bj + 127, i__4 = *r__ - 1;
156  i__2 = minMACRO(i__3,i__4);
157  for (j = bj; j <= i__2; ++j) {
158  dplus = d__[j] + t;
159  if (dplus < 0.) {
160  ++neg1;
161  }
162  tmp = t / dplus;
163  t = tmp * lld[j] - *sigma;
164 /* L21: */
165  }
166  sawnan = template_lapack_isnan(&t);
167 /* Run a slower version of the above loop if a NaN is detected. */
168 /* A NaN should occur only with a zero pivot after an infinite */
169 /* pivot. In that case, substituting 1 for T/DPLUS is the */
170 /* correct limit. */
171  if (sawnan) {
172  neg1 = 0;
173  t = bsav;
174 /* Computing MIN */
175  i__3 = bj + 127, i__4 = *r__ - 1;
176  i__2 = minMACRO(i__3,i__4);
177  for (j = bj; j <= i__2; ++j) {
178  dplus = d__[j] + t;
179  if (dplus < 0.) {
180  ++neg1;
181  }
182  tmp = t / dplus;
183  if (template_lapack_isnan(&tmp)) {
184  tmp = 1.;
185  }
186  t = tmp * lld[j] - *sigma;
187 /* L22: */
188  }
189  }
190  negcnt += neg1;
191 /* L210: */
192  }
193 
194 /* II) lower part: L D L^T - SIGMA I = U- D- U-^T */
195  p = d__[*n] - *sigma;
196  i__1 = *r__;
197  for (bj = *n - 1; bj >= i__1; bj += -128) {
198  neg2 = 0;
199  bsav = p;
200 /* Computing MAX */
201  i__3 = bj - 127;
202  i__2 = maxMACRO(i__3,*r__);
203  for (j = bj; j >= i__2; --j) {
204  dminus = lld[j] + p;
205  if (dminus < 0.) {
206  ++neg2;
207  }
208  tmp = p / dminus;
209  p = tmp * d__[j] - *sigma;
210 /* L23: */
211  }
212  sawnan = template_lapack_isnan(&p);
213 /* As above, run a slower version that substitutes 1 for Inf/Inf. */
214 
215  if (sawnan) {
216  neg2 = 0;
217  p = bsav;
218 /* Computing MAX */
219  i__3 = bj - 127;
220  i__2 = maxMACRO(i__3,*r__);
221  for (j = bj; j >= i__2; --j) {
222  dminus = lld[j] + p;
223  if (dminus < 0.) {
224  ++neg2;
225  }
226  tmp = p / dminus;
227  if (template_lapack_isnan(&tmp)) {
228  tmp = 1.;
229  }
230  p = tmp * d__[j] - *sigma;
231 /* L24: */
232  }
233  }
234  negcnt += neg2;
235 /* L230: */
236  }
237 
238 /* III) Twist index */
239 /* T was shifted by SIGMA initially. */
240  gamma = t + *sigma + p;
241  if (gamma < 0.) {
242  ++negcnt;
243  }
244  ret_val = negcnt;
245  return ret_val;
246 } /* dlaneg_ */
247 
248 #endif
minMACRO
#define minMACRO(a, b)
Definition: template_blas_common.h:46
logical
bool logical
Definition: template_blas_common.h:41
template_lapack_isnan
logical template_lapack_isnan(Treal *din)
Definition: template_lapack_isnan.h:45
integer
int integer
Definition: template_blas_common.h:40
template_lapack_isnan.h
template_lapack_laneg
int template_lapack_laneg(integer *n, Treal *d__, Treal *lld, Treal *sigma, Treal *pivmin, integer *r__)
Definition: template_lapack_laneg.h:45
maxMACRO
#define maxMACRO(a, b)
Definition: template_blas_common.h:45