ergo
template_lapack_larrf.h
Go to the documentation of this file.
1 /* Ergo, version 3.8, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2019 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4  * and Anastasia Kruchinina.
5  *
6  * This program is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program. If not, see <http://www.gnu.org/licenses/>.
18  *
19  * Primary academic reference:
20  * Ergo: An open-source program for linear-scaling electronic structure
21  * calculations,
22  * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23  * Kruchinina,
24  * SoftwareX 7, 107 (2018),
25  * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26  *
27  * For further information about Ergo, see <http://www.ergoscf.org>.
28  */
29 
30  /* This file belongs to the template_lapack part of the Ergo source
31  * code. The source files in the template_lapack directory are modified
32  * versions of files originally distributed as CLAPACK, see the
33  * Copyright/license notice in the file template_lapack/COPYING.
34  */
35 
36 
37 #ifndef TEMPLATE_LAPACK_LARRF_HEADER
38 #define TEMPLATE_LAPACK_LARRF_HEADER
39 
40 
41 template<class Treal>
42 int template_lapack_larrf(integer *n, Treal *d__, Treal *l,
43  Treal *ld, integer *clstrt, integer *clend, Treal *w,
44  Treal *wgap, Treal *werr, Treal *spdiam, Treal *
45  clgapl, Treal *clgapr, Treal *pivmin, Treal *sigma,
46  Treal *dplus, Treal *lplus, Treal *work, integer *info)
47 {
48  /* System generated locals */
49  integer i__1;
50  Treal d__1, d__2, d__3;
51 
52  /* Local variables */
53  integer i__;
54  Treal s, bestshift, smlgrowth, eps, tmp, max1, max2, rrr1, rrr2,
55  znm2, growthbound, fail, fact, oldp;
56  integer indx;
57  Treal prod;
58  integer ktry;
59  Treal fail2, avgap, ldmax, rdmax;
60  integer shift;
61  logical dorrr1;
62  Treal ldelta;
63  logical nofail;
64  Treal mingap, lsigma, rdelta;
65  logical forcer;
66  Treal rsigma, clwdth;
67  logical sawnan1, sawnan2, tryrrr1;
68 
69 
70 /* -- LAPACK auxiliary routine (version 3.2) -- */
71 /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
72 /* November 2006 */
73 /* * */
74 /* .. Scalar Arguments .. */
75 /* .. */
76 /* .. Array Arguments .. */
77 /* .. */
78 
79 /* Purpose */
80 /* ======= */
81 
82 /* Given the initial representation L D L^T and its cluster of close */
83 /* eigenvalues (in a relative measure), W( CLSTRT ), W( CLSTRT+1 ), ... */
84 /* W( CLEND ), DLARRF finds a new relatively robust representation */
85 /* L D L^T - SIGMA I = L(+) D(+) L(+)^T such that at least one of the */
86 /* eigenvalues of L(+) D(+) L(+)^T is relatively isolated. */
87 
88 /* Arguments */
89 /* ========= */
90 
91 /* N (input) INTEGER */
92 /* The order of the matrix (subblock, if the matrix splitted). */
93 
94 /* D (input) DOUBLE PRECISION array, dimension (N) */
95 /* The N diagonal elements of the diagonal matrix D. */
96 
97 /* L (input) DOUBLE PRECISION array, dimension (N-1) */
98 /* The (N-1) subdiagonal elements of the unit bidiagonal */
99 /* matrix L. */
100 
101 /* LD (input) DOUBLE PRECISION array, dimension (N-1) */
102 /* The (N-1) elements L(i)*D(i). */
103 
104 /* CLSTRT (input) INTEGER */
105 /* The index of the first eigenvalue in the cluster. */
106 
107 /* CLEND (input) INTEGER */
108 /* The index of the last eigenvalue in the cluster. */
109 
110 /* W (input) DOUBLE PRECISION array, dimension >= (CLEND-CLSTRT+1) */
111 /* The eigenvalue APPROXIMATIONS of L D L^T in ascending order. */
112 /* W( CLSTRT ) through W( CLEND ) form the cluster of relatively */
113 /* close eigenalues. */
114 
115 /* WGAP (input/output) DOUBLE PRECISION array, dimension >= (CLEND-CLSTRT+1) */
116 /* The separation from the right neighbor eigenvalue in W. */
117 
118 /* WERR (input) DOUBLE PRECISION array, dimension >= (CLEND-CLSTRT+1) */
119 /* WERR contain the semiwidth of the uncertainty */
120 /* interval of the corresponding eigenvalue APPROXIMATION in W */
121 
122 /* SPDIAM (input) estimate of the spectral diameter obtained from the */
123 /* Gerschgorin intervals */
124 
125 /* CLGAPL, CLGAPR (input) absolute gap on each end of the cluster. */
126 /* Set by the calling routine to protect against shifts too close */
127 /* to eigenvalues outside the cluster. */
128 
129 /* PIVMIN (input) DOUBLE PRECISION */
130 /* The minimum pivot allowed in the Sturm sequence. */
131 
132 /* SIGMA (output) DOUBLE PRECISION */
133 /* The shift used to form L(+) D(+) L(+)^T. */
134 
135 /* DPLUS (output) DOUBLE PRECISION array, dimension (N) */
136 /* The N diagonal elements of the diagonal matrix D(+). */
137 
138 /* LPLUS (output) DOUBLE PRECISION array, dimension (N-1) */
139 /* The first (N-1) elements of LPLUS contain the subdiagonal */
140 /* elements of the unit bidiagonal matrix L(+). */
141 
142 /* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) */
143 /* Workspace. */
144 
145 /* Further Details */
146 /* =============== */
147 
148 /* Based on contributions by */
149 /* Beresford Parlett, University of California, Berkeley, USA */
150 /* Jim Demmel, University of California, Berkeley, USA */
151 /* Inderjit Dhillon, University of Texas, Austin, USA */
152 /* Osni Marques, LBNL/NERSC, USA */
153 /* Christof Voemel, University of California, Berkeley, USA */
154 
155 /* ===================================================================== */
156 
157 /* .. Parameters .. */
158 /* .. */
159 /* .. Local Scalars .. */
160 /* .. */
161 /* .. External Functions .. */
162 /* .. */
163 /* .. External Subroutines .. */
164 /* .. */
165 /* .. Intrinsic Functions .. */
166 /* .. */
167 /* .. Executable Statements .. */
168 
169 /* Table of constant values */
170 
171  integer c__1 = 1;
172 
173 
174 
175  /* Parameter adjustments */
176  --work;
177  --lplus;
178  --dplus;
179  --werr;
180  --wgap;
181  --w;
182  --ld;
183  --l;
184  --d__;
185 
186  /* Initialization added by Elias to get rid of compiler warnings. */
187  indx = 0;
188  /* Function Body */
189  *info = 0;
190  fact = 2.;
191  eps = template_lapack_lamch("Precision", (Treal)0);
192  shift = 0;
193  forcer = FALSE_;
194 /* Note that we cannot guarantee that for any of the shifts tried, */
195 /* the factorization has a small or even moderate element growth. */
196 /* There could be Ritz values at both ends of the cluster and despite */
197 /* backing off, there are examples where all factorizations tried */
198 /* (in IEEE mode, allowing zero pivots & infinities) have INFINITE */
199 /* element growth. */
200 /* For this reason, we should use PIVMIN in this subroutine so that at */
201 /* least the L D L^T factorization exists. It can be checked afterwards */
202 /* whether the element growth caused bad residuals/orthogonality. */
203 /* Decide whether the code should accept the best among all */
204 /* representations despite large element growth or signal INFO=1 */
205  nofail = TRUE_;
206 
207 /* Compute the average gap length of the cluster */
208  clwdth = (d__1 = w[*clend] - w[*clstrt], absMACRO(d__1)) + werr[*clend] + werr[
209  *clstrt];
210  avgap = clwdth / (Treal) (*clend - *clstrt);
211  mingap = minMACRO(*clgapl,*clgapr);
212 /* Initial values for shifts to both ends of cluster */
213 /* Computing MIN */
214  d__1 = w[*clstrt], d__2 = w[*clend];
215  lsigma = minMACRO(d__1,d__2) - werr[*clstrt];
216 /* Computing MAX */
217  d__1 = w[*clstrt], d__2 = w[*clend];
218  rsigma = maxMACRO(d__1,d__2) + werr[*clend];
219 /* Use a small fudge to make sure that we really shift to the outside */
220  lsigma -= absMACRO(lsigma) * 4. * eps;
221  rsigma += absMACRO(rsigma) * 4. * eps;
222 /* Compute upper bounds for how much to back off the initial shifts */
223  ldmax = mingap * .25 + *pivmin * 2.;
224  rdmax = mingap * .25 + *pivmin * 2.;
225 /* Computing MAX */
226  d__1 = avgap, d__2 = wgap[*clstrt];
227  ldelta = maxMACRO(d__1,d__2) / fact;
228 /* Computing MAX */
229  d__1 = avgap, d__2 = wgap[*clend - 1];
230  rdelta = maxMACRO(d__1,d__2) / fact;
231 
232 /* Initialize the record of the best representation found */
233 
234  s = template_lapack_lamch("S", (Treal)0);
235  smlgrowth = 1. / s;
236  fail = (Treal) (*n - 1) * mingap / (*spdiam * eps);
237  fail2 = (Treal) (*n - 1) * mingap / (*spdiam * template_blas_sqrt(eps));
238  bestshift = lsigma;
239 
240 /* while (KTRY <= KTRYMAX) */
241  ktry = 0;
242  growthbound = *spdiam * 8.;
243 L5:
244  sawnan1 = FALSE_;
245  sawnan2 = FALSE_;
246 /* Ensure that we do not back off too much of the initial shifts */
247  ldelta = minMACRO(ldmax,ldelta);
248  rdelta = minMACRO(rdmax,rdelta);
249 /* Compute the element growth when shifting to both ends of the cluster */
250 /* accept the shift if there is no element growth at one of the two ends */
251 /* Left end */
252  s = -lsigma;
253  dplus[1] = d__[1] + s;
254  if (absMACRO(dplus[1]) < *pivmin) {
255  dplus[1] = -(*pivmin);
256 /* Need to set SAWNAN1 because refined RRR test should not be used */
257 /* in this case */
258  sawnan1 = TRUE_;
259  }
260  max1 = absMACRO(dplus[1]);
261  i__1 = *n - 1;
262  for (i__ = 1; i__ <= i__1; ++i__) {
263  lplus[i__] = ld[i__] / dplus[i__];
264  s = s * lplus[i__] * l[i__] - lsigma;
265  dplus[i__ + 1] = d__[i__ + 1] + s;
266  if ((d__1 = dplus[i__ + 1], absMACRO(d__1)) < *pivmin) {
267  dplus[i__ + 1] = -(*pivmin);
268 /* Need to set SAWNAN1 because refined RRR test should not be used */
269 /* in this case */
270  sawnan1 = TRUE_;
271  }
272 /* Computing MAX */
273  d__2 = max1, d__3 = (d__1 = dplus[i__ + 1], absMACRO(d__1));
274  max1 = maxMACRO(d__2,d__3);
275 /* L6: */
276  }
277  sawnan1 = sawnan1 || template_lapack_isnan(&max1);
278  if (forcer || ( max1 <= growthbound && ! sawnan1 ) ) {
279  *sigma = lsigma;
280  shift = 1;
281  goto L100;
282  }
283 /* Right end */
284  s = -rsigma;
285  work[1] = d__[1] + s;
286  if (absMACRO(work[1]) < *pivmin) {
287  work[1] = -(*pivmin);
288 /* Need to set SAWNAN2 because refined RRR test should not be used */
289 /* in this case */
290  sawnan2 = TRUE_;
291  }
292  max2 = absMACRO(work[1]);
293  i__1 = *n - 1;
294  for (i__ = 1; i__ <= i__1; ++i__) {
295  work[*n + i__] = ld[i__] / work[i__];
296  s = s * work[*n + i__] * l[i__] - rsigma;
297  work[i__ + 1] = d__[i__ + 1] + s;
298  if ((d__1 = work[i__ + 1], absMACRO(d__1)) < *pivmin) {
299  work[i__ + 1] = -(*pivmin);
300 /* Need to set SAWNAN2 because refined RRR test should not be used */
301 /* in this case */
302  sawnan2 = TRUE_;
303  }
304 /* Computing MAX */
305  d__2 = max2, d__3 = (d__1 = work[i__ + 1], absMACRO(d__1));
306  max2 = maxMACRO(d__2,d__3);
307 /* L7: */
308  }
309  sawnan2 = sawnan2 || template_lapack_isnan(&max2);
310  if (forcer || ( max2 <= growthbound && ! sawnan2 ) ) {
311  *sigma = rsigma;
312  shift = 2;
313  goto L100;
314  }
315 /* If we are at this point, both shifts led to too much element growth */
316 /* Record the better of the two shifts (provided it didn't lead to NaN) */
317  if (sawnan1 && sawnan2) {
318 /* both MAX1 and MAX2 are NaN */
319  goto L50;
320  } else {
321  if (! sawnan1) {
322  indx = 1;
323  if (max1 <= smlgrowth) {
324  smlgrowth = max1;
325  bestshift = lsigma;
326  }
327  }
328  if (! sawnan2) {
329  if (sawnan1 || max2 <= max1) {
330  indx = 2;
331  }
332  if (max2 <= smlgrowth) {
333  smlgrowth = max2;
334  bestshift = rsigma;
335  }
336  }
337  }
338 /* If we are here, both the left and the right shift led to */
339 /* element growth. If the element growth is moderate, then */
340 /* we may still accept the representation, if it passes a */
341 /* refined test for RRR. This test supposes that no NaN occurred. */
342 /* Moreover, we use the refined RRR test only for isolated clusters. */
343  if (clwdth < mingap / 128. && minMACRO(max1,max2) < fail2 && ! sawnan1 && !
344  sawnan2) {
345  dorrr1 = TRUE_;
346  } else {
347  dorrr1 = FALSE_;
348  }
349  tryrrr1 = TRUE_;
350  if (tryrrr1 && dorrr1) {
351  if (indx == 1) {
352  tmp = (d__1 = dplus[*n], absMACRO(d__1));
353  znm2 = 1.;
354  prod = 1.;
355  oldp = 1.;
356  for (i__ = *n - 1; i__ >= 1; --i__) {
357  if (prod <= eps) {
358  prod = dplus[i__ + 1] * work[*n + i__ + 1] / (dplus[i__] *
359  work[*n + i__]) * oldp;
360  } else {
361  prod *= (d__1 = work[*n + i__], absMACRO(d__1));
362  }
363  oldp = prod;
364 /* Computing 2nd power */
365  d__1 = prod;
366  znm2 += d__1 * d__1;
367 /* Computing MAX */
368  d__2 = tmp, d__3 = (d__1 = dplus[i__] * prod, absMACRO(d__1));
369  tmp = maxMACRO(d__2,d__3);
370 /* L15: */
371  }
372  rrr1 = tmp / (*spdiam * template_blas_sqrt(znm2));
373  if (rrr1 <= 8.) {
374  *sigma = lsigma;
375  shift = 1;
376  goto L100;
377  }
378  } else if (indx == 2) {
379  tmp = (d__1 = work[*n], absMACRO(d__1));
380  znm2 = 1.;
381  prod = 1.;
382  oldp = 1.;
383  for (i__ = *n - 1; i__ >= 1; --i__) {
384  if (prod <= eps) {
385  prod = work[i__ + 1] * lplus[i__ + 1] / (work[i__] *
386  lplus[i__]) * oldp;
387  } else {
388  prod *= (d__1 = lplus[i__], absMACRO(d__1));
389  }
390  oldp = prod;
391 /* Computing 2nd power */
392  d__1 = prod;
393  znm2 += d__1 * d__1;
394 /* Computing MAX */
395  d__2 = tmp, d__3 = (d__1 = work[i__] * prod, absMACRO(d__1));
396  tmp = maxMACRO(d__2,d__3);
397 /* L16: */
398  }
399  rrr2 = tmp / (*spdiam * template_blas_sqrt(znm2));
400  if (rrr2 <= 8.) {
401  *sigma = rsigma;
402  shift = 2;
403  goto L100;
404  }
405  }
406  }
407 L50:
408  if (ktry < 1) {
409 /* If we are here, both shifts failed also the RRR test. */
410 /* Back off to the outside */
411 /* Computing MAX */
412  d__1 = lsigma - ldelta, d__2 = lsigma - ldmax;
413  lsigma = maxMACRO(d__1,d__2);
414 /* Computing MIN */
415  d__1 = rsigma + rdelta, d__2 = rsigma + rdmax;
416  rsigma = minMACRO(d__1,d__2);
417  ldelta *= 2.;
418  rdelta *= 2.;
419  ++ktry;
420  goto L5;
421  } else {
422 /* None of the representations investigated satisfied our */
423 /* criteria. Take the best one we found. */
424  if (smlgrowth < fail || nofail) {
425  lsigma = bestshift;
426  rsigma = bestshift;
427  forcer = TRUE_;
428  goto L5;
429  } else {
430  *info = 1;
431  return 0;
432  }
433  }
434 L100:
435  if (shift == 1) {
436  } else if (shift == 2) {
437 /* store new L and D back into DPLUS, LPLUS */
438  template_blas_copy(n, &work[1], &c__1, &dplus[1], &c__1);
439  i__1 = *n - 1;
440  template_blas_copy(&i__1, &work[*n + 1], &c__1, &lplus[1], &c__1);
441  }
442  return 0;
443 
444 /* End of DLARRF */
445 
446 } /* dlarrf_ */
447 
448 #endif
template_blas_sqrt
Treal template_blas_sqrt(Treal x)
template_lapack_lamch
Treal template_lapack_lamch(const char *cmach, Treal dummyReal)
Definition: template_lapack_lamch.h:202
minMACRO
#define minMACRO(a, b)
Definition: template_blas_common.h:46
template_lapack_larrf
int template_lapack_larrf(integer *n, Treal *d__, Treal *l, Treal *ld, integer *clstrt, integer *clend, Treal *w, Treal *wgap, Treal *werr, Treal *spdiam, Treal *clgapl, Treal *clgapr, Treal *pivmin, Treal *sigma, Treal *dplus, Treal *lplus, Treal *work, integer *info)
Definition: template_lapack_larrf.h:42
absMACRO
#define absMACRO(x)
Definition: template_blas_common.h:47
logical
bool logical
Definition: template_blas_common.h:41
template_lapack_isnan
logical template_lapack_isnan(Treal *din)
Definition: template_lapack_isnan.h:45
template_blas_copy
int template_blas_copy(const integer *n, const Treal *dx, const integer *incx, Treal *dy, const integer *incy)
Definition: template_blas_copy.h:42
TRUE_
#define TRUE_
Definition: template_lapack_common.h:42
integer
int integer
Definition: template_blas_common.h:40
FALSE_
#define FALSE_
Definition: template_lapack_common.h:43
maxMACRO
#define maxMACRO(a, b)
Definition: template_blas_common.h:45