ergo
template_lapack_larrv.h
Go to the documentation of this file.
1 /* Ergo, version 3.8, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2019 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4  * and Anastasia Kruchinina.
5  *
6  * This program is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program. If not, see <http://www.gnu.org/licenses/>.
18  *
19  * Primary academic reference:
20  * Ergo: An open-source program for linear-scaling electronic structure
21  * calculations,
22  * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23  * Kruchinina,
24  * SoftwareX 7, 107 (2018),
25  * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26  *
27  * For further information about Ergo, see <http://www.ergoscf.org>.
28  */
29 
30  /* This file belongs to the template_lapack part of the Ergo source
31  * code. The source files in the template_lapack directory are modified
32  * versions of files originally distributed as CLAPACK, see the
33  * Copyright/license notice in the file template_lapack/COPYING.
34  */
35 
36 
37 #ifndef TEMPLATE_LAPACK_LARRV_HEADER
38 #define TEMPLATE_LAPACK_LARRV_HEADER
39 
40 
41 #include "template_lapack_lar1v.h"
42 
43 
44 template<class Treal>
45 int template_lapack_larrv(const integer *n, Treal *vl, Treal *vu,
46  Treal *d__, Treal *l, Treal *pivmin, integer *isplit,
47  integer *m, integer *dol, integer *dou, Treal *minrgp,
48  Treal *rtol1, Treal *rtol2, Treal *w, Treal *werr,
49  Treal *wgap, integer *iblock, integer *indexw, Treal *gers,
50  Treal *z__, const integer *ldz, integer *isuppz, Treal *work,
51  integer *iwork, integer *info)
52 {
53  /* System generated locals */
54  integer z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5;
55  Treal d__1, d__2;
56  logical L__1;
57 
58 
59  /* Local variables */
60  integer minwsize, i__, j, k, p, q, miniwsize, ii;
61  Treal gl;
62  integer im, in;
63  Treal gu, gap, eps, tau, tol, tmp;
64  integer zto;
65  Treal ztz;
66  integer iend, jblk;
67  Treal lgap;
68  integer done;
69  Treal rgap, left;
70  integer wend, iter;
71  Treal bstw = 0; // EMANUEL COMMENT: initialize to get rid of compiler warning
72  integer itmp1;
73  integer indld;
74  Treal fudge;
75  integer idone;
76  Treal sigma;
77  integer iinfo, iindr;
78  Treal resid;
79  logical eskip;
80  Treal right;
81  integer nclus, zfrom;
82  Treal rqtol;
83  integer iindc1, iindc2;
84  logical stp2ii;
85  Treal lambda;
86  integer ibegin, indeig;
87  logical needbs;
88  integer indlld;
89  Treal sgndef, mingma;
90  integer oldien, oldncl, wbegin;
91  Treal spdiam;
92  integer negcnt;
93  integer oldcls;
94  Treal savgap;
95  integer ndepth;
96  Treal ssigma;
97  logical usedbs;
98  integer iindwk, offset;
99  Treal gaptol;
100  integer newcls, oldfst, indwrk, windex, oldlst;
101  logical usedrq;
102  integer newfst, newftt, parity, windmn, windpl, isupmn, newlst, zusedl;
103  Treal bstres = 0; // EMANUEL COMMENT: initialize to get rid of compiler warning
104  integer newsiz, zusedu, zusedw;
105  Treal nrminv, rqcorr;
106  logical tryrqc;
107  integer isupmx;
108 
109 
110 /* -- LAPACK auxiliary routine (version 3.2) -- */
111 /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
112 /* November 2006 */
113 
114 /* .. Scalar Arguments .. */
115 /* .. */
116 /* .. Array Arguments .. */
117 /* .. */
118 
119 /* Purpose */
120 /* ======= */
121 
122 /* DLARRV computes the eigenvectors of the tridiagonal matrix */
123 /* T = L D L^T given L, D and APPROXIMATIONS to the eigenvalues of L D L^T. */
124 /* The input eigenvalues should have been computed by DLARRE. */
125 
126 /* Arguments */
127 /* ========= */
128 
129 /* N (input) INTEGER */
130 /* The order of the matrix. N >= 0. */
131 
132 /* VL (input) DOUBLE PRECISION */
133 /* VU (input) DOUBLE PRECISION */
134 /* Lower and upper bounds of the interval that contains the desired */
135 /* eigenvalues. VL < VU. Needed to compute gaps on the left or right */
136 /* end of the extremal eigenvalues in the desired RANGE. */
137 
138 /* D (input/output) DOUBLE PRECISION array, dimension (N) */
139 /* On entry, the N diagonal elements of the diagonal matrix D. */
140 /* On exit, D may be overwritten. */
141 
142 /* L (input/output) DOUBLE PRECISION array, dimension (N) */
143 /* On entry, the (N-1) subdiagonal elements of the unit */
144 /* bidiagonal matrix L are in elements 1 to N-1 of L */
145 /* (if the matrix is not splitted.) At the end of each block */
146 /* is stored the corresponding shift as given by DLARRE. */
147 /* On exit, L is overwritten. */
148 
149 /* PIVMIN (in) DOUBLE PRECISION */
150 /* The minimum pivot allowed in the Sturm sequence. */
151 
152 /* ISPLIT (input) INTEGER array, dimension (N) */
153 /* The splitting points, at which T breaks up into blocks. */
154 /* The first block consists of rows/columns 1 to */
155 /* ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 */
156 /* through ISPLIT( 2 ), etc. */
157 
158 /* M (input) INTEGER */
159 /* The total number of input eigenvalues. 0 <= M <= N. */
160 
161 /* DOL (input) INTEGER */
162 /* DOU (input) INTEGER */
163 /* If the user wants to compute only selected eigenvectors from all */
164 /* the eigenvalues supplied, he can specify an index range DOL:DOU. */
165 /* Or else the setting DOL=1, DOU=M should be applied. */
166 /* Note that DOL and DOU refer to the order in which the eigenvalues */
167 /* are stored in W. */
168 /* If the user wants to compute only selected eigenpairs, then */
169 /* the columns DOL-1 to DOU+1 of the eigenvector space Z contain the */
170 /* computed eigenvectors. All other columns of Z are set to zero. */
171 
172 /* MINRGP (input) DOUBLE PRECISION */
173 
174 /* RTOL1 (input) DOUBLE PRECISION */
175 /* RTOL2 (input) DOUBLE PRECISION */
176 /* Parameters for bisection. */
177 /* An interval [LEFT,RIGHT] has converged if */
178 /* RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) */
179 
180 /* W (input/output) DOUBLE PRECISION array, dimension (N) */
181 /* The first M elements of W contain the APPROXIMATE eigenvalues for */
182 /* which eigenvectors are to be computed. The eigenvalues */
183 /* should be grouped by split-off block and ordered from */
184 /* smallest to largest within the block ( The output array */
185 /* W from DLARRE is expected here ). Furthermore, they are with */
186 /* respect to the shift of the corresponding root representation */
187 /* for their block. On exit, W holds the eigenvalues of the */
188 /* UNshifted matrix. */
189 
190 /* WERR (input/output) DOUBLE PRECISION array, dimension (N) */
191 /* The first M elements contain the semiwidth of the uncertainty */
192 /* interval of the corresponding eigenvalue in W */
193 
194 /* WGAP (input/output) DOUBLE PRECISION array, dimension (N) */
195 /* The separation from the right neighbor eigenvalue in W. */
196 
197 /* IBLOCK (input) INTEGER array, dimension (N) */
198 /* The indices of the blocks (submatrices) associated with the */
199 /* corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue */
200 /* W(i) belongs to the first block from the top, =2 if W(i) */
201 /* belongs to the second block, etc. */
202 
203 /* INDEXW (input) INTEGER array, dimension (N) */
204 /* The indices of the eigenvalues within each block (submatrix); */
205 /* for example, INDEXW(i)= 10 and IBLOCK(i)=2 imply that the */
206 /* i-th eigenvalue W(i) is the 10-th eigenvalue in the second block. */
207 
208 /* GERS (input) DOUBLE PRECISION array, dimension (2*N) */
209 /* The N Gerschgorin intervals (the i-th Gerschgorin interval */
210 /* is (GERS(2*i-1), GERS(2*i)). The Gerschgorin intervals should */
211 /* be computed from the original UNshifted matrix. */
212 
213 /* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) ) */
214 /* If INFO = 0, the first M columns of Z contain the */
215 /* orthonormal eigenvectors of the matrix T */
216 /* corresponding to the input eigenvalues, with the i-th */
217 /* column of Z holding the eigenvector associated with W(i). */
218 /* Note: the user must ensure that at least max(1,M) columns are */
219 /* supplied in the array Z. */
220 
221 /* LDZ (input) INTEGER */
222 /* The leading dimension of the array Z. LDZ >= 1, and if */
223 /* JOBZ = 'V', LDZ >= max(1,N). */
224 
225 /* ISUPPZ (output) INTEGER array, dimension ( 2*max(1,M) ) */
226 /* The support of the eigenvectors in Z, i.e., the indices */
227 /* indicating the nonzero elements in Z. The I-th eigenvector */
228 /* is nonzero only in elements ISUPPZ( 2*I-1 ) through */
229 /* ISUPPZ( 2*I ). */
230 
231 /* WORK (workspace) DOUBLE PRECISION array, dimension (12*N) */
232 
233 /* IWORK (workspace) INTEGER array, dimension (7*N) */
234 
235 /* INFO (output) INTEGER */
236 /* = 0: successful exit */
237 
238 /* > 0: A problem occured in DLARRV. */
239 /* < 0: One of the called subroutines signaled an internal problem. */
240 /* Needs inspection of the corresponding parameter IINFO */
241 /* for further information. */
242 
243 /* =-1: Problem in DLARRB when refining a child's eigenvalues. */
244 /* =-2: Problem in DLARRF when computing the RRR of a child. */
245 /* When a child is inside a tight cluster, it can be difficult */
246 /* to find an RRR. A partial remedy from the user's point of */
247 /* view is to make the parameter MINRGP smaller and recompile. */
248 /* However, as the orthogonality of the computed vectors is */
249 /* proportional to 1/MINRGP, the user should be aware that */
250 /* he might be trading in precision when he decreases MINRGP. */
251 /* =-3: Problem in DLARRB when refining a single eigenvalue */
252 /* after the Rayleigh correction was rejected. */
253 /* = 5: The Rayleigh Quotient Iteration failed to converge to */
254 /* full accuracy in MAXITR steps. */
255 
256 /* Further Details */
257 /* =============== */
258 
259 /* Based on contributions by */
260 /* Beresford Parlett, University of California, Berkeley, USA */
261 /* Jim Demmel, University of California, Berkeley, USA */
262 /* Inderjit Dhillon, University of Texas, Austin, USA */
263 /* Osni Marques, LBNL/NERSC, USA */
264 /* Christof Voemel, University of California, Berkeley, USA */
265 
266 /* ===================================================================== */
267 
268 /* .. Parameters .. */
269 /* .. */
270 /* .. Local Scalars .. */
271 /* .. */
272 /* .. External Functions .. */
273 /* .. */
274 /* .. External Subroutines .. */
275 /* .. */
276 /* .. Intrinsic Functions .. */
277 /* .. */
278 /* .. Executable Statements .. */
279 /* .. */
280 /* The first N entries of WORK are reserved for the eigenvalues */
281 
282 /* Table of constant values */
283 
284  Treal c_b5 = 0.;
285  integer c__1 = 1;
286  integer c__2 = 2;
287 
288 
289  /* Parameter adjustments */
290  --d__;
291  --l;
292  --isplit;
293  --w;
294  --werr;
295  --wgap;
296  --iblock;
297  --indexw;
298  --gers;
299  z_dim1 = *ldz;
300  z_offset = 1 + z_dim1;
301  z__ -= z_offset;
302  --isuppz;
303  --work;
304  --iwork;
305 
306  /* Function Body */
307  indld = *n + 1;
308  indlld = (*n << 1) + 1;
309  indwrk = *n * 3 + 1;
310  minwsize = *n * 12;
311  i__1 = minwsize;
312  for (i__ = 1; i__ <= i__1; ++i__) {
313  work[i__] = 0.;
314 /* L5: */
315  }
316 /* IWORK(IINDR+1:IINDR+N) hold the twist indices R for the */
317 /* factorization used to compute the FP vector */
318  iindr = 0;
319 /* IWORK(IINDC1+1:IINC2+N) are used to store the clusters of the current */
320 /* layer and the one above. */
321  iindc1 = *n;
322  iindc2 = *n << 1;
323  iindwk = *n * 3 + 1;
324  miniwsize = *n * 7;
325  i__1 = miniwsize;
326  for (i__ = 1; i__ <= i__1; ++i__) {
327  iwork[i__] = 0;
328 /* L10: */
329  }
330  zusedl = 1;
331  if (*dol > 1) {
332 /* Set lower bound for use of Z */
333  zusedl = *dol - 1;
334  }
335  zusedu = *m;
336  if (*dou < *m) {
337 /* Set lower bound for use of Z */
338  zusedu = *dou + 1;
339  }
340 /* The width of the part of Z that is used */
341  zusedw = zusedu - zusedl + 1;
342  template_lapack_laset("Full", n, &zusedw, &c_b5, &c_b5, &z__[zusedl * z_dim1 + 1], ldz);
343  eps = template_lapack_lamch("Precision",(Treal)0);
344  rqtol = eps * 2.;
345 
346 /* Set expert flags for standard code. */
347  tryrqc = TRUE_;
348  if (*dol == 1 && *dou == *m) {
349  } else {
350 /* Only selected eigenpairs are computed. Since the other evalues */
351 /* are not refined by RQ iteration, bisection has to compute to full */
352 /* accuracy. */
353  *rtol1 = eps * 4.;
354  *rtol2 = eps * 4.;
355  }
356 /* The entries WBEGIN:WEND in W, WERR, WGAP correspond to the */
357 /* desired eigenvalues. The support of the nonzero eigenvector */
358 /* entries is contained in the interval IBEGIN:IEND. */
359 /* Remark that if k eigenpairs are desired, then the eigenvectors */
360 /* are stored in k contiguous columns of Z. */
361 /* DONE is the number of eigenvectors already computed */
362  done = 0;
363  ibegin = 1;
364  wbegin = 1;
365  i__1 = iblock[*m];
366  for (jblk = 1; jblk <= i__1; ++jblk) {
367  iend = isplit[jblk];
368  sigma = l[iend];
369 /* Find the eigenvectors of the submatrix indexed IBEGIN */
370 /* through IEND. */
371  wend = wbegin - 1;
372 L15:
373  if (wend < *m) {
374  if (iblock[wend + 1] == jblk) {
375  ++wend;
376  goto L15;
377  }
378  }
379  if (wend < wbegin) {
380  ibegin = iend + 1;
381  goto L170;
382  } else if (wend < *dol || wbegin > *dou) {
383  ibegin = iend + 1;
384  wbegin = wend + 1;
385  goto L170;
386  }
387 /* Find local spectral diameter of the block */
388  gl = gers[(ibegin << 1) - 1];
389  gu = gers[ibegin * 2];
390  i__2 = iend;
391  for (i__ = ibegin + 1; i__ <= i__2; ++i__) {
392 /* Computing MIN */
393  d__1 = gers[(i__ << 1) - 1];
394  gl = minMACRO(d__1,gl);
395 /* Computing MAX */
396  d__1 = gers[i__ * 2];
397  gu = maxMACRO(d__1,gu);
398 /* L20: */
399  }
400  spdiam = gu - gl;
401 /* OLDIEN is the last index of the previous block */
402  oldien = ibegin - 1;
403 /* Calculate the size of the current block */
404  in = iend - ibegin + 1;
405 /* The number of eigenvalues in the current block */
406  im = wend - wbegin + 1;
407 /* This is for a 1x1 block */
408  if (ibegin == iend) {
409  ++done;
410  z__[ibegin + wbegin * z_dim1] = 1.;
411  isuppz[(wbegin << 1) - 1] = ibegin;
412  isuppz[wbegin * 2] = ibegin;
413  w[wbegin] += sigma;
414  work[wbegin] = w[wbegin];
415  ibegin = iend + 1;
416  ++wbegin;
417  goto L170;
418  }
419 /* The desired (shifted) eigenvalues are stored in W(WBEGIN:WEND) */
420 /* Note that these can be approximations, in this case, the corresp. */
421 /* entries of WERR give the size of the uncertainty interval. */
422 /* The eigenvalue approximations will be refined when necessary as */
423 /* high relative accuracy is required for the computation of the */
424 /* corresponding eigenvectors. */
425  template_blas_copy(&im, &w[wbegin], &c__1, &work[wbegin], &c__1);
426 /* We store in W the eigenvalue approximations w.r.t. the original */
427 /* matrix T. */
428  i__2 = im;
429  for (i__ = 1; i__ <= i__2; ++i__) {
430  w[wbegin + i__ - 1] += sigma;
431 /* L30: */
432  }
433 /* NDEPTH is the current depth of the representation tree */
434  ndepth = 0;
435 /* PARITY is either 1 or 0 */
436  parity = 1;
437 /* NCLUS is the number of clusters for the next level of the */
438 /* representation tree, we start with NCLUS = 1 for the root */
439  nclus = 1;
440  iwork[iindc1 + 1] = 1;
441  iwork[iindc1 + 2] = im;
442 /* IDONE is the number of eigenvectors already computed in the current */
443 /* block */
444  idone = 0;
445 /* loop while( IDONE.LT.IM ) */
446 /* generate the representation tree for the current block and */
447 /* compute the eigenvectors */
448 L40:
449  if (idone < im) {
450 /* This is a crude protection against infinitely deep trees */
451  if (ndepth > *m) {
452  *info = -2;
453  return 0;
454  }
455 /* breadth first processing of the current level of the representation */
456 /* tree: OLDNCL = number of clusters on current level */
457  oldncl = nclus;
458 /* reset NCLUS to count the number of child clusters */
459  nclus = 0;
460 
461  parity = 1 - parity;
462  if (parity == 0) {
463  oldcls = iindc1;
464  newcls = iindc2;
465  } else {
466  oldcls = iindc2;
467  newcls = iindc1;
468  }
469 /* Process the clusters on the current level */
470  i__2 = oldncl;
471  for (i__ = 1; i__ <= i__2; ++i__) {
472  j = oldcls + (i__ << 1);
473 /* OLDFST, OLDLST = first, last index of current cluster. */
474 /* cluster indices start with 1 and are relative */
475 /* to WBEGIN when accessing W, WGAP, WERR, Z */
476  oldfst = iwork[j - 1];
477  oldlst = iwork[j];
478  if (ndepth > 0) {
479 /* Retrieve relatively robust representation (RRR) of cluster */
480 /* that has been computed at the previous level */
481 /* The RRR is stored in Z and overwritten once the eigenvectors */
482 /* have been computed or when the cluster is refined */
483  if (*dol == 1 && *dou == *m) {
484 /* Get representation from location of the leftmost evalue */
485 /* of the cluster */
486  j = wbegin + oldfst - 1;
487  } else {
488  if (wbegin + oldfst - 1 < *dol) {
489 /* Get representation from the left end of Z array */
490  j = *dol - 1;
491  } else if (wbegin + oldfst - 1 > *dou) {
492 /* Get representation from the right end of Z array */
493  j = *dou;
494  } else {
495  j = wbegin + oldfst - 1;
496  }
497  }
498  template_blas_copy(&in, &z__[ibegin + j * z_dim1], &c__1, &d__[ibegin]
499 , &c__1);
500  i__3 = in - 1;
501  template_blas_copy(&i__3, &z__[ibegin + (j + 1) * z_dim1], &c__1, &l[
502  ibegin], &c__1);
503  sigma = z__[iend + (j + 1) * z_dim1];
504 /* Set the corresponding entries in Z to zero */
505  template_lapack_laset("Full", &in, &c__2, &c_b5, &c_b5, &z__[ibegin + j
506  * z_dim1], ldz);
507  }
508 /* Compute DL and DLL of current RRR */
509  i__3 = iend - 1;
510  for (j = ibegin; j <= i__3; ++j) {
511  tmp = d__[j] * l[j];
512  work[indld - 1 + j] = tmp;
513  work[indlld - 1 + j] = tmp * l[j];
514 /* L50: */
515  }
516  if (ndepth > 0) {
517 /* P and Q are index of the first and last eigenvalue to compute */
518 /* within the current block */
519  p = indexw[wbegin - 1 + oldfst];
520  q = indexw[wbegin - 1 + oldlst];
521 /* Offset for the arrays WORK, WGAP and WERR, i.e., th P-OFFSET */
522 /* thru' Q-OFFSET elements of these arrays are to be used. */
523 /* OFFSET = P-OLDFST */
524  offset = indexw[wbegin] - 1;
525 /* perform limited bisection (if necessary) to get approximate */
526 /* eigenvalues to the precision needed. */
527  template_lapack_larrb(&in, &d__[ibegin], &work[indlld + ibegin - 1], &p,
528  &q, rtol1, rtol2, &offset, &work[wbegin], &wgap[
529  wbegin], &werr[wbegin], &work[indwrk], &iwork[
530  iindwk], pivmin, &spdiam, &in, &iinfo);
531  if (iinfo != 0) {
532  *info = -1;
533  return 0;
534  }
535 /* We also recompute the extremal gaps. W holds all eigenvalues */
536 /* of the unshifted matrix and must be used for computation */
537 /* of WGAP, the entries of WORK might stem from RRRs with */
538 /* different shifts. The gaps from WBEGIN-1+OLDFST to */
539 /* WBEGIN-1+OLDLST are correctly computed in DLARRB. */
540 /* However, we only allow the gaps to become greater since */
541 /* this is what should happen when we decrease WERR */
542  if (oldfst > 1) {
543 /* Computing MAX */
544  d__1 = wgap[wbegin + oldfst - 2], d__2 = w[wbegin +
545  oldfst - 1] - werr[wbegin + oldfst - 1] - w[
546  wbegin + oldfst - 2] - werr[wbegin + oldfst -
547  2];
548  wgap[wbegin + oldfst - 2] = maxMACRO(d__1,d__2);
549  }
550  if (wbegin + oldlst - 1 < wend) {
551 /* Computing MAX */
552  d__1 = wgap[wbegin + oldlst - 1], d__2 = w[wbegin +
553  oldlst] - werr[wbegin + oldlst] - w[wbegin +
554  oldlst - 1] - werr[wbegin + oldlst - 1];
555  wgap[wbegin + oldlst - 1] = maxMACRO(d__1,d__2);
556  }
557 /* Each time the eigenvalues in WORK get refined, we store */
558 /* the newly found approximation with all shifts applied in W */
559  i__3 = oldlst;
560  for (j = oldfst; j <= i__3; ++j) {
561  w[wbegin + j - 1] = work[wbegin + j - 1] + sigma;
562 /* L53: */
563  }
564  }
565 /* Process the current node. */
566  newfst = oldfst;
567  i__3 = oldlst;
568  for (j = oldfst; j <= i__3; ++j) {
569  if (j == oldlst) {
570 /* we are at the right end of the cluster, this is also the */
571 /* boundary of the child cluster */
572  newlst = j;
573  } else if (wgap[wbegin + j - 1] >= *minrgp * (d__1 = work[
574  wbegin + j - 1], absMACRO(d__1))) {
575 /* the right relative gap is big enough, the child cluster */
576 /* (NEWFST,..,NEWLST) is well separated from the following */
577  newlst = j;
578  } else {
579 /* inside a child cluster, the relative gap is not */
580 /* big enough. */
581  goto L140;
582  }
583 /* Compute size of child cluster found */
584  newsiz = newlst - newfst + 1;
585 /* NEWFTT is the place in Z where the new RRR or the computed */
586 /* eigenvector is to be stored */
587  if (*dol == 1 && *dou == *m) {
588 /* Store representation at location of the leftmost evalue */
589 /* of the cluster */
590  newftt = wbegin + newfst - 1;
591  } else {
592  if (wbegin + newfst - 1 < *dol) {
593 /* Store representation at the left end of Z array */
594  newftt = *dol - 1;
595  } else if (wbegin + newfst - 1 > *dou) {
596 /* Store representation at the right end of Z array */
597  newftt = *dou;
598  } else {
599  newftt = wbegin + newfst - 1;
600  }
601  }
602  if (newsiz > 1) {
603 
604 /* Current child is not a singleton but a cluster. */
605 /* Compute and store new representation of child. */
606 
607 
608 /* Compute left and right cluster gap. */
609 
610 /* LGAP and RGAP are not computed from WORK because */
611 /* the eigenvalue approximations may stem from RRRs */
612 /* different shifts. However, W hold all eigenvalues */
613 /* of the unshifted matrix. Still, the entries in WGAP */
614 /* have to be computed from WORK since the entries */
615 /* in W might be of the same order so that gaps are not */
616 /* exhibited correctly for very close eigenvalues. */
617  if (newfst == 1) {
618 /* Computing MAX */
619  d__1 = 0., d__2 = w[wbegin] - werr[wbegin] - *vl;
620  lgap = maxMACRO(d__1,d__2);
621  } else {
622  lgap = wgap[wbegin + newfst - 2];
623  }
624  rgap = wgap[wbegin + newlst - 1];
625 
626 /* Compute left- and rightmost eigenvalue of child */
627 /* to high precision in order to shift as close */
628 /* as possible and obtain as large relative gaps */
629 /* as possible */
630 
631  for (k = 1; k <= 2; ++k) {
632  if (k == 1) {
633  p = indexw[wbegin - 1 + newfst];
634  } else {
635  p = indexw[wbegin - 1 + newlst];
636  }
637  offset = indexw[wbegin] - 1;
638  template_lapack_larrb(&in, &d__[ibegin], &work[indlld + ibegin
639  - 1], &p, &p, &rqtol, &rqtol, &offset, &
640  work[wbegin], &wgap[wbegin], &werr[wbegin]
641 , &work[indwrk], &iwork[iindwk], pivmin, &
642  spdiam, &in, &iinfo);
643 /* L55: */
644  }
645 
646  if (wbegin + newlst - 1 < *dol || wbegin + newfst - 1
647  > *dou) {
648 /* if the cluster contains no desired eigenvalues */
649 /* skip the computation of that branch of the rep. tree */
650 
651 /* We could skip before the refinement of the extremal */
652 /* eigenvalues of the child, but then the representation */
653 /* tree could be different from the one when nothing is */
654 /* skipped. For this reason we skip at this place. */
655  idone = idone + newlst - newfst + 1;
656  goto L139;
657  }
658 
659 /* Compute RRR of child cluster. */
660 /* Note that the new RRR is stored in Z */
661 
662 /* DLARRF needs LWORK = 2*N */
663  template_lapack_larrf(&in, &d__[ibegin], &l[ibegin], &work[indld +
664  ibegin - 1], &newfst, &newlst, &work[wbegin],
665  &wgap[wbegin], &werr[wbegin], &spdiam, &lgap,
666  &rgap, pivmin, &tau, &z__[ibegin + newftt *
667  z_dim1], &z__[ibegin + (newftt + 1) * z_dim1],
668  &work[indwrk], &iinfo);
669  if (iinfo == 0) {
670 /* a new RRR for the cluster was found by DLARRF */
671 /* update shift and store it */
672  ssigma = sigma + tau;
673  z__[iend + (newftt + 1) * z_dim1] = ssigma;
674 /* WORK() are the midpoints and WERR() the semi-width */
675 /* Note that the entries in W are unchanged. */
676  i__4 = newlst;
677  for (k = newfst; k <= i__4; ++k) {
678  fudge = eps * 3. * (d__1 = work[wbegin + k -
679  1], absMACRO(d__1));
680  work[wbegin + k - 1] -= tau;
681  fudge += eps * 4. * (d__1 = work[wbegin + k -
682  1], absMACRO(d__1));
683 /* Fudge errors */
684  werr[wbegin + k - 1] += fudge;
685 /* Gaps are not fudged. Provided that WERR is small */
686 /* when eigenvalues are close, a zero gap indicates */
687 /* that a new representation is needed for resolving */
688 /* the cluster. A fudge could lead to a wrong decision */
689 /* of judging eigenvalues 'separated' which in */
690 /* reality are not. This could have a negative impact */
691 /* on the orthogonality of the computed eigenvectors. */
692 /* L116: */
693  }
694  ++nclus;
695  k = newcls + (nclus << 1);
696  iwork[k - 1] = newfst;
697  iwork[k] = newlst;
698  } else {
699  *info = -2;
700  return 0;
701  }
702  } else {
703 
704 /* Compute eigenvector of singleton */
705 
706  iter = 0;
707 
708  tol = template_blas_log((Treal) in) * 4. * eps;
709 
710  k = newfst;
711  windex = wbegin + k - 1;
712 /* Computing MAX */
713  i__4 = windex - 1;
714  windmn = maxMACRO(i__4,1);
715 /* Computing MIN */
716  i__4 = windex + 1;
717  windpl = minMACRO(i__4,*m);
718  lambda = work[windex];
719  ++done;
720 /* Check if eigenvector computation is to be skipped */
721  if (windex < *dol || windex > *dou) {
722  eskip = TRUE_;
723  goto L125;
724  } else {
725  eskip = FALSE_;
726  }
727  left = work[windex] - werr[windex];
728  right = work[windex] + werr[windex];
729  indeig = indexw[windex];
730 /* Note that since we compute the eigenpairs for a child, */
731 /* all eigenvalue approximations are w.r.t the same shift. */
732 /* In this case, the entries in WORK should be used for */
733 /* computing the gaps since they exhibit even very small */
734 /* differences in the eigenvalues, as opposed to the */
735 /* entries in W which might "look" the same. */
736  if (k == 1) {
737 /* In the case RANGE='I' and with not much initial */
738 /* accuracy in LAMBDA and VL, the formula */
739 /* LGAP = MAX( ZERO, (SIGMA - VL) + LAMBDA ) */
740 /* can lead to an overestimation of the left gap and */
741 /* thus to inadequately early RQI 'convergence'. */
742 /* Prevent this by forcing a small left gap. */
743 /* Computing MAX */
744  d__1 = absMACRO(left), d__2 = absMACRO(right);
745  lgap = eps * maxMACRO(d__1,d__2);
746  } else {
747  lgap = wgap[windmn];
748  }
749  if (k == im) {
750 /* In the case RANGE='I' and with not much initial */
751 /* accuracy in LAMBDA and VU, the formula */
752 /* can lead to an overestimation of the right gap and */
753 /* thus to inadequately early RQI 'convergence'. */
754 /* Prevent this by forcing a small right gap. */
755 /* Computing MAX */
756  d__1 = absMACRO(left), d__2 = absMACRO(right);
757  rgap = eps * maxMACRO(d__1,d__2);
758  } else {
759  rgap = wgap[windex];
760  }
761  gap = minMACRO(lgap,rgap);
762  if (k == 1 || k == im) {
763 /* The eigenvector support can become wrong */
764 /* because significant entries could be cut off due to a */
765 /* large GAPTOL parameter in LAR1V. Prevent this. */
766  gaptol = 0.;
767  } else {
768  gaptol = gap * eps;
769  }
770  isupmn = in;
771  isupmx = 1;
772 /* Update WGAP so that it holds the minimum gap */
773 /* to the left or the right. This is crucial in the */
774 /* case where bisection is used to ensure that the */
775 /* eigenvalue is refined up to the required precision. */
776 /* The correct value is restored afterwards. */
777  savgap = wgap[windex];
778  wgap[windex] = gap;
779 /* We want to use the Rayleigh Quotient Correction */
780 /* as often as possible since it converges quadratically */
781 /* when we are close enough to the desired eigenvalue. */
782 /* However, the Rayleigh Quotient can have the wrong sign */
783 /* and lead us away from the desired eigenvalue. In this */
784 /* case, the best we can do is to use bisection. */
785  usedbs = FALSE_;
786  usedrq = FALSE_;
787 /* Bisection is initially turned off unless it is forced */
788  needbs = ! tryrqc;
789 L120:
790 /* Check if bisection should be used to refine eigenvalue */
791  if (needbs) {
792 /* Take the bisection as new iterate */
793  usedbs = TRUE_;
794  itmp1 = iwork[iindr + windex];
795  offset = indexw[wbegin] - 1;
796  d__1 = eps * 2.;
797  template_lapack_larrb(&in, &d__[ibegin], &work[indlld + ibegin
798  - 1], &indeig, &indeig, &c_b5, &d__1, &
799  offset, &work[wbegin], &wgap[wbegin], &
800  werr[wbegin], &work[indwrk], &iwork[
801  iindwk], pivmin, &spdiam, &itmp1, &iinfo);
802  if (iinfo != 0) {
803  *info = -3;
804  return 0;
805  }
806  lambda = work[windex];
807 /* Reset twist index from inaccurate LAMBDA to */
808 /* force computation of true MINGMA */
809  iwork[iindr + windex] = 0;
810  }
811 /* Given LAMBDA, compute the eigenvector. */
812  L__1 = ! usedbs;
813  template_lapack_lar1v(&in, &c__1, &in, &lambda, &d__[ibegin], &l[
814  ibegin], &work[indld + ibegin - 1], &work[
815  indlld + ibegin - 1], pivmin, &gaptol, &z__[
816  ibegin + windex * z_dim1], &L__1, &negcnt, &
817  ztz, &mingma, &iwork[iindr + windex], &isuppz[
818  (windex << 1) - 1], &nrminv, &resid, &rqcorr,
819  &work[indwrk]);
820  if (iter == 0) {
821  bstres = resid;
822  bstw = lambda;
823  } else if (resid < bstres) {
824  bstres = resid;
825  bstw = lambda;
826  }
827 /* Computing MIN */
828  i__4 = isupmn, i__5 = isuppz[(windex << 1) - 1];
829  isupmn = minMACRO(i__4,i__5);
830 /* Computing MAX */
831  i__4 = isupmx, i__5 = isuppz[windex * 2];
832  isupmx = maxMACRO(i__4,i__5);
833  ++iter;
834 /* sin alpha <= |resid|/gap */
835 /* Note that both the residual and the gap are */
836 /* proportional to the matrix, so ||T|| doesn't play */
837 /* a role in the quotient */
838 
839 /* Convergence test for Rayleigh-Quotient iteration */
840 /* (omitted when Bisection has been used) */
841 
842  if (resid > tol * gap && absMACRO(rqcorr) > rqtol * absMACRO(
843  lambda) && ! usedbs) {
844 /* We need to check that the RQCORR update doesn't */
845 /* move the eigenvalue away from the desired one and */
846 /* towards a neighbor. -> protection with bisection */
847  if (indeig <= negcnt) {
848 /* The wanted eigenvalue lies to the left */
849  sgndef = -1.;
850  } else {
851 /* The wanted eigenvalue lies to the right */
852  sgndef = 1.;
853  }
854 /* We only use the RQCORR if it improves the */
855 /* the iterate reasonably. */
856  if (rqcorr * sgndef >= 0. && lambda + rqcorr <=
857  right && lambda + rqcorr >= left) {
858  usedrq = TRUE_;
859 /* Store new midpoint of bisection interval in WORK */
860  if (sgndef == 1.) {
861 /* The current LAMBDA is on the left of the true */
862 /* eigenvalue */
863  left = lambda;
864 /* We prefer to assume that the error estimate */
865 /* is correct. We could make the interval not */
866 /* as a bracket but to be modified if the RQCORR */
867 /* chooses to. In this case, the RIGHT side should */
868 /* be modified as follows: */
869 /* RIGHT = MAX(RIGHT, LAMBDA + RQCORR) */
870  } else {
871 /* The current LAMBDA is on the right of the true */
872 /* eigenvalue */
873  right = lambda;
874 /* See comment about assuming the error estimate is */
875 /* correct above. */
876 /* LEFT = MIN(LEFT, LAMBDA + RQCORR) */
877  }
878  work[windex] = (right + left) * .5;
879 /* Take RQCORR since it has the correct sign and */
880 /* improves the iterate reasonably */
881  lambda += rqcorr;
882 /* Update width of error interval */
883  werr[windex] = (right - left) * .5;
884  } else {
885  needbs = TRUE_;
886  }
887  if (right - left < rqtol * absMACRO(lambda)) {
888 /* The eigenvalue is computed to bisection accuracy */
889 /* compute eigenvector and stop */
890  usedbs = TRUE_;
891  goto L120;
892  } else if (iter < 10) {
893  goto L120;
894  } else if (iter == 10) {
895  needbs = TRUE_;
896  goto L120;
897  } else {
898  *info = 5;
899  return 0;
900  }
901  } else {
902  stp2ii = FALSE_;
903  if (usedrq && usedbs && bstres <= resid) {
904  lambda = bstw;
905  stp2ii = TRUE_;
906  }
907  if (stp2ii) {
908 /* improve error angle by second step */
909  L__1 = ! usedbs;
910  template_lapack_lar1v(&in, &c__1, &in, &lambda, &d__[ibegin]
911 , &l[ibegin], &work[indld + ibegin -
912  1], &work[indlld + ibegin - 1],
913  pivmin, &gaptol, &z__[ibegin + windex
914  * z_dim1], &L__1, &negcnt, &ztz, &
915  mingma, &iwork[iindr + windex], &
916  isuppz[(windex << 1) - 1], &nrminv, &
917  resid, &rqcorr, &work[indwrk]);
918  }
919  work[windex] = lambda;
920  }
921 
922 /* Compute FP-vector support w.r.t. whole matrix */
923 
924  isuppz[(windex << 1) - 1] += oldien;
925  isuppz[windex * 2] += oldien;
926  zfrom = isuppz[(windex << 1) - 1];
927  zto = isuppz[windex * 2];
928  isupmn += oldien;
929  isupmx += oldien;
930 /* Ensure vector is ok if support in the RQI has changed */
931  if (isupmn < zfrom) {
932  i__4 = zfrom - 1;
933  for (ii = isupmn; ii <= i__4; ++ii) {
934  z__[ii + windex * z_dim1] = 0.;
935 /* L122: */
936  }
937  }
938  if (isupmx > zto) {
939  i__4 = isupmx;
940  for (ii = zto + 1; ii <= i__4; ++ii) {
941  z__[ii + windex * z_dim1] = 0.;
942 /* L123: */
943  }
944  }
945  i__4 = zto - zfrom + 1;
946  template_blas_scal(&i__4, &nrminv, &z__[zfrom + windex * z_dim1],
947  &c__1);
948 L125:
949 /* Update W */
950  w[windex] = lambda + sigma;
951 /* Recompute the gaps on the left and right */
952 /* But only allow them to become larger and not */
953 /* smaller (which can only happen through "bad" */
954 /* cancellation and doesn't reflect the theory */
955 /* where the initial gaps are underestimated due */
956 /* to WERR being too crude.) */
957  if (! eskip) {
958  if (k > 1) {
959 /* Computing MAX */
960  d__1 = wgap[windmn], d__2 = w[windex] - werr[
961  windex] - w[windmn] - werr[windmn];
962  wgap[windmn] = maxMACRO(d__1,d__2);
963  }
964  if (windex < wend) {
965 /* Computing MAX */
966  d__1 = savgap, d__2 = w[windpl] - werr[windpl]
967  - w[windex] - werr[windex];
968  wgap[windex] = maxMACRO(d__1,d__2);
969  }
970  }
971  ++idone;
972  }
973 /* here ends the code for the current child */
974 
975 L139:
976 /* Proceed to any remaining child nodes */
977  newfst = j + 1;
978 L140:
979  ;
980  }
981 /* L150: */
982  }
983  ++ndepth;
984  goto L40;
985  }
986  ibegin = iend + 1;
987  wbegin = wend + 1;
988 L170:
989  ;
990  }
991 
992  return 0;
993 
994 /* End of DLARRV */
995 
996 } /* dlarrv_ */
997 
998 #endif
gu
static const real gu
Definition: fun-pz81.c:68
template_lapack_lamch
Treal template_lapack_lamch(const char *cmach, Treal dummyReal)
Definition: template_lapack_lamch.h:202
template_blas_scal
int template_blas_scal(const integer *n, const Treal *da, Treal *dx, const integer *incx)
Definition: template_blas_scal.h:43
minMACRO
#define minMACRO(a, b)
Definition: template_blas_common.h:46
template_lapack_larrf
int template_lapack_larrf(integer *n, Treal *d__, Treal *l, Treal *ld, integer *clstrt, integer *clend, Treal *w, Treal *wgap, Treal *werr, Treal *spdiam, Treal *clgapl, Treal *clgapr, Treal *pivmin, Treal *sigma, Treal *dplus, Treal *lplus, Treal *work, integer *info)
Definition: template_lapack_larrf.h:42
absMACRO
#define absMACRO(x)
Definition: template_blas_common.h:47
template_lapack_laset
int template_lapack_laset(const char *uplo, const integer *m, const integer *n, const Treal *alpha, const Treal *beta, Treal *a, const integer *lda)
Definition: template_lapack_laset.h:42
mat::left
@ left
Definition: Matrix.h:75
template_lapack_lar1v
int template_lapack_lar1v(integer *n, integer *b1, integer *bn, Treal *lambda, Treal *d__, Treal *l, Treal *ld, Treal *lld, Treal *pivmin, Treal *gaptol, Treal *z__, logical *wantnc, integer *negcnt, Treal *ztz, Treal *mingma, integer *r__, integer *isuppz, Treal *nrminv, Treal *resid, Treal *rqcorr, Treal *work)
Definition: template_lapack_lar1v.h:41
logical
bool logical
Definition: template_blas_common.h:41
template_lapack_lar1v.h
template_lapack_larrb
int template_lapack_larrb(integer *n, Treal *d__, Treal *lld, integer *ifirst, integer *ilast, Treal *rtol1, Treal *rtol2, integer *offset, Treal *w, Treal *wgap, Treal *werr, Treal *work, integer *iwork, Treal *pivmin, Treal *spdiam, integer *twist, integer *info)
Definition: template_lapack_larrb.h:45
mat::right
@ right
Definition: Matrix.h:75
template_lapack_larrv
int template_lapack_larrv(const integer *n, Treal *vl, Treal *vu, Treal *d__, Treal *l, Treal *pivmin, integer *isplit, integer *m, integer *dol, integer *dou, Treal *minrgp, Treal *rtol1, Treal *rtol2, Treal *w, Treal *werr, Treal *wgap, integer *iblock, integer *indexw, Treal *gers, Treal *z__, const integer *ldz, integer *isuppz, Treal *work, integer *iwork, integer *info)
Definition: template_lapack_larrv.h:45
template_blas_log
Treal template_blas_log(Treal x)
template_blas_copy
int template_blas_copy(const integer *n, const Treal *dx, const integer *incx, Treal *dy, const integer *incy)
Definition: template_blas_copy.h:42
TRUE_
#define TRUE_
Definition: template_lapack_common.h:42
integer
int integer
Definition: template_blas_common.h:40
FALSE_
#define FALSE_
Definition: template_lapack_common.h:43
maxMACRO
#define maxMACRO(a, b)
Definition: template_blas_common.h:45