ergo
template_lapack_larrv.h
Go to the documentation of this file.
1 /* Ergo, version 3.3, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2013 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Primary academic reference:
19  * Kohn−Sham Density Functional Theory Electronic Structure Calculations
20  * with Linearly Scaling Computational Time and Memory Usage,
21  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
22  * J. Chem. Theory Comput. 7, 340 (2011),
23  * <http://dx.doi.org/10.1021/ct100611z>
24  *
25  * For further information about Ergo, see <http://www.ergoscf.org>.
26  */
27 
28  /* This file belongs to the template_lapack part of the Ergo source
29  * code. The source files in the template_lapack directory are modified
30  * versions of files originally distributed as CLAPACK, see the
31  * Copyright/license notice in the file template_lapack/COPYING.
32  */
33 
34 
35 #ifndef TEMPLATE_LAPACK_LARRV_HEADER
36 #define TEMPLATE_LAPACK_LARRV_HEADER
37 
38 
39 #include "template_lapack_lar1v.h"
40 
41 
42 template<class Treal>
43 int template_lapack_larrv(const integer *n, Treal *vl, Treal *vu,
44  Treal *d__, Treal *l, Treal *pivmin, integer *isplit,
45  integer *m, integer *dol, integer *dou, Treal *minrgp,
46  Treal *rtol1, Treal *rtol2, Treal *w, Treal *werr,
47  Treal *wgap, integer *iblock, integer *indexw, Treal *gers,
48  Treal *z__, const integer *ldz, integer *isuppz, Treal *work,
49  integer *iwork, integer *info)
50 {
51  /* System generated locals */
52  integer z_dim1, z_offset, i__1, i__2, i__3, i__4, i__5;
53  Treal d__1, d__2;
54  logical L__1;
55 
56 
57  /* Local variables */
58  integer minwsize, i__, j, k, p, q, miniwsize, ii;
59  Treal gl;
60  integer im, in;
61  Treal gu, gap, eps, tau, tol, tmp;
62  integer zto;
63  Treal ztz;
64  integer iend, jblk;
65  Treal lgap;
66  integer done;
67  Treal rgap, left;
68  integer wend, iter;
69  Treal bstw = 0; // EMANUEL COMMENT: initialize to get rid of compiler warning
70  integer itmp1;
71  integer indld;
72  Treal fudge;
73  integer idone;
74  Treal sigma;
75  integer iinfo, iindr;
76  Treal resid;
77  logical eskip;
78  Treal right;
79  integer nclus, zfrom;
80  Treal rqtol;
81  integer iindc1, iindc2;
82  logical stp2ii;
83  Treal lambda;
84  integer ibegin, indeig;
85  logical needbs;
86  integer indlld;
87  Treal sgndef, mingma;
88  integer oldien, oldncl, wbegin;
89  Treal spdiam;
90  integer negcnt;
91  integer oldcls;
92  Treal savgap;
93  integer ndepth;
94  Treal ssigma;
95  logical usedbs;
96  integer iindwk, offset;
97  Treal gaptol;
98  integer newcls, oldfst, indwrk, windex, oldlst;
99  logical usedrq;
100  integer newfst, newftt, parity, windmn, windpl, isupmn, newlst, zusedl;
101  Treal bstres = 0; // EMANUEL COMMENT: initialize to get rid of compiler warning
102  integer newsiz, zusedu, zusedw;
103  Treal nrminv, rqcorr;
104  logical tryrqc;
105  integer isupmx;
106 
107 
108 /* -- LAPACK auxiliary routine (version 3.2) -- */
109 /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
110 /* November 2006 */
111 
112 /* .. Scalar Arguments .. */
113 /* .. */
114 /* .. Array Arguments .. */
115 /* .. */
116 
117 /* Purpose */
118 /* ======= */
119 
120 /* DLARRV computes the eigenvectors of the tridiagonal matrix */
121 /* T = L D L^T given L, D and APPROXIMATIONS to the eigenvalues of L D L^T. */
122 /* The input eigenvalues should have been computed by DLARRE. */
123 
124 /* Arguments */
125 /* ========= */
126 
127 /* N (input) INTEGER */
128 /* The order of the matrix. N >= 0. */
129 
130 /* VL (input) DOUBLE PRECISION */
131 /* VU (input) DOUBLE PRECISION */
132 /* Lower and upper bounds of the interval that contains the desired */
133 /* eigenvalues. VL < VU. Needed to compute gaps on the left or right */
134 /* end of the extremal eigenvalues in the desired RANGE. */
135 
136 /* D (input/output) DOUBLE PRECISION array, dimension (N) */
137 /* On entry, the N diagonal elements of the diagonal matrix D. */
138 /* On exit, D may be overwritten. */
139 
140 /* L (input/output) DOUBLE PRECISION array, dimension (N) */
141 /* On entry, the (N-1) subdiagonal elements of the unit */
142 /* bidiagonal matrix L are in elements 1 to N-1 of L */
143 /* (if the matrix is not splitted.) At the end of each block */
144 /* is stored the corresponding shift as given by DLARRE. */
145 /* On exit, L is overwritten. */
146 
147 /* PIVMIN (in) DOUBLE PRECISION */
148 /* The minimum pivot allowed in the Sturm sequence. */
149 
150 /* ISPLIT (input) INTEGER array, dimension (N) */
151 /* The splitting points, at which T breaks up into blocks. */
152 /* The first block consists of rows/columns 1 to */
153 /* ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1 */
154 /* through ISPLIT( 2 ), etc. */
155 
156 /* M (input) INTEGER */
157 /* The total number of input eigenvalues. 0 <= M <= N. */
158 
159 /* DOL (input) INTEGER */
160 /* DOU (input) INTEGER */
161 /* If the user wants to compute only selected eigenvectors from all */
162 /* the eigenvalues supplied, he can specify an index range DOL:DOU. */
163 /* Or else the setting DOL=1, DOU=M should be applied. */
164 /* Note that DOL and DOU refer to the order in which the eigenvalues */
165 /* are stored in W. */
166 /* If the user wants to compute only selected eigenpairs, then */
167 /* the columns DOL-1 to DOU+1 of the eigenvector space Z contain the */
168 /* computed eigenvectors. All other columns of Z are set to zero. */
169 
170 /* MINRGP (input) DOUBLE PRECISION */
171 
172 /* RTOL1 (input) DOUBLE PRECISION */
173 /* RTOL2 (input) DOUBLE PRECISION */
174 /* Parameters for bisection. */
175 /* An interval [LEFT,RIGHT] has converged if */
176 /* RIGHT-LEFT.LT.MAX( RTOL1*GAP, RTOL2*MAX(|LEFT|,|RIGHT|) ) */
177 
178 /* W (input/output) DOUBLE PRECISION array, dimension (N) */
179 /* The first M elements of W contain the APPROXIMATE eigenvalues for */
180 /* which eigenvectors are to be computed. The eigenvalues */
181 /* should be grouped by split-off block and ordered from */
182 /* smallest to largest within the block ( The output array */
183 /* W from DLARRE is expected here ). Furthermore, they are with */
184 /* respect to the shift of the corresponding root representation */
185 /* for their block. On exit, W holds the eigenvalues of the */
186 /* UNshifted matrix. */
187 
188 /* WERR (input/output) DOUBLE PRECISION array, dimension (N) */
189 /* The first M elements contain the semiwidth of the uncertainty */
190 /* interval of the corresponding eigenvalue in W */
191 
192 /* WGAP (input/output) DOUBLE PRECISION array, dimension (N) */
193 /* The separation from the right neighbor eigenvalue in W. */
194 
195 /* IBLOCK (input) INTEGER array, dimension (N) */
196 /* The indices of the blocks (submatrices) associated with the */
197 /* corresponding eigenvalues in W; IBLOCK(i)=1 if eigenvalue */
198 /* W(i) belongs to the first block from the top, =2 if W(i) */
199 /* belongs to the second block, etc. */
200 
201 /* INDEXW (input) INTEGER array, dimension (N) */
202 /* The indices of the eigenvalues within each block (submatrix); */
203 /* for example, INDEXW(i)= 10 and IBLOCK(i)=2 imply that the */
204 /* i-th eigenvalue W(i) is the 10-th eigenvalue in the second block. */
205 
206 /* GERS (input) DOUBLE PRECISION array, dimension (2*N) */
207 /* The N Gerschgorin intervals (the i-th Gerschgorin interval */
208 /* is (GERS(2*i-1), GERS(2*i)). The Gerschgorin intervals should */
209 /* be computed from the original UNshifted matrix. */
210 
211 /* Z (output) DOUBLE PRECISION array, dimension (LDZ, max(1,M) ) */
212 /* If INFO = 0, the first M columns of Z contain the */
213 /* orthonormal eigenvectors of the matrix T */
214 /* corresponding to the input eigenvalues, with the i-th */
215 /* column of Z holding the eigenvector associated with W(i). */
216 /* Note: the user must ensure that at least max(1,M) columns are */
217 /* supplied in the array Z. */
218 
219 /* LDZ (input) INTEGER */
220 /* The leading dimension of the array Z. LDZ >= 1, and if */
221 /* JOBZ = 'V', LDZ >= max(1,N). */
222 
223 /* ISUPPZ (output) INTEGER array, dimension ( 2*max(1,M) ) */
224 /* The support of the eigenvectors in Z, i.e., the indices */
225 /* indicating the nonzero elements in Z. The I-th eigenvector */
226 /* is nonzero only in elements ISUPPZ( 2*I-1 ) through */
227 /* ISUPPZ( 2*I ). */
228 
229 /* WORK (workspace) DOUBLE PRECISION array, dimension (12*N) */
230 
231 /* IWORK (workspace) INTEGER array, dimension (7*N) */
232 
233 /* INFO (output) INTEGER */
234 /* = 0: successful exit */
235 
236 /* > 0: A problem occured in DLARRV. */
237 /* < 0: One of the called subroutines signaled an internal problem. */
238 /* Needs inspection of the corresponding parameter IINFO */
239 /* for further information. */
240 
241 /* =-1: Problem in DLARRB when refining a child's eigenvalues. */
242 /* =-2: Problem in DLARRF when computing the RRR of a child. */
243 /* When a child is inside a tight cluster, it can be difficult */
244 /* to find an RRR. A partial remedy from the user's point of */
245 /* view is to make the parameter MINRGP smaller and recompile. */
246 /* However, as the orthogonality of the computed vectors is */
247 /* proportional to 1/MINRGP, the user should be aware that */
248 /* he might be trading in precision when he decreases MINRGP. */
249 /* =-3: Problem in DLARRB when refining a single eigenvalue */
250 /* after the Rayleigh correction was rejected. */
251 /* = 5: The Rayleigh Quotient Iteration failed to converge to */
252 /* full accuracy in MAXITR steps. */
253 
254 /* Further Details */
255 /* =============== */
256 
257 /* Based on contributions by */
258 /* Beresford Parlett, University of California, Berkeley, USA */
259 /* Jim Demmel, University of California, Berkeley, USA */
260 /* Inderjit Dhillon, University of Texas, Austin, USA */
261 /* Osni Marques, LBNL/NERSC, USA */
262 /* Christof Voemel, University of California, Berkeley, USA */
263 
264 /* ===================================================================== */
265 
266 /* .. Parameters .. */
267 /* .. */
268 /* .. Local Scalars .. */
269 /* .. */
270 /* .. External Functions .. */
271 /* .. */
272 /* .. External Subroutines .. */
273 /* .. */
274 /* .. Intrinsic Functions .. */
275 /* .. */
276 /* .. Executable Statements .. */
277 /* .. */
278 /* The first N entries of WORK are reserved for the eigenvalues */
279 
280 /* Table of constant values */
281 
282  Treal c_b5 = 0.;
283  integer c__1 = 1;
284  integer c__2 = 2;
285 
286 
287  /* Parameter adjustments */
288  --d__;
289  --l;
290  --isplit;
291  --w;
292  --werr;
293  --wgap;
294  --iblock;
295  --indexw;
296  --gers;
297  z_dim1 = *ldz;
298  z_offset = 1 + z_dim1;
299  z__ -= z_offset;
300  --isuppz;
301  --work;
302  --iwork;
303 
304  /* Function Body */
305  indld = *n + 1;
306  indlld = (*n << 1) + 1;
307  indwrk = *n * 3 + 1;
308  minwsize = *n * 12;
309  i__1 = minwsize;
310  for (i__ = 1; i__ <= i__1; ++i__) {
311  work[i__] = 0.;
312 /* L5: */
313  }
314 /* IWORK(IINDR+1:IINDR+N) hold the twist indices R for the */
315 /* factorization used to compute the FP vector */
316  iindr = 0;
317 /* IWORK(IINDC1+1:IINC2+N) are used to store the clusters of the current */
318 /* layer and the one above. */
319  iindc1 = *n;
320  iindc2 = *n << 1;
321  iindwk = *n * 3 + 1;
322  miniwsize = *n * 7;
323  i__1 = miniwsize;
324  for (i__ = 1; i__ <= i__1; ++i__) {
325  iwork[i__] = 0;
326 /* L10: */
327  }
328  zusedl = 1;
329  if (*dol > 1) {
330 /* Set lower bound for use of Z */
331  zusedl = *dol - 1;
332  }
333  zusedu = *m;
334  if (*dou < *m) {
335 /* Set lower bound for use of Z */
336  zusedu = *dou + 1;
337  }
338 /* The width of the part of Z that is used */
339  zusedw = zusedu - zusedl + 1;
340  template_lapack_laset("Full", n, &zusedw, &c_b5, &c_b5, &z__[zusedl * z_dim1 + 1], ldz);
341  eps = template_lapack_lamch("Precision",(Treal)0);
342  rqtol = eps * 2.;
343 
344 /* Set expert flags for standard code. */
345  tryrqc = TRUE_;
346  if (*dol == 1 && *dou == *m) {
347  } else {
348 /* Only selected eigenpairs are computed. Since the other evalues */
349 /* are not refined by RQ iteration, bisection has to compute to full */
350 /* accuracy. */
351  *rtol1 = eps * 4.;
352  *rtol2 = eps * 4.;
353  }
354 /* The entries WBEGIN:WEND in W, WERR, WGAP correspond to the */
355 /* desired eigenvalues. The support of the nonzero eigenvector */
356 /* entries is contained in the interval IBEGIN:IEND. */
357 /* Remark that if k eigenpairs are desired, then the eigenvectors */
358 /* are stored in k contiguous columns of Z. */
359 /* DONE is the number of eigenvectors already computed */
360  done = 0;
361  ibegin = 1;
362  wbegin = 1;
363  i__1 = iblock[*m];
364  for (jblk = 1; jblk <= i__1; ++jblk) {
365  iend = isplit[jblk];
366  sigma = l[iend];
367 /* Find the eigenvectors of the submatrix indexed IBEGIN */
368 /* through IEND. */
369  wend = wbegin - 1;
370 L15:
371  if (wend < *m) {
372  if (iblock[wend + 1] == jblk) {
373  ++wend;
374  goto L15;
375  }
376  }
377  if (wend < wbegin) {
378  ibegin = iend + 1;
379  goto L170;
380  } else if (wend < *dol || wbegin > *dou) {
381  ibegin = iend + 1;
382  wbegin = wend + 1;
383  goto L170;
384  }
385 /* Find local spectral diameter of the block */
386  gl = gers[(ibegin << 1) - 1];
387  gu = gers[ibegin * 2];
388  i__2 = iend;
389  for (i__ = ibegin + 1; i__ <= i__2; ++i__) {
390 /* Computing MIN */
391  d__1 = gers[(i__ << 1) - 1];
392  gl = minMACRO(d__1,gl);
393 /* Computing MAX */
394  d__1 = gers[i__ * 2];
395  gu = maxMACRO(d__1,gu);
396 /* L20: */
397  }
398  spdiam = gu - gl;
399 /* OLDIEN is the last index of the previous block */
400  oldien = ibegin - 1;
401 /* Calculate the size of the current block */
402  in = iend - ibegin + 1;
403 /* The number of eigenvalues in the current block */
404  im = wend - wbegin + 1;
405 /* This is for a 1x1 block */
406  if (ibegin == iend) {
407  ++done;
408  z__[ibegin + wbegin * z_dim1] = 1.;
409  isuppz[(wbegin << 1) - 1] = ibegin;
410  isuppz[wbegin * 2] = ibegin;
411  w[wbegin] += sigma;
412  work[wbegin] = w[wbegin];
413  ibegin = iend + 1;
414  ++wbegin;
415  goto L170;
416  }
417 /* The desired (shifted) eigenvalues are stored in W(WBEGIN:WEND) */
418 /* Note that these can be approximations, in this case, the corresp. */
419 /* entries of WERR give the size of the uncertainty interval. */
420 /* The eigenvalue approximations will be refined when necessary as */
421 /* high relative accuracy is required for the computation of the */
422 /* corresponding eigenvectors. */
423  template_blas_copy(&im, &w[wbegin], &c__1, &work[wbegin], &c__1);
424 /* We store in W the eigenvalue approximations w.r.t. the original */
425 /* matrix T. */
426  i__2 = im;
427  for (i__ = 1; i__ <= i__2; ++i__) {
428  w[wbegin + i__ - 1] += sigma;
429 /* L30: */
430  }
431 /* NDEPTH is the current depth of the representation tree */
432  ndepth = 0;
433 /* PARITY is either 1 or 0 */
434  parity = 1;
435 /* NCLUS is the number of clusters for the next level of the */
436 /* representation tree, we start with NCLUS = 1 for the root */
437  nclus = 1;
438  iwork[iindc1 + 1] = 1;
439  iwork[iindc1 + 2] = im;
440 /* IDONE is the number of eigenvectors already computed in the current */
441 /* block */
442  idone = 0;
443 /* loop while( IDONE.LT.IM ) */
444 /* generate the representation tree for the current block and */
445 /* compute the eigenvectors */
446 L40:
447  if (idone < im) {
448 /* This is a crude protection against infinitely deep trees */
449  if (ndepth > *m) {
450  *info = -2;
451  return 0;
452  }
453 /* breadth first processing of the current level of the representation */
454 /* tree: OLDNCL = number of clusters on current level */
455  oldncl = nclus;
456 /* reset NCLUS to count the number of child clusters */
457  nclus = 0;
458 
459  parity = 1 - parity;
460  if (parity == 0) {
461  oldcls = iindc1;
462  newcls = iindc2;
463  } else {
464  oldcls = iindc2;
465  newcls = iindc1;
466  }
467 /* Process the clusters on the current level */
468  i__2 = oldncl;
469  for (i__ = 1; i__ <= i__2; ++i__) {
470  j = oldcls + (i__ << 1);
471 /* OLDFST, OLDLST = first, last index of current cluster. */
472 /* cluster indices start with 1 and are relative */
473 /* to WBEGIN when accessing W, WGAP, WERR, Z */
474  oldfst = iwork[j - 1];
475  oldlst = iwork[j];
476  if (ndepth > 0) {
477 /* Retrieve relatively robust representation (RRR) of cluster */
478 /* that has been computed at the previous level */
479 /* The RRR is stored in Z and overwritten once the eigenvectors */
480 /* have been computed or when the cluster is refined */
481  if (*dol == 1 && *dou == *m) {
482 /* Get representation from location of the leftmost evalue */
483 /* of the cluster */
484  j = wbegin + oldfst - 1;
485  } else {
486  if (wbegin + oldfst - 1 < *dol) {
487 /* Get representation from the left end of Z array */
488  j = *dol - 1;
489  } else if (wbegin + oldfst - 1 > *dou) {
490 /* Get representation from the right end of Z array */
491  j = *dou;
492  } else {
493  j = wbegin + oldfst - 1;
494  }
495  }
496  template_blas_copy(&in, &z__[ibegin + j * z_dim1], &c__1, &d__[ibegin]
497 , &c__1);
498  i__3 = in - 1;
499  template_blas_copy(&i__3, &z__[ibegin + (j + 1) * z_dim1], &c__1, &l[
500  ibegin], &c__1);
501  sigma = z__[iend + (j + 1) * z_dim1];
502 /* Set the corresponding entries in Z to zero */
503  template_lapack_laset("Full", &in, &c__2, &c_b5, &c_b5, &z__[ibegin + j
504  * z_dim1], ldz);
505  }
506 /* Compute DL and DLL of current RRR */
507  i__3 = iend - 1;
508  for (j = ibegin; j <= i__3; ++j) {
509  tmp = d__[j] * l[j];
510  work[indld - 1 + j] = tmp;
511  work[indlld - 1 + j] = tmp * l[j];
512 /* L50: */
513  }
514  if (ndepth > 0) {
515 /* P and Q are index of the first and last eigenvalue to compute */
516 /* within the current block */
517  p = indexw[wbegin - 1 + oldfst];
518  q = indexw[wbegin - 1 + oldlst];
519 /* Offset for the arrays WORK, WGAP and WERR, i.e., th P-OFFSET */
520 /* thru' Q-OFFSET elements of these arrays are to be used. */
521 /* OFFSET = P-OLDFST */
522  offset = indexw[wbegin] - 1;
523 /* perform limited bisection (if necessary) to get approximate */
524 /* eigenvalues to the precision needed. */
525  template_lapack_larrb(&in, &d__[ibegin], &work[indlld + ibegin - 1], &p,
526  &q, rtol1, rtol2, &offset, &work[wbegin], &wgap[
527  wbegin], &werr[wbegin], &work[indwrk], &iwork[
528  iindwk], pivmin, &spdiam, &in, &iinfo);
529  if (iinfo != 0) {
530  *info = -1;
531  return 0;
532  }
533 /* We also recompute the extremal gaps. W holds all eigenvalues */
534 /* of the unshifted matrix and must be used for computation */
535 /* of WGAP, the entries of WORK might stem from RRRs with */
536 /* different shifts. The gaps from WBEGIN-1+OLDFST to */
537 /* WBEGIN-1+OLDLST are correctly computed in DLARRB. */
538 /* However, we only allow the gaps to become greater since */
539 /* this is what should happen when we decrease WERR */
540  if (oldfst > 1) {
541 /* Computing MAX */
542  d__1 = wgap[wbegin + oldfst - 2], d__2 = w[wbegin +
543  oldfst - 1] - werr[wbegin + oldfst - 1] - w[
544  wbegin + oldfst - 2] - werr[wbegin + oldfst -
545  2];
546  wgap[wbegin + oldfst - 2] = maxMACRO(d__1,d__2);
547  }
548  if (wbegin + oldlst - 1 < wend) {
549 /* Computing MAX */
550  d__1 = wgap[wbegin + oldlst - 1], d__2 = w[wbegin +
551  oldlst] - werr[wbegin + oldlst] - w[wbegin +
552  oldlst - 1] - werr[wbegin + oldlst - 1];
553  wgap[wbegin + oldlst - 1] = maxMACRO(d__1,d__2);
554  }
555 /* Each time the eigenvalues in WORK get refined, we store */
556 /* the newly found approximation with all shifts applied in W */
557  i__3 = oldlst;
558  for (j = oldfst; j <= i__3; ++j) {
559  w[wbegin + j - 1] = work[wbegin + j - 1] + sigma;
560 /* L53: */
561  }
562  }
563 /* Process the current node. */
564  newfst = oldfst;
565  i__3 = oldlst;
566  for (j = oldfst; j <= i__3; ++j) {
567  if (j == oldlst) {
568 /* we are at the right end of the cluster, this is also the */
569 /* boundary of the child cluster */
570  newlst = j;
571  } else if (wgap[wbegin + j - 1] >= *minrgp * (d__1 = work[
572  wbegin + j - 1], absMACRO(d__1))) {
573 /* the right relative gap is big enough, the child cluster */
574 /* (NEWFST,..,NEWLST) is well separated from the following */
575  newlst = j;
576  } else {
577 /* inside a child cluster, the relative gap is not */
578 /* big enough. */
579  goto L140;
580  }
581 /* Compute size of child cluster found */
582  newsiz = newlst - newfst + 1;
583 /* NEWFTT is the place in Z where the new RRR or the computed */
584 /* eigenvector is to be stored */
585  if (*dol == 1 && *dou == *m) {
586 /* Store representation at location of the leftmost evalue */
587 /* of the cluster */
588  newftt = wbegin + newfst - 1;
589  } else {
590  if (wbegin + newfst - 1 < *dol) {
591 /* Store representation at the left end of Z array */
592  newftt = *dol - 1;
593  } else if (wbegin + newfst - 1 > *dou) {
594 /* Store representation at the right end of Z array */
595  newftt = *dou;
596  } else {
597  newftt = wbegin + newfst - 1;
598  }
599  }
600  if (newsiz > 1) {
601 
602 /* Current child is not a singleton but a cluster. */
603 /* Compute and store new representation of child. */
604 
605 
606 /* Compute left and right cluster gap. */
607 
608 /* LGAP and RGAP are not computed from WORK because */
609 /* the eigenvalue approximations may stem from RRRs */
610 /* different shifts. However, W hold all eigenvalues */
611 /* of the unshifted matrix. Still, the entries in WGAP */
612 /* have to be computed from WORK since the entries */
613 /* in W might be of the same order so that gaps are not */
614 /* exhibited correctly for very close eigenvalues. */
615  if (newfst == 1) {
616 /* Computing MAX */
617  d__1 = 0., d__2 = w[wbegin] - werr[wbegin] - *vl;
618  lgap = maxMACRO(d__1,d__2);
619  } else {
620  lgap = wgap[wbegin + newfst - 2];
621  }
622  rgap = wgap[wbegin + newlst - 1];
623 
624 /* Compute left- and rightmost eigenvalue of child */
625 /* to high precision in order to shift as close */
626 /* as possible and obtain as large relative gaps */
627 /* as possible */
628 
629  for (k = 1; k <= 2; ++k) {
630  if (k == 1) {
631  p = indexw[wbegin - 1 + newfst];
632  } else {
633  p = indexw[wbegin - 1 + newlst];
634  }
635  offset = indexw[wbegin] - 1;
636  template_lapack_larrb(&in, &d__[ibegin], &work[indlld + ibegin
637  - 1], &p, &p, &rqtol, &rqtol, &offset, &
638  work[wbegin], &wgap[wbegin], &werr[wbegin]
639 , &work[indwrk], &iwork[iindwk], pivmin, &
640  spdiam, &in, &iinfo);
641 /* L55: */
642  }
643 
644  if (wbegin + newlst - 1 < *dol || wbegin + newfst - 1
645  > *dou) {
646 /* if the cluster contains no desired eigenvalues */
647 /* skip the computation of that branch of the rep. tree */
648 
649 /* We could skip before the refinement of the extremal */
650 /* eigenvalues of the child, but then the representation */
651 /* tree could be different from the one when nothing is */
652 /* skipped. For this reason we skip at this place. */
653  idone = idone + newlst - newfst + 1;
654  goto L139;
655  }
656 
657 /* Compute RRR of child cluster. */
658 /* Note that the new RRR is stored in Z */
659 
660 /* DLARRF needs LWORK = 2*N */
661  template_lapack_larrf(&in, &d__[ibegin], &l[ibegin], &work[indld +
662  ibegin - 1], &newfst, &newlst, &work[wbegin],
663  &wgap[wbegin], &werr[wbegin], &spdiam, &lgap,
664  &rgap, pivmin, &tau, &z__[ibegin + newftt *
665  z_dim1], &z__[ibegin + (newftt + 1) * z_dim1],
666  &work[indwrk], &iinfo);
667  if (iinfo == 0) {
668 /* a new RRR for the cluster was found by DLARRF */
669 /* update shift and store it */
670  ssigma = sigma + tau;
671  z__[iend + (newftt + 1) * z_dim1] = ssigma;
672 /* WORK() are the midpoints and WERR() the semi-width */
673 /* Note that the entries in W are unchanged. */
674  i__4 = newlst;
675  for (k = newfst; k <= i__4; ++k) {
676  fudge = eps * 3. * (d__1 = work[wbegin + k -
677  1], absMACRO(d__1));
678  work[wbegin + k - 1] -= tau;
679  fudge += eps * 4. * (d__1 = work[wbegin + k -
680  1], absMACRO(d__1));
681 /* Fudge errors */
682  werr[wbegin + k - 1] += fudge;
683 /* Gaps are not fudged. Provided that WERR is small */
684 /* when eigenvalues are close, a zero gap indicates */
685 /* that a new representation is needed for resolving */
686 /* the cluster. A fudge could lead to a wrong decision */
687 /* of judging eigenvalues 'separated' which in */
688 /* reality are not. This could have a negative impact */
689 /* on the orthogonality of the computed eigenvectors. */
690 /* L116: */
691  }
692  ++nclus;
693  k = newcls + (nclus << 1);
694  iwork[k - 1] = newfst;
695  iwork[k] = newlst;
696  } else {
697  *info = -2;
698  return 0;
699  }
700  } else {
701 
702 /* Compute eigenvector of singleton */
703 
704  iter = 0;
705 
706  tol = template_blas_log((Treal) in) * 4. * eps;
707 
708  k = newfst;
709  windex = wbegin + k - 1;
710 /* Computing MAX */
711  i__4 = windex - 1;
712  windmn = maxMACRO(i__4,1);
713 /* Computing MIN */
714  i__4 = windex + 1;
715  windpl = minMACRO(i__4,*m);
716  lambda = work[windex];
717  ++done;
718 /* Check if eigenvector computation is to be skipped */
719  if (windex < *dol || windex > *dou) {
720  eskip = TRUE_;
721  goto L125;
722  } else {
723  eskip = FALSE_;
724  }
725  left = work[windex] - werr[windex];
726  right = work[windex] + werr[windex];
727  indeig = indexw[windex];
728 /* Note that since we compute the eigenpairs for a child, */
729 /* all eigenvalue approximations are w.r.t the same shift. */
730 /* In this case, the entries in WORK should be used for */
731 /* computing the gaps since they exhibit even very small */
732 /* differences in the eigenvalues, as opposed to the */
733 /* entries in W which might "look" the same. */
734  if (k == 1) {
735 /* In the case RANGE='I' and with not much initial */
736 /* accuracy in LAMBDA and VL, the formula */
737 /* LGAP = MAX( ZERO, (SIGMA - VL) + LAMBDA ) */
738 /* can lead to an overestimation of the left gap and */
739 /* thus to inadequately early RQI 'convergence'. */
740 /* Prevent this by forcing a small left gap. */
741 /* Computing MAX */
742  d__1 = absMACRO(left), d__2 = absMACRO(right);
743  lgap = eps * maxMACRO(d__1,d__2);
744  } else {
745  lgap = wgap[windmn];
746  }
747  if (k == im) {
748 /* In the case RANGE='I' and with not much initial */
749 /* accuracy in LAMBDA and VU, the formula */
750 /* can lead to an overestimation of the right gap and */
751 /* thus to inadequately early RQI 'convergence'. */
752 /* Prevent this by forcing a small right gap. */
753 /* Computing MAX */
754  d__1 = absMACRO(left), d__2 = absMACRO(right);
755  rgap = eps * maxMACRO(d__1,d__2);
756  } else {
757  rgap = wgap[windex];
758  }
759  gap = minMACRO(lgap,rgap);
760  if (k == 1 || k == im) {
761 /* The eigenvector support can become wrong */
762 /* because significant entries could be cut off due to a */
763 /* large GAPTOL parameter in LAR1V. Prevent this. */
764  gaptol = 0.;
765  } else {
766  gaptol = gap * eps;
767  }
768  isupmn = in;
769  isupmx = 1;
770 /* Update WGAP so that it holds the minimum gap */
771 /* to the left or the right. This is crucial in the */
772 /* case where bisection is used to ensure that the */
773 /* eigenvalue is refined up to the required precision. */
774 /* The correct value is restored afterwards. */
775  savgap = wgap[windex];
776  wgap[windex] = gap;
777 /* We want to use the Rayleigh Quotient Correction */
778 /* as often as possible since it converges quadratically */
779 /* when we are close enough to the desired eigenvalue. */
780 /* However, the Rayleigh Quotient can have the wrong sign */
781 /* and lead us away from the desired eigenvalue. In this */
782 /* case, the best we can do is to use bisection. */
783  usedbs = FALSE_;
784  usedrq = FALSE_;
785 /* Bisection is initially turned off unless it is forced */
786  needbs = ! tryrqc;
787 L120:
788 /* Check if bisection should be used to refine eigenvalue */
789  if (needbs) {
790 /* Take the bisection as new iterate */
791  usedbs = TRUE_;
792  itmp1 = iwork[iindr + windex];
793  offset = indexw[wbegin] - 1;
794  d__1 = eps * 2.;
795  template_lapack_larrb(&in, &d__[ibegin], &work[indlld + ibegin
796  - 1], &indeig, &indeig, &c_b5, &d__1, &
797  offset, &work[wbegin], &wgap[wbegin], &
798  werr[wbegin], &work[indwrk], &iwork[
799  iindwk], pivmin, &spdiam, &itmp1, &iinfo);
800  if (iinfo != 0) {
801  *info = -3;
802  return 0;
803  }
804  lambda = work[windex];
805 /* Reset twist index from inaccurate LAMBDA to */
806 /* force computation of true MINGMA */
807  iwork[iindr + windex] = 0;
808  }
809 /* Given LAMBDA, compute the eigenvector. */
810  L__1 = ! usedbs;
811  template_lapack_lar1v(&in, &c__1, &in, &lambda, &d__[ibegin], &l[
812  ibegin], &work[indld + ibegin - 1], &work[
813  indlld + ibegin - 1], pivmin, &gaptol, &z__[
814  ibegin + windex * z_dim1], &L__1, &negcnt, &
815  ztz, &mingma, &iwork[iindr + windex], &isuppz[
816  (windex << 1) - 1], &nrminv, &resid, &rqcorr,
817  &work[indwrk]);
818  if (iter == 0) {
819  bstres = resid;
820  bstw = lambda;
821  } else if (resid < bstres) {
822  bstres = resid;
823  bstw = lambda;
824  }
825 /* Computing MIN */
826  i__4 = isupmn, i__5 = isuppz[(windex << 1) - 1];
827  isupmn = minMACRO(i__4,i__5);
828 /* Computing MAX */
829  i__4 = isupmx, i__5 = isuppz[windex * 2];
830  isupmx = maxMACRO(i__4,i__5);
831  ++iter;
832 /* sin alpha <= |resid|/gap */
833 /* Note that both the residual and the gap are */
834 /* proportional to the matrix, so ||T|| doesn't play */
835 /* a role in the quotient */
836 
837 /* Convergence test for Rayleigh-Quotient iteration */
838 /* (omitted when Bisection has been used) */
839 
840  if (resid > tol * gap && absMACRO(rqcorr) > rqtol * absMACRO(
841  lambda) && ! usedbs) {
842 /* We need to check that the RQCORR update doesn't */
843 /* move the eigenvalue away from the desired one and */
844 /* towards a neighbor. -> protection with bisection */
845  if (indeig <= negcnt) {
846 /* The wanted eigenvalue lies to the left */
847  sgndef = -1.;
848  } else {
849 /* The wanted eigenvalue lies to the right */
850  sgndef = 1.;
851  }
852 /* We only use the RQCORR if it improves the */
853 /* the iterate reasonably. */
854  if (rqcorr * sgndef >= 0. && lambda + rqcorr <=
855  right && lambda + rqcorr >= left) {
856  usedrq = TRUE_;
857 /* Store new midpoint of bisection interval in WORK */
858  if (sgndef == 1.) {
859 /* The current LAMBDA is on the left of the true */
860 /* eigenvalue */
861  left = lambda;
862 /* We prefer to assume that the error estimate */
863 /* is correct. We could make the interval not */
864 /* as a bracket but to be modified if the RQCORR */
865 /* chooses to. In this case, the RIGHT side should */
866 /* be modified as follows: */
867 /* RIGHT = MAX(RIGHT, LAMBDA + RQCORR) */
868  } else {
869 /* The current LAMBDA is on the right of the true */
870 /* eigenvalue */
871  right = lambda;
872 /* See comment about assuming the error estimate is */
873 /* correct above. */
874 /* LEFT = MIN(LEFT, LAMBDA + RQCORR) */
875  }
876  work[windex] = (right + left) * .5;
877 /* Take RQCORR since it has the correct sign and */
878 /* improves the iterate reasonably */
879  lambda += rqcorr;
880 /* Update width of error interval */
881  werr[windex] = (right - left) * .5;
882  } else {
883  needbs = TRUE_;
884  }
885  if (right - left < rqtol * absMACRO(lambda)) {
886 /* The eigenvalue is computed to bisection accuracy */
887 /* compute eigenvector and stop */
888  usedbs = TRUE_;
889  goto L120;
890  } else if (iter < 10) {
891  goto L120;
892  } else if (iter == 10) {
893  needbs = TRUE_;
894  goto L120;
895  } else {
896  *info = 5;
897  return 0;
898  }
899  } else {
900  stp2ii = FALSE_;
901  if (usedrq && usedbs && bstres <= resid) {
902  lambda = bstw;
903  stp2ii = TRUE_;
904  }
905  if (stp2ii) {
906 /* improve error angle by second step */
907  L__1 = ! usedbs;
908  template_lapack_lar1v(&in, &c__1, &in, &lambda, &d__[ibegin]
909 , &l[ibegin], &work[indld + ibegin -
910  1], &work[indlld + ibegin - 1],
911  pivmin, &gaptol, &z__[ibegin + windex
912  * z_dim1], &L__1, &negcnt, &ztz, &
913  mingma, &iwork[iindr + windex], &
914  isuppz[(windex << 1) - 1], &nrminv, &
915  resid, &rqcorr, &work[indwrk]);
916  }
917  work[windex] = lambda;
918  }
919 
920 /* Compute FP-vector support w.r.t. whole matrix */
921 
922  isuppz[(windex << 1) - 1] += oldien;
923  isuppz[windex * 2] += oldien;
924  zfrom = isuppz[(windex << 1) - 1];
925  zto = isuppz[windex * 2];
926  isupmn += oldien;
927  isupmx += oldien;
928 /* Ensure vector is ok if support in the RQI has changed */
929  if (isupmn < zfrom) {
930  i__4 = zfrom - 1;
931  for (ii = isupmn; ii <= i__4; ++ii) {
932  z__[ii + windex * z_dim1] = 0.;
933 /* L122: */
934  }
935  }
936  if (isupmx > zto) {
937  i__4 = isupmx;
938  for (ii = zto + 1; ii <= i__4; ++ii) {
939  z__[ii + windex * z_dim1] = 0.;
940 /* L123: */
941  }
942  }
943  i__4 = zto - zfrom + 1;
944  template_blas_scal(&i__4, &nrminv, &z__[zfrom + windex * z_dim1],
945  &c__1);
946 L125:
947 /* Update W */
948  w[windex] = lambda + sigma;
949 /* Recompute the gaps on the left and right */
950 /* But only allow them to become larger and not */
951 /* smaller (which can only happen through "bad" */
952 /* cancellation and doesn't reflect the theory */
953 /* where the initial gaps are underestimated due */
954 /* to WERR being too crude.) */
955  if (! eskip) {
956  if (k > 1) {
957 /* Computing MAX */
958  d__1 = wgap[windmn], d__2 = w[windex] - werr[
959  windex] - w[windmn] - werr[windmn];
960  wgap[windmn] = maxMACRO(d__1,d__2);
961  }
962  if (windex < wend) {
963 /* Computing MAX */
964  d__1 = savgap, d__2 = w[windpl] - werr[windpl]
965  - w[windex] - werr[windex];
966  wgap[windex] = maxMACRO(d__1,d__2);
967  }
968  }
969  ++idone;
970  }
971 /* here ends the code for the current child */
972 
973 L139:
974 /* Proceed to any remaining child nodes */
975  newfst = j + 1;
976 L140:
977  ;
978  }
979 /* L150: */
980  }
981  ++ndepth;
982  goto L40;
983  }
984  ibegin = iend + 1;
985  wbegin = wend + 1;
986 L170:
987  ;
988  }
989 
990  return 0;
991 
992 /* End of DLARRV */
993 
994 } /* dlarrv_ */
995 
996 #endif
static const real gu
Definition: fun-pz81.c:71
int template_blas_scal(const integer *n, const Treal *da, Treal *dx, const integer *incx)
Definition: template_blas_scal.h:41
#define absMACRO(x)
Definition: template_blas_common.h:45
Definition: Matrix.h:73
int template_lapack_larrb(integer *n, Treal *d__, Treal *lld, integer *ifirst, integer *ilast, Treal *rtol1, Treal *rtol2, integer *offset, Treal *w, Treal *wgap, Treal *werr, Treal *work, integer *iwork, Treal *pivmin, Treal *spdiam, integer *twist, integer *info)
Definition: template_lapack_larrb.h:43
int template_lapack_larrf(integer *n, Treal *d__, Treal *l, Treal *ld, integer *clstrt, integer *clend, Treal *w, Treal *wgap, Treal *werr, Treal *spdiam, Treal *clgapl, Treal *clgapr, Treal *pivmin, Treal *sigma, Treal *dplus, Treal *lplus, Treal *work, integer *info)
Definition: template_lapack_larrf.h:40
int template_lapack_laset(const char *uplo, const integer *m, const integer *n, const Treal *alpha, const Treal *beta, Treal *a, const integer *lda)
Definition: template_lapack_laset.h:40
int integer
Definition: template_blas_common.h:38
#define maxMACRO(a, b)
Definition: template_blas_common.h:43
#define minMACRO(a, b)
Definition: template_blas_common.h:44
int template_lapack_larrv(const integer *n, Treal *vl, Treal *vu, Treal *d__, Treal *l, Treal *pivmin, integer *isplit, integer *m, integer *dol, integer *dou, Treal *minrgp, Treal *rtol1, Treal *rtol2, Treal *w, Treal *werr, Treal *wgap, integer *iblock, integer *indexw, Treal *gers, Treal *z__, const integer *ldz, integer *isuppz, Treal *work, integer *iwork, integer *info)
Definition: template_lapack_larrv.h:43
Treal template_blas_log(Treal x)
int template_lapack_lar1v(integer *n, integer *b1, integer *bn, Treal *lambda, Treal *d__, Treal *l, Treal *ld, Treal *lld, Treal *pivmin, Treal *gaptol, Treal *z__, logical *wantnc, integer *negcnt, Treal *ztz, Treal *mingma, integer *r__, integer *isuppz, Treal *nrminv, Treal *resid, Treal *rqcorr, Treal *work)
Definition: template_lapack_lar1v.h:39
Treal template_lapack_lamch(const char *cmach, Treal dummyReal)
Definition: template_lapack_lamch.h:199
bool logical
Definition: template_blas_common.h:39
#define TRUE_
Definition: template_lapack_common.h:40
int template_blas_copy(const integer *n, const Treal *dx, const integer *incx, Treal *dy, const integer *incy)
Definition: template_blas_copy.h:40
#define FALSE_
Definition: template_lapack_common.h:41
Definition: Matrix.h:73