ergo
template_lapack_stein.h
Go to the documentation of this file.
1 /* Ergo, version 3.8, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2019 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4  * and Anastasia Kruchinina.
5  *
6  * This program is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program. If not, see <http://www.gnu.org/licenses/>.
18  *
19  * Primary academic reference:
20  * Ergo: An open-source program for linear-scaling electronic structure
21  * calculations,
22  * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23  * Kruchinina,
24  * SoftwareX 7, 107 (2018),
25  * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26  *
27  * For further information about Ergo, see <http://www.ergoscf.org>.
28  */
29 
30  /* This file belongs to the template_lapack part of the Ergo source
31  * code. The source files in the template_lapack directory are modified
32  * versions of files originally distributed as CLAPACK, see the
33  * Copyright/license notice in the file template_lapack/COPYING.
34  */
35 
36 
37 #ifndef TEMPLATE_LAPACK_STEIN_HEADER
38 #define TEMPLATE_LAPACK_STEIN_HEADER
39 
40 
41 template<class Treal>
42 int template_lapack_stein(const integer *n, const Treal *d__, const Treal *e,
43  const integer *m, const Treal *w, const integer *iblock, const integer *isplit,
44  Treal *z__, const integer *ldz, Treal *work, integer *iwork,
45  integer *ifail, integer *info)
46 {
47 /* -- LAPACK routine (version 3.0) --
48  Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
49  Courant Institute, Argonne National Lab, and Rice University
50  September 30, 1994
51 
52 
53  Purpose
54  =======
55 
56  DSTEIN computes the eigenvectors of a real symmetric tridiagonal
57  matrix T corresponding to specified eigenvalues, using inverse
58  iteration.
59 
60  The maximum number of iterations allowed for each eigenvector is
61  specified by an internal parameter MAXITS (currently set to 5).
62 
63  Arguments
64  =========
65 
66  N (input) INTEGER
67  The order of the matrix. N >= 0.
68 
69  D (input) DOUBLE PRECISION array, dimension (N)
70  The n diagonal elements of the tridiagonal matrix T.
71 
72  E (input) DOUBLE PRECISION array, dimension (N)
73  The (n-1) subdiagonal elements of the tridiagonal matrix
74  T, in elements 1 to N-1. E(N) need not be set.
75 
76  M (input) INTEGER
77  The number of eigenvectors to be found. 0 <= M <= N.
78 
79  W (input) DOUBLE PRECISION array, dimension (N)
80  The first M elements of W contain the eigenvalues for
81  which eigenvectors are to be computed. The eigenvalues
82  should be grouped by split-off block and ordered from
83  smallest to largest within the block. ( The output array
84  W from DSTEBZ with ORDER = 'B' is expected here. )
85 
86  IBLOCK (input) INTEGER array, dimension (N)
87  The submatrix indices associated with the corresponding
88  eigenvalues in W; IBLOCK(i)=1 if eigenvalue W(i) belongs to
89  the first submatrix from the top, =2 if W(i) belongs to
90  the second submatrix, etc. ( The output array IBLOCK
91  from DSTEBZ is expected here. )
92 
93  ISPLIT (input) INTEGER array, dimension (N)
94  The splitting points, at which T breaks up into submatrices.
95  The first submatrix consists of rows/columns 1 to
96  ISPLIT( 1 ), the second of rows/columns ISPLIT( 1 )+1
97  through ISPLIT( 2 ), etc.
98  ( The output array ISPLIT from DSTEBZ is expected here. )
99 
100  Z (output) DOUBLE PRECISION array, dimension (LDZ, M)
101  The computed eigenvectors. The eigenvector associated
102  with the eigenvalue W(i) is stored in the i-th column of
103  Z. Any vector which fails to converge is set to its current
104  iterate after MAXITS iterations.
105 
106  LDZ (input) INTEGER
107  The leading dimension of the array Z. LDZ >= max(1,N).
108 
109  WORK (workspace) DOUBLE PRECISION array, dimension (5*N)
110 
111  IWORK (workspace) INTEGER array, dimension (N)
112 
113  IFAIL (output) INTEGER array, dimension (M)
114  On normal exit, all elements of IFAIL are zero.
115  If one or more eigenvectors fail to converge after
116  MAXITS iterations, then their indices are stored in
117  array IFAIL.
118 
119  INFO (output) INTEGER
120  = 0: successful exit.
121  < 0: if INFO = -i, the i-th argument had an illegal value
122  > 0: if INFO = i, then i eigenvectors failed to converge
123  in MAXITS iterations. Their indices are stored in
124  array IFAIL.
125 
126  Internal Parameters
127  ===================
128 
129  MAXITS INTEGER, default = 5
130  The maximum number of iterations performed.
131 
132  EXTRA INTEGER, default = 2
133  The number of iterations performed after norm growth
134  criterion is satisfied, should be at least 1.
135 
136  =====================================================================
137 
138 
139  Test the input parameters.
140 
141  Parameter adjustments */
142  /* Table of constant values */
143  integer c__2 = 2;
144  integer c__1 = 1;
145  integer c_n1 = -1;
146 
147  /* System generated locals */
148  integer z_dim1, z_offset, i__1, i__2, i__3;
149  Treal d__1, d__2, d__3, d__4, d__5;
150  /* Local variables */
151  integer jblk, nblk;
152  integer jmax;
153  integer i__, j;
154  integer iseed[4], gpind, iinfo;
155  integer b1;
156  integer j1;
157  Treal ortol;
158  integer indrv1, indrv2, indrv3, indrv4, indrv5, bn;
159  Treal xj;
160  integer nrmchk;
161  integer blksiz;
162  Treal onenrm, dtpcrt, pertol, scl, eps, sep, nrm, tol;
163  integer its;
164  Treal xjm, ztr, eps1;
165 #define z___ref(a_1,a_2) z__[(a_2)*z_dim1 + a_1]
166 
167 
168  --d__;
169  --e;
170  --w;
171  --iblock;
172  --isplit;
173  z_dim1 = *ldz;
174  z_offset = 1 + z_dim1 * 1;
175  z__ -= z_offset;
176  --work;
177  --iwork;
178  --ifail;
179 
180  /* Initialization added by Elias to get rid of compiler warnings. */
181  ortol = dtpcrt = xjm = onenrm = gpind = 0;
182  /* Function Body */
183  *info = 0;
184  i__1 = *m;
185  for (i__ = 1; i__ <= i__1; ++i__) {
186  ifail[i__] = 0;
187 /* L10: */
188  }
189 
190  if (*n < 0) {
191  *info = -1;
192  } else if (*m < 0 || *m > *n) {
193  *info = -4;
194  } else if (*ldz < maxMACRO(1,*n)) {
195  *info = -9;
196  } else {
197  i__1 = *m;
198  for (j = 2; j <= i__1; ++j) {
199  if (iblock[j] < iblock[j - 1]) {
200  *info = -6;
201  goto L30;
202  }
203  if (iblock[j] == iblock[j - 1] && w[j] < w[j - 1]) {
204  *info = -5;
205  goto L30;
206  }
207 /* L20: */
208  }
209 L30:
210  ;
211  }
212 
213  if (*info != 0) {
214  i__1 = -(*info);
215  template_blas_erbla("STEIN ", &i__1);
216  return 0;
217  }
218 
219 /* Quick return if possible */
220 
221  if (*n == 0 || *m == 0) {
222  return 0;
223  } else if (*n == 1) {
224  z___ref(1, 1) = 1.;
225  return 0;
226  }
227 
228 /* Get machine constants. */
229 
230  eps = template_lapack_lamch("Precision", (Treal)0);
231 
232 /* Initialize seed for random number generator DLARNV. */
233 
234  for (i__ = 1; i__ <= 4; ++i__) {
235  iseed[i__ - 1] = 1;
236 /* L40: */
237  }
238 
239 /* Initialize pointers. */
240 
241  indrv1 = 0;
242  indrv2 = indrv1 + *n;
243  indrv3 = indrv2 + *n;
244  indrv4 = indrv3 + *n;
245  indrv5 = indrv4 + *n;
246 
247 /* Compute eigenvectors of matrix blocks. */
248 
249  j1 = 1;
250  i__1 = iblock[*m];
251  for (nblk = 1; nblk <= i__1; ++nblk) {
252 
253 /* Find starting and ending indices of block nblk. */
254 
255  if (nblk == 1) {
256  b1 = 1;
257  } else {
258  b1 = isplit[nblk - 1] + 1;
259  }
260  bn = isplit[nblk];
261  blksiz = bn - b1 + 1;
262  if (blksiz == 1) {
263  goto L60;
264  }
265  gpind = b1;
266 
267 /* Compute reorthogonalization criterion and stopping criterion. */
268 
269  onenrm = (d__1 = d__[b1], absMACRO(d__1)) + (d__2 = e[b1], absMACRO(d__2));
270 /* Computing MAX */
271  d__3 = onenrm, d__4 = (d__1 = d__[bn], absMACRO(d__1)) + (d__2 = e[bn - 1],
272  absMACRO(d__2));
273  onenrm = maxMACRO(d__3,d__4);
274  i__2 = bn - 1;
275  for (i__ = b1 + 1; i__ <= i__2; ++i__) {
276 /* Computing MAX */
277  d__4 = onenrm, d__5 = (d__1 = d__[i__], absMACRO(d__1)) + (d__2 = e[
278  i__ - 1], absMACRO(d__2)) + (d__3 = e[i__], absMACRO(d__3));
279  onenrm = maxMACRO(d__4,d__5);
280 /* L50: */
281  }
282  ortol = onenrm * .001;
283 
284  dtpcrt = template_blas_sqrt(.1 / blksiz);
285 
286 /* Loop through eigenvalues of block nblk. */
287 
288 L60:
289  jblk = 0;
290  i__2 = *m;
291  for (j = j1; j <= i__2; ++j) {
292  if (iblock[j] != nblk) {
293  j1 = j;
294  goto L160;
295  }
296  ++jblk;
297  xj = w[j];
298 
299 /* Skip all the work if the block size is one. */
300 
301  if (blksiz == 1) {
302  work[indrv1 + 1] = 1.;
303  goto L120;
304  }
305 
306 /* If eigenvalues j and j-1 are too close, add a relatively
307  small perturbation. */
308 
309  if (jblk > 1) {
310  eps1 = (d__1 = eps * xj, absMACRO(d__1));
311  pertol = eps1 * 10.;
312  sep = xj - xjm;
313  if (sep < pertol) {
314  xj = xjm + pertol;
315  }
316  }
317 
318  its = 0;
319  nrmchk = 0;
320 
321 /* Get random starting vector. */
322 
323  template_lapack_larnv(&c__2, iseed, &blksiz, &work[indrv1 + 1]);
324 
325 /* Copy the matrix T so it won't be destroyed in factorization. */
326 
327  template_blas_copy(&blksiz, &d__[b1], &c__1, &work[indrv4 + 1], &c__1);
328  i__3 = blksiz - 1;
329  template_blas_copy(&i__3, &e[b1], &c__1, &work[indrv2 + 2], &c__1);
330  i__3 = blksiz - 1;
331  template_blas_copy(&i__3, &e[b1], &c__1, &work[indrv3 + 1], &c__1);
332 
333 /* Compute LU factors with partial pivoting ( PT = LU ) */
334 
335  tol = 0.;
336  template_lapack_lagtf(&blksiz, &work[indrv4 + 1], &xj, &work[indrv2 + 2], &work[
337  indrv3 + 1], &tol, &work[indrv5 + 1], &iwork[1], &iinfo);
338 
339 /* Update iteration count. */
340 
341 L70:
342  ++its;
343  if (its > 5) {
344  goto L100;
345  }
346 
347 /* Normalize and scale the righthand side vector Pb.
348 
349  Computing MAX */
350  d__2 = eps, d__3 = (d__1 = work[indrv4 + blksiz], absMACRO(d__1));
351  scl = blksiz * onenrm * maxMACRO(d__2,d__3) / template_blas_asum(&blksiz, &work[
352  indrv1 + 1], &c__1);
353  template_blas_scal(&blksiz, &scl, &work[indrv1 + 1], &c__1);
354 
355 /* Solve the system LU = Pb. */
356 
357  template_lapack_lagts(&c_n1, &blksiz, &work[indrv4 + 1], &work[indrv2 + 2], &
358  work[indrv3 + 1], &work[indrv5 + 1], &iwork[1], &work[
359  indrv1 + 1], &tol, &iinfo);
360 
361 /* Reorthogonalize by modified Gram-Schmidt if eigenvalues are
362  close enough. */
363 
364  if (jblk == 1) {
365  goto L90;
366  }
367  if ((d__1 = xj - xjm, absMACRO(d__1)) > ortol) {
368  gpind = j;
369  }
370  if (gpind != j) {
371  i__3 = j - 1;
372  for (i__ = gpind; i__ <= i__3; ++i__) {
373  ztr = -template_blas_dot(&blksiz, &work[indrv1 + 1], &c__1, &z___ref(
374  b1, i__), &c__1);
375  template_blas_axpy(&blksiz, &ztr, &z___ref(b1, i__), &c__1, &work[
376  indrv1 + 1], &c__1);
377 /* L80: */
378  }
379  }
380 
381 /* Check the infinity norm of the iterate. */
382 
383 L90:
384  jmax = template_blas_idamax(&blksiz, &work[indrv1 + 1], &c__1);
385  nrm = (d__1 = work[indrv1 + jmax], absMACRO(d__1));
386 
387 /* Continue for additional iterations after norm reaches
388  stopping criterion. */
389 
390  if (nrm < dtpcrt) {
391  goto L70;
392  }
393  ++nrmchk;
394  if (nrmchk < 3) {
395  goto L70;
396  }
397 
398  goto L110;
399 
400 /* If stopping criterion was not satisfied, update info and
401  store eigenvector number in array ifail. */
402 
403 L100:
404  ++(*info);
405  ifail[*info] = j;
406 
407 /* Accept iterate as jth eigenvector. */
408 
409 L110:
410  scl = 1. / template_blas_nrm2(&blksiz, &work[indrv1 + 1], &c__1);
411  jmax = template_blas_idamax(&blksiz, &work[indrv1 + 1], &c__1);
412  if (work[indrv1 + jmax] < 0.) {
413  scl = -scl;
414  }
415  template_blas_scal(&blksiz, &scl, &work[indrv1 + 1], &c__1);
416 L120:
417  i__3 = *n;
418  for (i__ = 1; i__ <= i__3; ++i__) {
419  z___ref(i__, j) = 0.;
420 /* L130: */
421  }
422  i__3 = blksiz;
423  for (i__ = 1; i__ <= i__3; ++i__) {
424  z___ref(b1 + i__ - 1, j) = work[indrv1 + i__];
425 /* L140: */
426  }
427 
428 /* Save the shift to check eigenvalue spacing at next
429  iteration. */
430 
431  xjm = xj;
432 
433 /* L150: */
434  }
435 L160:
436  ;
437  }
438 
439  return 0;
440 
441 /* End of DSTEIN */
442 
443 } /* dstein_ */
444 
445 #undef z___ref
446 
447 
448 #endif
template_blas_sqrt
Treal template_blas_sqrt(Treal x)
template_blas_idamax
integer template_blas_idamax(const integer *n, const Treal *dx, const integer *incx)
Definition: template_blas_idamax.h:42
template_lapack_lamch
Treal template_lapack_lamch(const char *cmach, Treal dummyReal)
Definition: template_lapack_lamch.h:202
template_blas_dot
Treal template_blas_dot(const integer *n, const Treal *dx, const integer *incx, const Treal *dy, const integer *incy)
Definition: template_blas_dot.h:43
template_blas_scal
int template_blas_scal(const integer *n, const Treal *da, Treal *dx, const integer *incx)
Definition: template_blas_scal.h:43
template_lapack_larnv
int template_lapack_larnv(const integer *idist, integer *iseed, const integer *n, Treal *x)
Definition: template_lapack_larnv.h:42
absMACRO
#define absMACRO(x)
Definition: template_blas_common.h:47
template_lapack_lagtf
int template_lapack_lagtf(const integer *n, Treal *a, const Treal *lambda, Treal *b, Treal *c__, const Treal *tol, Treal *d__, integer *in, integer *info)
Definition: template_lapack_lagtf.h:42
template_lapack_lagts
int template_lapack_lagts(const integer *job, const integer *n, const Treal *a, const Treal *b, const Treal *c__, const Treal *d__, const integer *in, Treal *y, Treal *tol, integer *info)
Definition: template_lapack_lagts.h:42
template_blas_nrm2
Treal template_blas_nrm2(const integer *n, const Treal *x, const integer *incx)
Definition: template_blas_nrm2.h:42
template_blas_asum
Treal template_blas_asum(const integer *n, const Treal *dx, const integer *incx)
Definition: template_blas_asum.h:42
template_blas_erbla
int template_blas_erbla(const char *srname, integer *info)
Definition: template_blas_common.cc:146
template_blas_copy
int template_blas_copy(const integer *n, const Treal *dx, const integer *incx, Treal *dy, const integer *incy)
Definition: template_blas_copy.h:42
template_blas_axpy
int template_blas_axpy(const integer *n, const Treal *da, const Treal *dx, const integer *incx, Treal *dy, const integer *incy)
Definition: template_blas_axpy.h:43
integer
int integer
Definition: template_blas_common.h:40
z___ref
#define z___ref(a_1, a_2)
maxMACRO
#define maxMACRO(a, b)
Definition: template_blas_common.h:45
template_lapack_stein
int template_lapack_stein(const integer *n, const Treal *d__, const Treal *e, const integer *m, const Treal *w, const integer *iblock, const integer *isplit, Treal *z__, const integer *ldz, Treal *work, integer *iwork, integer *ifail, integer *info)
Definition: template_lapack_stein.h:42