ergo
template_lapack_sytrd.h
Go to the documentation of this file.
1 /* Ergo, version 3.8, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2019 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4  * and Anastasia Kruchinina.
5  *
6  * This program is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program. If not, see <http://www.gnu.org/licenses/>.
18  *
19  * Primary academic reference:
20  * Ergo: An open-source program for linear-scaling electronic structure
21  * calculations,
22  * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23  * Kruchinina,
24  * SoftwareX 7, 107 (2018),
25  * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26  *
27  * For further information about Ergo, see <http://www.ergoscf.org>.
28  */
29 
30  /* This file belongs to the template_lapack part of the Ergo source
31  * code. The source files in the template_lapack directory are modified
32  * versions of files originally distributed as CLAPACK, see the
33  * Copyright/license notice in the file template_lapack/COPYING.
34  */
35 
36 
37 #ifndef TEMPLATE_LAPACK_SYTRD_HEADER
38 #define TEMPLATE_LAPACK_SYTRD_HEADER
39 
40 #include "template_lapack_common.h"
41 
42 template<class Treal>
43 int template_lapack_sytrd(const char *uplo, const integer *n, Treal *a, const integer *
44  lda, Treal *d__, Treal *e, Treal *tau, Treal *
45  work, const integer *lwork, integer *info)
46 {
47 /* -- LAPACK routine (version 3.0) --
48  Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
49  Courant Institute, Argonne National Lab, and Rice University
50  June 30, 1999
51 
52 
53  Purpose
54  =======
55 
56  DSYTRD reduces a real symmetric matrix A to real symmetric
57  tridiagonal form T by an orthogonal similarity transformation:
58  Q**T * A * Q = T.
59 
60  Arguments
61  =========
62 
63  UPLO (input) CHARACTER*1
64  = 'U': Upper triangle of A is stored;
65  = 'L': Lower triangle of A is stored.
66 
67  N (input) INTEGER
68  The order of the matrix A. N >= 0.
69 
70  A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
71  On entry, the symmetric matrix A. If UPLO = 'U', the leading
72  N-by-N upper triangular part of A contains the upper
73  triangular part of the matrix A, and the strictly lower
74  triangular part of A is not referenced. If UPLO = 'L', the
75  leading N-by-N lower triangular part of A contains the lower
76  triangular part of the matrix A, and the strictly upper
77  triangular part of A is not referenced.
78  On exit, if UPLO = 'U', the diagonal and first superdiagonal
79  of A are overwritten by the corresponding elements of the
80  tridiagonal matrix T, and the elements above the first
81  superdiagonal, with the array TAU, represent the orthogonal
82  matrix Q as a product of elementary reflectors; if UPLO
83  = 'L', the diagonal and first subdiagonal of A are over-
84  written by the corresponding elements of the tridiagonal
85  matrix T, and the elements below the first subdiagonal, with
86  the array TAU, represent the orthogonal matrix Q as a product
87  of elementary reflectors. See Further Details.
88 
89  LDA (input) INTEGER
90  The leading dimension of the array A. LDA >= max(1,N).
91 
92  D (output) DOUBLE PRECISION array, dimension (N)
93  The diagonal elements of the tridiagonal matrix T:
94  D(i) = A(i,i).
95 
96  E (output) DOUBLE PRECISION array, dimension (N-1)
97  The off-diagonal elements of the tridiagonal matrix T:
98  E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.
99 
100  TAU (output) DOUBLE PRECISION array, dimension (N-1)
101  The scalar factors of the elementary reflectors (see Further
102  Details).
103 
104  WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
105  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
106 
107  LWORK (input) INTEGER
108  The dimension of the array WORK. LWORK >= 1.
109  For optimum performance LWORK >= N*NB, where NB is the
110  optimal blocksize.
111 
112  If LWORK = -1, then a workspace query is assumed; the routine
113  only calculates the optimal size of the WORK array, returns
114  this value as the first entry of the WORK array, and no error
115  message related to LWORK is issued by XERBLA.
116 
117  INFO (output) INTEGER
118  = 0: successful exit
119  < 0: if INFO = -i, the i-th argument had an illegal value
120 
121  Further Details
122  ===============
123 
124  If UPLO = 'U', the matrix Q is represented as a product of elementary
125  reflectors
126 
127  Q = H(n-1) . . . H(2) H(1).
128 
129  Each H(i) has the form
130 
131  H(i) = I - tau * v * v'
132 
133  where tau is a real scalar, and v is a real vector with
134  v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
135  A(1:i-1,i+1), and tau in TAU(i).
136 
137  If UPLO = 'L', the matrix Q is represented as a product of elementary
138  reflectors
139 
140  Q = H(1) H(2) . . . H(n-1).
141 
142  Each H(i) has the form
143 
144  H(i) = I - tau * v * v'
145 
146  where tau is a real scalar, and v is a real vector with
147  v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i),
148  and tau in TAU(i).
149 
150  The contents of A on exit are illustrated by the following examples
151  with n = 5:
152 
153  if UPLO = 'U': if UPLO = 'L':
154 
155  ( d e v2 v3 v4 ) ( d )
156  ( d e v3 v4 ) ( e d )
157  ( d e v4 ) ( v1 e d )
158  ( d e ) ( v1 v2 e d )
159  ( d ) ( v1 v2 v3 e d )
160 
161  where d and e denote diagonal and off-diagonal elements of T, and vi
162  denotes an element of the vector defining H(i).
163 
164  =====================================================================
165 
166 
167  Test the input parameters
168 
169  Parameter adjustments */
170  /* Table of constant values */
171  integer c__1 = 1;
172  integer c_n1 = -1;
173  integer c__3 = 3;
174  integer c__2 = 2;
175  Treal c_b22 = -1.;
176  Treal c_b23 = 1.;
177 
178  /* System generated locals */
179  integer a_dim1, a_offset, i__1, i__2, i__3;
180  /* Local variables */
181  integer i__, j;
182  integer nbmin, iinfo;
183  logical upper;
184  integer nb, kk, nx;
185  integer ldwork, lwkopt;
186  logical lquery;
187  integer iws;
188 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
189 
190 
191  a_dim1 = *lda;
192  a_offset = 1 + a_dim1 * 1;
193  a -= a_offset;
194  --d__;
195  --e;
196  --tau;
197  --work;
198 
199  /* Initialization added by Elias to get rid of compiler warnings. */
200  lwkopt = 0;
201  /* Function Body */
202  *info = 0;
203  upper = template_blas_lsame(uplo, "U");
204  lquery = *lwork == -1;
205  if (! upper && ! template_blas_lsame(uplo, "L")) {
206  *info = -1;
207  } else if (*n < 0) {
208  *info = -2;
209  } else if (*lda < maxMACRO(1,*n)) {
210  *info = -4;
211  } else if (*lwork < 1 && ! lquery) {
212  *info = -9;
213  }
214 
215  if (*info == 0) {
216 
217 /* Determine the block size. */
218 
219  nb = template_lapack_ilaenv(&c__1, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6,
220  (ftnlen)1);
221  lwkopt = *n * nb;
222  work[1] = (Treal) lwkopt;
223  }
224 
225  if (*info != 0) {
226  i__1 = -(*info);
227  template_blas_erbla("SYTRD ", &i__1);
228  return 0;
229  } else if (lquery) {
230  return 0;
231  }
232 
233 /* Quick return if possible */
234 
235  if (*n == 0) {
236  work[1] = 1.;
237  return 0;
238  }
239 
240  nx = *n;
241  iws = 1;
242  if (nb > 1 && nb < *n) {
243 
244 /* Determine when to cross over from blocked to unblocked code
245  (last block is always handled by unblocked code).
246 
247  Computing MAX */
248  i__1 = nb, i__2 = template_lapack_ilaenv(&c__3, "DSYTRD", uplo, n, &c_n1, &c_n1, &
249  c_n1, (ftnlen)6, (ftnlen)1);
250  nx = maxMACRO(i__1,i__2);
251  if (nx < *n) {
252 
253 /* Determine if workspace is large enough for blocked code. */
254 
255  ldwork = *n;
256  iws = ldwork * nb;
257  if (*lwork < iws) {
258 
259 /* Not enough workspace to use optimal NB: determine the
260  minimum value of NB, and reduce NB or force use of
261  unblocked code by setting NX = N.
262 
263  Computing MAX */
264  i__1 = *lwork / ldwork;
265  nb = maxMACRO(i__1,1);
266  nbmin = template_lapack_ilaenv(&c__2, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1,
267  (ftnlen)6, (ftnlen)1);
268  if (nb < nbmin) {
269  nx = *n;
270  }
271  }
272  } else {
273  nx = *n;
274  }
275  } else {
276  nb = 1;
277  }
278 
279  if (upper) {
280 
281 /* Reduce the upper triangle of A.
282  Columns 1:kk are handled by the unblocked method. */
283 
284  kk = *n - (*n - nx + nb - 1) / nb * nb;
285  i__1 = kk + 1;
286  i__2 = -nb;
287  for (i__ = *n - nb + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ +=
288  i__2) {
289 
290 /* Reduce columns i:i+nb-1 to tridiagonal form and form the
291  matrix W which is needed to update the unreduced part of
292  the matrix */
293 
294  i__3 = i__ + nb - 1;
295  template_lapack_latrd(uplo, &i__3, &nb, &a[a_offset], lda, &e[1], &tau[1], &
296  work[1], &ldwork);
297 
298 /* Update the unreduced submatrix A(1:i-1,1:i-1), using an
299  update of the form: A := A - V*W' - W*V' */
300 
301  i__3 = i__ - 1;
302  template_blas_syr2k(uplo, "No transpose", &i__3, &nb, &c_b22, &a_ref(1, i__),
303  lda, &work[1], &ldwork, &c_b23, &a[a_offset], lda);
304 
305 /* Copy superdiagonal elements back into A, and diagonal
306  elements into D */
307 
308  i__3 = i__ + nb - 1;
309  for (j = i__; j <= i__3; ++j) {
310  a_ref(j - 1, j) = e[j - 1];
311  d__[j] = a_ref(j, j);
312 /* L10: */
313  }
314 /* L20: */
315  }
316 
317 /* Use unblocked code to reduce the last or only block */
318 
319  template_lapack_sytd2(uplo, &kk, &a[a_offset], lda, &d__[1], &e[1], &tau[1], &iinfo);
320  } else {
321 
322 /* Reduce the lower triangle of A */
323 
324  i__2 = *n - nx;
325  i__1 = nb;
326  for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
327 
328 /* Reduce columns i:i+nb-1 to tridiagonal form and form the
329  matrix W which is needed to update the unreduced part of
330  the matrix */
331 
332  i__3 = *n - i__ + 1;
333  template_lapack_latrd(uplo, &i__3, &nb, &a_ref(i__, i__), lda, &e[i__], &tau[
334  i__], &work[1], &ldwork);
335 
336 /* Update the unreduced submatrix A(i+ib:n,i+ib:n), using
337  an update of the form: A := A - V*W' - W*V' */
338 
339  i__3 = *n - i__ - nb + 1;
340  template_blas_syr2k(uplo, "No transpose", &i__3, &nb, &c_b22, &a_ref(i__ + nb,
341  i__), lda, &work[nb + 1], &ldwork, &c_b23, &a_ref(i__ +
342  nb, i__ + nb), lda);
343 
344 /* Copy subdiagonal elements back into A, and diagonal
345  elements into D */
346 
347  i__3 = i__ + nb - 1;
348  for (j = i__; j <= i__3; ++j) {
349  a_ref(j + 1, j) = e[j];
350  d__[j] = a_ref(j, j);
351 /* L30: */
352  }
353 /* L40: */
354  }
355 
356 /* Use unblocked code to reduce the last or only block */
357 
358  i__1 = *n - i__ + 1;
359  template_lapack_sytd2(uplo, &i__1, &a_ref(i__, i__), lda, &d__[i__], &e[i__], &tau[
360  i__], &iinfo);
361  }
362 
363  work[1] = (Treal) lwkopt;
364  return 0;
365 
366 /* End of DSYTRD */
367 
368 } /* dsytrd_ */
369 
370 #undef a_ref
371 
372 
373 #endif
a_ref
#define a_ref(a_1, a_2)
ftnlen
int ftnlen
Definition: template_blas_common.h:42
template_lapack_ilaenv
integer template_lapack_ilaenv(const integer *ispec, const char *name__, const char *opts, const integer *n1, const integer *n2, const integer *n3, const integer *n4, ftnlen name_len, ftnlen opts_len)
Definition: template_lapack_common.cc:281
template_lapack_sytd2
int template_lapack_sytd2(const char *uplo, const integer *n, Treal *a, const integer *lda, Treal *d__, Treal *e, Treal *tau, integer *info)
Definition: template_lapack_sytd2.h:43
logical
bool logical
Definition: template_blas_common.h:41
template_lapack_common.h
template_blas_syr2k
int template_blas_syr2k(const char *uplo, const char *trans, const integer *n, const integer *k, const Treal *alpha, const Treal *a, const integer *lda, const Treal *b, const integer *ldb, const Treal *beta, Treal *c__, const integer *ldc)
Definition: template_blas_syr2k.h:42
template_blas_erbla
int template_blas_erbla(const char *srname, integer *info)
Definition: template_blas_common.cc:146
template_lapack_sytrd
int template_lapack_sytrd(const char *uplo, const integer *n, Treal *a, const integer *lda, Treal *d__, Treal *e, Treal *tau, Treal *work, const integer *lwork, integer *info)
Definition: template_lapack_sytrd.h:43
template_blas_lsame
logical template_blas_lsame(const char *ca, const char *cb)
Definition: template_blas_common.cc:46
integer
int integer
Definition: template_blas_common.h:40
template_lapack_latrd
int template_lapack_latrd(const char *uplo, const integer *n, const integer *nb, Treal *a, const integer *lda, Treal *e, Treal *tau, Treal *w, const integer *ldw)
Definition: template_lapack_latrd.h:42
maxMACRO
#define maxMACRO(a, b)
Definition: template_blas_common.h:45