ergo
template_lapack_latrd.h
Go to the documentation of this file.
1 /* Ergo, version 3.8, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2019 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4  * and Anastasia Kruchinina.
5  *
6  * This program is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program. If not, see <http://www.gnu.org/licenses/>.
18  *
19  * Primary academic reference:
20  * Ergo: An open-source program for linear-scaling electronic structure
21  * calculations,
22  * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23  * Kruchinina,
24  * SoftwareX 7, 107 (2018),
25  * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26  *
27  * For further information about Ergo, see <http://www.ergoscf.org>.
28  */
29 
30  /* This file belongs to the template_lapack part of the Ergo source
31  * code. The source files in the template_lapack directory are modified
32  * versions of files originally distributed as CLAPACK, see the
33  * Copyright/license notice in the file template_lapack/COPYING.
34  */
35 
36 
37 #ifndef TEMPLATE_LAPACK_LATRD_HEADER
38 #define TEMPLATE_LAPACK_LATRD_HEADER
39 
40 
41 template<class Treal>
42 int template_lapack_latrd(const char *uplo, const integer *n, const integer *nb, Treal *
43  a, const integer *lda, Treal *e, Treal *tau, Treal *w,
44  const integer *ldw)
45 {
46 /* -- LAPACK auxiliary routine (version 3.0) --
47  Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
48  Courant Institute, Argonne National Lab, and Rice University
49  October 31, 1992
50 
51 
52  Purpose
53  =======
54 
55  DLATRD reduces NB rows and columns of a real symmetric matrix A to
56  symmetric tridiagonal form by an orthogonal similarity
57  transformation Q' * A * Q, and returns the matrices V and W which are
58  needed to apply the transformation to the unreduced part of A.
59 
60  If UPLO = 'U', DLATRD reduces the last NB rows and columns of a
61  matrix, of which the upper triangle is supplied;
62  if UPLO = 'L', DLATRD reduces the first NB rows and columns of a
63  matrix, of which the lower triangle is supplied.
64 
65  This is an auxiliary routine called by DSYTRD.
66 
67  Arguments
68  =========
69 
70  UPLO (input) CHARACTER
71  Specifies whether the upper or lower triangular part of the
72  symmetric matrix A is stored:
73  = 'U': Upper triangular
74  = 'L': Lower triangular
75 
76  N (input) INTEGER
77  The order of the matrix A.
78 
79  NB (input) INTEGER
80  The number of rows and columns to be reduced.
81 
82  A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
83  On entry, the symmetric matrix A. If UPLO = 'U', the leading
84  n-by-n upper triangular part of A contains the upper
85  triangular part of the matrix A, and the strictly lower
86  triangular part of A is not referenced. If UPLO = 'L', the
87  leading n-by-n lower triangular part of A contains the lower
88  triangular part of the matrix A, and the strictly upper
89  triangular part of A is not referenced.
90  On exit:
91  if UPLO = 'U', the last NB columns have been reduced to
92  tridiagonal form, with the diagonal elements overwriting
93  the diagonal elements of A; the elements above the diagonal
94  with the array TAU, represent the orthogonal matrix Q as a
95  product of elementary reflectors;
96  if UPLO = 'L', the first NB columns have been reduced to
97  tridiagonal form, with the diagonal elements overwriting
98  the diagonal elements of A; the elements below the diagonal
99  with the array TAU, represent the orthogonal matrix Q as a
100  product of elementary reflectors.
101  See Further Details.
102 
103  LDA (input) INTEGER
104  The leading dimension of the array A. LDA >= (1,N).
105 
106  E (output) DOUBLE PRECISION array, dimension (N-1)
107  If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal
108  elements of the last NB columns of the reduced matrix;
109  if UPLO = 'L', E(1:nb) contains the subdiagonal elements of
110  the first NB columns of the reduced matrix.
111 
112  TAU (output) DOUBLE PRECISION array, dimension (N-1)
113  The scalar factors of the elementary reflectors, stored in
114  TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'.
115  See Further Details.
116 
117  W (output) DOUBLE PRECISION array, dimension (LDW,NB)
118  The n-by-nb matrix W required to update the unreduced part
119  of A.
120 
121  LDW (input) INTEGER
122  The leading dimension of the array W. LDW >= max(1,N).
123 
124  Further Details
125  ===============
126 
127  If UPLO = 'U', the matrix Q is represented as a product of elementary
128  reflectors
129 
130  Q = H(n) H(n-1) . . . H(n-nb+1).
131 
132  Each H(i) has the form
133 
134  H(i) = I - tau * v * v'
135 
136  where tau is a real scalar, and v is a real vector with
137  v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i),
138  and tau in TAU(i-1).
139 
140  If UPLO = 'L', the matrix Q is represented as a product of elementary
141  reflectors
142 
143  Q = H(1) H(2) . . . H(nb).
144 
145  Each H(i) has the form
146 
147  H(i) = I - tau * v * v'
148 
149  where tau is a real scalar, and v is a real vector with
150  v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i),
151  and tau in TAU(i).
152 
153  The elements of the vectors v together form the n-by-nb matrix V
154  which is needed, with W, to apply the transformation to the unreduced
155  part of the matrix, using a symmetric rank-2k update of the form:
156  A := A - V*W' - W*V'.
157 
158  The contents of A on exit are illustrated by the following examples
159  with n = 5 and nb = 2:
160 
161  if UPLO = 'U': if UPLO = 'L':
162 
163  ( a a a v4 v5 ) ( d )
164  ( a a v4 v5 ) ( 1 d )
165  ( a 1 v5 ) ( v1 1 a )
166  ( d 1 ) ( v1 v2 a a )
167  ( d ) ( v1 v2 a a a )
168 
169  where d denotes a diagonal element of the reduced matrix, a denotes
170  an element of the original matrix that is unchanged, and vi denotes
171  an element of the vector defining H(i).
172 
173  =====================================================================
174 
175 
176  Quick return if possible
177 
178  Parameter adjustments */
179  /* Table of constant values */
180  Treal c_b5 = -1.;
181  Treal c_b6 = 1.;
182  integer c__1 = 1;
183  Treal c_b16 = 0.;
184 
185  /* System generated locals */
186  integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3;
187  /* Local variables */
188  integer i__;
189  Treal alpha;
190  integer iw;
191 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
192 #define w_ref(a_1,a_2) w[(a_2)*w_dim1 + a_1]
193 
194 
195  a_dim1 = *lda;
196  a_offset = 1 + a_dim1 * 1;
197  a -= a_offset;
198  --e;
199  --tau;
200  w_dim1 = *ldw;
201  w_offset = 1 + w_dim1 * 1;
202  w -= w_offset;
203 
204  /* Function Body */
205  if (*n <= 0) {
206  return 0;
207  }
208 
209  if (template_blas_lsame(uplo, "U")) {
210 
211 /* Reduce last NB columns of upper triangle */
212 
213  i__1 = *n - *nb + 1;
214  for (i__ = *n; i__ >= i__1; --i__) {
215  iw = i__ - *n + *nb;
216  if (i__ < *n) {
217 
218 /* Update A(1:i,i) */
219 
220  i__2 = *n - i__;
221  template_blas_gemv("No transpose", &i__, &i__2, &c_b5, &a_ref(1, i__ + 1),
222  lda, &w_ref(i__, iw + 1), ldw, &c_b6, &a_ref(1, i__),
223  &c__1);
224  i__2 = *n - i__;
225  template_blas_gemv("No transpose", &i__, &i__2, &c_b5, &w_ref(1, iw + 1),
226  ldw, &a_ref(i__, i__ + 1), lda, &c_b6, &a_ref(1, i__),
227  &c__1);
228  }
229  if (i__ > 1) {
230 
231 /* Generate elementary reflector H(i) to annihilate
232  A(1:i-2,i) */
233 
234  i__2 = i__ - 1;
235  template_lapack_larfg(&i__2, &a_ref(i__ - 1, i__), &a_ref(1, i__), &c__1, &
236  tau[i__ - 1]);
237  e[i__ - 1] = a_ref(i__ - 1, i__);
238  a_ref(i__ - 1, i__) = 1.;
239 
240 /* Compute W(1:i-1,i) */
241 
242  i__2 = i__ - 1;
243  template_blas_symv("Upper", &i__2, &c_b6, &a[a_offset], lda, &a_ref(1,
244  i__), &c__1, &c_b16, &w_ref(1, iw), &c__1);
245  if (i__ < *n) {
246  i__2 = i__ - 1;
247  i__3 = *n - i__;
248  template_blas_gemv("Transpose", &i__2, &i__3, &c_b6, &w_ref(1, iw + 1)
249  , ldw, &a_ref(1, i__), &c__1, &c_b16, &w_ref(i__
250  + 1, iw), &c__1);
251  i__2 = i__ - 1;
252  i__3 = *n - i__;
253  template_blas_gemv("No transpose", &i__2, &i__3, &c_b5, &a_ref(1, i__
254  + 1), lda, &w_ref(i__ + 1, iw), &c__1, &c_b6, &
255  w_ref(1, iw), &c__1);
256  i__2 = i__ - 1;
257  i__3 = *n - i__;
258  template_blas_gemv("Transpose", &i__2, &i__3, &c_b6, &a_ref(1, i__ +
259  1), lda, &a_ref(1, i__), &c__1, &c_b16, &w_ref(
260  i__ + 1, iw), &c__1);
261  i__2 = i__ - 1;
262  i__3 = *n - i__;
263  template_blas_gemv("No transpose", &i__2, &i__3, &c_b5, &w_ref(1, iw
264  + 1), ldw, &w_ref(i__ + 1, iw), &c__1, &c_b6, &
265  w_ref(1, iw), &c__1);
266  }
267  i__2 = i__ - 1;
268  template_blas_scal(&i__2, &tau[i__ - 1], &w_ref(1, iw), &c__1);
269  i__2 = i__ - 1;
270  alpha = tau[i__ - 1] * -.5 * template_blas_dot(&i__2, &w_ref(1, iw), &
271  c__1, &a_ref(1, i__), &c__1);
272  i__2 = i__ - 1;
273  template_blas_axpy(&i__2, &alpha, &a_ref(1, i__), &c__1, &w_ref(1, iw), &
274  c__1);
275  }
276 
277 /* L10: */
278  }
279  } else {
280 
281 /* Reduce first NB columns of lower triangle */
282 
283  i__1 = *nb;
284  for (i__ = 1; i__ <= i__1; ++i__) {
285 
286 /* Update A(i:n,i) */
287 
288  i__2 = *n - i__ + 1;
289  i__3 = i__ - 1;
290  template_blas_gemv("No transpose", &i__2, &i__3, &c_b5, &a_ref(i__, 1), lda, &
291  w_ref(i__, 1), ldw, &c_b6, &a_ref(i__, i__), &c__1);
292  i__2 = *n - i__ + 1;
293  i__3 = i__ - 1;
294  template_blas_gemv("No transpose", &i__2, &i__3, &c_b5, &w_ref(i__, 1), ldw, &
295  a_ref(i__, 1), lda, &c_b6, &a_ref(i__, i__), &c__1);
296  if (i__ < *n) {
297 
298 /* Generate elementary reflector H(i) to annihilate
299  A(i+2:n,i)
300 
301  Computing MIN */
302  i__2 = i__ + 2;
303  i__3 = *n - i__;
304  template_lapack_larfg(&i__3, &a_ref(i__ + 1, i__), &a_ref(minMACRO(i__2,*n), i__)
305  , &c__1, &tau[i__]);
306  e[i__] = a_ref(i__ + 1, i__);
307  a_ref(i__ + 1, i__) = 1.;
308 
309 /* Compute W(i+1:n,i) */
310 
311  i__2 = *n - i__;
312  template_blas_symv("Lower", &i__2, &c_b6, &a_ref(i__ + 1, i__ + 1), lda, &
313  a_ref(i__ + 1, i__), &c__1, &c_b16, &w_ref(i__ + 1,
314  i__), &c__1);
315  i__2 = *n - i__;
316  i__3 = i__ - 1;
317  template_blas_gemv("Transpose", &i__2, &i__3, &c_b6, &w_ref(i__ + 1, 1),
318  ldw, &a_ref(i__ + 1, i__), &c__1, &c_b16, &w_ref(1,
319  i__), &c__1);
320  i__2 = *n - i__;
321  i__3 = i__ - 1;
322  template_blas_gemv("No transpose", &i__2, &i__3, &c_b5, &a_ref(i__ + 1, 1)
323  , lda, &w_ref(1, i__), &c__1, &c_b6, &w_ref(i__ + 1,
324  i__), &c__1);
325  i__2 = *n - i__;
326  i__3 = i__ - 1;
327  template_blas_gemv("Transpose", &i__2, &i__3, &c_b6, &a_ref(i__ + 1, 1),
328  lda, &a_ref(i__ + 1, i__), &c__1, &c_b16, &w_ref(1,
329  i__), &c__1);
330  i__2 = *n - i__;
331  i__3 = i__ - 1;
332  template_blas_gemv("No transpose", &i__2, &i__3, &c_b5, &w_ref(i__ + 1, 1)
333  , ldw, &w_ref(1, i__), &c__1, &c_b6, &w_ref(i__ + 1,
334  i__), &c__1);
335  i__2 = *n - i__;
336  template_blas_scal(&i__2, &tau[i__], &w_ref(i__ + 1, i__), &c__1);
337  i__2 = *n - i__;
338  alpha = tau[i__] * -.5 * template_blas_dot(&i__2, &w_ref(i__ + 1, i__), &
339  c__1, &a_ref(i__ + 1, i__), &c__1);
340  i__2 = *n - i__;
341  template_blas_axpy(&i__2, &alpha, &a_ref(i__ + 1, i__), &c__1, &w_ref(i__
342  + 1, i__), &c__1);
343  }
344 
345 /* L20: */
346  }
347  }
348 
349  return 0;
350 
351 /* End of DLATRD */
352 
353 } /* dlatrd_ */
354 
355 #undef w_ref
356 #undef a_ref
357 
358 
359 #endif
template_blas_dot
Treal template_blas_dot(const integer *n, const Treal *dx, const integer *incx, const Treal *dy, const integer *incy)
Definition: template_blas_dot.h:43
template_lapack_larfg
int template_lapack_larfg(const integer *n, Treal *alpha, Treal *x, const integer *incx, Treal *tau)
Definition: template_lapack_larfg.h:43
template_blas_scal
int template_blas_scal(const integer *n, const Treal *da, Treal *dx, const integer *incx)
Definition: template_blas_scal.h:43
minMACRO
#define minMACRO(a, b)
Definition: template_blas_common.h:46
w_ref
#define w_ref(a_1, a_2)
template_blas_symv
int template_blas_symv(const char *uplo, const integer *n, const Treal *alpha, const Treal *a, const integer *lda, const Treal *x, const integer *incx, const Treal *beta, Treal *y, const integer *incy)
Definition: template_blas_symv.h:42
template_blas_gemv
int template_blas_gemv(const char *trans, const integer *m, const integer *n, const Treal *alpha, const Treal *a, const integer *lda, const Treal *x, const integer *incx, const Treal *beta, Treal *y, const integer *incy)
Definition: template_blas_gemv.h:43
template_blas_lsame
logical template_blas_lsame(const char *ca, const char *cb)
Definition: template_blas_common.cc:46
template_blas_axpy
int template_blas_axpy(const integer *n, const Treal *da, const Treal *dx, const integer *incx, Treal *dy, const integer *incy)
Definition: template_blas_axpy.h:43
integer
int integer
Definition: template_blas_common.h:40
template_lapack_latrd
int template_lapack_latrd(const char *uplo, const integer *n, const integer *nb, Treal *a, const integer *lda, Treal *e, Treal *tau, Treal *w, const integer *ldw)
Definition: template_lapack_latrd.h:42
a_ref
#define a_ref(a_1, a_2)