ergo
template_lapack_orgql.h
Go to the documentation of this file.
1 /* Ergo, version 3.8, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2019 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4  * and Anastasia Kruchinina.
5  *
6  * This program is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program. If not, see <http://www.gnu.org/licenses/>.
18  *
19  * Primary academic reference:
20  * Ergo: An open-source program for linear-scaling electronic structure
21  * calculations,
22  * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23  * Kruchinina,
24  * SoftwareX 7, 107 (2018),
25  * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26  *
27  * For further information about Ergo, see <http://www.ergoscf.org>.
28  */
29 
30  /* This file belongs to the template_lapack part of the Ergo source
31  * code. The source files in the template_lapack directory are modified
32  * versions of files originally distributed as CLAPACK, see the
33  * Copyright/license notice in the file template_lapack/COPYING.
34  */
35 
36 
37 #ifndef TEMPLATE_LAPACK_ORGQL_HEADER
38 #define TEMPLATE_LAPACK_ORGQL_HEADER
39 
40 
41 template<class Treal>
42 int template_lapack_orgql(const integer *m, const integer *n, const integer *k, Treal *
43  a, const integer *lda, const Treal *tau, Treal *work, const integer *lwork,
44  integer *info)
45 {
46 /* -- LAPACK routine (version 3.0) --
47  Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
48  Courant Institute, Argonne National Lab, and Rice University
49  June 30, 1999
50 
51 
52  Purpose
53  =======
54 
55  DORGQL generates an M-by-N real matrix Q with orthonormal columns,
56  which is defined as the last N columns of a product of K elementary
57  reflectors of order M
58 
59  Q = H(k) . . . H(2) H(1)
60 
61  as returned by DGEQLF.
62 
63  Arguments
64  =========
65 
66  M (input) INTEGER
67  The number of rows of the matrix Q. M >= 0.
68 
69  N (input) INTEGER
70  The number of columns of the matrix Q. M >= N >= 0.
71 
72  K (input) INTEGER
73  The number of elementary reflectors whose product defines the
74  matrix Q. N >= K >= 0.
75 
76  A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
77  On entry, the (n-k+i)-th column must contain the vector which
78  defines the elementary reflector H(i), for i = 1,2,...,k, as
79  returned by DGEQLF in the last k columns of its array
80  argument A.
81  On exit, the M-by-N matrix Q.
82 
83  LDA (input) INTEGER
84  The first dimension of the array A. LDA >= max(1,M).
85 
86  TAU (input) DOUBLE PRECISION array, dimension (K)
87  TAU(i) must contain the scalar factor of the elementary
88  reflector H(i), as returned by DGEQLF.
89 
90  WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
91  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
92 
93  LWORK (input) INTEGER
94  The dimension of the array WORK. LWORK >= max(1,N).
95  For optimum performance LWORK >= N*NB, where NB is the
96  optimal blocksize.
97 
98  If LWORK = -1, then a workspace query is assumed; the routine
99  only calculates the optimal size of the WORK array, returns
100  this value as the first entry of the WORK array, and no error
101  message related to LWORK is issued by XERBLA.
102 
103  INFO (output) INTEGER
104  = 0: successful exit
105  < 0: if INFO = -i, the i-th argument has an illegal value
106 
107  =====================================================================
108 
109 
110  Test the input arguments
111 
112  Parameter adjustments */
113  /* Table of constant values */
114  integer c__1 = 1;
115  integer c_n1 = -1;
116  integer c__3 = 3;
117  integer c__2 = 2;
118 
119  /* System generated locals */
120  integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
121  /* Local variables */
122  integer i__, j, l, nbmin, iinfo;
123  integer ib, nb, kk;
124  integer nx;
125  integer ldwork, lwkopt;
126  logical lquery;
127  integer iws;
128 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
129 
130 
131  a_dim1 = *lda;
132  a_offset = 1 + a_dim1 * 1;
133  a -= a_offset;
134  --tau;
135  --work;
136 
137  /* Function Body */
138  *info = 0;
139  nb = template_lapack_ilaenv(&c__1, "DORGQL", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1);
140  lwkopt = maxMACRO(1,*n) * nb;
141  work[1] = (Treal) lwkopt;
142  lquery = *lwork == -1;
143  if (*m < 0) {
144  *info = -1;
145  } else if (*n < 0 || *n > *m) {
146  *info = -2;
147  } else if (*k < 0 || *k > *n) {
148  *info = -3;
149  } else if (*lda < maxMACRO(1,*m)) {
150  *info = -5;
151  } else if (*lwork < maxMACRO(1,*n) && ! lquery) {
152  *info = -8;
153  }
154  if (*info != 0) {
155  i__1 = -(*info);
156  template_blas_erbla("ORGQL ", &i__1);
157  return 0;
158  } else if (lquery) {
159  return 0;
160  }
161 
162 /* Quick return if possible */
163 
164  if (*n <= 0) {
165  work[1] = 1.;
166  return 0;
167  }
168 
169  nbmin = 2;
170  nx = 0;
171  iws = *n;
172  if (nb > 1 && nb < *k) {
173 
174 /* Determine when to cross over from blocked to unblocked code.
175 
176  Computing MAX */
177  i__1 = 0, i__2 = template_lapack_ilaenv(&c__3, "DORGQL", " ", m, n, k, &c_n1, (
178  ftnlen)6, (ftnlen)1);
179  nx = maxMACRO(i__1,i__2);
180  if (nx < *k) {
181 
182 /* Determine if workspace is large enough for blocked code. */
183 
184  ldwork = *n;
185  iws = ldwork * nb;
186  if (*lwork < iws) {
187 
188 /* Not enough workspace to use optimal NB: reduce NB and
189  determine the minimum value of NB. */
190 
191  nb = *lwork / ldwork;
192 /* Computing MAX */
193  i__1 = 2, i__2 = template_lapack_ilaenv(&c__2, "DORGQL", " ", m, n, k, &c_n1,
194  (ftnlen)6, (ftnlen)1);
195  nbmin = maxMACRO(i__1,i__2);
196  }
197  }
198  }
199 
200  if (nb >= nbmin && nb < *k && nx < *k) {
201 
202 /* Use blocked code after the first block.
203  The last kk columns are handled by the block method.
204 
205  Computing MIN */
206  i__1 = *k, i__2 = (*k - nx + nb - 1) / nb * nb;
207  kk = minMACRO(i__1,i__2);
208 
209 /* Set A(m-kk+1:m,1:n-kk) to zero. */
210 
211  i__1 = *n - kk;
212  for (j = 1; j <= i__1; ++j) {
213  i__2 = *m;
214  for (i__ = *m - kk + 1; i__ <= i__2; ++i__) {
215  a_ref(i__, j) = 0.;
216 /* L10: */
217  }
218 /* L20: */
219  }
220  } else {
221  kk = 0;
222  }
223 
224 /* Use unblocked code for the first or only block. */
225 
226  i__1 = *m - kk;
227  i__2 = *n - kk;
228  i__3 = *k - kk;
229  template_lapack_org2l(&i__1, &i__2, &i__3, &a[a_offset], lda, &tau[1], &work[1], &iinfo)
230  ;
231 
232  if (kk > 0) {
233 
234 /* Use blocked code */
235 
236  i__1 = *k;
237  i__2 = nb;
238  for (i__ = *k - kk + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ +=
239  i__2) {
240 /* Computing MIN */
241  i__3 = nb, i__4 = *k - i__ + 1;
242  ib = minMACRO(i__3,i__4);
243  if (*n - *k + i__ > 1) {
244 
245 /* Form the triangular factor of the block reflector
246  H = H(i+ib-1) . . . H(i+1) H(i) */
247 
248  i__3 = *m - *k + i__ + ib - 1;
249  template_lapack_larft("Backward", "Columnwise", &i__3, &ib, &a_ref(1, *n - *
250  k + i__), lda, &tau[i__], &work[1], &ldwork);
251 
252 /* Apply H to A(1:m-k+i+ib-1,1:n-k+i-1) from the left */
253 
254  i__3 = *m - *k + i__ + ib - 1;
255  i__4 = *n - *k + i__ - 1;
256  template_lapack_larfb("Left", "No transpose", "Backward", "Columnwise", &
257  i__3, &i__4, &ib, &a_ref(1, *n - *k + i__), lda, &
258  work[1], &ldwork, &a[a_offset], lda, &work[ib + 1], &
259  ldwork);
260  }
261 
262 /* Apply H to rows 1:m-k+i+ib-1 of current block */
263 
264  i__3 = *m - *k + i__ + ib - 1;
265  template_lapack_org2l(&i__3, &ib, &ib, &a_ref(1, *n - *k + i__), lda, &tau[i__],
266  &work[1], &iinfo);
267 
268 /* Set rows m-k+i+ib:m of current block to zero */
269 
270  i__3 = *n - *k + i__ + ib - 1;
271  for (j = *n - *k + i__; j <= i__3; ++j) {
272  i__4 = *m;
273  for (l = *m - *k + i__ + ib; l <= i__4; ++l) {
274  a_ref(l, j) = 0.;
275 /* L30: */
276  }
277 /* L40: */
278  }
279 /* L50: */
280  }
281  }
282 
283  work[1] = (Treal) iws;
284  return 0;
285 
286 /* End of DORGQL */
287 
288 } /* dorgql_ */
289 
290 #undef a_ref
291 
292 
293 #endif
ftnlen
int ftnlen
Definition: template_blas_common.h:42
template_lapack_ilaenv
integer template_lapack_ilaenv(const integer *ispec, const char *name__, const char *opts, const integer *n1, const integer *n2, const integer *n3, const integer *n4, ftnlen name_len, ftnlen opts_len)
Definition: template_lapack_common.cc:281
minMACRO
#define minMACRO(a, b)
Definition: template_blas_common.h:46
template_lapack_orgql
int template_lapack_orgql(const integer *m, const integer *n, const integer *k, Treal *a, const integer *lda, const Treal *tau, Treal *work, const integer *lwork, integer *info)
Definition: template_lapack_orgql.h:42
logical
bool logical
Definition: template_blas_common.h:41
template_blas_erbla
int template_blas_erbla(const char *srname, integer *info)
Definition: template_blas_common.cc:146
integer
int integer
Definition: template_blas_common.h:40
template_lapack_larft
int template_lapack_larft(const char *direct, const char *storev, const integer *n, const integer *k, Treal *v, const integer *ldv, const Treal *tau, Treal *t, const integer *ldt)
Definition: template_lapack_larft.h:42
template_lapack_org2l
int template_lapack_org2l(const integer *m, const integer *n, const integer *k, Treal *a, const integer *lda, const Treal *tau, Treal *work, integer *info)
Definition: template_lapack_org2l.h:42
a_ref
#define a_ref(a_1, a_2)
maxMACRO
#define maxMACRO(a, b)
Definition: template_blas_common.h:45
template_lapack_larfb
int template_lapack_larfb(const char *side, const char *trans, const char *direct, const char *storev, const integer *m, const integer *n, const integer *k, const Treal *v, const integer *ldv, const Treal *t, const integer *ldt, Treal *c__, const integer *ldc, Treal *work, const integer *ldwork)
Definition: template_lapack_larfb.h:42