ergo
template_lapack_lasq5.h
Go to the documentation of this file.
1 /* Ergo, version 3.8, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2019 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4  * and Anastasia Kruchinina.
5  *
6  * This program is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program. If not, see <http://www.gnu.org/licenses/>.
18  *
19  * Primary academic reference:
20  * Ergo: An open-source program for linear-scaling electronic structure
21  * calculations,
22  * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23  * Kruchinina,
24  * SoftwareX 7, 107 (2018),
25  * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26  *
27  * For further information about Ergo, see <http://www.ergoscf.org>.
28  */
29 
30  /* This file belongs to the template_lapack part of the Ergo source
31  * code. The source files in the template_lapack directory are modified
32  * versions of files originally distributed as CLAPACK, see the
33  * Copyright/license notice in the file template_lapack/COPYING.
34  */
35 
36 
37 #ifndef TEMPLATE_LAPACK_LASQ5_HEADER
38 #define TEMPLATE_LAPACK_LASQ5_HEADER
39 
40 template<class Treal>
41 int template_lapack_lasq5(integer *i0, integer *n0, Treal *z__,
42  integer *pp, Treal *tau, Treal *dmin__, Treal *dmin1,
43  Treal *dmin2, Treal *dn, Treal *dnm1, Treal *dnm2,
44  logical *ieee)
45 {
46  /* System generated locals */
47  integer i__1;
48  Treal d__1, d__2;
49 
50  /* Local variables */
51  Treal d__;
52  integer j4, j4p2;
53  Treal emin, temp;
54 
55 
56 /* -- LAPACK routine (version 3.2) -- */
57 
58 /* -- Contributed by Osni Marques of the Lawrence Berkeley National -- */
59 /* -- Laboratory and Beresford Parlett of the Univ. of California at -- */
60 /* -- Berkeley -- */
61 /* -- November 2008 -- */
62 
63 /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
64 /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
65 
66 /* .. Scalar Arguments .. */
67 /* .. */
68 /* .. Array Arguments .. */
69 /* .. */
70 
71 /* Purpose */
72 /* ======= */
73 
74 /* DLASQ5 computes one dqds transform in ping-pong form, one */
75 /* version for IEEE machines another for non IEEE machines. */
76 
77 /* Arguments */
78 /* ========= */
79 
80 /* I0 (input) INTEGER */
81 /* First index. */
82 
83 /* N0 (input) INTEGER */
84 /* Last index. */
85 
86 /* Z (input) DOUBLE PRECISION array, dimension ( 4*N ) */
87 /* Z holds the qd array. EMIN is stored in Z(4*N0) to avoid */
88 /* an extra argument. */
89 
90 /* PP (input) INTEGER */
91 /* PP=0 for ping, PP=1 for pong. */
92 
93 /* TAU (input) DOUBLE PRECISION */
94 /* This is the shift. */
95 
96 /* DMIN (output) DOUBLE PRECISION */
97 /* Minimum value of d. */
98 
99 /* DMIN1 (output) DOUBLE PRECISION */
100 /* Minimum value of d, excluding D( N0 ). */
101 
102 /* DMIN2 (output) DOUBLE PRECISION */
103 /* Minimum value of d, excluding D( N0 ) and D( N0-1 ). */
104 
105 /* DN (output) DOUBLE PRECISION */
106 /* d(N0), the last value of d. */
107 
108 /* DNM1 (output) DOUBLE PRECISION */
109 /* d(N0-1). */
110 
111 /* DNM2 (output) DOUBLE PRECISION */
112 /* d(N0-2). */
113 
114 /* IEEE (input) LOGICAL */
115 /* Flag for IEEE or non IEEE arithmetic. */
116 
117 /* ===================================================================== */
118 
119 /* .. Parameter .. */
120 /* .. */
121 /* .. Local Scalars .. */
122 /* .. */
123 /* .. Intrinsic Functions .. */
124 /* .. */
125 /* .. Executable Statements .. */
126 
127  /* Parameter adjustments */
128  --z__;
129 
130  /* Function Body */
131  if (*n0 - *i0 - 1 <= 0) {
132  return 0;
133  }
134 
135  j4 = (*i0 << 2) + *pp - 3;
136  emin = z__[j4 + 4];
137  d__ = z__[j4] - *tau;
138  *dmin__ = d__;
139  *dmin1 = -z__[j4];
140 
141  if (*ieee) {
142 
143 /* Code for IEEE arithmetic. */
144 
145  if (*pp == 0) {
146  i__1 = ( *n0 - 3 ) << 2;
147  for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
148  z__[j4 - 2] = d__ + z__[j4 - 1];
149  temp = z__[j4 + 1] / z__[j4 - 2];
150  d__ = d__ * temp - *tau;
151  *dmin__ = minMACRO(*dmin__,d__);
152  z__[j4] = z__[j4 - 1] * temp;
153 /* Computing MIN */
154  d__1 = z__[j4];
155  emin = minMACRO(d__1,emin);
156 /* L10: */
157  }
158  } else {
159  i__1 = ( *n0 - 3 ) << 2;
160  for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
161  z__[j4 - 3] = d__ + z__[j4];
162  temp = z__[j4 + 2] / z__[j4 - 3];
163  d__ = d__ * temp - *tau;
164  *dmin__ = minMACRO(*dmin__,d__);
165  z__[j4 - 1] = z__[j4] * temp;
166 /* Computing MIN */
167  d__1 = z__[j4 - 1];
168  emin = minMACRO(d__1,emin);
169 /* L20: */
170  }
171  }
172 
173 /* Unroll last two steps. */
174 
175  *dnm2 = d__;
176  *dmin2 = *dmin__;
177  j4 = ( ( *n0 - 2 ) << 2) - *pp;
178  j4p2 = j4 + (*pp << 1) - 1;
179  z__[j4 - 2] = *dnm2 + z__[j4p2];
180  z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
181  *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau;
182  *dmin__ = minMACRO(*dmin__,*dnm1);
183 
184  *dmin1 = *dmin__;
185  j4 += 4;
186  j4p2 = j4 + (*pp << 1) - 1;
187  z__[j4 - 2] = *dnm1 + z__[j4p2];
188  z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
189  *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau;
190  *dmin__ = minMACRO(*dmin__,*dn);
191 
192  } else {
193 
194 /* Code for non IEEE arithmetic. */
195 
196  if (*pp == 0) {
197  i__1 = ( *n0 - 3 ) << 2;
198  for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
199  z__[j4 - 2] = d__ + z__[j4 - 1];
200  if (d__ < 0.) {
201  return 0;
202  } else {
203  z__[j4] = z__[j4 + 1] * (z__[j4 - 1] / z__[j4 - 2]);
204  d__ = z__[j4 + 1] * (d__ / z__[j4 - 2]) - *tau;
205  }
206  *dmin__ = minMACRO(*dmin__,d__);
207 /* Computing MIN */
208  d__1 = emin, d__2 = z__[j4];
209  emin = minMACRO(d__1,d__2);
210 /* L30: */
211  }
212  } else {
213  i__1 = ( *n0 - 3 ) << 2;
214  for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
215  z__[j4 - 3] = d__ + z__[j4];
216  if (d__ < 0.) {
217  return 0;
218  } else {
219  z__[j4 - 1] = z__[j4 + 2] * (z__[j4] / z__[j4 - 3]);
220  d__ = z__[j4 + 2] * (d__ / z__[j4 - 3]) - *tau;
221  }
222  *dmin__ = minMACRO(*dmin__,d__);
223 /* Computing MIN */
224  d__1 = emin, d__2 = z__[j4 - 1];
225  emin = minMACRO(d__1,d__2);
226 /* L40: */
227  }
228  }
229 
230 /* Unroll last two steps. */
231 
232  *dnm2 = d__;
233  *dmin2 = *dmin__;
234  j4 = ( ( *n0 - 2 ) << 2) - *pp;
235  j4p2 = j4 + (*pp << 1) - 1;
236  z__[j4 - 2] = *dnm2 + z__[j4p2];
237  if (*dnm2 < 0.) {
238  return 0;
239  } else {
240  z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
241  *dnm1 = z__[j4p2 + 2] * (*dnm2 / z__[j4 - 2]) - *tau;
242  }
243  *dmin__ = minMACRO(*dmin__,*dnm1);
244 
245  *dmin1 = *dmin__;
246  j4 += 4;
247  j4p2 = j4 + (*pp << 1) - 1;
248  z__[j4 - 2] = *dnm1 + z__[j4p2];
249  if (*dnm1 < 0.) {
250  return 0;
251  } else {
252  z__[j4] = z__[j4p2 + 2] * (z__[j4p2] / z__[j4 - 2]);
253  *dn = z__[j4p2 + 2] * (*dnm1 / z__[j4 - 2]) - *tau;
254  }
255  *dmin__ = minMACRO(*dmin__,*dn);
256 
257  }
258 
259  z__[j4 + 2] = *dn;
260  z__[(*n0 << 2) - *pp] = emin;
261  return 0;
262 
263 /* End of DLASQ5 */
264 
265 } /* dlasq5_ */
266 
267 #endif
template_lapack_lasq5
int template_lapack_lasq5(integer *i0, integer *n0, Treal *z__, integer *pp, Treal *tau, Treal *dmin__, Treal *dmin1, Treal *dmin2, Treal *dn, Treal *dnm1, Treal *dnm2, logical *ieee)
Definition: template_lapack_lasq5.h:41
minMACRO
#define minMACRO(a, b)
Definition: template_blas_common.h:46
logical
bool logical
Definition: template_blas_common.h:41
integer
int integer
Definition: template_blas_common.h:40