ergo
template_blas_trmm.h
Go to the documentation of this file.
1 /* Ergo, version 3.8, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2019 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4  * and Anastasia Kruchinina.
5  *
6  * This program is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program. If not, see <http://www.gnu.org/licenses/>.
18  *
19  * Primary academic reference:
20  * Ergo: An open-source program for linear-scaling electronic structure
21  * calculations,
22  * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23  * Kruchinina,
24  * SoftwareX 7, 107 (2018),
25  * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26  *
27  * For further information about Ergo, see <http://www.ergoscf.org>.
28  */
29 
30  /* This file belongs to the template_lapack part of the Ergo source
31  * code. The source files in the template_lapack directory are modified
32  * versions of files originally distributed as CLAPACK, see the
33  * Copyright/license notice in the file template_lapack/COPYING.
34  */
35 
36 
37 #ifndef TEMPLATE_BLAS_TRMM_HEADER
38 #define TEMPLATE_BLAS_TRMM_HEADER
39 
40 #include "template_blas_common.h"
41 
42 template<class Treal>
43 int template_blas_trmm(const char *side, const char *uplo, const char *transa, const char *diag,
44  const integer *m, const integer *n, const Treal *alpha, const Treal *a, const integer *
45  lda, Treal *b, const integer *ldb)
46 {
47  /* System generated locals */
48  integer a_dim1, a_offset, b_dim1, b_offset, i__1, i__2, i__3;
49  /* Local variables */
50  integer info;
51  Treal temp;
52  integer i__, j, k;
53  logical lside;
54  integer nrowa;
55  logical upper;
56  logical nounit;
57 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
58 #define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
59 /* Purpose
60  =======
61  DTRMM performs one of the matrix-matrix operations
62  B := alpha*op( A )*B, or B := alpha*B*op( A ),
63  where alpha is a scalar, B is an m by n matrix, A is a unit, or
64  non-unit, upper or lower triangular matrix and op( A ) is one of
65  op( A ) = A or op( A ) = A'.
66  Parameters
67  ==========
68  SIDE - CHARACTER*1.
69  On entry, SIDE specifies whether op( A ) multiplies B from
70  the left or right as follows:
71  SIDE = 'L' or 'l' B := alpha*op( A )*B.
72  SIDE = 'R' or 'r' B := alpha*B*op( A ).
73  Unchanged on exit.
74  UPLO - CHARACTER*1.
75  On entry, UPLO specifies whether the matrix A is an upper or
76  lower triangular matrix as follows:
77  UPLO = 'U' or 'u' A is an upper triangular matrix.
78  UPLO = 'L' or 'l' A is a lower triangular matrix.
79  Unchanged on exit.
80  TRANSA - CHARACTER*1.
81  On entry, TRANSA specifies the form of op( A ) to be used in
82  the matrix multiplication as follows:
83  TRANSA = 'N' or 'n' op( A ) = A.
84  TRANSA = 'T' or 't' op( A ) = A'.
85  TRANSA = 'C' or 'c' op( A ) = A'.
86  Unchanged on exit.
87  DIAG - CHARACTER*1.
88  On entry, DIAG specifies whether or not A is unit triangular
89  as follows:
90  DIAG = 'U' or 'u' A is assumed to be unit triangular.
91  DIAG = 'N' or 'n' A is not assumed to be unit
92  triangular.
93  Unchanged on exit.
94  M - INTEGER.
95  On entry, M specifies the number of rows of B. M must be at
96  least zero.
97  Unchanged on exit.
98  N - INTEGER.
99  On entry, N specifies the number of columns of B. N must be
100  at least zero.
101  Unchanged on exit.
102  ALPHA - DOUBLE PRECISION.
103  On entry, ALPHA specifies the scalar alpha. When alpha is
104  zero then A is not referenced and B need not be set before
105  entry.
106  Unchanged on exit.
107  A - DOUBLE PRECISION array of DIMENSION ( LDA, k ), where k is m
108  when SIDE = 'L' or 'l' and is n when SIDE = 'R' or 'r'.
109  Before entry with UPLO = 'U' or 'u', the leading k by k
110  upper triangular part of the array A must contain the upper
111  triangular matrix and the strictly lower triangular part of
112  A is not referenced.
113  Before entry with UPLO = 'L' or 'l', the leading k by k
114  lower triangular part of the array A must contain the lower
115  triangular matrix and the strictly upper triangular part of
116  A is not referenced.
117  Note that when DIAG = 'U' or 'u', the diagonal elements of
118  A are not referenced either, but are assumed to be unity.
119  Unchanged on exit.
120  LDA - INTEGER.
121  On entry, LDA specifies the first dimension of A as declared
122  in the calling (sub) program. When SIDE = 'L' or 'l' then
123  LDA must be at least max( 1, m ), when SIDE = 'R' or 'r'
124  then LDA must be at least max( 1, n ).
125  Unchanged on exit.
126  B - DOUBLE PRECISION array of DIMENSION ( LDB, n ).
127  Before entry, the leading m by n part of the array B must
128  contain the matrix B, and on exit is overwritten by the
129  transformed matrix.
130  LDB - INTEGER.
131  On entry, LDB specifies the first dimension of B as declared
132  in the calling (sub) program. LDB must be at least
133  max( 1, m ).
134  Unchanged on exit.
135  Level 3 Blas routine.
136  -- Written on 8-February-1989.
137  Jack Dongarra, Argonne National Laboratory.
138  Iain Duff, AERE Harwell.
139  Jeremy Du Croz, Numerical Algorithms Group Ltd.
140  Sven Hammarling, Numerical Algorithms Group Ltd.
141  Test the input parameters.
142  Parameter adjustments */
143  a_dim1 = *lda;
144  a_offset = 1 + a_dim1 * 1;
145  a -= a_offset;
146  b_dim1 = *ldb;
147  b_offset = 1 + b_dim1 * 1;
148  b -= b_offset;
149  /* Function Body */
150  lside = template_blas_lsame(side, "L");
151  if (lside) {
152  nrowa = *m;
153  } else {
154  nrowa = *n;
155  }
156  nounit = template_blas_lsame(diag, "N");
157  upper = template_blas_lsame(uplo, "U");
158  info = 0;
159  if (! lside && ! template_blas_lsame(side, "R")) {
160  info = 1;
161  } else if (! upper && ! template_blas_lsame(uplo, "L")) {
162  info = 2;
163  } else if (! template_blas_lsame(transa, "N") && ! template_blas_lsame(transa,
164  "T") && ! template_blas_lsame(transa, "C")) {
165  info = 3;
166  } else if (! template_blas_lsame(diag, "U") && ! template_blas_lsame(diag,
167  "N")) {
168  info = 4;
169  } else if (*m < 0) {
170  info = 5;
171  } else if (*n < 0) {
172  info = 6;
173  } else if (*lda < maxMACRO(1,nrowa)) {
174  info = 9;
175  } else if (*ldb < maxMACRO(1,*m)) {
176  info = 11;
177  }
178  if (info != 0) {
179  template_blas_erbla("TRMM ", &info);
180  return 0;
181  }
182 /* Quick return if possible. */
183  if (*n == 0) {
184  return 0;
185  }
186 /* And when alpha.eq.zero. */
187  if (*alpha == 0.) {
188  i__1 = *n;
189  for (j = 1; j <= i__1; ++j) {
190  i__2 = *m;
191  for (i__ = 1; i__ <= i__2; ++i__) {
192  b_ref(i__, j) = 0.;
193 /* L10: */
194  }
195 /* L20: */
196  }
197  return 0;
198  }
199 /* Start the operations. */
200  if (lside) {
201  if (template_blas_lsame(transa, "N")) {
202 /* Form B := alpha*A*B. */
203  if (upper) {
204  i__1 = *n;
205  for (j = 1; j <= i__1; ++j) {
206  i__2 = *m;
207  for (k = 1; k <= i__2; ++k) {
208  if (b_ref(k, j) != 0.) {
209  temp = *alpha * b_ref(k, j);
210  i__3 = k - 1;
211  for (i__ = 1; i__ <= i__3; ++i__) {
212  b_ref(i__, j) = b_ref(i__, j) + temp * a_ref(
213  i__, k);
214 /* L30: */
215  }
216  if (nounit) {
217  temp *= a_ref(k, k);
218  }
219  b_ref(k, j) = temp;
220  }
221 /* L40: */
222  }
223 /* L50: */
224  }
225  } else {
226  i__1 = *n;
227  for (j = 1; j <= i__1; ++j) {
228  for (k = *m; k >= 1; --k) {
229  if (b_ref(k, j) != 0.) {
230  temp = *alpha * b_ref(k, j);
231  b_ref(k, j) = temp;
232  if (nounit) {
233  b_ref(k, j) = b_ref(k, j) * a_ref(k, k);
234  }
235  i__2 = *m;
236  for (i__ = k + 1; i__ <= i__2; ++i__) {
237  b_ref(i__, j) = b_ref(i__, j) + temp * a_ref(
238  i__, k);
239 /* L60: */
240  }
241  }
242 /* L70: */
243  }
244 /* L80: */
245  }
246  }
247  } else {
248 /* Form B := alpha*A'*B. */
249  if (upper) {
250  i__1 = *n;
251  for (j = 1; j <= i__1; ++j) {
252  for (i__ = *m; i__ >= 1; --i__) {
253  temp = b_ref(i__, j);
254  if (nounit) {
255  temp *= a_ref(i__, i__);
256  }
257  i__2 = i__ - 1;
258  for (k = 1; k <= i__2; ++k) {
259  temp += a_ref(k, i__) * b_ref(k, j);
260 /* L90: */
261  }
262  b_ref(i__, j) = *alpha * temp;
263 /* L100: */
264  }
265 /* L110: */
266  }
267  } else {
268  i__1 = *n;
269  for (j = 1; j <= i__1; ++j) {
270  i__2 = *m;
271  for (i__ = 1; i__ <= i__2; ++i__) {
272  temp = b_ref(i__, j);
273  if (nounit) {
274  temp *= a_ref(i__, i__);
275  }
276  i__3 = *m;
277  for (k = i__ + 1; k <= i__3; ++k) {
278  temp += a_ref(k, i__) * b_ref(k, j);
279 /* L120: */
280  }
281  b_ref(i__, j) = *alpha * temp;
282 /* L130: */
283  }
284 /* L140: */
285  }
286  }
287  }
288  } else {
289  if (template_blas_lsame(transa, "N")) {
290 /* Form B := alpha*B*A. */
291  if (upper) {
292  for (j = *n; j >= 1; --j) {
293  temp = *alpha;
294  if (nounit) {
295  temp *= a_ref(j, j);
296  }
297  i__1 = *m;
298  for (i__ = 1; i__ <= i__1; ++i__) {
299  b_ref(i__, j) = temp * b_ref(i__, j);
300 /* L150: */
301  }
302  i__1 = j - 1;
303  for (k = 1; k <= i__1; ++k) {
304  if (a_ref(k, j) != 0.) {
305  temp = *alpha * a_ref(k, j);
306  i__2 = *m;
307  for (i__ = 1; i__ <= i__2; ++i__) {
308  b_ref(i__, j) = b_ref(i__, j) + temp * b_ref(
309  i__, k);
310 /* L160: */
311  }
312  }
313 /* L170: */
314  }
315 /* L180: */
316  }
317  } else {
318  i__1 = *n;
319  for (j = 1; j <= i__1; ++j) {
320  temp = *alpha;
321  if (nounit) {
322  temp *= a_ref(j, j);
323  }
324  i__2 = *m;
325  for (i__ = 1; i__ <= i__2; ++i__) {
326  b_ref(i__, j) = temp * b_ref(i__, j);
327 /* L190: */
328  }
329  i__2 = *n;
330  for (k = j + 1; k <= i__2; ++k) {
331  if (a_ref(k, j) != 0.) {
332  temp = *alpha * a_ref(k, j);
333  i__3 = *m;
334  for (i__ = 1; i__ <= i__3; ++i__) {
335  b_ref(i__, j) = b_ref(i__, j) + temp * b_ref(
336  i__, k);
337 /* L200: */
338  }
339  }
340 /* L210: */
341  }
342 /* L220: */
343  }
344  }
345  } else {
346 /* Form B := alpha*B*A'. */
347  if (upper) {
348  i__1 = *n;
349  for (k = 1; k <= i__1; ++k) {
350  i__2 = k - 1;
351  for (j = 1; j <= i__2; ++j) {
352  if (a_ref(j, k) != 0.) {
353  temp = *alpha * a_ref(j, k);
354  i__3 = *m;
355  for (i__ = 1; i__ <= i__3; ++i__) {
356  b_ref(i__, j) = b_ref(i__, j) + temp * b_ref(
357  i__, k);
358 /* L230: */
359  }
360  }
361 /* L240: */
362  }
363  temp = *alpha;
364  if (nounit) {
365  temp *= a_ref(k, k);
366  }
367  if (temp != 1.) {
368  i__2 = *m;
369  for (i__ = 1; i__ <= i__2; ++i__) {
370  b_ref(i__, k) = temp * b_ref(i__, k);
371 /* L250: */
372  }
373  }
374 /* L260: */
375  }
376  } else {
377  for (k = *n; k >= 1; --k) {
378  i__1 = *n;
379  for (j = k + 1; j <= i__1; ++j) {
380  if (a_ref(j, k) != 0.) {
381  temp = *alpha * a_ref(j, k);
382  i__2 = *m;
383  for (i__ = 1; i__ <= i__2; ++i__) {
384  b_ref(i__, j) = b_ref(i__, j) + temp * b_ref(
385  i__, k);
386 /* L270: */
387  }
388  }
389 /* L280: */
390  }
391  temp = *alpha;
392  if (nounit) {
393  temp *= a_ref(k, k);
394  }
395  if (temp != 1.) {
396  i__1 = *m;
397  for (i__ = 1; i__ <= i__1; ++i__) {
398  b_ref(i__, k) = temp * b_ref(i__, k);
399 /* L290: */
400  }
401  }
402 /* L300: */
403  }
404  }
405  }
406  }
407  return 0;
408 /* End of DTRMM . */
409 } /* dtrmm_ */
410 #undef b_ref
411 #undef a_ref
412 
413 #endif
b_ref
#define b_ref(a_1, a_2)
template_blas_common.h
mat::side
side
Definition: Matrix.h:75
a_ref
#define a_ref(a_1, a_2)
logical
bool logical
Definition: template_blas_common.h:41
template_blas_trmm
int template_blas_trmm(const char *side, const char *uplo, const char *transa, const char *diag, const integer *m, const integer *n, const Treal *alpha, const Treal *a, const integer *lda, Treal *b, const integer *ldb)
Definition: template_blas_trmm.h:43
template_blas_erbla
int template_blas_erbla(const char *srname, integer *info)
Definition: template_blas_common.cc:146
template_blas_lsame
logical template_blas_lsame(const char *ca, const char *cb)
Definition: template_blas_common.cc:46
integer
int integer
Definition: template_blas_common.h:40
maxMACRO
#define maxMACRO(a, b)
Definition: template_blas_common.h:45