ergo
template_blas_gemm.h
Go to the documentation of this file.
1/* Ergo, version 3.8.2, a program for linear scaling electronic structure
2 * calculations.
3 * Copyright (C) 2023 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4 * and Anastasia Kruchinina.
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 *
19 * Primary academic reference:
20 * Ergo: An open-source program for linear-scaling electronic structure
21 * calculations,
22 * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23 * Kruchinina,
24 * SoftwareX 7, 107 (2018),
25 * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26 *
27 * For further information about Ergo, see <http://www.ergoscf.org>.
28 */
29
30 /* This file belongs to the template_lapack part of the Ergo source
31 * code. The source files in the template_lapack directory are modified
32 * versions of files originally distributed as CLAPACK, see the
33 * Copyright/license notice in the file template_lapack/COPYING.
34 */
35
36
37#ifndef TEMPLATE_BLAS_GEMM_HEADER
38#define TEMPLATE_BLAS_GEMM_HEADER
39
41
42template<class Treal>
43int template_blas_gemm(const char *transa, const char *transb, const integer *m, const integer *
44 n, const integer *k, const Treal *alpha, const Treal *a, const integer *lda,
45 const Treal *b, const integer *ldb, const Treal *beta, Treal *c__,
46 const integer *ldc)
47{
48 /* System generated locals */
49 integer a_dim1, a_offset, b_dim1, b_offset, c_dim1, c_offset, i__1, i__2,
50 i__3;
51 /* Local variables */
52 integer info;
53 logical nota, notb;
54 Treal temp;
55 integer i__, j, l;
56 integer nrowa, nrowb;
57#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
58#define b_ref(a_1,a_2) b[(a_2)*b_dim1 + a_1]
59#define c___ref(a_1,a_2) c__[(a_2)*c_dim1 + a_1]
60/* Purpose
61 =======
62 DGEMM performs one of the matrix-matrix operations
63 C := alpha*op( A )*op( B ) + beta*C,
64 where op( X ) is one of
65 op( X ) = X or op( X ) = X',
66 alpha and beta are scalars, and A, B and C are matrices, with op( A )
67 an m by k matrix, op( B ) a k by n matrix and C an m by n matrix.
68 Parameters
69 ==========
70 TRANSA - CHARACTER*1.
71 On entry, TRANSA specifies the form of op( A ) to be used in
72 the matrix multiplication as follows:
73 TRANSA = 'N' or 'n', op( A ) = A.
74 TRANSA = 'T' or 't', op( A ) = A'.
75 TRANSA = 'C' or 'c', op( A ) = A'.
76 Unchanged on exit.
77 TRANSB - CHARACTER*1.
78 On entry, TRANSB specifies the form of op( B ) to be used in
79 the matrix multiplication as follows:
80 TRANSB = 'N' or 'n', op( B ) = B.
81 TRANSB = 'T' or 't', op( B ) = B'.
82 TRANSB = 'C' or 'c', op( B ) = B'.
83 Unchanged on exit.
84 M - INTEGER.
85 On entry, M specifies the number of rows of the matrix
86 op( A ) and of the matrix C. M must be at least zero.
87 Unchanged on exit.
88 N - INTEGER.
89 On entry, N specifies the number of columns of the matrix
90 op( B ) and the number of columns of the matrix C. N must be
91 at least zero.
92 Unchanged on exit.
93 K - INTEGER.
94 On entry, K specifies the number of columns of the matrix
95 op( A ) and the number of rows of the matrix op( B ). K must
96 be at least zero.
97 Unchanged on exit.
98 ALPHA - DOUBLE PRECISION.
99 On entry, ALPHA specifies the scalar alpha.
100 Unchanged on exit.
101 A - DOUBLE PRECISION array of DIMENSION ( LDA, ka ), where ka is
102 k when TRANSA = 'N' or 'n', and is m otherwise.
103 Before entry with TRANSA = 'N' or 'n', the leading m by k
104 part of the array A must contain the matrix A, otherwise
105 the leading k by m part of the array A must contain the
106 matrix A.
107 Unchanged on exit.
108 LDA - INTEGER.
109 On entry, LDA specifies the first dimension of A as declared
110 in the calling (sub) program. When TRANSA = 'N' or 'n' then
111 LDA must be at least max( 1, m ), otherwise LDA must be at
112 least max( 1, k ).
113 Unchanged on exit.
114 B - DOUBLE PRECISION array of DIMENSION ( LDB, kb ), where kb is
115 n when TRANSB = 'N' or 'n', and is k otherwise.
116 Before entry with TRANSB = 'N' or 'n', the leading k by n
117 part of the array B must contain the matrix B, otherwise
118 the leading n by k part of the array B must contain the
119 matrix B.
120 Unchanged on exit.
121 LDB - INTEGER.
122 On entry, LDB specifies the first dimension of B as declared
123 in the calling (sub) program. When TRANSB = 'N' or 'n' then
124 LDB must be at least max( 1, k ), otherwise LDB must be at
125 least max( 1, n ).
126 Unchanged on exit.
127 BETA - DOUBLE PRECISION.
128 On entry, BETA specifies the scalar beta. When BETA is
129 supplied as zero then C need not be set on input.
130 Unchanged on exit.
131 C - DOUBLE PRECISION array of DIMENSION ( LDC, n ).
132 Before entry, the leading m by n part of the array C must
133 contain the matrix C, except when beta is zero, in which
134 case C need not be set on entry.
135 On exit, the array C is overwritten by the m by n matrix
136 ( alpha*op( A )*op( B ) + beta*C ).
137 LDC - INTEGER.
138 On entry, LDC specifies the first dimension of C as declared
139 in the calling (sub) program. LDC must be at least
140 max( 1, m ).
141 Unchanged on exit.
142 Level 3 Blas routine.
143 -- Written on 8-February-1989.
144 Jack Dongarra, Argonne National Laboratory.
145 Iain Duff, AERE Harwell.
146 Jeremy Du Croz, Numerical Algorithms Group Ltd.
147 Sven Hammarling, Numerical Algorithms Group Ltd.
148 Set NOTA and NOTB as true if A and B respectively are not
149 transposed and set NROWA, NCOLA and NROWB as the number of rows
150 and columns of A and the number of rows of B respectively.
151 Parameter adjustments */
152 a_dim1 = *lda;
153 a_offset = 1 + a_dim1 * 1;
154 a -= a_offset;
155 b_dim1 = *ldb;
156 b_offset = 1 + b_dim1 * 1;
157 b -= b_offset;
158 c_dim1 = *ldc;
159 c_offset = 1 + c_dim1 * 1;
160 c__ -= c_offset;
161 /* Function Body */
162 nota = template_blas_lsame(transa, "N");
163 notb = template_blas_lsame(transb, "N");
164 if (nota) {
165 nrowa = *m;
166 } else {
167 nrowa = *k;
168 }
169 if (notb) {
170 nrowb = *k;
171 } else {
172 nrowb = *n;
173 }
174/* Test the input parameters. */
175 info = 0;
176 if (! nota && ! template_blas_lsame(transa, "C") && ! template_blas_lsame(
177 transa, "T")) {
178 info = 1;
179 } else if (! notb && ! template_blas_lsame(transb, "C") && !
180 template_blas_lsame(transb, "T")) {
181 info = 2;
182 } else if (*m < 0) {
183 info = 3;
184 } else if (*n < 0) {
185 info = 4;
186 } else if (*k < 0) {
187 info = 5;
188 } else if (*lda < maxMACRO(1,nrowa)) {
189 info = 8;
190 } else if (*ldb < maxMACRO(1,nrowb)) {
191 info = 10;
192 } else if (*ldc < maxMACRO(1,*m)) {
193 info = 13;
194 }
195 if (info != 0) {
196 template_blas_erbla("DGEMM ", &info);
197 return 0;
198 }
199/* Quick return if possible. */
200 if (*m == 0 || *n == 0 || ( (*alpha == 0. || *k == 0) && *beta == 1.) ) {
201 return 0;
202 }
203/* And if alpha.eq.zero. */
204 if (*alpha == 0.) {
205 if (*beta == 0.) {
206 i__1 = *n;
207 for (j = 1; j <= i__1; ++j) {
208 i__2 = *m;
209 for (i__ = 1; i__ <= i__2; ++i__) {
210 c___ref(i__, j) = 0.;
211/* L10: */
212 }
213/* L20: */
214 }
215 } else {
216 i__1 = *n;
217 for (j = 1; j <= i__1; ++j) {
218 i__2 = *m;
219 for (i__ = 1; i__ <= i__2; ++i__) {
220 c___ref(i__, j) = *beta * c___ref(i__, j);
221/* L30: */
222 }
223/* L40: */
224 }
225 }
226 return 0;
227 }
228/* Start the operations. */
229 if (notb) {
230 if (nota) {
231/* Form C := alpha*A*B + beta*C. */
232 i__1 = *n;
233 for (j = 1; j <= i__1; ++j) {
234 if (*beta == 0.) {
235 i__2 = *m;
236 for (i__ = 1; i__ <= i__2; ++i__) {
237 c___ref(i__, j) = 0.;
238/* L50: */
239 }
240 } else if (*beta != 1.) {
241 i__2 = *m;
242 for (i__ = 1; i__ <= i__2; ++i__) {
243 c___ref(i__, j) = *beta * c___ref(i__, j);
244/* L60: */
245 }
246 }
247 i__2 = *k;
248 for (l = 1; l <= i__2; ++l) {
249 if (b_ref(l, j) != 0.) {
250 temp = *alpha * b_ref(l, j);
251 i__3 = *m;
252 for (i__ = 1; i__ <= i__3; ++i__) {
253 c___ref(i__, j) = c___ref(i__, j) + temp * a_ref(
254 i__, l);
255/* L70: */
256 }
257 }
258/* L80: */
259 }
260/* L90: */
261 }
262 } else {
263/* Form C := alpha*A'*B + beta*C */
264 i__1 = *n;
265 for (j = 1; j <= i__1; ++j) {
266 i__2 = *m;
267 for (i__ = 1; i__ <= i__2; ++i__) {
268 temp = 0.;
269 i__3 = *k;
270 for (l = 1; l <= i__3; ++l) {
271 temp += a_ref(l, i__) * b_ref(l, j);
272/* L100: */
273 }
274 if (*beta == 0.) {
275 c___ref(i__, j) = *alpha * temp;
276 } else {
277 c___ref(i__, j) = *alpha * temp + *beta * c___ref(i__,
278 j);
279 }
280/* L110: */
281 }
282/* L120: */
283 }
284 }
285 } else {
286 if (nota) {
287/* Form C := alpha*A*B' + beta*C */
288 i__1 = *n;
289 for (j = 1; j <= i__1; ++j) {
290 if (*beta == 0.) {
291 i__2 = *m;
292 for (i__ = 1; i__ <= i__2; ++i__) {
293 c___ref(i__, j) = 0.;
294/* L130: */
295 }
296 } else if (*beta != 1.) {
297 i__2 = *m;
298 for (i__ = 1; i__ <= i__2; ++i__) {
299 c___ref(i__, j) = *beta * c___ref(i__, j);
300/* L140: */
301 }
302 }
303 i__2 = *k;
304 for (l = 1; l <= i__2; ++l) {
305 if (b_ref(j, l) != 0.) {
306 temp = *alpha * b_ref(j, l);
307 i__3 = *m;
308 for (i__ = 1; i__ <= i__3; ++i__) {
309 c___ref(i__, j) = c___ref(i__, j) + temp * a_ref(
310 i__, l);
311/* L150: */
312 }
313 }
314/* L160: */
315 }
316/* L170: */
317 }
318 } else {
319/* Form C := alpha*A'*B' + beta*C */
320 i__1 = *n;
321 for (j = 1; j <= i__1; ++j) {
322 i__2 = *m;
323 for (i__ = 1; i__ <= i__2; ++i__) {
324 temp = 0.;
325 i__3 = *k;
326 for (l = 1; l <= i__3; ++l) {
327 temp += a_ref(l, i__) * b_ref(j, l);
328/* L180: */
329 }
330 if (*beta == 0.) {
331 c___ref(i__, j) = *alpha * temp;
332 } else {
333 c___ref(i__, j) = *alpha * temp + *beta * c___ref(i__,
334 j);
335 }
336/* L190: */
337 }
338/* L200: */
339 }
340 }
341 }
342 return 0;
343/* End of DGEMM . */
344} /* dgemm_ */
345#undef c___ref
346#undef b_ref
347#undef a_ref
348
349#endif
int template_blas_erbla(const char *srname, integer *info)
Definition template_blas_common.cc:146
logical template_blas_lsame(const char *ca, const char *cb)
Definition template_blas_common.cc:46
int integer
Definition template_blas_common.h:40
#define maxMACRO(a, b)
Definition template_blas_common.h:45
bool logical
Definition template_blas_common.h:41
int template_blas_gemm(const char *transa, const char *transb, const integer *m, const integer *n, const integer *k, const Treal *alpha, const Treal *a, const integer *lda, const Treal *b, const integer *ldb, const Treal *beta, Treal *c__, const integer *ldc)
Definition template_blas_gemm.h:43
#define c___ref(a_1, a_2)
#define a_ref(a_1, a_2)
#define b_ref(a_1, a_2)