ergo
template_lapack_orgql.h
Go to the documentation of this file.
1/* Ergo, version 3.8.2, a program for linear scaling electronic structure
2 * calculations.
3 * Copyright (C) 2023 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4 * and Anastasia Kruchinina.
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 *
19 * Primary academic reference:
20 * Ergo: An open-source program for linear-scaling electronic structure
21 * calculations,
22 * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23 * Kruchinina,
24 * SoftwareX 7, 107 (2018),
25 * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26 *
27 * For further information about Ergo, see <http://www.ergoscf.org>.
28 */
29
30 /* This file belongs to the template_lapack part of the Ergo source
31 * code. The source files in the template_lapack directory are modified
32 * versions of files originally distributed as CLAPACK, see the
33 * Copyright/license notice in the file template_lapack/COPYING.
34 */
35
36
37#ifndef TEMPLATE_LAPACK_ORGQL_HEADER
38#define TEMPLATE_LAPACK_ORGQL_HEADER
39
40
41template<class Treal>
42int template_lapack_orgql(const integer *m, const integer *n, const integer *k, Treal *
43 a, const integer *lda, const Treal *tau, Treal *work, const integer *lwork,
44 integer *info)
45{
46/* -- LAPACK routine (version 3.0) --
47 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
48 Courant Institute, Argonne National Lab, and Rice University
49 June 30, 1999
50
51
52 Purpose
53 =======
54
55 DORGQL generates an M-by-N real matrix Q with orthonormal columns,
56 which is defined as the last N columns of a product of K elementary
57 reflectors of order M
58
59 Q = H(k) . . . H(2) H(1)
60
61 as returned by DGEQLF.
62
63 Arguments
64 =========
65
66 M (input) INTEGER
67 The number of rows of the matrix Q. M >= 0.
68
69 N (input) INTEGER
70 The number of columns of the matrix Q. M >= N >= 0.
71
72 K (input) INTEGER
73 The number of elementary reflectors whose product defines the
74 matrix Q. N >= K >= 0.
75
76 A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
77 On entry, the (n-k+i)-th column must contain the vector which
78 defines the elementary reflector H(i), for i = 1,2,...,k, as
79 returned by DGEQLF in the last k columns of its array
80 argument A.
81 On exit, the M-by-N matrix Q.
82
83 LDA (input) INTEGER
84 The first dimension of the array A. LDA >= max(1,M).
85
86 TAU (input) DOUBLE PRECISION array, dimension (K)
87 TAU(i) must contain the scalar factor of the elementary
88 reflector H(i), as returned by DGEQLF.
89
90 WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
91 On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
92
93 LWORK (input) INTEGER
94 The dimension of the array WORK. LWORK >= max(1,N).
95 For optimum performance LWORK >= N*NB, where NB is the
96 optimal blocksize.
97
98 If LWORK = -1, then a workspace query is assumed; the routine
99 only calculates the optimal size of the WORK array, returns
100 this value as the first entry of the WORK array, and no error
101 message related to LWORK is issued by XERBLA.
102
103 INFO (output) INTEGER
104 = 0: successful exit
105 < 0: if INFO = -i, the i-th argument has an illegal value
106
107 =====================================================================
108
109
110 Test the input arguments
111
112 Parameter adjustments */
113 /* Table of constant values */
114 integer c__1 = 1;
115 integer c_n1 = -1;
116 integer c__3 = 3;
117 integer c__2 = 2;
118
119 /* System generated locals */
120 integer a_dim1, a_offset, i__1, i__2, i__3, i__4;
121 /* Local variables */
122 integer i__, j, l, nbmin, iinfo;
123 integer ib, nb, kk;
124 integer nx;
125 integer ldwork, lwkopt;
126 logical lquery;
127 integer iws;
128#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
129
130
131 a_dim1 = *lda;
132 a_offset = 1 + a_dim1 * 1;
133 a -= a_offset;
134 --tau;
135 --work;
136
137 /* Function Body */
138 *info = 0;
139 nb = template_lapack_ilaenv(&c__1, "DORGQL", " ", m, n, k, &c_n1, (ftnlen)6, (ftnlen)1);
140 lwkopt = maxMACRO(1,*n) * nb;
141 work[1] = (Treal) lwkopt;
142 lquery = *lwork == -1;
143 if (*m < 0) {
144 *info = -1;
145 } else if (*n < 0 || *n > *m) {
146 *info = -2;
147 } else if (*k < 0 || *k > *n) {
148 *info = -3;
149 } else if (*lda < maxMACRO(1,*m)) {
150 *info = -5;
151 } else if (*lwork < maxMACRO(1,*n) && ! lquery) {
152 *info = -8;
153 }
154 if (*info != 0) {
155 i__1 = -(*info);
156 template_blas_erbla("ORGQL ", &i__1);
157 return 0;
158 } else if (lquery) {
159 return 0;
160 }
161
162/* Quick return if possible */
163
164 if (*n <= 0) {
165 work[1] = 1.;
166 return 0;
167 }
168
169 nbmin = 2;
170 nx = 0;
171 iws = *n;
172 if (nb > 1 && nb < *k) {
173
174/* Determine when to cross over from blocked to unblocked code.
175
176 Computing MAX */
177 i__1 = 0, i__2 = template_lapack_ilaenv(&c__3, "DORGQL", " ", m, n, k, &c_n1, (
178 ftnlen)6, (ftnlen)1);
179 nx = maxMACRO(i__1,i__2);
180 if (nx < *k) {
181
182/* Determine if workspace is large enough for blocked code. */
183
184 ldwork = *n;
185 iws = ldwork * nb;
186 if (*lwork < iws) {
187
188/* Not enough workspace to use optimal NB: reduce NB and
189 determine the minimum value of NB. */
190
191 nb = *lwork / ldwork;
192/* Computing MAX */
193 i__1 = 2, i__2 = template_lapack_ilaenv(&c__2, "DORGQL", " ", m, n, k, &c_n1,
194 (ftnlen)6, (ftnlen)1);
195 nbmin = maxMACRO(i__1,i__2);
196 }
197 }
198 }
199
200 if (nb >= nbmin && nb < *k && nx < *k) {
201
202/* Use blocked code after the first block.
203 The last kk columns are handled by the block method.
204
205 Computing MIN */
206 i__1 = *k, i__2 = (*k - nx + nb - 1) / nb * nb;
207 kk = minMACRO(i__1,i__2);
208
209/* Set A(m-kk+1:m,1:n-kk) to zero. */
210
211 i__1 = *n - kk;
212 for (j = 1; j <= i__1; ++j) {
213 i__2 = *m;
214 for (i__ = *m - kk + 1; i__ <= i__2; ++i__) {
215 a_ref(i__, j) = 0.;
216/* L10: */
217 }
218/* L20: */
219 }
220 } else {
221 kk = 0;
222 }
223
224/* Use unblocked code for the first or only block. */
225
226 i__1 = *m - kk;
227 i__2 = *n - kk;
228 i__3 = *k - kk;
229 template_lapack_org2l(&i__1, &i__2, &i__3, &a[a_offset], lda, &tau[1], &work[1], &iinfo)
230 ;
231
232 if (kk > 0) {
233
234/* Use blocked code */
235
236 i__1 = *k;
237 i__2 = nb;
238 for (i__ = *k - kk + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ +=
239 i__2) {
240/* Computing MIN */
241 i__3 = nb, i__4 = *k - i__ + 1;
242 ib = minMACRO(i__3,i__4);
243 if (*n - *k + i__ > 1) {
244
245/* Form the triangular factor of the block reflector
246 H = H(i+ib-1) . . . H(i+1) H(i) */
247
248 i__3 = *m - *k + i__ + ib - 1;
249 template_lapack_larft("Backward", "Columnwise", &i__3, &ib, &a_ref(1, *n - *
250 k + i__), lda, &tau[i__], &work[1], &ldwork);
251
252/* Apply H to A(1:m-k+i+ib-1,1:n-k+i-1) from the left */
253
254 i__3 = *m - *k + i__ + ib - 1;
255 i__4 = *n - *k + i__ - 1;
256 template_lapack_larfb("Left", "No transpose", "Backward", "Columnwise", &
257 i__3, &i__4, &ib, &a_ref(1, *n - *k + i__), lda, &
258 work[1], &ldwork, &a[a_offset], lda, &work[ib + 1], &
259 ldwork);
260 }
261
262/* Apply H to rows 1:m-k+i+ib-1 of current block */
263
264 i__3 = *m - *k + i__ + ib - 1;
265 template_lapack_org2l(&i__3, &ib, &ib, &a_ref(1, *n - *k + i__), lda, &tau[i__],
266 &work[1], &iinfo);
267
268/* Set rows m-k+i+ib:m of current block to zero */
269
270 i__3 = *n - *k + i__ + ib - 1;
271 for (j = *n - *k + i__; j <= i__3; ++j) {
272 i__4 = *m;
273 for (l = *m - *k + i__ + ib; l <= i__4; ++l) {
274 a_ref(l, j) = 0.;
275/* L30: */
276 }
277/* L40: */
278 }
279/* L50: */
280 }
281 }
282
283 work[1] = (Treal) iws;
284 return 0;
285
286/* End of DORGQL */
287
288} /* dorgql_ */
289
290#undef a_ref
291
292
293#endif
int template_blas_erbla(const char *srname, integer *info)
Definition template_blas_common.cc:146
int integer
Definition template_blas_common.h:40
int ftnlen
Definition template_blas_common.h:42
#define minMACRO(a, b)
Definition template_blas_common.h:46
#define maxMACRO(a, b)
Definition template_blas_common.h:45
bool logical
Definition template_blas_common.h:41
integer template_lapack_ilaenv(const integer *ispec, const char *name__, const char *opts, const integer *n1, const integer *n2, const integer *n3, const integer *n4, ftnlen name_len, ftnlen opts_len)
Definition template_lapack_common.cc:281
int template_lapack_larfb(const char *side, const char *trans, const char *direct, const char *storev, const integer *m, const integer *n, const integer *k, const Treal *v, const integer *ldv, const Treal *t, const integer *ldt, Treal *c__, const integer *ldc, Treal *work, const integer *ldwork)
Definition template_lapack_larfb.h:42
int template_lapack_larft(const char *direct, const char *storev, const integer *n, const integer *k, Treal *v, const integer *ldv, const Treal *tau, Treal *t, const integer *ldt)
Definition template_lapack_larft.h:42
int template_lapack_org2l(const integer *m, const integer *n, const integer *k, Treal *a, const integer *lda, const Treal *tau, Treal *work, integer *info)
Definition template_lapack_org2l.h:42
#define a_ref(a_1, a_2)
int template_lapack_orgql(const integer *m, const integer *n, const integer *k, Treal *a, const integer *lda, const Treal *tau, Treal *work, const integer *lwork, integer *info)
Definition template_lapack_orgql.h:42