ergo
template_lapack_latrd.h
Go to the documentation of this file.
1/* Ergo, version 3.8.2, a program for linear scaling electronic structure
2 * calculations.
3 * Copyright (C) 2023 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4 * and Anastasia Kruchinina.
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 *
19 * Primary academic reference:
20 * Ergo: An open-source program for linear-scaling electronic structure
21 * calculations,
22 * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23 * Kruchinina,
24 * SoftwareX 7, 107 (2018),
25 * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26 *
27 * For further information about Ergo, see <http://www.ergoscf.org>.
28 */
29
30 /* This file belongs to the template_lapack part of the Ergo source
31 * code. The source files in the template_lapack directory are modified
32 * versions of files originally distributed as CLAPACK, see the
33 * Copyright/license notice in the file template_lapack/COPYING.
34 */
35
36
37#ifndef TEMPLATE_LAPACK_LATRD_HEADER
38#define TEMPLATE_LAPACK_LATRD_HEADER
39
40
41template<class Treal>
42int template_lapack_latrd(const char *uplo, const integer *n, const integer *nb, Treal *
43 a, const integer *lda, Treal *e, Treal *tau, Treal *w,
44 const integer *ldw)
45{
46/* -- LAPACK auxiliary routine (version 3.0) --
47 Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
48 Courant Institute, Argonne National Lab, and Rice University
49 October 31, 1992
50
51
52 Purpose
53 =======
54
55 DLATRD reduces NB rows and columns of a real symmetric matrix A to
56 symmetric tridiagonal form by an orthogonal similarity
57 transformation Q' * A * Q, and returns the matrices V and W which are
58 needed to apply the transformation to the unreduced part of A.
59
60 If UPLO = 'U', DLATRD reduces the last NB rows and columns of a
61 matrix, of which the upper triangle is supplied;
62 if UPLO = 'L', DLATRD reduces the first NB rows and columns of a
63 matrix, of which the lower triangle is supplied.
64
65 This is an auxiliary routine called by DSYTRD.
66
67 Arguments
68 =========
69
70 UPLO (input) CHARACTER
71 Specifies whether the upper or lower triangular part of the
72 symmetric matrix A is stored:
73 = 'U': Upper triangular
74 = 'L': Lower triangular
75
76 N (input) INTEGER
77 The order of the matrix A.
78
79 NB (input) INTEGER
80 The number of rows and columns to be reduced.
81
82 A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
83 On entry, the symmetric matrix A. If UPLO = 'U', the leading
84 n-by-n upper triangular part of A contains the upper
85 triangular part of the matrix A, and the strictly lower
86 triangular part of A is not referenced. If UPLO = 'L', the
87 leading n-by-n lower triangular part of A contains the lower
88 triangular part of the matrix A, and the strictly upper
89 triangular part of A is not referenced.
90 On exit:
91 if UPLO = 'U', the last NB columns have been reduced to
92 tridiagonal form, with the diagonal elements overwriting
93 the diagonal elements of A; the elements above the diagonal
94 with the array TAU, represent the orthogonal matrix Q as a
95 product of elementary reflectors;
96 if UPLO = 'L', the first NB columns have been reduced to
97 tridiagonal form, with the diagonal elements overwriting
98 the diagonal elements of A; the elements below the diagonal
99 with the array TAU, represent the orthogonal matrix Q as a
100 product of elementary reflectors.
101 See Further Details.
102
103 LDA (input) INTEGER
104 The leading dimension of the array A. LDA >= (1,N).
105
106 E (output) DOUBLE PRECISION array, dimension (N-1)
107 If UPLO = 'U', E(n-nb:n-1) contains the superdiagonal
108 elements of the last NB columns of the reduced matrix;
109 if UPLO = 'L', E(1:nb) contains the subdiagonal elements of
110 the first NB columns of the reduced matrix.
111
112 TAU (output) DOUBLE PRECISION array, dimension (N-1)
113 The scalar factors of the elementary reflectors, stored in
114 TAU(n-nb:n-1) if UPLO = 'U', and in TAU(1:nb) if UPLO = 'L'.
115 See Further Details.
116
117 W (output) DOUBLE PRECISION array, dimension (LDW,NB)
118 The n-by-nb matrix W required to update the unreduced part
119 of A.
120
121 LDW (input) INTEGER
122 The leading dimension of the array W. LDW >= max(1,N).
123
124 Further Details
125 ===============
126
127 If UPLO = 'U', the matrix Q is represented as a product of elementary
128 reflectors
129
130 Q = H(n) H(n-1) . . . H(n-nb+1).
131
132 Each H(i) has the form
133
134 H(i) = I - tau * v * v'
135
136 where tau is a real scalar, and v is a real vector with
137 v(i:n) = 0 and v(i-1) = 1; v(1:i-1) is stored on exit in A(1:i-1,i),
138 and tau in TAU(i-1).
139
140 If UPLO = 'L', the matrix Q is represented as a product of elementary
141 reflectors
142
143 Q = H(1) H(2) . . . H(nb).
144
145 Each H(i) has the form
146
147 H(i) = I - tau * v * v'
148
149 where tau is a real scalar, and v is a real vector with
150 v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i),
151 and tau in TAU(i).
152
153 The elements of the vectors v together form the n-by-nb matrix V
154 which is needed, with W, to apply the transformation to the unreduced
155 part of the matrix, using a symmetric rank-2k update of the form:
156 A := A - V*W' - W*V'.
157
158 The contents of A on exit are illustrated by the following examples
159 with n = 5 and nb = 2:
160
161 if UPLO = 'U': if UPLO = 'L':
162
163 ( a a a v4 v5 ) ( d )
164 ( a a v4 v5 ) ( 1 d )
165 ( a 1 v5 ) ( v1 1 a )
166 ( d 1 ) ( v1 v2 a a )
167 ( d ) ( v1 v2 a a a )
168
169 where d denotes a diagonal element of the reduced matrix, a denotes
170 an element of the original matrix that is unchanged, and vi denotes
171 an element of the vector defining H(i).
172
173 =====================================================================
174
175
176 Quick return if possible
177
178 Parameter adjustments */
179 /* Table of constant values */
180 Treal c_b5 = -1.;
181 Treal c_b6 = 1.;
182 integer c__1 = 1;
183 Treal c_b16 = 0.;
184
185 /* System generated locals */
186 integer a_dim1, a_offset, w_dim1, w_offset, i__1, i__2, i__3;
187 /* Local variables */
188 integer i__;
189 Treal alpha;
190 integer iw;
191#define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
192#define w_ref(a_1,a_2) w[(a_2)*w_dim1 + a_1]
193
194
195 a_dim1 = *lda;
196 a_offset = 1 + a_dim1 * 1;
197 a -= a_offset;
198 --e;
199 --tau;
200 w_dim1 = *ldw;
201 w_offset = 1 + w_dim1 * 1;
202 w -= w_offset;
203
204 /* Function Body */
205 if (*n <= 0) {
206 return 0;
207 }
208
209 if (template_blas_lsame(uplo, "U")) {
210
211/* Reduce last NB columns of upper triangle */
212
213 i__1 = *n - *nb + 1;
214 for (i__ = *n; i__ >= i__1; --i__) {
215 iw = i__ - *n + *nb;
216 if (i__ < *n) {
217
218/* Update A(1:i,i) */
219
220 i__2 = *n - i__;
221 template_blas_gemv("No transpose", &i__, &i__2, &c_b5, &a_ref(1, i__ + 1),
222 lda, &w_ref(i__, iw + 1), ldw, &c_b6, &a_ref(1, i__),
223 &c__1);
224 i__2 = *n - i__;
225 template_blas_gemv("No transpose", &i__, &i__2, &c_b5, &w_ref(1, iw + 1),
226 ldw, &a_ref(i__, i__ + 1), lda, &c_b6, &a_ref(1, i__),
227 &c__1);
228 }
229 if (i__ > 1) {
230
231/* Generate elementary reflector H(i) to annihilate
232 A(1:i-2,i) */
233
234 i__2 = i__ - 1;
235 template_lapack_larfg(&i__2, &a_ref(i__ - 1, i__), &a_ref(1, i__), &c__1, &
236 tau[i__ - 1]);
237 e[i__ - 1] = a_ref(i__ - 1, i__);
238 a_ref(i__ - 1, i__) = 1.;
239
240/* Compute W(1:i-1,i) */
241
242 i__2 = i__ - 1;
243 template_blas_symv("Upper", &i__2, &c_b6, &a[a_offset], lda, &a_ref(1,
244 i__), &c__1, &c_b16, &w_ref(1, iw), &c__1);
245 if (i__ < *n) {
246 i__2 = i__ - 1;
247 i__3 = *n - i__;
248 template_blas_gemv("Transpose", &i__2, &i__3, &c_b6, &w_ref(1, iw + 1)
249 , ldw, &a_ref(1, i__), &c__1, &c_b16, &w_ref(i__
250 + 1, iw), &c__1);
251 i__2 = i__ - 1;
252 i__3 = *n - i__;
253 template_blas_gemv("No transpose", &i__2, &i__3, &c_b5, &a_ref(1, i__
254 + 1), lda, &w_ref(i__ + 1, iw), &c__1, &c_b6, &
255 w_ref(1, iw), &c__1);
256 i__2 = i__ - 1;
257 i__3 = *n - i__;
258 template_blas_gemv("Transpose", &i__2, &i__3, &c_b6, &a_ref(1, i__ +
259 1), lda, &a_ref(1, i__), &c__1, &c_b16, &w_ref(
260 i__ + 1, iw), &c__1);
261 i__2 = i__ - 1;
262 i__3 = *n - i__;
263 template_blas_gemv("No transpose", &i__2, &i__3, &c_b5, &w_ref(1, iw
264 + 1), ldw, &w_ref(i__ + 1, iw), &c__1, &c_b6, &
265 w_ref(1, iw), &c__1);
266 }
267 i__2 = i__ - 1;
268 template_blas_scal(&i__2, &tau[i__ - 1], &w_ref(1, iw), &c__1);
269 i__2 = i__ - 1;
270 alpha = tau[i__ - 1] * -.5 * template_blas_dot(&i__2, &w_ref(1, iw), &
271 c__1, &a_ref(1, i__), &c__1);
272 i__2 = i__ - 1;
273 template_blas_axpy(&i__2, &alpha, &a_ref(1, i__), &c__1, &w_ref(1, iw), &
274 c__1);
275 }
276
277/* L10: */
278 }
279 } else {
280
281/* Reduce first NB columns of lower triangle */
282
283 i__1 = *nb;
284 for (i__ = 1; i__ <= i__1; ++i__) {
285
286/* Update A(i:n,i) */
287
288 i__2 = *n - i__ + 1;
289 i__3 = i__ - 1;
290 template_blas_gemv("No transpose", &i__2, &i__3, &c_b5, &a_ref(i__, 1), lda, &
291 w_ref(i__, 1), ldw, &c_b6, &a_ref(i__, i__), &c__1);
292 i__2 = *n - i__ + 1;
293 i__3 = i__ - 1;
294 template_blas_gemv("No transpose", &i__2, &i__3, &c_b5, &w_ref(i__, 1), ldw, &
295 a_ref(i__, 1), lda, &c_b6, &a_ref(i__, i__), &c__1);
296 if (i__ < *n) {
297
298/* Generate elementary reflector H(i) to annihilate
299 A(i+2:n,i)
300
301 Computing MIN */
302 i__2 = i__ + 2;
303 i__3 = *n - i__;
304 template_lapack_larfg(&i__3, &a_ref(i__ + 1, i__), &a_ref(minMACRO(i__2,*n), i__)
305 , &c__1, &tau[i__]);
306 e[i__] = a_ref(i__ + 1, i__);
307 a_ref(i__ + 1, i__) = 1.;
308
309/* Compute W(i+1:n,i) */
310
311 i__2 = *n - i__;
312 template_blas_symv("Lower", &i__2, &c_b6, &a_ref(i__ + 1, i__ + 1), lda, &
313 a_ref(i__ + 1, i__), &c__1, &c_b16, &w_ref(i__ + 1,
314 i__), &c__1);
315 i__2 = *n - i__;
316 i__3 = i__ - 1;
317 template_blas_gemv("Transpose", &i__2, &i__3, &c_b6, &w_ref(i__ + 1, 1),
318 ldw, &a_ref(i__ + 1, i__), &c__1, &c_b16, &w_ref(1,
319 i__), &c__1);
320 i__2 = *n - i__;
321 i__3 = i__ - 1;
322 template_blas_gemv("No transpose", &i__2, &i__3, &c_b5, &a_ref(i__ + 1, 1)
323 , lda, &w_ref(1, i__), &c__1, &c_b6, &w_ref(i__ + 1,
324 i__), &c__1);
325 i__2 = *n - i__;
326 i__3 = i__ - 1;
327 template_blas_gemv("Transpose", &i__2, &i__3, &c_b6, &a_ref(i__ + 1, 1),
328 lda, &a_ref(i__ + 1, i__), &c__1, &c_b16, &w_ref(1,
329 i__), &c__1);
330 i__2 = *n - i__;
331 i__3 = i__ - 1;
332 template_blas_gemv("No transpose", &i__2, &i__3, &c_b5, &w_ref(i__ + 1, 1)
333 , ldw, &w_ref(1, i__), &c__1, &c_b6, &w_ref(i__ + 1,
334 i__), &c__1);
335 i__2 = *n - i__;
336 template_blas_scal(&i__2, &tau[i__], &w_ref(i__ + 1, i__), &c__1);
337 i__2 = *n - i__;
338 alpha = tau[i__] * -.5 * template_blas_dot(&i__2, &w_ref(i__ + 1, i__), &
339 c__1, &a_ref(i__ + 1, i__), &c__1);
340 i__2 = *n - i__;
341 template_blas_axpy(&i__2, &alpha, &a_ref(i__ + 1, i__), &c__1, &w_ref(i__
342 + 1, i__), &c__1);
343 }
344
345/* L20: */
346 }
347 }
348
349 return 0;
350
351/* End of DLATRD */
352
353} /* dlatrd_ */
354
355#undef w_ref
356#undef a_ref
357
358
359#endif
int template_blas_axpy(const integer *n, const Treal *da, const Treal *dx, const integer *incx, Treal *dy, const integer *incy)
Definition template_blas_axpy.h:43
logical template_blas_lsame(const char *ca, const char *cb)
Definition template_blas_common.cc:46
int integer
Definition template_blas_common.h:40
#define minMACRO(a, b)
Definition template_blas_common.h:46
Treal template_blas_dot(const integer *n, const Treal *dx, const integer *incx, const Treal *dy, const integer *incy)
Definition template_blas_dot.h:43
int template_blas_gemv(const char *trans, const integer *m, const integer *n, const Treal *alpha, const Treal *a, const integer *lda, const Treal *x, const integer *incx, const Treal *beta, Treal *y, const integer *incy)
Definition template_blas_gemv.h:43
int template_blas_scal(const integer *n, const Treal *da, Treal *dx, const integer *incx)
Definition template_blas_scal.h:43
int template_blas_symv(const char *uplo, const integer *n, const Treal *alpha, const Treal *a, const integer *lda, const Treal *x, const integer *incx, const Treal *beta, Treal *y, const integer *incy)
Definition template_blas_symv.h:42
int template_lapack_larfg(const integer *n, Treal *alpha, Treal *x, const integer *incx, Treal *tau)
Definition template_lapack_larfg.h:43
#define a_ref(a_1, a_2)
int template_lapack_latrd(const char *uplo, const integer *n, const integer *nb, Treal *a, const integer *lda, Treal *e, Treal *tau, Treal *w, const integer *ldw)
Definition template_lapack_latrd.h:42
#define w_ref(a_1, a_2)