ergo
template_lapack_larrf.h
Go to the documentation of this file.
1/* Ergo, version 3.8.2, a program for linear scaling electronic structure
2 * calculations.
3 * Copyright (C) 2023 Elias Rudberg, Emanuel H. Rubensson, Pawel Salek,
4 * and Anastasia Kruchinina.
5 *
6 * This program is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, either version 3 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program. If not, see <http://www.gnu.org/licenses/>.
18 *
19 * Primary academic reference:
20 * Ergo: An open-source program for linear-scaling electronic structure
21 * calculations,
22 * Elias Rudberg, Emanuel H. Rubensson, Pawel Salek, and Anastasia
23 * Kruchinina,
24 * SoftwareX 7, 107 (2018),
25 * <http://dx.doi.org/10.1016/j.softx.2018.03.005>
26 *
27 * For further information about Ergo, see <http://www.ergoscf.org>.
28 */
29
30 /* This file belongs to the template_lapack part of the Ergo source
31 * code. The source files in the template_lapack directory are modified
32 * versions of files originally distributed as CLAPACK, see the
33 * Copyright/license notice in the file template_lapack/COPYING.
34 */
35
36
37#ifndef TEMPLATE_LAPACK_LARRF_HEADER
38#define TEMPLATE_LAPACK_LARRF_HEADER
39
40
41template<class Treal>
42int template_lapack_larrf(integer *n, Treal *d__, Treal *l,
43 Treal *ld, integer *clstrt, integer *clend, Treal *w,
44 Treal *wgap, Treal *werr, Treal *spdiam, Treal *
45 clgapl, Treal *clgapr, Treal *pivmin, Treal *sigma,
46 Treal *dplus, Treal *lplus, Treal *work, integer *info)
47{
48 /* System generated locals */
49 integer i__1;
50 Treal d__1, d__2, d__3;
51
52 /* Local variables */
53 integer i__;
54 Treal s, bestshift, smlgrowth, eps, tmp, max1, max2, rrr1, rrr2,
55 znm2, growthbound, fail, fact, oldp;
56 integer indx;
57 Treal prod;
58 integer ktry;
59 Treal fail2, avgap, ldmax, rdmax;
60 integer shift;
61 logical dorrr1;
62 Treal ldelta;
63 logical nofail;
64 Treal mingap, lsigma, rdelta;
65 logical forcer;
66 Treal rsigma, clwdth;
67 logical sawnan1, sawnan2, tryrrr1;
68
69
70/* -- LAPACK auxiliary routine (version 3.2) -- */
71/* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
72/* November 2006 */
73/* * */
74/* .. Scalar Arguments .. */
75/* .. */
76/* .. Array Arguments .. */
77/* .. */
78
79/* Purpose */
80/* ======= */
81
82/* Given the initial representation L D L^T and its cluster of close */
83/* eigenvalues (in a relative measure), W( CLSTRT ), W( CLSTRT+1 ), ... */
84/* W( CLEND ), DLARRF finds a new relatively robust representation */
85/* L D L^T - SIGMA I = L(+) D(+) L(+)^T such that at least one of the */
86/* eigenvalues of L(+) D(+) L(+)^T is relatively isolated. */
87
88/* Arguments */
89/* ========= */
90
91/* N (input) INTEGER */
92/* The order of the matrix (subblock, if the matrix splitted). */
93
94/* D (input) DOUBLE PRECISION array, dimension (N) */
95/* The N diagonal elements of the diagonal matrix D. */
96
97/* L (input) DOUBLE PRECISION array, dimension (N-1) */
98/* The (N-1) subdiagonal elements of the unit bidiagonal */
99/* matrix L. */
100
101/* LD (input) DOUBLE PRECISION array, dimension (N-1) */
102/* The (N-1) elements L(i)*D(i). */
103
104/* CLSTRT (input) INTEGER */
105/* The index of the first eigenvalue in the cluster. */
106
107/* CLEND (input) INTEGER */
108/* The index of the last eigenvalue in the cluster. */
109
110/* W (input) DOUBLE PRECISION array, dimension >= (CLEND-CLSTRT+1) */
111/* The eigenvalue APPROXIMATIONS of L D L^T in ascending order. */
112/* W( CLSTRT ) through W( CLEND ) form the cluster of relatively */
113/* close eigenalues. */
114
115/* WGAP (input/output) DOUBLE PRECISION array, dimension >= (CLEND-CLSTRT+1) */
116/* The separation from the right neighbor eigenvalue in W. */
117
118/* WERR (input) DOUBLE PRECISION array, dimension >= (CLEND-CLSTRT+1) */
119/* WERR contain the semiwidth of the uncertainty */
120/* interval of the corresponding eigenvalue APPROXIMATION in W */
121
122/* SPDIAM (input) estimate of the spectral diameter obtained from the */
123/* Gerschgorin intervals */
124
125/* CLGAPL, CLGAPR (input) absolute gap on each end of the cluster. */
126/* Set by the calling routine to protect against shifts too close */
127/* to eigenvalues outside the cluster. */
128
129/* PIVMIN (input) DOUBLE PRECISION */
130/* The minimum pivot allowed in the Sturm sequence. */
131
132/* SIGMA (output) DOUBLE PRECISION */
133/* The shift used to form L(+) D(+) L(+)^T. */
134
135/* DPLUS (output) DOUBLE PRECISION array, dimension (N) */
136/* The N diagonal elements of the diagonal matrix D(+). */
137
138/* LPLUS (output) DOUBLE PRECISION array, dimension (N-1) */
139/* The first (N-1) elements of LPLUS contain the subdiagonal */
140/* elements of the unit bidiagonal matrix L(+). */
141
142/* WORK (workspace) DOUBLE PRECISION array, dimension (2*N) */
143/* Workspace. */
144
145/* Further Details */
146/* =============== */
147
148/* Based on contributions by */
149/* Beresford Parlett, University of California, Berkeley, USA */
150/* Jim Demmel, University of California, Berkeley, USA */
151/* Inderjit Dhillon, University of Texas, Austin, USA */
152/* Osni Marques, LBNL/NERSC, USA */
153/* Christof Voemel, University of California, Berkeley, USA */
154
155/* ===================================================================== */
156
157/* .. Parameters .. */
158/* .. */
159/* .. Local Scalars .. */
160/* .. */
161/* .. External Functions .. */
162/* .. */
163/* .. External Subroutines .. */
164/* .. */
165/* .. Intrinsic Functions .. */
166/* .. */
167/* .. Executable Statements .. */
168
169/* Table of constant values */
170
171 integer c__1 = 1;
172
173
174
175 /* Parameter adjustments */
176 --work;
177 --lplus;
178 --dplus;
179 --werr;
180 --wgap;
181 --w;
182 --ld;
183 --l;
184 --d__;
185
186 /* Initialization added by Elias to get rid of compiler warnings. */
187 indx = 0;
188 /* Function Body */
189 *info = 0;
190 fact = 2.;
191 eps = template_lapack_lamch("Precision", (Treal)0);
192 shift = 0;
193 forcer = FALSE_;
194/* Note that we cannot guarantee that for any of the shifts tried, */
195/* the factorization has a small or even moderate element growth. */
196/* There could be Ritz values at both ends of the cluster and despite */
197/* backing off, there are examples where all factorizations tried */
198/* (in IEEE mode, allowing zero pivots & infinities) have INFINITE */
199/* element growth. */
200/* For this reason, we should use PIVMIN in this subroutine so that at */
201/* least the L D L^T factorization exists. It can be checked afterwards */
202/* whether the element growth caused bad residuals/orthogonality. */
203/* Decide whether the code should accept the best among all */
204/* representations despite large element growth or signal INFO=1 */
205 nofail = TRUE_;
206
207/* Compute the average gap length of the cluster */
208 clwdth = (d__1 = w[*clend] - w[*clstrt], absMACRO(d__1)) + werr[*clend] + werr[
209 *clstrt];
210 avgap = clwdth / (Treal) (*clend - *clstrt);
211 mingap = minMACRO(*clgapl,*clgapr);
212/* Initial values for shifts to both ends of cluster */
213/* Computing MIN */
214 d__1 = w[*clstrt], d__2 = w[*clend];
215 lsigma = minMACRO(d__1,d__2) - werr[*clstrt];
216/* Computing MAX */
217 d__1 = w[*clstrt], d__2 = w[*clend];
218 rsigma = maxMACRO(d__1,d__2) + werr[*clend];
219/* Use a small fudge to make sure that we really shift to the outside */
220 lsigma -= absMACRO(lsigma) * 4. * eps;
221 rsigma += absMACRO(rsigma) * 4. * eps;
222/* Compute upper bounds for how much to back off the initial shifts */
223 ldmax = mingap * .25 + *pivmin * 2.;
224 rdmax = mingap * .25 + *pivmin * 2.;
225/* Computing MAX */
226 d__1 = avgap, d__2 = wgap[*clstrt];
227 ldelta = maxMACRO(d__1,d__2) / fact;
228/* Computing MAX */
229 d__1 = avgap, d__2 = wgap[*clend - 1];
230 rdelta = maxMACRO(d__1,d__2) / fact;
231
232/* Initialize the record of the best representation found */
233
234 s = template_lapack_lamch("S", (Treal)0);
235 smlgrowth = 1. / s;
236 fail = (Treal) (*n - 1) * mingap / (*spdiam * eps);
237 fail2 = (Treal) (*n - 1) * mingap / (*spdiam * template_blas_sqrt(eps));
238 bestshift = lsigma;
239
240/* while (KTRY <= KTRYMAX) */
241 ktry = 0;
242 growthbound = *spdiam * 8.;
243L5:
244 sawnan1 = FALSE_;
245 sawnan2 = FALSE_;
246/* Ensure that we do not back off too much of the initial shifts */
247 ldelta = minMACRO(ldmax,ldelta);
248 rdelta = minMACRO(rdmax,rdelta);
249/* Compute the element growth when shifting to both ends of the cluster */
250/* accept the shift if there is no element growth at one of the two ends */
251/* Left end */
252 s = -lsigma;
253 dplus[1] = d__[1] + s;
254 if (absMACRO(dplus[1]) < *pivmin) {
255 dplus[1] = -(*pivmin);
256/* Need to set SAWNAN1 because refined RRR test should not be used */
257/* in this case */
258 sawnan1 = TRUE_;
259 }
260 max1 = absMACRO(dplus[1]);
261 i__1 = *n - 1;
262 for (i__ = 1; i__ <= i__1; ++i__) {
263 lplus[i__] = ld[i__] / dplus[i__];
264 s = s * lplus[i__] * l[i__] - lsigma;
265 dplus[i__ + 1] = d__[i__ + 1] + s;
266 if ((d__1 = dplus[i__ + 1], absMACRO(d__1)) < *pivmin) {
267 dplus[i__ + 1] = -(*pivmin);
268/* Need to set SAWNAN1 because refined RRR test should not be used */
269/* in this case */
270 sawnan1 = TRUE_;
271 }
272/* Computing MAX */
273 d__2 = max1, d__3 = (d__1 = dplus[i__ + 1], absMACRO(d__1));
274 max1 = maxMACRO(d__2,d__3);
275/* L6: */
276 }
277 sawnan1 = sawnan1 || template_lapack_isnan(&max1);
278 if (forcer || ( max1 <= growthbound && ! sawnan1 ) ) {
279 *sigma = lsigma;
280 shift = 1;
281 goto L100;
282 }
283/* Right end */
284 s = -rsigma;
285 work[1] = d__[1] + s;
286 if (absMACRO(work[1]) < *pivmin) {
287 work[1] = -(*pivmin);
288/* Need to set SAWNAN2 because refined RRR test should not be used */
289/* in this case */
290 sawnan2 = TRUE_;
291 }
292 max2 = absMACRO(work[1]);
293 i__1 = *n - 1;
294 for (i__ = 1; i__ <= i__1; ++i__) {
295 work[*n + i__] = ld[i__] / work[i__];
296 s = s * work[*n + i__] * l[i__] - rsigma;
297 work[i__ + 1] = d__[i__ + 1] + s;
298 if ((d__1 = work[i__ + 1], absMACRO(d__1)) < *pivmin) {
299 work[i__ + 1] = -(*pivmin);
300/* Need to set SAWNAN2 because refined RRR test should not be used */
301/* in this case */
302 sawnan2 = TRUE_;
303 }
304/* Computing MAX */
305 d__2 = max2, d__3 = (d__1 = work[i__ + 1], absMACRO(d__1));
306 max2 = maxMACRO(d__2,d__3);
307/* L7: */
308 }
309 sawnan2 = sawnan2 || template_lapack_isnan(&max2);
310 if (forcer || ( max2 <= growthbound && ! sawnan2 ) ) {
311 *sigma = rsigma;
312 shift = 2;
313 goto L100;
314 }
315/* If we are at this point, both shifts led to too much element growth */
316/* Record the better of the two shifts (provided it didn't lead to NaN) */
317 if (sawnan1 && sawnan2) {
318/* both MAX1 and MAX2 are NaN */
319 goto L50;
320 } else {
321 if (! sawnan1) {
322 indx = 1;
323 if (max1 <= smlgrowth) {
324 smlgrowth = max1;
325 bestshift = lsigma;
326 }
327 }
328 if (! sawnan2) {
329 if (sawnan1 || max2 <= max1) {
330 indx = 2;
331 }
332 if (max2 <= smlgrowth) {
333 smlgrowth = max2;
334 bestshift = rsigma;
335 }
336 }
337 }
338/* If we are here, both the left and the right shift led to */
339/* element growth. If the element growth is moderate, then */
340/* we may still accept the representation, if it passes a */
341/* refined test for RRR. This test supposes that no NaN occurred. */
342/* Moreover, we use the refined RRR test only for isolated clusters. */
343 if (clwdth < mingap / 128. && minMACRO(max1,max2) < fail2 && ! sawnan1 && !
344 sawnan2) {
345 dorrr1 = TRUE_;
346 } else {
347 dorrr1 = FALSE_;
348 }
349 tryrrr1 = TRUE_;
350 if (tryrrr1 && dorrr1) {
351 if (indx == 1) {
352 tmp = (d__1 = dplus[*n], absMACRO(d__1));
353 znm2 = 1.;
354 prod = 1.;
355 oldp = 1.;
356 for (i__ = *n - 1; i__ >= 1; --i__) {
357 if (prod <= eps) {
358 prod = dplus[i__ + 1] * work[*n + i__ + 1] / (dplus[i__] *
359 work[*n + i__]) * oldp;
360 } else {
361 prod *= (d__1 = work[*n + i__], absMACRO(d__1));
362 }
363 oldp = prod;
364/* Computing 2nd power */
365 d__1 = prod;
366 znm2 += d__1 * d__1;
367/* Computing MAX */
368 d__2 = tmp, d__3 = (d__1 = dplus[i__] * prod, absMACRO(d__1));
369 tmp = maxMACRO(d__2,d__3);
370/* L15: */
371 }
372 rrr1 = tmp / (*spdiam * template_blas_sqrt(znm2));
373 if (rrr1 <= 8.) {
374 *sigma = lsigma;
375 shift = 1;
376 goto L100;
377 }
378 } else if (indx == 2) {
379 tmp = (d__1 = work[*n], absMACRO(d__1));
380 znm2 = 1.;
381 prod = 1.;
382 oldp = 1.;
383 for (i__ = *n - 1; i__ >= 1; --i__) {
384 if (prod <= eps) {
385 prod = work[i__ + 1] * lplus[i__ + 1] / (work[i__] *
386 lplus[i__]) * oldp;
387 } else {
388 prod *= (d__1 = lplus[i__], absMACRO(d__1));
389 }
390 oldp = prod;
391/* Computing 2nd power */
392 d__1 = prod;
393 znm2 += d__1 * d__1;
394/* Computing MAX */
395 d__2 = tmp, d__3 = (d__1 = work[i__] * prod, absMACRO(d__1));
396 tmp = maxMACRO(d__2,d__3);
397/* L16: */
398 }
399 rrr2 = tmp / (*spdiam * template_blas_sqrt(znm2));
400 if (rrr2 <= 8.) {
401 *sigma = rsigma;
402 shift = 2;
403 goto L100;
404 }
405 }
406 }
407L50:
408 if (ktry < 1) {
409/* If we are here, both shifts failed also the RRR test. */
410/* Back off to the outside */
411/* Computing MAX */
412 d__1 = lsigma - ldelta, d__2 = lsigma - ldmax;
413 lsigma = maxMACRO(d__1,d__2);
414/* Computing MIN */
415 d__1 = rsigma + rdelta, d__2 = rsigma + rdmax;
416 rsigma = minMACRO(d__1,d__2);
417 ldelta *= 2.;
418 rdelta *= 2.;
419 ++ktry;
420 goto L5;
421 } else {
422/* None of the representations investigated satisfied our */
423/* criteria. Take the best one we found. */
424 if (smlgrowth < fail || nofail) {
425 lsigma = bestshift;
426 rsigma = bestshift;
427 forcer = TRUE_;
428 goto L5;
429 } else {
430 *info = 1;
431 return 0;
432 }
433 }
434L100:
435 if (shift == 1) {
436 } else if (shift == 2) {
437/* store new L and D back into DPLUS, LPLUS */
438 template_blas_copy(n, &work[1], &c__1, &dplus[1], &c__1);
439 i__1 = *n - 1;
440 template_blas_copy(&i__1, &work[*n + 1], &c__1, &lplus[1], &c__1);
441 }
442 return 0;
443
444/* End of DLARRF */
445
446} /* dlarrf_ */
447
448#endif
Treal template_blas_sqrt(Treal x)
int integer
Definition template_blas_common.h:40
#define absMACRO(x)
Definition template_blas_common.h:47
#define minMACRO(a, b)
Definition template_blas_common.h:46
#define maxMACRO(a, b)
Definition template_blas_common.h:45
bool logical
Definition template_blas_common.h:41
int template_blas_copy(const integer *n, const Treal *dx, const integer *incx, Treal *dy, const integer *incy)
Definition template_blas_copy.h:42
#define TRUE_
Definition template_lapack_common.h:42
#define FALSE_
Definition template_lapack_common.h:43
logical template_lapack_isnan(Treal *din)
Definition template_lapack_isnan.h:45
Treal template_lapack_lamch(const char *cmach, Treal dummyReal)
Definition template_lapack_lamch.h:202
int template_lapack_larrf(integer *n, Treal *d__, Treal *l, Treal *ld, integer *clstrt, integer *clend, Treal *w, Treal *wgap, Treal *werr, Treal *spdiam, Treal *clgapl, Treal *clgapr, Treal *pivmin, Treal *sigma, Treal *dplus, Treal *lplus, Treal *work, integer *info)
Definition template_lapack_larrf.h:42