Actual source code: ispai.c

  1: /*
  2:    3/99 Modified by Stephen Barnard to support SPAI version 3.0
  3: */

  5: /*
  6:       Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner
  7:    Code written by Stephen Barnard.

  9:       Note: there is some BAD memory bleeding below!

 11:       This code needs work

 13:    1) get rid of all memory bleeding
 14:    2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix
 15:       rather than having the sp flag for PC_SPAI
 16:    3) fix to set the block size based on the matrix block size

 18: */
 19: #if !defined(PETSC_SKIP_COMPLEX)
 20:   #define PETSC_SKIP_COMPLEX /* since spai uses I which conflicts with some complex implementations */
 21: #endif

 23: #include <petsc/private/pcimpl.h>
 24: #include <../src/ksp/pc/impls/spai/petscspai.h>

 26: /*
 27:     These are the SPAI include files
 28: */
 29: EXTERN_C_BEGIN
 30: #define SPAI_USE_MPI /* required for setting SPAI_Comm correctly in basics.h */
 31: #include <spai.h>
 32: #include <matrix.h>
 33: EXTERN_C_END

 35: static PetscErrorCode ConvertMatToMatrix(MPI_Comm, Mat, Mat, matrix **);
 36: static PetscErrorCode ConvertMatrixToMat(MPI_Comm, matrix *, Mat *);

 38: typedef struct {
 39:   matrix *B;  /* matrix in SPAI format */
 40:   matrix *BT; /* transpose of matrix in SPAI format */
 41:   matrix *M;  /* the approximate inverse in SPAI format */

 43:   Mat PM; /* the approximate inverse PETSc format */

 45:   double epsilon;    /* tolerance */
 46:   int    nbsteps;    /* max number of "improvement" steps per line */
 47:   int    max;        /* max dimensions of is_I, q, etc. */
 48:   int    maxnew;     /* max number of new entries per step */
 49:   int    block_size; /* constant block size */
 50:   int    cache_size; /* one of (1,2,3,4,5,6) indicting size of cache */
 51:   int    verbose;    /* SPAI prints timing and statistics */

 53:   int      sp;        /* symmetric nonzero pattern */
 54:   MPI_Comm comm_spai; /* communicator to be used with spai */
 55: } PC_SPAI;

 57: static PetscErrorCode PCSetUp_SPAI(PC pc)
 58: {
 59:   PC_SPAI *ispai = (PC_SPAI *)pc->data;
 60:   Mat      AT;

 62:   PetscFunctionBegin;
 63:   init_SPAI();

 65:   if (ispai->sp) {
 66:     PetscCall(ConvertMatToMatrix(ispai->comm_spai, pc->pmat, pc->pmat, &ispai->B));
 67:   } else {
 68:     /* Use the transpose to get the column nonzero structure. */
 69:     PetscCall(MatTranspose(pc->pmat, MAT_INITIAL_MATRIX, &AT));
 70:     PetscCall(ConvertMatToMatrix(ispai->comm_spai, pc->pmat, AT, &ispai->B));
 71:     PetscCall(MatDestroy(&AT));
 72:   }

 74:   /* Destroy the transpose */
 75:   /* Don't know how to do it. PETSc developers? */

 77:   /* construct SPAI preconditioner */
 78:   /* FILE *messages */    /* file for warning messages */
 79:   /* double epsilon */    /* tolerance */
 80:   /* int nbsteps */       /* max number of "improvement" steps per line */
 81:   /* int max */           /* max dimensions of is_I, q, etc. */
 82:   /* int maxnew */        /* max number of new entries per step */
 83:   /* int block_size */    /* block_size == 1 specifies scalar elements
 84:                               block_size == n specifies nxn constant-block elements
 85:                               block_size == 0 specifies variable-block elements */
 86:   /* int cache_size */    /* one of (1,2,3,4,5,6) indicting size of cache. cache_size == 0 indicates no caching */
 87:   /* int    verbose    */ /* verbose == 0 specifies that SPAI is silent
 88:                               verbose == 1 prints timing and matrix statistics */

 90:   PetscCallExternal(bspai, ispai->B, &ispai->M, stdout, ispai->epsilon, ispai->nbsteps, ispai->max, ispai->maxnew, ispai->block_size, ispai->cache_size, ispai->verbose);

 92:   PetscCall(ConvertMatrixToMat(PetscObjectComm((PetscObject)pc), ispai->M, &ispai->PM));

 94:   /* free the SPAI matrices */
 95:   sp_free_matrix(ispai->B);
 96:   sp_free_matrix(ispai->M);
 97:   PetscFunctionReturn(PETSC_SUCCESS);
 98: }

100: static PetscErrorCode PCApply_SPAI(PC pc, Vec xx, Vec y)
101: {
102:   PC_SPAI *ispai = (PC_SPAI *)pc->data;

104:   PetscFunctionBegin;
105:   /* Now using PETSc's multiply */
106:   PetscCall(MatMult(ispai->PM, xx, y));
107:   PetscFunctionReturn(PETSC_SUCCESS);
108: }

110: static PetscErrorCode PCMatApply_SPAI(PC pc, Mat X, Mat Y)
111: {
112:   PC_SPAI *ispai = (PC_SPAI *)pc->data;

114:   PetscFunctionBegin;
115:   /* Now using PETSc's multiply */
116:   PetscCall(MatMatMult(ispai->PM, X, MAT_REUSE_MATRIX, PETSC_DEFAULT, &Y));
117:   PetscFunctionReturn(PETSC_SUCCESS);
118: }

120: static PetscErrorCode PCDestroy_SPAI(PC pc)
121: {
122:   PC_SPAI *ispai = (PC_SPAI *)pc->data;

124:   PetscFunctionBegin;
125:   PetscCall(MatDestroy(&ispai->PM));
126:   PetscCallMPI(MPI_Comm_free(&(ispai->comm_spai)));
127:   PetscCall(PetscFree(pc->data));
128:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetEpsilon_C", NULL));
129:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetNBSteps_C", NULL));
130:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMax_C", NULL));
131:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMaxNew_C", NULL));
132:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetBlockSize_C", NULL));
133:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetCacheSize_C", NULL));
134:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetVerbose_C", NULL));
135:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetSp_C", NULL));
136:   PetscFunctionReturn(PETSC_SUCCESS);
137: }

139: static PetscErrorCode PCView_SPAI(PC pc, PetscViewer viewer)
140: {
141:   PC_SPAI  *ispai = (PC_SPAI *)pc->data;
142:   PetscBool iascii;

144:   PetscFunctionBegin;
145:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
146:   if (iascii) {
147:     PetscCall(PetscViewerASCIIPrintf(viewer, "    epsilon %g\n", ispai->epsilon));
148:     PetscCall(PetscViewerASCIIPrintf(viewer, "    nbsteps %d\n", ispai->nbsteps));
149:     PetscCall(PetscViewerASCIIPrintf(viewer, "    max %d\n", ispai->max));
150:     PetscCall(PetscViewerASCIIPrintf(viewer, "    maxnew %d\n", ispai->maxnew));
151:     PetscCall(PetscViewerASCIIPrintf(viewer, "    block_size %d\n", ispai->block_size));
152:     PetscCall(PetscViewerASCIIPrintf(viewer, "    cache_size %d\n", ispai->cache_size));
153:     PetscCall(PetscViewerASCIIPrintf(viewer, "    verbose %d\n", ispai->verbose));
154:     PetscCall(PetscViewerASCIIPrintf(viewer, "    sp %d\n", ispai->sp));
155:   }
156:   PetscFunctionReturn(PETSC_SUCCESS);
157: }

159: static PetscErrorCode PCSPAISetEpsilon_SPAI(PC pc, PetscReal epsilon1)
160: {
161:   PC_SPAI *ispai = (PC_SPAI *)pc->data;

163:   PetscFunctionBegin;
164:   ispai->epsilon = (double)epsilon1;
165:   PetscFunctionReturn(PETSC_SUCCESS);
166: }

168: static PetscErrorCode PCSPAISetNBSteps_SPAI(PC pc, PetscInt nbsteps1)
169: {
170:   PC_SPAI *ispai = (PC_SPAI *)pc->data;

172:   PetscFunctionBegin;
173:   ispai->nbsteps = (int)nbsteps1;
174:   PetscFunctionReturn(PETSC_SUCCESS);
175: }

177: /* added 1/7/99 g.h. */
178: static PetscErrorCode PCSPAISetMax_SPAI(PC pc, PetscInt max1)
179: {
180:   PC_SPAI *ispai = (PC_SPAI *)pc->data;

182:   PetscFunctionBegin;
183:   ispai->max = (int)max1;
184:   PetscFunctionReturn(PETSC_SUCCESS);
185: }

187: static PetscErrorCode PCSPAISetMaxNew_SPAI(PC pc, PetscInt maxnew1)
188: {
189:   PC_SPAI *ispai = (PC_SPAI *)pc->data;

191:   PetscFunctionBegin;
192:   ispai->maxnew = (int)maxnew1;
193:   PetscFunctionReturn(PETSC_SUCCESS);
194: }

196: static PetscErrorCode PCSPAISetBlockSize_SPAI(PC pc, PetscInt block_size1)
197: {
198:   PC_SPAI *ispai = (PC_SPAI *)pc->data;

200:   PetscFunctionBegin;
201:   ispai->block_size = (int)block_size1;
202:   PetscFunctionReturn(PETSC_SUCCESS);
203: }

205: static PetscErrorCode PCSPAISetCacheSize_SPAI(PC pc, PetscInt cache_size)
206: {
207:   PC_SPAI *ispai = (PC_SPAI *)pc->data;

209:   PetscFunctionBegin;
210:   ispai->cache_size = (int)cache_size;
211:   PetscFunctionReturn(PETSC_SUCCESS);
212: }

214: static PetscErrorCode PCSPAISetVerbose_SPAI(PC pc, PetscInt verbose)
215: {
216:   PC_SPAI *ispai = (PC_SPAI *)pc->data;

218:   PetscFunctionBegin;
219:   ispai->verbose = (int)verbose;
220:   PetscFunctionReturn(PETSC_SUCCESS);
221: }

223: static PetscErrorCode PCSPAISetSp_SPAI(PC pc, PetscInt sp)
224: {
225:   PC_SPAI *ispai = (PC_SPAI *)pc->data;

227:   PetscFunctionBegin;
228:   ispai->sp = (int)sp;
229:   PetscFunctionReturn(PETSC_SUCCESS);
230: }

232: /*@
233:   PCSPAISetEpsilon -- Set the tolerance for the `PCSPAI` preconditioner

235:   Input Parameters:
236: + pc       - the preconditioner
237: - epsilon1 - epsilon (default .4)

239:   Note:
240:   Espilon must be between 0 and 1. It controls the
241:   quality of the approximation of M to the inverse of
242:   A. Higher values of epsilon lead to more work, more
243:   fill, and usually better preconditioners. In many
244:   cases the best choice of epsilon is the one that
245:   divides the total solution time equally between the
246:   preconditioner and the solver.

248:   Level: intermediate

250: .seealso: `PCSPAI`, `PCSetType()`
251:   @*/
252: PetscErrorCode PCSPAISetEpsilon(PC pc, PetscReal epsilon1)
253: {
254:   PetscFunctionBegin;
255:   PetscTryMethod(pc, "PCSPAISetEpsilon_C", (PC, PetscReal), (pc, epsilon1));
256:   PetscFunctionReturn(PETSC_SUCCESS);
257: }

259: /*@
260:   PCSPAISetNBSteps - set maximum number of improvement steps per row in
261:   the `PCSPAI` preconditioner

263:   Input Parameters:
264: + pc       - the preconditioner
265: - nbsteps1 - number of steps (default 5)

267:   Note:
268:   `PCSPAI` constructs to approximation to every column of
269:   the exact inverse of A in a series of improvement
270:   steps. The quality of the approximation is determined
271:   by epsilon. If an approximation achieving an accuracy
272:   of epsilon is not obtained after ns steps, SPAI simply
273:   uses the best approximation constructed so far.

275:   Level: intermediate

277: .seealso: `PCSPAI`, `PCSetType()`, `PCSPAISetMaxNew()`
278: @*/
279: PetscErrorCode PCSPAISetNBSteps(PC pc, PetscInt nbsteps1)
280: {
281:   PetscFunctionBegin;
282:   PetscTryMethod(pc, "PCSPAISetNBSteps_C", (PC, PetscInt), (pc, nbsteps1));
283:   PetscFunctionReturn(PETSC_SUCCESS);
284: }

286: /* added 1/7/99 g.h. */
287: /*@
288:   PCSPAISetMax - set the size of various working buffers in
289:   the `PCSPAI` preconditioner

291:   Input Parameters:
292: + pc   - the preconditioner
293: - max1 - size (default is 5000)

295:   Level: intermediate

297: .seealso: `PCSPAI`, `PCSetType()`
298: @*/
299: PetscErrorCode PCSPAISetMax(PC pc, PetscInt max1)
300: {
301:   PetscFunctionBegin;
302:   PetscTryMethod(pc, "PCSPAISetMax_C", (PC, PetscInt), (pc, max1));
303:   PetscFunctionReturn(PETSC_SUCCESS);
304: }

306: /*@
307:   PCSPAISetMaxNew - set maximum number of new nonzero candidates per step
308:   in `PCSPAI` preconditioner

310:   Input Parameters:
311: + pc      - the preconditioner
312: - maxnew1 - maximum number (default 5)

314:   Level: intermediate

316: .seealso: `PCSPAI`, `PCSetType()`, `PCSPAISetNBSteps()`
317: @*/
318: PetscErrorCode PCSPAISetMaxNew(PC pc, PetscInt maxnew1)
319: {
320:   PetscFunctionBegin;
321:   PetscTryMethod(pc, "PCSPAISetMaxNew_C", (PC, PetscInt), (pc, maxnew1));
322:   PetscFunctionReturn(PETSC_SUCCESS);
323: }

325: /*@
326:   PCSPAISetBlockSize - set the block size for the `PCSPAI` preconditioner

328:   Input Parameters:
329: + pc          - the preconditioner
330: - block_size1 - block size (default 1)

332:   Notes:
333:   A block
334:   size of 1 treats A as a matrix of scalar elements. A
335:   block size of s > 1 treats A as a matrix of sxs
336:   blocks. A block size of 0 treats A as a matrix with
337:   variable sized blocks, which are determined by
338:   searching for dense square diagonal blocks in A.
339:   This can be very effective for finite-element
340:   matrices.

342:   SPAI will convert A to block form, use a block
343:   version of the preconditioner algorithm, and then
344:   convert the result back to scalar form.

346:   In many cases the a block-size parameter other than 1
347:   can lead to very significant improvement in
348:   performance.

350:   Level: intermediate

352: .seealso: `PCSPAI`, `PCSetType()`
353: @*/
354: PetscErrorCode PCSPAISetBlockSize(PC pc, PetscInt block_size1)
355: {
356:   PetscFunctionBegin;
357:   PetscTryMethod(pc, "PCSPAISetBlockSize_C", (PC, PetscInt), (pc, block_size1));
358:   PetscFunctionReturn(PETSC_SUCCESS);
359: }

361: /*@
362:   PCSPAISetCacheSize - specify cache size in the `PCSPAI` preconditioner

364:   Input Parameters:
365: + pc         - the preconditioner
366: - cache_size - cache size {0,1,2,3,4,5} (default 5)

368:   Note:
369:   `PCSPAI` uses a hash table to cache messages and avoid
370:   redundant communication. If suggest always using
371:   5. This parameter is irrelevant in the serial
372:   version.

374:   Level: intermediate

376: .seealso: `PCSPAI`, `PCSetType()`
377: @*/
378: PetscErrorCode PCSPAISetCacheSize(PC pc, PetscInt cache_size)
379: {
380:   PetscFunctionBegin;
381:   PetscTryMethod(pc, "PCSPAISetCacheSize_C", (PC, PetscInt), (pc, cache_size));
382:   PetscFunctionReturn(PETSC_SUCCESS);
383: }

385: /*@
386:   PCSPAISetVerbose - verbosity level for the `PCSPAI` preconditioner

388:   Input Parameters:
389: + pc      - the preconditioner
390: - verbose - level (default 1)

392:   Note:
393:   print parameters, timings and matrix statistics

395:   Level: intermediate

397: .seealso: `PCSPAI`, `PCSetType()`
398: @*/
399: PetscErrorCode PCSPAISetVerbose(PC pc, PetscInt verbose)
400: {
401:   PetscFunctionBegin;
402:   PetscTryMethod(pc, "PCSPAISetVerbose_C", (PC, PetscInt), (pc, verbose));
403:   PetscFunctionReturn(PETSC_SUCCESS);
404: }

406: /*@
407:   PCSPAISetSp - specify a symmetric matrix sparsity pattern in the `PCSPAI` preconditioner

409:   Input Parameters:
410: + pc - the preconditioner
411: - sp - 0 or 1

413:   Note:
414:   If A has a symmetric nonzero pattern use -sp 1 to
415:   improve performance by eliminating some communication
416:   in the parallel version. Even if A does not have a
417:   symmetric nonzero pattern -sp 1 may well lead to good
418:   results, but the code will not follow the published
419:   SPAI algorithm exactly.

421:   Level: intermediate

423: .seealso: `PCSPAI`, `PCSetType()`
424: @*/
425: PetscErrorCode PCSPAISetSp(PC pc, PetscInt sp)
426: {
427:   PetscFunctionBegin;
428:   PetscTryMethod(pc, "PCSPAISetSp_C", (PC, PetscInt), (pc, sp));
429:   PetscFunctionReturn(PETSC_SUCCESS);
430: }

432: static PetscErrorCode PCSetFromOptions_SPAI(PC pc, PetscOptionItems *PetscOptionsObject)
433: {
434:   PC_SPAI  *ispai = (PC_SPAI *)pc->data;
435:   int       nbsteps1, max1, maxnew1, block_size1, cache_size, verbose, sp;
436:   double    epsilon1;
437:   PetscBool flg;

439:   PetscFunctionBegin;
440:   PetscOptionsHeadBegin(PetscOptionsObject, "SPAI options");
441:   PetscCall(PetscOptionsReal("-pc_spai_epsilon", "", "PCSPAISetEpsilon", ispai->epsilon, &epsilon1, &flg));
442:   if (flg) PetscCall(PCSPAISetEpsilon(pc, epsilon1));
443:   PetscCall(PetscOptionsInt("-pc_spai_nbsteps", "", "PCSPAISetNBSteps", ispai->nbsteps, &nbsteps1, &flg));
444:   if (flg) PetscCall(PCSPAISetNBSteps(pc, nbsteps1));
445:   /* added 1/7/99 g.h. */
446:   PetscCall(PetscOptionsInt("-pc_spai_max", "", "PCSPAISetMax", ispai->max, &max1, &flg));
447:   if (flg) PetscCall(PCSPAISetMax(pc, max1));
448:   PetscCall(PetscOptionsInt("-pc_spai_maxnew", "", "PCSPAISetMaxNew", ispai->maxnew, &maxnew1, &flg));
449:   if (flg) PetscCall(PCSPAISetMaxNew(pc, maxnew1));
450:   PetscCall(PetscOptionsInt("-pc_spai_block_size", "", "PCSPAISetBlockSize", ispai->block_size, &block_size1, &flg));
451:   if (flg) PetscCall(PCSPAISetBlockSize(pc, block_size1));
452:   PetscCall(PetscOptionsInt("-pc_spai_cache_size", "", "PCSPAISetCacheSize", ispai->cache_size, &cache_size, &flg));
453:   if (flg) PetscCall(PCSPAISetCacheSize(pc, cache_size));
454:   PetscCall(PetscOptionsInt("-pc_spai_verbose", "", "PCSPAISetVerbose", ispai->verbose, &verbose, &flg));
455:   if (flg) PetscCall(PCSPAISetVerbose(pc, verbose));
456:   PetscCall(PetscOptionsInt("-pc_spai_sp", "", "PCSPAISetSp", ispai->sp, &sp, &flg));
457:   if (flg) PetscCall(PCSPAISetSp(pc, sp));
458:   PetscOptionsHeadEnd();
459:   PetscFunctionReturn(PETSC_SUCCESS);
460: }

462: /*MC
463:    PCSPAI - Use the Sparse Approximate Inverse method

465:    Options Database Keys:
466: +  -pc_spai_epsilon <eps> - set tolerance
467: .  -pc_spai_nbstep <n> - set nbsteps
468: .  -pc_spai_max <m> - set max
469: .  -pc_spai_max_new <m> - set maxnew
470: .  -pc_spai_block_size <n> - set block size
471: .  -pc_spai_cache_size <n> - set cache size
472: .  -pc_spai_sp <m> - set sp
473: -  -pc_spai_set_verbose <true,false> - verbose output

475:    Level: beginner

477:    Note:
478:     This only works with `MATAIJ` matrices.

480:    References:
481:  . * -  Grote and Barnard (SIAM J. Sci. Comput.; vol 18, nr 3)

483: .seealso: `PCCreate()`, `PCSetType()`, `PCType`, `PC`,
484:           `PCSPAISetEpsilon()`, `PCSPAISetMax()`, `PCSPAISetMaxNew()`, `PCSPAISetBlockSize()`,
485:           `PCSPAISetVerbose()`, `PCSPAISetSp()`, `PCSPAISetNBSteps()`, `PCSPAISetCacheSize()`
486: M*/

488: PETSC_EXTERN PetscErrorCode PCCreate_SPAI(PC pc)
489: {
490:   PC_SPAI *ispai;

492:   PetscFunctionBegin;
493:   PetscCall(PetscNew(&ispai));
494:   pc->data = ispai;

496:   pc->ops->destroy         = PCDestroy_SPAI;
497:   pc->ops->apply           = PCApply_SPAI;
498:   pc->ops->matapply        = PCMatApply_SPAI;
499:   pc->ops->applyrichardson = 0;
500:   pc->ops->setup           = PCSetUp_SPAI;
501:   pc->ops->view            = PCView_SPAI;
502:   pc->ops->setfromoptions  = PCSetFromOptions_SPAI;

504:   ispai->epsilon    = .4;
505:   ispai->nbsteps    = 5;
506:   ispai->max        = 5000;
507:   ispai->maxnew     = 5;
508:   ispai->block_size = 1;
509:   ispai->cache_size = 5;
510:   ispai->verbose    = 0;

512:   ispai->sp = 1;
513:   PetscCallMPI(MPI_Comm_dup(PetscObjectComm((PetscObject)pc), &(ispai->comm_spai)));

515:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetEpsilon_C", PCSPAISetEpsilon_SPAI));
516:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetNBSteps_C", PCSPAISetNBSteps_SPAI));
517:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMax_C", PCSPAISetMax_SPAI));
518:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetMaxNew_C", PCSPAISetMaxNew_SPAI));
519:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetBlockSize_C", PCSPAISetBlockSize_SPAI));
520:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetCacheSize_C", PCSPAISetCacheSize_SPAI));
521:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetVerbose_C", PCSPAISetVerbose_SPAI));
522:   PetscCall(PetscObjectComposeFunction((PetscObject)pc, "PCSPAISetSp_C", PCSPAISetSp_SPAI));
523:   PetscFunctionReturn(PETSC_SUCCESS);
524: }

526: /*
527:    Converts from a PETSc matrix to an SPAI matrix
528: */
529: static PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A, Mat AT, matrix **B)
530: {
531:   matrix                  *M;
532:   int                      i, j, col;
533:   int                      row_indx;
534:   int                      len, pe, local_indx, start_indx;
535:   int                     *mapping;
536:   const int               *cols;
537:   const double            *vals;
538:   int                      n, mnl, nnl, nz, rstart, rend;
539:   PetscMPIInt              size, rank;
540:   struct compressed_lines *rows;

542:   PetscFunctionBegin;
543:   PetscCallMPI(MPI_Comm_size(comm, &size));
544:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
545:   PetscCall(MatGetSize(A, &n, &n));
546:   PetscCall(MatGetLocalSize(A, &mnl, &nnl));

548:   /*
549:     not sure why a barrier is required. commenting out
550:   PetscCallMPI(MPI_Barrier(comm));
551:   */

553:   M = new_matrix((SPAI_Comm)comm);

555:   M->n              = n;
556:   M->bs             = 1;
557:   M->max_block_size = 1;

559:   M->mnls          = (int *)malloc(sizeof(int) * size);
560:   M->start_indices = (int *)malloc(sizeof(int) * size);
561:   M->pe            = (int *)malloc(sizeof(int) * n);
562:   M->block_sizes   = (int *)malloc(sizeof(int) * n);
563:   for (i = 0; i < n; i++) M->block_sizes[i] = 1;

565:   PetscCallMPI(MPI_Allgather(&mnl, 1, MPI_INT, M->mnls, 1, MPI_INT, comm));

567:   M->start_indices[0] = 0;
568:   for (i = 1; i < size; i++) M->start_indices[i] = M->start_indices[i - 1] + M->mnls[i - 1];

570:   M->mnl            = M->mnls[M->myid];
571:   M->my_start_index = M->start_indices[M->myid];

573:   for (i = 0; i < size; i++) {
574:     start_indx = M->start_indices[i];
575:     for (j = 0; j < M->mnls[i]; j++) M->pe[start_indx + j] = i;
576:   }

578:   if (AT) {
579:     M->lines = new_compressed_lines(M->mnls[rank], 1);
580:   } else {
581:     M->lines = new_compressed_lines(M->mnls[rank], 0);
582:   }

584:   rows = M->lines;

586:   /* Determine the mapping from global indices to pointers */
587:   PetscCall(PetscMalloc1(M->n, &mapping));
588:   pe         = 0;
589:   local_indx = 0;
590:   for (i = 0; i < M->n; i++) {
591:     if (local_indx >= M->mnls[pe]) {
592:       pe++;
593:       local_indx = 0;
594:     }
595:     mapping[i] = local_indx + M->start_indices[pe];
596:     local_indx++;
597:   }

599:   /************** Set up the row structure *****************/

601:   PetscCall(MatGetOwnershipRange(A, &rstart, &rend));
602:   for (i = rstart; i < rend; i++) {
603:     row_indx = i - rstart;
604:     PetscCall(MatGetRow(A, i, &nz, &cols, &vals));
605:     /* allocate buffers */
606:     rows->ptrs[row_indx] = (int *)malloc(nz * sizeof(int));
607:     rows->A[row_indx]    = (double *)malloc(nz * sizeof(double));
608:     /* copy the matrix */
609:     for (j = 0; j < nz; j++) {
610:       col = cols[j];
611:       len = rows->len[row_indx]++;

613:       rows->ptrs[row_indx][len] = mapping[col];
614:       rows->A[row_indx][len]    = vals[j];
615:     }
616:     rows->slen[row_indx] = rows->len[row_indx];

618:     PetscCall(MatRestoreRow(A, i, &nz, &cols, &vals));
619:   }

621:   /************** Set up the column structure *****************/

623:   if (AT) {
624:     for (i = rstart; i < rend; i++) {
625:       row_indx = i - rstart;
626:       PetscCall(MatGetRow(AT, i, &nz, &cols, &vals));
627:       /* allocate buffers */
628:       rows->rptrs[row_indx] = (int *)malloc(nz * sizeof(int));
629:       /* copy the matrix (i.e., the structure) */
630:       for (j = 0; j < nz; j++) {
631:         col = cols[j];
632:         len = rows->rlen[row_indx]++;

634:         rows->rptrs[row_indx][len] = mapping[col];
635:       }
636:       PetscCall(MatRestoreRow(AT, i, &nz, &cols, &vals));
637:     }
638:   }

640:   PetscCall(PetscFree(mapping));

642:   order_pointers(M);
643:   M->maxnz = calc_maxnz(M);
644:   *B       = M;
645:   PetscFunctionReturn(PETSC_SUCCESS);
646: }

648: /*
649:    Converts from an SPAI matrix B  to a PETSc matrix PB.
650:    This assumes that the SPAI matrix B is stored in
651:    COMPRESSED-ROW format.
652: */
653: static PetscErrorCode ConvertMatrixToMat(MPI_Comm comm, matrix *B, Mat *PB)
654: {
655:   PetscMPIInt size, rank;
656:   int         m, n, M, N;
657:   int         d_nz, o_nz;
658:   int        *d_nnz, *o_nnz;
659:   int         i, k, global_row, global_col, first_diag_col, last_diag_col;
660:   PetscScalar val;

662:   PetscFunctionBegin;
663:   PetscCallMPI(MPI_Comm_size(comm, &size));
664:   PetscCallMPI(MPI_Comm_rank(comm, &rank));

666:   m = n = B->mnls[rank];
667:   d_nz = o_nz = 0;

669:   /* Determine preallocation for MatCreateAIJ */
670:   PetscCall(PetscMalloc1(m, &d_nnz));
671:   PetscCall(PetscMalloc1(m, &o_nnz));
672:   for (i = 0; i < m; i++) d_nnz[i] = o_nnz[i] = 0;
673:   first_diag_col = B->start_indices[rank];
674:   last_diag_col  = first_diag_col + B->mnls[rank];
675:   for (i = 0; i < B->mnls[rank]; i++) {
676:     for (k = 0; k < B->lines->len[i]; k++) {
677:       global_col = B->lines->ptrs[i][k];
678:       if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++;
679:       else o_nnz[i]++;
680:     }
681:   }

683:   M = N = B->n;
684:   /* Here we only know how to create AIJ format */
685:   PetscCall(MatCreate(comm, PB));
686:   PetscCall(MatSetSizes(*PB, m, n, M, N));
687:   PetscCall(MatSetType(*PB, MATAIJ));
688:   PetscCall(MatSeqAIJSetPreallocation(*PB, d_nz, d_nnz));
689:   PetscCall(MatMPIAIJSetPreallocation(*PB, d_nz, d_nnz, o_nz, o_nnz));

691:   for (i = 0; i < B->mnls[rank]; i++) {
692:     global_row = B->start_indices[rank] + i;
693:     for (k = 0; k < B->lines->len[i]; k++) {
694:       global_col = B->lines->ptrs[i][k];

696:       val = B->lines->A[i][k];
697:       PetscCall(MatSetValues(*PB, 1, &global_row, 1, &global_col, &val, ADD_VALUES));
698:     }
699:   }

701:   PetscCall(PetscFree(d_nnz));
702:   PetscCall(PetscFree(o_nnz));

704:   PetscCall(MatAssemblyBegin(*PB, MAT_FINAL_ASSEMBLY));
705:   PetscCall(MatAssemblyEnd(*PB, MAT_FINAL_ASSEMBLY));
706:   PetscFunctionReturn(PETSC_SUCCESS);
707: }