Actual source code: comb.c
1: /*
2: Split phase global vector reductions with support for combining the
3: communication portion of several operations. Using MPI-1.1 support only
5: The idea for this and much of the initial code is contributed by
6: Victor Eijkhout.
8: Usage:
9: VecDotBegin(Vec,Vec,PetscScalar *);
10: VecNormBegin(Vec,NormType,PetscReal *);
11: ....
12: VecDotEnd(Vec,Vec,PetscScalar *);
13: VecNormEnd(Vec,NormType,PetscReal *);
15: Limitations:
16: - The order of the xxxEnd() functions MUST be in the same order
17: as the xxxBegin(). There is extensive error checking to try to
18: insure that the user calls the routines in the correct order
19: */
21: #include <petsc/private/vecimpl.h>
23: static PetscErrorCode MPIPetsc_Iallreduce(void *sendbuf, void *recvbuf, PetscMPIInt count, MPI_Datatype datatype, MPI_Op op, MPI_Comm comm, MPI_Request *request)
24: {
25: PetscFunctionBegin;
26: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
27: PetscCallMPI(MPI_Iallreduce(sendbuf, recvbuf, count, datatype, op, comm, request));
28: #else
29: PetscCall(MPIU_Allreduce(sendbuf, recvbuf, count, datatype, op, comm));
30: *request = MPI_REQUEST_NULL;
31: #endif
32: PetscFunctionReturn(PETSC_SUCCESS);
33: }
35: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *);
37: /*
38: PetscSplitReductionCreate - Creates a data structure to contain the queued information.
39: */
40: static PetscErrorCode PetscSplitReductionCreate(MPI_Comm comm, PetscSplitReduction **sr)
41: {
42: PetscFunctionBegin;
43: PetscCall(PetscNew(sr));
44: (*sr)->numopsbegin = 0;
45: (*sr)->numopsend = 0;
46: (*sr)->state = STATE_BEGIN;
47: #define MAXOPS 32
48: (*sr)->maxops = MAXOPS;
49: PetscCall(PetscMalloc6(MAXOPS, &(*sr)->lvalues, MAXOPS, &(*sr)->gvalues, MAXOPS, &(*sr)->invecs, MAXOPS, &(*sr)->reducetype, MAXOPS, &(*sr)->lvalues_mix, MAXOPS, &(*sr)->gvalues_mix));
50: #undef MAXOPS
51: (*sr)->comm = comm;
52: (*sr)->request = MPI_REQUEST_NULL;
53: (*sr)->mix = PETSC_FALSE;
54: (*sr)->async = PETSC_FALSE;
55: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
56: (*sr)->async = PETSC_TRUE; /* Enable by default */
57: #endif
58: /* always check for option; so that tests that run on systems without support don't warn about unhandled options */
59: PetscCall(PetscOptionsGetBool(NULL, NULL, "-splitreduction_async", &(*sr)->async, NULL));
60: PetscFunctionReturn(PETSC_SUCCESS);
61: }
63: /*
64: This function is the MPI reduction operation used when there is
65: a combination of sums and max in the reduction. The call below to
66: MPI_Op_create() converts the function PetscSplitReduction_Local() to the
67: MPI operator PetscSplitReduction_Op.
68: */
69: MPI_Op PetscSplitReduction_Op = 0;
71: PETSC_EXTERN void MPIAPI PetscSplitReduction_Local(void *in, void *out, PetscMPIInt *cnt, MPI_Datatype *datatype)
72: {
73: struct PetscScalarInt {
74: PetscScalar v;
75: PetscInt i;
76: };
77: struct PetscScalarInt *xin = (struct PetscScalarInt *)in;
78: struct PetscScalarInt *xout = (struct PetscScalarInt *)out;
79: PetscInt i, count = (PetscInt)*cnt;
81: PetscFunctionBegin;
82: if (*datatype != MPIU_SCALAR_INT) {
83: PetscCallAbort(MPI_COMM_SELF, (*PetscErrorPrintf)("Can only handle MPIU_SCALAR_INT data types"));
84: PETSCABORT(MPI_COMM_SELF, PETSC_ERR_ARG_WRONG);
85: }
86: for (i = 0; i < count; i++) {
87: if (xin[i].i == PETSC_SR_REDUCE_SUM) xout[i].v += xin[i].v;
88: else if (xin[i].i == PETSC_SR_REDUCE_MAX) xout[i].v = PetscMax(PetscRealPart(xout[i].v), PetscRealPart(xin[i].v));
89: else if (xin[i].i == PETSC_SR_REDUCE_MIN) xout[i].v = PetscMin(PetscRealPart(xout[i].v), PetscRealPart(xin[i].v));
90: else {
91: PetscCallAbort(MPI_COMM_SELF, (*PetscErrorPrintf)("Reduction type input is not PETSC_SR_REDUCE_SUM, PETSC_SR_REDUCE_MAX, or PETSC_SR_REDUCE_MIN"));
92: PETSCABORT(MPI_COMM_SELF, PETSC_ERR_ARG_WRONG);
93: }
94: }
95: PetscFunctionReturnVoid();
96: }
98: /*@
99: PetscCommSplitReductionBegin - Begin an asynchronous split-mode reduction
101: Collective but not synchronizing
103: Input Parameter:
104: . comm - communicator on which split reduction has been queued
106: Level: advanced
108: Note:
109: Calling this function is optional when using split-mode reduction. On supporting hardware,
110: calling this after all VecXxxBegin() allows the reduction to make asynchronous progress
111: before the result is needed (in VecXxxEnd()).
113: .seealso: `VecNormBegin()`, `VecNormEnd()`, `VecDotBegin()`, `VecDotEnd()`, `VecTDotBegin()`, `VecTDotEnd()`, `VecMDotBegin()`, `VecMDotEnd()`, `VecMTDotBegin()`, `VecMTDotEnd()`
114: @*/
115: PetscErrorCode PetscCommSplitReductionBegin(MPI_Comm comm)
116: {
117: PetscSplitReduction *sr;
119: PetscFunctionBegin;
120: PetscCall(PetscSplitReductionGet(comm, &sr));
121: PetscCheck(sr->numopsend <= 0, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot call this after VecxxxEnd() has been called");
122: if (sr->async) { /* Bad reuse, setup code copied from PetscSplitReductionApply(). */
123: PetscInt i, numops = sr->numopsbegin, *reducetype = sr->reducetype;
124: PetscScalar *lvalues = sr->lvalues, *gvalues = sr->gvalues;
125: PetscInt sum_flg = 0, max_flg = 0, min_flg = 0;
126: MPI_Comm comm = sr->comm;
127: PetscMPIInt size, cmul = sizeof(PetscScalar) / sizeof(PetscReal);
129: PetscCall(PetscLogEventBegin(VEC_ReduceBegin, 0, 0, 0, 0));
130: PetscCallMPI(MPI_Comm_size(sr->comm, &size));
131: if (size == 1) {
132: PetscCall(PetscArraycpy(gvalues, lvalues, numops));
133: } else {
134: /* determine if all reductions are sum, max, or min */
135: for (i = 0; i < numops; i++) {
136: if (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
137: else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
138: else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
139: else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
140: }
141: PetscCheck(sum_flg + max_flg + min_flg <= 1 || !sr->mix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
142: if (sum_flg + max_flg + min_flg > 1) {
143: sr->mix = PETSC_TRUE;
144: for (i = 0; i < numops; i++) {
145: sr->lvalues_mix[i].v = lvalues[i];
146: sr->lvalues_mix[i].i = reducetype[i];
147: }
148: PetscCall(MPIPetsc_Iallreduce(sr->lvalues_mix, sr->gvalues_mix, numops, MPIU_SCALAR_INT, PetscSplitReduction_Op, comm, &sr->request));
149: } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
150: PetscCall(MPIPetsc_Iallreduce((PetscReal *)lvalues, (PetscReal *)gvalues, cmul * numops, MPIU_REAL, MPIU_MAX, comm, &sr->request));
151: } else if (min_flg) {
152: PetscCall(MPIPetsc_Iallreduce((PetscReal *)lvalues, (PetscReal *)gvalues, cmul * numops, MPIU_REAL, MPIU_MIN, comm, &sr->request));
153: } else {
154: PetscCall(MPIPetsc_Iallreduce(lvalues, gvalues, numops, MPIU_SCALAR, MPIU_SUM, comm, &sr->request));
155: }
156: }
157: sr->state = STATE_PENDING;
158: sr->numopsend = 0;
159: PetscCall(PetscLogEventEnd(VEC_ReduceBegin, 0, 0, 0, 0));
160: } else {
161: PetscCall(PetscSplitReductionApply(sr));
162: }
163: PetscFunctionReturn(PETSC_SUCCESS);
164: }
166: PetscErrorCode PetscSplitReductionEnd(PetscSplitReduction *sr)
167: {
168: PetscFunctionBegin;
169: switch (sr->state) {
170: case STATE_BEGIN: /* We are doing synchronous communication and this is the first call to VecXxxEnd() so do the communication */
171: PetscCall(PetscSplitReductionApply(sr));
172: break;
173: case STATE_PENDING:
174: /* We are doing asynchronous-mode communication and this is the first VecXxxEnd() so wait for comm to complete */
175: PetscCall(PetscLogEventBegin(VEC_ReduceEnd, 0, 0, 0, 0));
176: if (sr->request != MPI_REQUEST_NULL) PetscCallMPI(MPI_Wait(&sr->request, MPI_STATUS_IGNORE));
177: sr->state = STATE_END;
178: if (sr->mix) {
179: PetscInt i;
180: for (i = 0; i < sr->numopsbegin; i++) sr->gvalues[i] = sr->gvalues_mix[i].v;
181: sr->mix = PETSC_FALSE;
182: }
183: PetscCall(PetscLogEventEnd(VEC_ReduceEnd, 0, 0, 0, 0));
184: break;
185: default:
186: break; /* everything is already done */
187: }
188: PetscFunctionReturn(PETSC_SUCCESS);
189: }
191: /*
192: PetscSplitReductionApply - Actually do the communication required for a split phase reduction
193: */
194: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *sr)
195: {
196: PetscInt i, numops = sr->numopsbegin, *reducetype = sr->reducetype;
197: PetscScalar *lvalues = sr->lvalues, *gvalues = sr->gvalues;
198: PetscInt sum_flg = 0, max_flg = 0, min_flg = 0;
199: MPI_Comm comm = sr->comm;
200: PetscMPIInt size, cmul = sizeof(PetscScalar) / sizeof(PetscReal);
202: PetscFunctionBegin;
203: PetscCheck(sr->numopsend <= 0, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Cannot call this after VecxxxEnd() has been called");
204: PetscCall(PetscLogEventBegin(VEC_ReduceCommunication, 0, 0, 0, 0));
205: PetscCallMPI(MPI_Comm_size(sr->comm, &size));
206: if (size == 1) {
207: PetscCall(PetscArraycpy(gvalues, lvalues, numops));
208: } else {
209: /* determine if all reductions are sum, max, or min */
210: for (i = 0; i < numops; i++) {
211: if (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
212: else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
213: else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
214: else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
215: }
216: if (sum_flg + max_flg + min_flg > 1) {
217: PetscCheck(!sr->mix, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in PetscSplitReduction() data structure, probably memory corruption");
218: for (i = 0; i < numops; i++) {
219: sr->lvalues_mix[i].v = lvalues[i];
220: sr->lvalues_mix[i].i = reducetype[i];
221: }
222: PetscCall(MPIU_Allreduce(sr->lvalues_mix, sr->gvalues_mix, numops, MPIU_SCALAR_INT, PetscSplitReduction_Op, comm));
223: for (i = 0; i < numops; i++) sr->gvalues[i] = sr->gvalues_mix[i].v;
224: } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
225: PetscCall(MPIU_Allreduce((PetscReal *)lvalues, (PetscReal *)gvalues, cmul * numops, MPIU_REAL, MPIU_MAX, comm));
226: } else if (min_flg) {
227: PetscCall(MPIU_Allreduce((PetscReal *)lvalues, (PetscReal *)gvalues, cmul * numops, MPIU_REAL, MPIU_MIN, comm));
228: } else {
229: PetscCall(MPIU_Allreduce(lvalues, gvalues, numops, MPIU_SCALAR, MPIU_SUM, comm));
230: }
231: }
232: sr->state = STATE_END;
233: sr->numopsend = 0;
234: PetscCall(PetscLogEventEnd(VEC_ReduceCommunication, 0, 0, 0, 0));
235: PetscFunctionReturn(PETSC_SUCCESS);
236: }
238: /*
239: PetscSplitReductionExtend - Double the amount of space (slots) allocated for a split reduction object.
240: */
241: PetscErrorCode PetscSplitReductionExtend(PetscSplitReduction *sr)
242: {
243: struct PetscScalarInt {
244: PetscScalar v;
245: PetscInt i;
246: };
247: PetscInt maxops = sr->maxops, *reducetype = sr->reducetype;
248: PetscScalar *lvalues = sr->lvalues, *gvalues = sr->gvalues;
249: struct PetscScalarInt *lvalues_mix = (struct PetscScalarInt *)sr->lvalues_mix;
250: struct PetscScalarInt *gvalues_mix = (struct PetscScalarInt *)sr->gvalues_mix;
251: void **invecs = sr->invecs;
253: PetscFunctionBegin;
254: sr->maxops = 2 * maxops;
255: PetscCall(PetscMalloc6(2 * maxops, &sr->lvalues, 2 * maxops, &sr->gvalues, 2 * maxops, &sr->reducetype, 2 * maxops, &sr->invecs, 2 * maxops, &sr->lvalues_mix, 2 * maxops, &sr->gvalues_mix));
256: PetscCall(PetscArraycpy(sr->lvalues, lvalues, maxops));
257: PetscCall(PetscArraycpy(sr->gvalues, gvalues, maxops));
258: PetscCall(PetscArraycpy(sr->reducetype, reducetype, maxops));
259: PetscCall(PetscArraycpy(sr->invecs, invecs, maxops));
260: PetscCall(PetscArraycpy(sr->lvalues_mix, lvalues_mix, maxops));
261: PetscCall(PetscArraycpy(sr->gvalues_mix, gvalues_mix, maxops));
262: PetscCall(PetscFree6(lvalues, gvalues, reducetype, invecs, lvalues_mix, gvalues_mix));
263: PetscFunctionReturn(PETSC_SUCCESS);
264: }
266: static PetscErrorCode PetscSplitReductionDestroy(PetscSplitReduction *sr)
267: {
268: PetscFunctionBegin;
269: PetscCall(PetscFree6(sr->lvalues, sr->gvalues, sr->reducetype, sr->invecs, sr->lvalues_mix, sr->gvalues_mix));
270: PetscCall(PetscFree(sr));
271: PetscFunctionReturn(PETSC_SUCCESS);
272: }
274: PetscMPIInt Petsc_Reduction_keyval = MPI_KEYVAL_INVALID;
276: /*
277: Private routine to delete internal storage when a communicator is freed.
278: This is called by MPI, not by users.
280: The binding for the first argument changed from MPI 1.0 to 1.1; in 1.0
281: it was MPI_Comm *comm.
282: */
283: static PetscMPIInt MPIAPI Petsc_DelReduction(MPI_Comm comm, PetscMPIInt keyval, void *attr_val, void *extra_state)
284: {
285: PetscFunctionBegin;
286: (void)keyval;
287: (void)extra_state;
288: PetscCallMPI(PetscInfo(0, "Deleting reduction data in an MPI_Comm %ld\n", (long)comm));
289: PetscCallMPI(PetscSplitReductionDestroy((PetscSplitReduction *)attr_val));
290: PetscFunctionReturn(PETSC_SUCCESS);
291: }
293: /*
294: PetscSplitReductionGet - Gets the split reduction object from a
295: PETSc vector, creates if it does not exit.
297: */
298: PetscErrorCode PetscSplitReductionGet(MPI_Comm comm, PetscSplitReduction **sr)
299: {
300: PetscMPIInt flag;
302: PetscFunctionBegin;
303: if (Petsc_Reduction_keyval == MPI_KEYVAL_INVALID) {
304: /*
305: The calling sequence of the 2nd argument to this function changed
306: between MPI Standard 1.0 and the revisions 1.1 Here we match the
307: new standard, if you are using an MPI implementation that uses
308: the older version you will get a warning message about the next line;
309: it is only a warning message and should do no harm.
310: */
311: PetscCallMPI(MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN, Petsc_DelReduction, &Petsc_Reduction_keyval, NULL));
312: }
313: PetscCallMPI(MPI_Comm_get_attr(comm, Petsc_Reduction_keyval, (void **)sr, &flag));
314: if (!flag) { /* doesn't exist yet so create it and put it in */
315: PetscCall(PetscSplitReductionCreate(comm, sr));
316: PetscCallMPI(MPI_Comm_set_attr(comm, Petsc_Reduction_keyval, *sr));
317: PetscCall(PetscInfo(0, "Putting reduction data in an MPI_Comm %ld\n", (long)comm));
318: }
319: PetscFunctionReturn(PETSC_SUCCESS);
320: }
322: /* ----------------------------------------------------------------------------------------------------*/
324: /*@
325: VecDotBegin - Starts a split phase dot product computation.
327: Input Parameters:
328: + x - the first vector
329: . y - the second vector
330: - result - where the result will go (can be NULL)
332: Level: advanced
334: Notes:
335: Each call to `VecDotBegin()` should be paired with a call to `VecDotEnd()`.
337: .seealso: `VecDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
338: `VecTDotBegin()`, `VecTDotEnd()`, `PetscCommSplitReductionBegin()`
339: @*/
340: PetscErrorCode VecDotBegin(Vec x, Vec y, PetscScalar *result)
341: {
342: PetscSplitReduction *sr;
343: MPI_Comm comm;
345: PetscFunctionBegin;
348: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
349: PetscCall(PetscSplitReductionGet(comm, &sr));
350: PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
351: if (sr->numopsbegin >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
352: sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
353: sr->invecs[sr->numopsbegin] = (void *)x;
354: PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
355: PetscUseTypeMethod(x, dot_local, y, sr->lvalues + sr->numopsbegin++);
356: PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
357: PetscFunctionReturn(PETSC_SUCCESS);
358: }
360: /*@
361: VecDotEnd - Ends a split phase dot product computation.
363: Input Parameters:
364: + x - the first vector (can be `NULL`)
365: . y - the second vector (can be `NULL`)
366: - result - where the result will go
368: Level: advanced
370: Notes:
371: Each call to `VecDotBegin()` should be paired with a call to `VecDotEnd()`.
373: .seealso: `VecDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
374: `VecTDotBegin()`, `VecTDotEnd()`, `PetscCommSplitReductionBegin()`
375: @*/
376: PetscErrorCode VecDotEnd(Vec x, Vec y, PetscScalar *result)
377: {
378: PetscSplitReduction *sr;
379: MPI_Comm comm;
381: PetscFunctionBegin;
382: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
383: PetscCall(PetscSplitReductionGet(comm, &sr));
384: PetscCall(PetscSplitReductionEnd(sr));
386: PetscCheck(sr->numopsend < sr->numopsbegin, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() more times then VecxxxBegin()");
387: PetscCheck(!x || (void *)x == sr->invecs[sr->numopsend], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
388: PetscCheck(sr->reducetype[sr->numopsend] == PETSC_SR_REDUCE_SUM, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecDotEnd() on a reduction started with VecNormBegin()");
389: *result = sr->gvalues[sr->numopsend++];
391: /*
392: We are finished getting all the results so reset to no outstanding requests
393: */
394: if (sr->numopsend == sr->numopsbegin) {
395: sr->state = STATE_BEGIN;
396: sr->numopsend = 0;
397: sr->numopsbegin = 0;
398: sr->mix = PETSC_FALSE;
399: }
400: PetscFunctionReturn(PETSC_SUCCESS);
401: }
403: /*@
404: VecTDotBegin - Starts a split phase transpose dot product computation.
406: Input Parameters:
407: + x - the first vector
408: . y - the second vector
409: - result - where the result will go (can be `NULL`)
411: Level: advanced
413: Notes:
414: Each call to `VecTDotBegin()` should be paired with a call to `VecTDotEnd()`.
416: .seealso: `VecTDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
417: `VecDotBegin()`, `VecDotEnd()`, `PetscCommSplitReductionBegin()`
418: @*/
419: PetscErrorCode VecTDotBegin(Vec x, Vec y, PetscScalar *result)
420: {
421: PetscSplitReduction *sr;
422: MPI_Comm comm;
424: PetscFunctionBegin;
425: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
426: PetscCall(PetscSplitReductionGet(comm, &sr));
427: PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
428: if (sr->numopsbegin >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
429: sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
430: sr->invecs[sr->numopsbegin] = (void *)x;
431: PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
432: PetscUseTypeMethod(x, tdot_local, y, sr->lvalues + sr->numopsbegin++);
433: PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
434: PetscFunctionReturn(PETSC_SUCCESS);
435: }
437: /*@
438: VecTDotEnd - Ends a split phase transpose dot product computation.
440: Input Parameters:
441: + x - the first vector (can be `NULL`)
442: . y - the second vector (can be `NULL`)
443: - result - where the result will go
445: Level: advanced
447: Notes:
448: Each call to `VecTDotBegin()` should be paired with a call to `VecTDotEnd()`.
450: .seealso: `VecTDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
451: `VecDotBegin()`, `VecDotEnd()`
452: @*/
453: PetscErrorCode VecTDotEnd(Vec x, Vec y, PetscScalar *result)
454: {
455: PetscFunctionBegin;
456: /*
457: TDotEnd() is the same as DotEnd() so reuse the code
458: */
459: PetscCall(VecDotEnd(x, y, result));
460: PetscFunctionReturn(PETSC_SUCCESS);
461: }
463: /* -------------------------------------------------------------------------*/
465: /*@
466: VecNormBegin - Starts a split phase norm computation.
468: Input Parameters:
469: + x - the first vector
470: . ntype - norm type, one of `NORM_1`, `NORM_2`, `NORM_MAX`, `NORM_1_AND_2`
471: - result - where the result will go (can be `NULL`)
473: Level: advanced
475: Notes:
476: Each call to `VecNormBegin()` should be paired with a call to `VecNormEnd()`.
478: .seealso: `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`, `VecDotBegin()`, `VecDotEnd()`, `PetscCommSplitReductionBegin()`
479: @*/
480: PetscErrorCode VecNormBegin(Vec x, NormType ntype, PetscReal *result)
481: {
482: PetscSplitReduction *sr;
483: PetscReal lresult[2];
484: MPI_Comm comm;
486: PetscFunctionBegin;
488: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
489: PetscCall(PetscSplitReductionGet(comm, &sr));
490: PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
491: if (sr->numopsbegin >= sr->maxops || (sr->numopsbegin == sr->maxops - 1 && ntype == NORM_1_AND_2)) PetscCall(PetscSplitReductionExtend(sr));
493: sr->invecs[sr->numopsbegin] = (void *)x;
494: PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
495: PetscUseTypeMethod(x, norm_local, ntype, lresult);
496: PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
497: if (ntype == NORM_2) lresult[0] = lresult[0] * lresult[0];
498: if (ntype == NORM_1_AND_2) lresult[1] = lresult[1] * lresult[1];
499: if (ntype == NORM_MAX) sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_MAX;
500: else sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
501: sr->lvalues[sr->numopsbegin++] = lresult[0];
502: if (ntype == NORM_1_AND_2) {
503: sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
504: sr->lvalues[sr->numopsbegin++] = lresult[1];
505: }
506: PetscFunctionReturn(PETSC_SUCCESS);
507: }
509: /*@
510: VecNormEnd - Ends a split phase norm computation.
512: Input Parameters:
513: + x - the first vector
514: . ntype - norm type, one of `NORM_1`, `NORM_2`, `NORM_MAX`, `NORM_1_AND_2`
515: - result - where the result will go
517: Level: advanced
519: Notes:
520: Each call to `VecNormBegin()` should be paired with a call to `VecNormEnd()`.
522: The `x` vector is not allowed to be `NULL`, otherwise the vector would not have its correctly cached norm value
524: .seealso: `VecNormBegin()`, `VecNorm()`, `VecDot()`, `VecMDot()`, `VecDotBegin()`, `VecDotEnd()`, `PetscCommSplitReductionBegin()`
525: @*/
526: PetscErrorCode VecNormEnd(Vec x, NormType ntype, PetscReal *result)
527: {
528: PetscSplitReduction *sr;
529: MPI_Comm comm;
531: PetscFunctionBegin;
533: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
534: PetscCall(PetscSplitReductionGet(comm, &sr));
535: PetscCall(PetscSplitReductionEnd(sr));
537: PetscCheck(sr->numopsend < sr->numopsbegin, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() more times then VecxxxBegin()");
538: PetscCheck((void *)x == sr->invecs[sr->numopsend], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
539: PetscCheck(sr->reducetype[sr->numopsend] == PETSC_SR_REDUCE_MAX || ntype != NORM_MAX, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecNormEnd(,NORM_MAX,) on a reduction started with VecDotBegin() or NORM_1 or NORM_2");
540: result[0] = PetscRealPart(sr->gvalues[sr->numopsend++]);
542: if (ntype == NORM_2) result[0] = PetscSqrtReal(result[0]);
543: else if (ntype == NORM_1_AND_2) {
544: result[1] = PetscRealPart(sr->gvalues[sr->numopsend++]);
545: result[1] = PetscSqrtReal(result[1]);
546: }
547: if (ntype != NORM_1_AND_2) PetscCall(PetscObjectComposedDataSetReal((PetscObject)x, NormIds[ntype], result[0]));
549: if (sr->numopsend == sr->numopsbegin) {
550: sr->state = STATE_BEGIN;
551: sr->numopsend = 0;
552: sr->numopsbegin = 0;
553: }
554: PetscFunctionReturn(PETSC_SUCCESS);
555: }
557: /*
558: Possibly add
560: PetscReductionSumBegin/End()
561: PetscReductionMaxBegin/End()
562: PetscReductionMinBegin/End()
563: or have more like MPI with a single function with flag for Op? Like first better
564: */
566: /*@
567: VecMDotBegin - Starts a split phase multiple dot product computation.
569: Input Parameters:
570: + x - the first vector
571: . nv - number of vectors
572: . y - array of vectors
573: - result - where the result will go (can be `NULL`)
575: Level: advanced
577: Notes:
578: Each call to `VecMDotBegin()` should be paired with a call to `VecMDotEnd()`.
580: .seealso: `VecMDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
581: `VecTDotBegin()`, `VecTDotEnd()`, `VecMTDotBegin()`, `VecMTDotEnd()`, `PetscCommSplitReductionBegin()`
582: @*/
583: PetscErrorCode VecMDotBegin(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
584: {
585: PetscSplitReduction *sr;
586: MPI_Comm comm;
587: PetscInt i;
589: PetscFunctionBegin;
590: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
591: PetscCall(PetscSplitReductionGet(comm, &sr));
592: PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
593: for (i = 0; i < nv; i++) {
594: if (sr->numopsbegin + i >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
595: sr->reducetype[sr->numopsbegin + i] = PETSC_SR_REDUCE_SUM;
596: sr->invecs[sr->numopsbegin + i] = (void *)x;
597: }
598: PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
599: PetscUseTypeMethod(x, mdot_local, nv, y, sr->lvalues + sr->numopsbegin);
600: PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
601: sr->numopsbegin += nv;
602: PetscFunctionReturn(PETSC_SUCCESS);
603: }
605: /*@
606: VecMDotEnd - Ends a split phase multiple dot product computation.
608: Input Parameters:
609: + x - the first vector (can be `NULL`)
610: . nv - number of vectors
611: - y - array of vectors (can be `NULL`)
613: Output Parameter:
614: . result - where the result will go
616: Level: advanced
618: Notes:
619: Each call to `VecMDotBegin()` should be paired with a call to `VecMDotEnd()`.
621: .seealso: `VecMDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
622: `VecTDotBegin()`, `VecTDotEnd()`, `VecMTDotBegin()`, `VecMTDotEnd()`, `PetscCommSplitReductionBegin()`
623: @*/
624: PetscErrorCode VecMDotEnd(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
625: {
626: PetscSplitReduction *sr;
627: MPI_Comm comm;
628: PetscInt i;
630: PetscFunctionBegin;
631: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
632: PetscCall(PetscSplitReductionGet(comm, &sr));
633: PetscCall(PetscSplitReductionEnd(sr));
635: PetscCheck(sr->numopsend < sr->numopsbegin, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() more times then VecxxxBegin()");
636: PetscCheck(!x || (void *)x == sr->invecs[sr->numopsend], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
637: PetscCheck(sr->reducetype[sr->numopsend] == PETSC_SR_REDUCE_SUM, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Called VecDotEnd() on a reduction started with VecNormBegin()");
638: for (i = 0; i < nv; i++) result[i] = sr->gvalues[sr->numopsend++];
640: /*
641: We are finished getting all the results so reset to no outstanding requests
642: */
643: if (sr->numopsend == sr->numopsbegin) {
644: sr->state = STATE_BEGIN;
645: sr->numopsend = 0;
646: sr->numopsbegin = 0;
647: }
648: PetscFunctionReturn(PETSC_SUCCESS);
649: }
651: /*@
652: VecMTDotBegin - Starts a split phase transpose multiple dot product computation.
654: Input Parameters:
655: + x - the first vector
656: . nv - number of vectors
657: . y - array of vectors
658: - result - where the result will go (can be `NULL`)
660: Level: advanced
662: Notes:
663: Each call to `VecMTDotBegin()` should be paired with a call to `VecMTDotEnd()`.
665: .seealso: `VecMTDotEnd()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
666: `VecDotBegin()`, `VecDotEnd()`, `VecMDotBegin()`, `VecMDotEnd()`, `PetscCommSplitReductionBegin()`
667: @*/
668: PetscErrorCode VecMTDotBegin(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
669: {
670: PetscSplitReduction *sr;
671: MPI_Comm comm;
672: PetscInt i;
674: PetscFunctionBegin;
675: PetscCall(PetscObjectGetComm((PetscObject)x, &comm));
676: PetscCall(PetscSplitReductionGet(comm, &sr));
677: PetscCheck(sr->state == STATE_BEGIN, PETSC_COMM_SELF, PETSC_ERR_ORDER, "Called before all VecxxxEnd() called");
678: for (i = 0; i < nv; i++) {
679: if (sr->numopsbegin + i >= sr->maxops) PetscCall(PetscSplitReductionExtend(sr));
680: sr->reducetype[sr->numopsbegin + i] = PETSC_SR_REDUCE_SUM;
681: sr->invecs[sr->numopsbegin + i] = (void *)x;
682: }
683: PetscCall(PetscLogEventBegin(VEC_ReduceArithmetic, 0, 0, 0, 0));
684: PetscUseTypeMethod(x, mtdot_local, nv, y, sr->lvalues + sr->numopsbegin);
685: PetscCall(PetscLogEventEnd(VEC_ReduceArithmetic, 0, 0, 0, 0));
686: sr->numopsbegin += nv;
687: PetscFunctionReturn(PETSC_SUCCESS);
688: }
690: /*@
691: VecMTDotEnd - Ends a split phase transpose multiple dot product computation.
693: Input Parameters:
694: + x - the first vector (can be `NULL`)
695: . nv - number of vectors
696: - y - array of vectors (can be `NULL`)
698: Output Parameter:
699: . result - where the result will go
701: Level: advanced
703: Notes:
704: Each call to `VecTDotBegin()` should be paired with a call to `VecTDotEnd()`.
706: .seealso: `VecMTDotBegin()`, `VecNormBegin()`, `VecNormEnd()`, `VecNorm()`, `VecDot()`, `VecMDot()`,
707: `VecDotBegin()`, `VecDotEnd()`, `VecMDotBegin()`, `VecMDotEnd()`, `PetscCommSplitReductionBegin()`
708: @*/
709: PetscErrorCode VecMTDotEnd(Vec x, PetscInt nv, const Vec y[], PetscScalar result[])
710: {
711: PetscFunctionBegin;
712: /*
713: MTDotEnd() is the same as MDotEnd() so reuse the code
714: */
715: PetscCall(VecMDotEnd(x, nv, y, result));
716: PetscFunctionReturn(PETSC_SUCCESS);
717: }