Actual source code: ex9.c

  1: static char help[]= "This example shows 1) how to transfer vectors from a parent communicator to vectors on a child communicator and vice versa;\n\
  2:   2) how to transfer vectors from a subcommunicator to vectors on another subcommunicator. The two subcommunicators are not\n\
  3:   required to cover all processes in PETSC_COMM_WORLD; 3) how to copy a vector from a parent communicator to vectors on its child communicators.\n\
  4:   To run any example with VECCUDA vectors, add -vectype cuda to the argument list\n\n";

  6: #include <petscvec.h>
  7: int main(int argc,char **argv)
  8: {
 10:   PetscMPIInt    nproc,grank,mycolor;
 11:   PetscInt       i,n,N = 20,low,high;
 12:   MPI_Comm       subcomm;
 13:   Vec            x  = PETSC_NULL; /* global vectors on PETSC_COMM_WORLD */
 14:   Vec            yg = PETSC_NULL; /* global vectors on PETSC_COMM_WORLD */
 15:   VecScatter     vscat;
 16:   IS             ix,iy;
 17:   PetscBool      iscuda = PETSC_FALSE;      /* Option to use VECCUDA vectors */
 18:   PetscBool      optionflag, compareflag;
 19:   char           vectypename[PETSC_MAX_PATH_LEN];
 20:   PetscBool      world2sub  = PETSC_FALSE;  /* Copy a vector from WORLD to a subcomm? */
 21:   PetscBool      sub2sub    = PETSC_FALSE;  /* Copy a vector from a subcomm to another subcomm? */
 22:   PetscBool      world2subs = PETSC_FALSE;  /* Copy a vector from WORLD to multiple subcomms? */

 24:   PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr;
 25:   MPI_Comm_size(PETSC_COMM_WORLD,&nproc);
 26:   MPI_Comm_rank(PETSC_COMM_WORLD,&grank);

 28:   if (nproc < 2) SETERRQ(PETSC_COMM_WORLD,PETSC_ERR_ARG_SIZ,"This test must have at least two processes to run");

 30:   PetscOptionsGetBool(NULL,0,"-world2sub",&world2sub,NULL);
 31:   PetscOptionsGetBool(NULL,0,"-sub2sub",&sub2sub,NULL);
 32:   PetscOptionsGetBool(NULL,0,"-world2subs",&world2subs,NULL);
 33:   PetscOptionsGetString(NULL,NULL,"-vectype",vectypename,sizeof(vectypename),&optionflag);
 34:   if (optionflag) {
 35:     PetscStrncmp(vectypename, "cuda", (size_t)4, &compareflag);
 36:     if (compareflag) iscuda = PETSC_TRUE;
 37:   }

 39:   /* Split PETSC_COMM_WORLD into three subcomms. Each process can only see the subcomm it belongs to */
 40:   mycolor = grank % 3;
 41:   MPI_Comm_split(PETSC_COMM_WORLD,mycolor,grank,&subcomm);

 43:   /*===========================================================================
 44:    *  Transfer a vector x defined on PETSC_COMM_WORLD to a vector y defined on
 45:    *  a subcommunicator of PETSC_COMM_WORLD and vice versa.
 46:    *===========================================================================*/
 47:   if (world2sub) {
 48:     VecCreate(PETSC_COMM_WORLD, &x);
 49:     VecSetSizes(x, PETSC_DECIDE, N);
 50:     if (iscuda) {
 51:       VecSetType(x, VECCUDA);
 52:     } else {
 53:       VecSetType(x, VECSTANDARD);
 54:     }
 55:     VecSetUp(x);
 56:     PetscObjectSetName((PetscObject)x,"x_commworld"); /* Give a name to view x clearly */

 58:     /* Initialize x to [-0.0, -1.0, -2.0, ..., -19.0] */
 59:     VecGetOwnershipRange(x,&low,&high);
 60:     for (i=low; i<high; i++) {
 61:       PetscScalar val = -i;
 62:       VecSetValue(x,i,val,INSERT_VALUES);
 63:     }
 64:     VecAssemblyBegin(x);
 65:     VecAssemblyEnd(x);

 67:     /* Transfer x to a vector y only defined on subcomm0 and vice versa */
 68:     if (mycolor == 0) { /* subcomm0 contains ranks 0, 3, 6, ... in PETSC_COMM_WORLD */
 69:       Vec         y;
 70:       PetscScalar *yvalue;
 71:        VecCreate(subcomm, &y);
 72:       VecSetSizes(y, PETSC_DECIDE, N);
 73:       if (iscuda) {
 74:         VecSetType(y, VECCUDA);
 75:       } else {
 76:         VecSetType(y, VECSTANDARD);
 77:       }
 78:       VecSetUp(y);
 79:       PetscObjectSetName((PetscObject)y,"y_subcomm_0"); /* Give a name to view y clearly */
 80:       VecGetLocalSize(y,&n);
 81:       if (iscuda) {
 82:         #if defined(PETSC_HAVE_CUDA)
 83:           VecCUDAGetArray(y,&yvalue);
 84:         #endif
 85:       } else {
 86:         VecGetArray(y,&yvalue);
 87:       }
 88:       /* Create yg on PETSC_COMM_WORLD and alias yg with y. They share the memory pointed by yvalue.
 89:         Note this is a collective call. All processes have to call it and supply consistent N.
 90:       */
 91:       if (iscuda) {
 92:         #if defined(PETSC_HAVE_CUDA)
 93:           VecCreateMPICUDAWithArray(PETSC_COMM_WORLD,1,n,N,yvalue,&yg);
 94:         #endif
 95:       } else {
 96:         VecCreateMPIWithArray(PETSC_COMM_WORLD,1,n,N,yvalue,&yg);
 97:       }

 99:       /* Create an identity map that makes yg[i] = x[i], i=0..N-1 */
100:       VecGetOwnershipRange(yg,&low,&high); /* low, high are global indices */
101:       ISCreateStride(PETSC_COMM_SELF,high-low,low,1,&ix);
102:       ISDuplicate(ix,&iy);

104:       /* Union of ix's on subcomm0 covers the full range of [0,N) */
105:       VecScatterCreate(x,ix,yg,iy,&vscat);
106:       VecScatterBegin(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD);
107:       VecScatterEnd(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD);

109:       /* Once yg got the data from x, we return yvalue to y so that we can use y in other operations.
110:         VecGetArray must be paired with VecRestoreArray.
111:       */
112:       if (iscuda) {
113:          #if defined(PETSC_HAVE_CUDA)
114:            VecCUDARestoreArray(y,&yvalue);
115:          #endif
116:       } else {
117:         VecRestoreArray(y,&yvalue);
118:       }

120:       /* Libraries on subcomm0 can safely use y now, for example, view and scale it */
121:       VecView(y,PETSC_VIEWER_STDOUT_(subcomm));
122:       VecScale(y,2.0);

124:       /* Send the new y back to x */
125:       VecGetArray(y,&yvalue); /* If VecScale is done on GPU, Petsc will prepare a valid yvalue for access */
126:       /* Supply new yvalue to yg without memory copying */
127:       VecPlaceArray(yg,yvalue);
128:       VecScatterBegin(vscat,yg,x,INSERT_VALUES,SCATTER_REVERSE);
129:       VecScatterEnd(vscat,yg,x,INSERT_VALUES,SCATTER_REVERSE);
130:       VecResetArray(yg);
131:       if (iscuda) {
132:         #if defined(PETSC_HAVE_CUDA)
133:           VecCUDARestoreArray(y,&yvalue);
134:         #endif
135:       } else {
136:         VecRestoreArray(y,&yvalue);
137:       }

139:       VecDestroy(&y);
140:     } else {
141:       /* Ranks outside of subcomm0 do not supply values to yg */
142:       if (iscuda) {
143:         #if defined(PETSC_HAVE_CUDA)
144:           VecCreateMPICUDAWithArray(PETSC_COMM_WORLD,1,0/*n*/,N,NULL,&yg);
145:         #endif
146:       } else {
147:         VecCreateMPIWithArray(PETSC_COMM_WORLD,1,0/*n*/,N,NULL,&yg);
148:       }

150:       /* Ranks in subcomm0 already specified the full range of the identity map. The remaining
151:         ranks just need to create empty ISes to cheat VecScatterCreate.
152:       */
153:       ISCreateStride(PETSC_COMM_SELF,0,0,1,&ix);
154:       ISDuplicate(ix,&iy);

156:       VecScatterCreate(x,ix,yg,iy,&vscat);
157:       VecScatterBegin(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD);
158:       VecScatterEnd(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD);

160:       /* Send the new y back to x. Ranks outside of subcomm0 actually have nothing to send.
161:         But they have to call VecScatterBegin/End since these routines are collective.
162:       */
163:       VecScatterBegin(vscat,yg,x,INSERT_VALUES,SCATTER_REVERSE);
164:       VecScatterEnd(vscat,yg,x,INSERT_VALUES,SCATTER_REVERSE);
165:     }

167:     VecView(x,PETSC_VIEWER_STDOUT_WORLD);
168:     ISDestroy(&ix);
169:     ISDestroy(&iy);
170:     VecDestroy(&x);
171:     VecDestroy(&yg);
172:     VecScatterDestroy(&vscat);
173:   } /* world2sub */

175:   /*===========================================================================
176:    *  Transfer a vector x defined on subcomm0 to a vector y defined on
177:    *  subcomm1. The two subcomms are not overlapping and their union is
178:    *  not necessarily equal to PETSC_COMM_WORLD.
179:    *===========================================================================*/
180:   if (sub2sub) {
181:     if (mycolor == 0) {
182:       /* Intentionally declare N as a local variable so that processes in subcomm1 do not know its value */
183:       PetscInt          n,N = 22;
184:       Vec               x,xg,yg;
185:       IS                ix,iy;
186:       VecScatter        vscat;
187:       const PetscScalar *xvalue;
188:       MPI_Comm          intercomm,parentcomm;
189:       PetscMPIInt       lrank;

191:       MPI_Comm_rank(subcomm,&lrank);
192:       /* x is on subcomm */
193:       VecCreate(subcomm, &x);
194:       VecSetSizes(x, PETSC_DECIDE, N);
195:       if (iscuda) {
196:         VecSetType(x, VECCUDA);
197:       } else {
198:         VecSetType(x, VECSTANDARD);
199:       }
200:       VecSetUp(x);
201:       VecGetOwnershipRange(x,&low,&high);

203:       /* initialize x = [0.0, 1.0, 2.0, ..., 21.0] */
204:       for (i=low; i<high; i++) {
205:         PetscScalar val = i;
206:         VecSetValue(x,i,val,INSERT_VALUES);
207:       }
208:       VecAssemblyBegin(x);
209:       VecAssemblyEnd(x);

211:       MPI_Intercomm_create(subcomm,0,PETSC_COMM_WORLD/*peer_comm*/,1,100/*tag*/,&intercomm);

213:       /* Tell rank 0 of subcomm1 the global size of x */
214:       if (!lrank) {MPI_Send(&N,1,MPIU_INT,0/*receiver's rank in remote comm, i.e., subcomm1*/,200/*tag*/,intercomm);}

216:       /* Create an intracomm Petsc can work on. Ranks in subcomm0 are ordered before ranks in subcomm1 in parentcomm.
217:         But this order actually does not matter, since what we care is vector y, which is defined on subcomm1.
218:       */
219:       MPI_Intercomm_merge(intercomm,0/*low*/,&parentcomm);

221:       /* Create a vector xg on parentcomm, which shares memory with x */
222:       VecGetLocalSize(x,&n);
223:       if (iscuda) {
224:         #if defined(PETSC_HAVE_CUDA)
225:           VecCUDAGetArrayRead(x,&xvalue);
226:           VecCreateMPICUDAWithArray(parentcomm,1,n,N,xvalue,&xg);
227:         #endif
228:       } else {
229:         VecGetArrayRead(x,&xvalue);
230:         VecCreateMPIWithArray(parentcomm,1,n,N,xvalue,&xg);
231:       }

233:       /* Ranks in subcomm 0 have nothing on yg, so they simply have n=0, array=NULL */
234:       if (iscuda) {
235:         #if defined(PETSC_HAVE_CUDA)
236:           VecCreateMPICUDAWithArray(parentcomm,1,0/*n*/,N,NULL/*array*/,&yg);
237:         #endif
238:       } else {
239:         VecCreateMPIWithArray(parentcomm,1,0/*n*/,N,NULL/*array*/,&yg);
240:       }

242:       /* Create the vecscatter, which does identity map by setting yg[i] = xg[i], i=0..N-1. */
243:       VecGetOwnershipRange(xg,&low,&high); /* low, high are global indices of xg */
244:       ISCreateStride(PETSC_COMM_SELF,high-low,low,1,&ix);
245:       ISDuplicate(ix,&iy);
246:       VecScatterCreate(xg,ix,yg,iy,&vscat);

248:       /* Scatter values from xg to yg */
249:       VecScatterBegin(vscat,xg,yg,INSERT_VALUES,SCATTER_FORWARD);
250:       VecScatterEnd(vscat,xg,yg,INSERT_VALUES,SCATTER_FORWARD);

252:       /* After the VecScatter is done, xg is idle so we can safely return xvalue to x */
253:       if (iscuda) {
254:         #if defined(PETSC_HAVE_CUDA)
255:           VecCUDARestoreArrayRead(x,&xvalue);
256:         #endif
257:       } else {
258:         VecRestoreArrayRead(x,&xvalue);
259:       }
260:       VecDestroy(&x);
261:       ISDestroy(&ix);
262:       ISDestroy(&iy);
263:       VecDestroy(&xg);
264:       VecDestroy(&yg);
265:       VecScatterDestroy(&vscat);
266:       MPI_Comm_free(&intercomm);
267:       MPI_Comm_free(&parentcomm);
268:     } else if (mycolor == 1) { /* subcomm 1, containing ranks 1, 4, 7, ... in PETSC_COMM_WORLD */
269:       PetscInt    n,N;
270:       Vec         y,xg,yg;
271:       IS          ix,iy;
272:       VecScatter  vscat;
273:       PetscScalar *yvalue;
274:       MPI_Comm    intercomm,parentcomm;
275:       PetscMPIInt lrank;

277:       MPI_Comm_rank(subcomm,&lrank);
278:       MPI_Intercomm_create(subcomm,0,PETSC_COMM_WORLD/*peer_comm*/,0/*remote_leader*/,100/*tag*/,&intercomm);

280:       /* Two rank-0 are talking */
281:       if (!lrank) {MPI_Recv(&N,1,MPIU_INT,0/*sender's rank in remote comm, i.e. subcomm0*/,200/*tag*/,intercomm,MPI_STATUS_IGNORE);}
282:       /* Rank 0 of subcomm1 bcasts N to its members */
283:       MPI_Bcast(&N,1,MPIU_INT,0/*local root*/,subcomm);

285:       /* Create a intracomm Petsc can work on */
286:       MPI_Intercomm_merge(intercomm,1/*high*/,&parentcomm);

288:       /* Ranks in subcomm1 have nothing on xg, so they simply have n=0, array=NULL.*/
289:       if (iscuda) {
290:         #if defined(PETSC_HAVE_CUDA)
291:           VecCreateMPICUDAWithArray(parentcomm,1/*bs*/,0/*n*/,N,NULL/*array*/,&xg);
292:         #endif
293:       } else {
294:         VecCreateMPIWithArray(parentcomm,1/*bs*/,0/*n*/,N,NULL/*array*/,&xg);
295:       }

297:       VecCreate(subcomm, &y);
298:       VecSetSizes(y, PETSC_DECIDE, N);
299:       if (iscuda) {
300:         VecSetType(y, VECCUDA);
301:       } else {
302:         VecSetType(y, VECSTANDARD);
303:       }
304:       VecSetUp(y);

306:       PetscObjectSetName((PetscObject)y,"y_subcomm_1"); /* Give a name to view y clearly */
307:       VecGetLocalSize(y,&n);
308:       if (iscuda) {
309:         #if defined(PETSC_HAVE_CUDA)
310:           VecCUDAGetArray(y,&yvalue);
311:         #endif
312:       } else {
313:         VecGetArray(y,&yvalue);
314:       }
315:       /* Create a vector yg on parentcomm, which shares memory with y. xg and yg must be
316:         created in the same order in subcomm0/1. For example, we can not reverse the order of
317:         creating xg and yg in subcomm1.
318:       */
319:       if (iscuda) {
320:         #if defined(PETSC_HAVE_CUDA)
321:           VecCreateMPICUDAWithArray(parentcomm,1/*bs*/,n,N,yvalue,&yg);
322:         #endif
323:       } else {
324:         VecCreateMPIWithArray(parentcomm,1/*bs*/,n,N,yvalue,&yg);
325:       }

327:       /* Ranks in subcomm0 already specified the full range of the identity map.
328:         ranks in subcomm1 just need to create empty ISes to cheat VecScatterCreate.
329:       */
330:       ISCreateStride(PETSC_COMM_SELF,0,0,1,&ix);
331:       ISDuplicate(ix,&iy);
332:       VecScatterCreate(xg,ix,yg,iy,&vscat);

334:       /* Scatter values from xg to yg */
335:       VecScatterBegin(vscat,xg,yg,INSERT_VALUES,SCATTER_FORWARD);
336:       VecScatterEnd(vscat,xg,yg,INSERT_VALUES,SCATTER_FORWARD);

338:       /* After the VecScatter is done, values in yg are available. y is our interest, so we return yvalue to y */
339:       if (iscuda) {
340:         #if defined(PETSC_HAVE_CUDA)
341:           VecCUDARestoreArray(y,&yvalue);
342:         #endif
343:       } else {
344:         VecRestoreArray(y,&yvalue);
345:       }

347:       /* Libraries on subcomm1 can safely use y now, for example, view it */
348:       VecView(y,PETSC_VIEWER_STDOUT_(subcomm));

350:       VecDestroy(&y);
351:       ISDestroy(&ix);
352:       ISDestroy(&iy);
353:       VecDestroy(&xg);
354:       VecDestroy(&yg);
355:       VecScatterDestroy(&vscat);
356:       MPI_Comm_free(&intercomm);
357:       MPI_Comm_free(&parentcomm);
358:     } else if (mycolor == 2) { /* subcomm2 */
359:       /* Processes in subcomm2 do not participate in the VecScatter. They can freely do unrelated things on subcomm2 */
360:     }
361:   } /* sub2sub */

363:   /*===========================================================================
364:    *  Copy a vector x defined on PETSC_COMM_WORLD to vectors y defined on
365:    *  every subcommunicator of PETSC_COMM_WORLD. We could use multiple transfers
366:    *  as we did in case 1, but that is not efficient. Instead, we use one vecscatter
367:    *  to achieve that.
368:    *===========================================================================*/
369:   if (world2subs) {
370:     Vec         y;
371:     PetscInt    n,N=15,xstart,ystart,low,high;
372:     PetscScalar *yvalue;

374:     /* Initialize x to [0, 1, 2, 3, ..., N-1] */
375:     VecCreate(PETSC_COMM_WORLD, &x);
376:     VecSetSizes(x, PETSC_DECIDE, N);
377:     if (iscuda) {
378:       VecSetType(x, VECCUDA);
379:     } else {
380:       VecSetType(x, VECSTANDARD);
381:     }
382:     VecSetUp(x);
383:     VecGetOwnershipRange(x,&low,&high);
384:     for (i=low; i<high; i++) {VecSetValue(x,i,(PetscScalar)i,INSERT_VALUES);}
385:     VecAssemblyBegin(x);
386:     VecAssemblyEnd(x);

388:     /* Every subcomm has a y as long as x */
389:     VecCreate(subcomm, &y);
390:     VecSetSizes(y, PETSC_DECIDE, N);
391:     if (iscuda) {
392:       VecSetType(y, VECCUDA);
393:     } else {
394:       VecSetType(y, VECSTANDARD);
395:     }
396:     VecSetUp(y);
397:     VecGetLocalSize(y,&n);

399:     /* Create a global vector yg on PETSC_COMM_WORLD using y's memory. yg's global size = N*(number of subcommunicators).
400:        Eeach rank in subcomms contributes a piece to construct the global yg. Keep in mind that pieces from a subcomm are not
401:        necessarily consecutive in yg. That depends on how PETSC_COMM_WORLD is split. In our case, subcomm0 is made of rank
402:        0, 3, 6 etc from PETSC_COMM_WORLD. So subcomm0's pieces are interleaved with pieces from other subcomms in yg.
403:     */
404:     if (iscuda) {
405:       #if defined(PETSC_HAVE_CUDA)
406:         VecCUDAGetArray(y,&yvalue);
407:         VecCreateMPICUDAWithArray(PETSC_COMM_WORLD,1,n,PETSC_DECIDE,yvalue,&yg);
408:       #endif
409:     } else {
410:       VecGetArray(y,&yvalue);
411:       VecCreateMPIWithArray(PETSC_COMM_WORLD,1,n,PETSC_DECIDE,yvalue,&yg);
412:     }
413:     PetscObjectSetName((PetscObject)yg,"yg_on_subcomms"); /* Give a name to view yg clearly */

415:     /* The following two lines are key. From xstart, we know where to pull entries from x. Note that we get xstart from y,
416:        since first entry of y on this rank is from x[xstart]. From ystart, we know where ot put entries to yg.
417:      */
418:     VecGetOwnershipRange(y,&xstart,NULL);
419:     VecGetOwnershipRange(yg,&ystart,NULL);

421:     ISCreateStride(PETSC_COMM_SELF,n,xstart,1,&ix);
422:     ISCreateStride(PETSC_COMM_SELF,n,ystart,1,&iy);
423:     VecScatterCreate(x,ix,yg,iy,&vscat);
424:     VecScatterBegin(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD);
425:     VecScatterEnd(vscat,x,yg,INSERT_VALUES,SCATTER_FORWARD);

427:     /* View yg on PETSC_COMM_WORLD before destroying it. We shall see the interleaving effect in output. */
428:     VecView(yg,PETSC_VIEWER_STDOUT_WORLD);
429:     VecDestroy(&yg);

431:     /* Restory yvalue so that processes in subcomm can use y from now on. */
432:     if (iscuda) {
433:       #if defined(PETSC_HAVE_CUDA)
434:         VecCUDARestoreArray(y,&yvalue);
435:       #endif
436:     } else {
437:       VecRestoreArray(y,&yvalue);
438:     }
439:     VecScale(y,3.0);

441:     ISDestroy(&ix); /* One can also destroy ix, iy immediately after VecScatterCreate() */
442:     ISDestroy(&iy);
443:     VecDestroy(&x);
444:     VecDestroy(&y);
445:     VecScatterDestroy(&vscat);
446:   } /* world2subs */

448:   MPI_Comm_free(&subcomm);
449:   PetscFinalize();
450:   return ierr;
451: }

453: /*TEST

455:    build:
456:      requires: !define(PETSC_HAVE_MPIUNI)
457:    testset:
458:      nsize: 7

460:      test:
461:        suffix: 1
462:        args: -world2sub

464:      test:
465:        suffix: 2
466:        args: -sub2sub

468:      test:
469:        suffix: 3
470:        args: -world2subs

472:      test:
473:        suffix: 4
474:        args: -world2sub -vectype cuda
475:        requires: cuda

477:      test:
478:        suffix: 5
479:        args: -sub2sub -vectype cuda
480:        requires: cuda

482:      test:
483:       suffix: 6
484:       args: -world2subs -vectype cuda
485:       requires: cuda

487:      test:
488:        suffix: 7
489:        args: -world2sub -sf_type neighbor
490:        output_file: output/ex9_1.out
491:        # OpenMPI has a bug wrt MPI_Neighbor_alltoallv etc (https://github.com/open-mpi/ompi/pull/6782). Once the patch is in, we can remove !define(PETSC_HAVE_OMPI_MAJOR_VERSION)
492:        requires: define(PETSC_HAVE_MPI_NEIGHBORHOOD_COLLECTIVES) !define(PETSC_HAVE_OMPI_MAJOR_VERSION)
493: TEST*/