Actual source code: sfwindow.c
1: #include <petsc/private/sfimpl.h>
3: typedef struct _n_PetscSFDataLink *PetscSFDataLink;
4: typedef struct _n_PetscSFWinLink *PetscSFWinLink;
6: typedef struct {
7: PetscSFWindowSyncType sync; /* FENCE, LOCK, or ACTIVE synchronization */
8: PetscSFDataLink link; /* List of MPI data types, lazily constructed for each data type */
9: PetscSFWinLink wins; /* List of active windows */
10: PetscSFWindowFlavorType flavor; /* Current PETSCSF_WINDOW_FLAVOR_ */
11: PetscSF dynsf;
12: MPI_Info info;
13: } PetscSF_Window;
15: struct _n_PetscSFDataLink {
16: MPI_Datatype unit;
17: MPI_Datatype *mine;
18: MPI_Datatype *remote;
19: PetscSFDataLink next;
20: };
22: struct _n_PetscSFWinLink {
23: PetscBool inuse;
24: size_t bytes;
25: void *addr;
26: void *paddr;
27: MPI_Win win;
28: MPI_Request *reqs;
29: PetscSFWindowFlavorType flavor;
30: MPI_Aint *dyn_target_addr;
31: PetscBool epoch;
32: PetscSFWinLink next;
33: };
35: const char *const PetscSFWindowSyncTypes[] = {"FENCE","LOCK","ACTIVE","PetscSFWindowSyncType","PETSCSF_WINDOW_SYNC_",NULL};
36: const char *const PetscSFWindowFlavorTypes[] = {"CREATE","DYNAMIC","ALLOCATE","SHARED","PetscSFWindowFlavorType","PETSCSF_WINDOW_FLAVOR_",NULL};
38: /* Built-in MPI_Ops act elementwise inside MPI_Accumulate, but cannot be used with composite types inside collectives (MPI_Allreduce) */
39: static PetscErrorCode PetscSFWindowOpTranslate(MPI_Op *op)
40: {
42: if (*op == MPIU_SUM) *op = MPI_SUM;
43: else if (*op == MPIU_MAX) *op = MPI_MAX;
44: else if (*op == MPIU_MIN) *op = MPI_MIN;
45: return(0);
46: }
48: /*@C
49: PetscSFWindowGetDataTypes - gets composite local and remote data types for each rank
51: Not Collective
53: Input Parameters:
54: + sf - star forest
55: - unit - data type for each node
57: Output Parameters:
58: + localtypes - types describing part of local leaf buffer referencing each remote rank
59: - remotetypes - types describing part of remote root buffer referenced for each remote rank
61: Level: developer
63: .seealso: PetscSFSetGraph(), PetscSFView()
64: @*/
65: static PetscErrorCode PetscSFWindowGetDataTypes(PetscSF sf,MPI_Datatype unit,const MPI_Datatype **localtypes,const MPI_Datatype **remotetypes)
66: {
67: PetscSF_Window *w = (PetscSF_Window*)sf->data;
68: PetscErrorCode ierr;
69: PetscSFDataLink link;
70: PetscInt i,nranks;
71: const PetscInt *roffset,*rmine,*rremote;
72: const PetscMPIInt *ranks;
75: /* Look for types in cache */
76: for (link=w->link; link; link=link->next) {
77: PetscBool match;
78: MPIPetsc_Type_compare(unit,link->unit,&match);
79: if (match) {
80: *localtypes = link->mine;
81: *remotetypes = link->remote;
82: return(0);
83: }
84: }
86: /* Create new composite types for each send rank */
87: PetscSFGetRootRanks(sf,&nranks,&ranks,&roffset,&rmine,&rremote);
88: PetscNew(&link);
89: MPI_Type_dup(unit,&link->unit);
90: PetscMalloc2(nranks,&link->mine,nranks,&link->remote);
91: for (i=0; i<nranks; i++) {
92: PetscInt rcount = roffset[i+1] - roffset[i];
93: PetscMPIInt *rmine,*rremote;
94: #if !defined(PETSC_USE_64BIT_INDICES)
95: rmine = sf->rmine + sf->roffset[i];
96: rremote = sf->rremote + sf->roffset[i];
97: #else
98: PetscInt j;
99: PetscMalloc2(rcount,&rmine,rcount,&rremote);
100: for (j=0; j<rcount; j++) {
101: PetscMPIIntCast(sf->rmine[sf->roffset[i]+j],rmine+j);
102: PetscMPIIntCast(sf->rremote[sf->roffset[i]+j],rremote+j);
103: }
104: #endif
106: MPI_Type_create_indexed_block(rcount,1,rmine,link->unit,&link->mine[i]);
107: MPI_Type_create_indexed_block(rcount,1,rremote,link->unit,&link->remote[i]);
108: #if defined(PETSC_USE_64BIT_INDICES)
109: PetscFree2(rmine,rremote);
110: #endif
111: MPI_Type_commit(&link->mine[i]);
112: MPI_Type_commit(&link->remote[i]);
113: }
114: link->next = w->link;
115: w->link = link;
117: *localtypes = link->mine;
118: *remotetypes = link->remote;
119: return(0);
120: }
122: /*@C
123: PetscSFWindowSetFlavorType - Set flavor type for MPI_Win creation
125: Logically Collective
127: Input Parameters:
128: + sf - star forest for communication
129: - flavor - flavor type
131: Options Database Key:
132: . -sf_window_flavor <flavor> - sets the flavor type CREATE, DYNAMIC, ALLOCATE or SHARED (see PetscSFWindowFlavorType)
134: Level: advanced
136: Notes: Windows reusage follow this rules:
138: PETSCSF_WINDOW_FLAVOR_CREATE: creates a new window every time, uses MPI_Win_create
140: PETSCSF_WINDOW_FLAVOR_DYNAMIC: uses MPI_Win_create_dynamic/MPI_Win_attach and tries to reuse windows by comparing the root array. Intended to be used on repeated applications of the same SF, e.g.
141: for i=1 to K
142: PetscSFOperationBegin(rootdata1,leafdata_whatever);
143: PetscSFOperationEnd(rootdata1,leafdata_whatever);
144: ...
145: PetscSFOperationBegin(rootdataN,leafdata_whatever);
146: PetscSFOperationEnd(rootdataN,leafdata_whatever);
147: endfor
148: The following pattern will instead raise an error
149: PetscSFOperationBegin(rootdata1,leafdata_whatever);
150: PetscSFOperationEnd(rootdata1,leafdata_whatever);
151: PetscSFOperationBegin(rank ? rootdata1 : rootdata2,leafdata_whatever);
152: PetscSFOperationEnd(rank ? rootdata1 : rootdata2,leafdata_whatever);
154: PETSCSF_WINDOW_FLAVOR_ALLOCATE: uses MPI_Win_allocate, reuses any pre-existing window which fits the data and it is not in use
156: PETSCSF_WINDOW_FLAVOR_SHARED: uses MPI_Win_allocate_shared, reusage policy as for PETSCSF_WINDOW_FLAVOR_ALLOCATE
158: .seealso: PetscSFSetFromOptions(), PetscSFWindowGetFlavorType()
159: @*/
160: PetscErrorCode PetscSFWindowSetFlavorType(PetscSF sf,PetscSFWindowFlavorType flavor)
161: {
167: PetscTryMethod(sf,"PetscSFWindowSetFlavorType_C",(PetscSF,PetscSFWindowFlavorType),(sf,flavor));
168: return(0);
169: }
171: static PetscErrorCode PetscSFWindowSetFlavorType_Window(PetscSF sf,PetscSFWindowFlavorType flavor)
172: {
173: PetscSF_Window *w = (PetscSF_Window*)sf->data;
176: w->flavor = flavor;
177: return(0);
178: }
180: /*@C
181: PetscSFWindowGetFlavorType - Get flavor type for PetscSF communication
183: Logically Collective
185: Input Parameter:
186: . sf - star forest for communication
188: Output Parameter:
189: . flavor - flavor type
191: Level: advanced
193: .seealso: PetscSFSetFromOptions(), PetscSFWindowSetFlavorType()
194: @*/
195: PetscErrorCode PetscSFWindowGetFlavorType(PetscSF sf,PetscSFWindowFlavorType *flavor)
196: {
202: PetscUseMethod(sf,"PetscSFWindowGetFlavorType_C",(PetscSF,PetscSFWindowFlavorType*),(sf,flavor));
203: return(0);
204: }
206: static PetscErrorCode PetscSFWindowGetFlavorType_Window(PetscSF sf,PetscSFWindowFlavorType *flavor)
207: {
208: PetscSF_Window *w = (PetscSF_Window*)sf->data;
211: *flavor = w->flavor;
212: return(0);
213: }
215: /*@C
216: PetscSFWindowSetSyncType - Set synchronization type for PetscSF communication
218: Logically Collective
220: Input Parameters:
221: + sf - star forest for communication
222: - sync - synchronization type
224: Options Database Key:
225: . -sf_window_sync <sync> - sets the synchronization type FENCE, LOCK, or ACTIVE (see PetscSFWindowSyncType)
227: Level: advanced
229: .seealso: PetscSFSetFromOptions(), PetscSFWindowGetSyncType()
230: @*/
231: PetscErrorCode PetscSFWindowSetSyncType(PetscSF sf,PetscSFWindowSyncType sync)
232: {
238: PetscTryMethod(sf,"PetscSFWindowSetSyncType_C",(PetscSF,PetscSFWindowSyncType),(sf,sync));
239: return(0);
240: }
242: static PetscErrorCode PetscSFWindowSetSyncType_Window(PetscSF sf,PetscSFWindowSyncType sync)
243: {
244: PetscSF_Window *w = (PetscSF_Window*)sf->data;
247: w->sync = sync;
248: return(0);
249: }
251: /*@C
252: PetscSFWindowGetSyncType - Get synchronization type for PetscSF communication
254: Logically Collective
256: Input Parameter:
257: . sf - star forest for communication
259: Output Parameter:
260: . sync - synchronization type
262: Level: advanced
264: .seealso: PetscSFSetFromOptions(), PetscSFWindowSetSyncType()
265: @*/
266: PetscErrorCode PetscSFWindowGetSyncType(PetscSF sf,PetscSFWindowSyncType *sync)
267: {
273: PetscUseMethod(sf,"PetscSFWindowGetSyncType_C",(PetscSF,PetscSFWindowSyncType*),(sf,sync));
274: return(0);
275: }
277: static PetscErrorCode PetscSFWindowGetSyncType_Window(PetscSF sf,PetscSFWindowSyncType *sync)
278: {
279: PetscSF_Window *w = (PetscSF_Window*)sf->data;
282: *sync = w->sync;
283: return(0);
284: }
286: /*@C
287: PetscSFWindowSetInfo - Set the MPI_Info handle that will be used for subsequent windows allocation
289: Logically Collective
291: Input Parameters:
292: + sf - star forest for communication
293: - info - MPI_Info handle
295: Level: advanced
297: Notes: the info handle is duplicated with a call to MPI_Info_dup unless info = MPI_INFO_NULL.
299: .seealso: PetscSFSetFromOptions(), PetscSFWindowGetInfo()
300: @*/
301: PetscErrorCode PetscSFWindowSetInfo(PetscSF sf,MPI_Info info)
302: {
307: PetscTryMethod(sf,"PetscSFWindowSetInfo_C",(PetscSF,MPI_Info),(sf,info));
308: return(0);
309: }
311: static PetscErrorCode PetscSFWindowSetInfo_Window(PetscSF sf,MPI_Info info)
312: {
313: PetscSF_Window *w = (PetscSF_Window*)sf->data;
317: if (w->info != MPI_INFO_NULL) {
318: MPI_Info_free(&w->info);
319: }
320: if (info != MPI_INFO_NULL) {
321: MPI_Info_dup(info,&w->info);
322: }
323: return(0);
324: }
326: /*@C
327: PetscSFWindowGetInfo - Get the MPI_Info handle used for windows allocation
329: Logically Collective
331: Input Parameter:
332: . sf - star forest for communication
334: Output Parameter:
335: . info - MPI_Info handle
337: Level: advanced
339: Notes: if PetscSFWindowSetInfo() has not be called, this returns MPI_INFO_NULL
341: .seealso: PetscSFSetFromOptions(), PetscSFWindowSetInfo()
342: @*/
343: PetscErrorCode PetscSFWindowGetInfo(PetscSF sf,MPI_Info *info)
344: {
350: PetscUseMethod(sf,"PetscSFWindowGetInfo_C",(PetscSF,MPI_Info*),(sf,info));
351: return(0);
352: }
354: static PetscErrorCode PetscSFWindowGetInfo_Window(PetscSF sf,MPI_Info *info)
355: {
356: PetscSF_Window *w = (PetscSF_Window*)sf->data;
359: *info = w->info;
360: return(0);
361: }
363: /*
364: PetscSFGetWindow - Get a window for use with a given data type
366: Collective on PetscSF
368: Input Parameters:
369: + sf - star forest
370: . unit - data type
371: . array - array to be sent
372: . sync - type of synchronization PetscSFWindowSyncType
373: . epoch - PETSC_TRUE to acquire the window and start an epoch, PETSC_FALSE to just acquire the window
374: . fenceassert - assert parameter for call to MPI_Win_fence(), if sync == PETSCSF_WINDOW_SYNC_FENCE
375: . postassert - assert parameter for call to MPI_Win_post(), if sync == PETSCSF_WINDOW_SYNC_ACTIVE
376: - startassert - assert parameter for call to MPI_Win_start(), if sync == PETSCSF_WINDOW_SYNC_ACTIVE
378: Output Parameters:
379: + target_disp - target_disp argument for RMA calls (significative for PETSCSF_WINDOW_FLAVOR_DYNAMIC only)
380: + reqs - array of requests (significative for sync == PETSCSF_WINDOW_SYNC_LOCK only)
381: - win - window
383: Level: developer
384: .seealso: PetscSFGetRootRanks(), PetscSFWindowGetDataTypes()
385: */
386: static PetscErrorCode PetscSFGetWindow(PetscSF sf,MPI_Datatype unit,void *array,PetscSFWindowSyncType sync,PetscBool epoch,PetscMPIInt fenceassert,PetscMPIInt postassert,PetscMPIInt startassert,const MPI_Aint **target_disp, MPI_Request **reqs, MPI_Win *win)
387: {
388: PetscSF_Window *w = (PetscSF_Window*)sf->data;
390: MPI_Aint lb,lb_true,bytes,bytes_true;
391: PetscSFWinLink link;
392: #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
393: MPI_Aint winaddr;
394: PetscInt nranks;
395: #endif
396: PetscBool reuse = PETSC_FALSE, update = PETSC_FALSE;
397: PetscBool dummy[2];
398: MPI_Aint wsize;
401: MPI_Type_get_extent(unit,&lb,&bytes);
402: MPI_Type_get_true_extent(unit,&lb_true,&bytes_true);
403: if (lb != 0 || lb_true != 0) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for unit type with nonzero lower bound, write petsc-maint@mcs.anl.gov if you want this feature");
404: if (bytes != bytes_true) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for unit type with modified extent, write petsc-maint@mcs.anl.gov if you want this feature");
405: if (w->flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE;
406: for (link=w->wins; reuse && link; link=link->next) {
407: PetscBool winok = PETSC_FALSE;
408: if (w->flavor != link->flavor) continue;
409: switch (w->flavor) {
410: case PETSCSF_WINDOW_FLAVOR_DYNAMIC: /* check available matching array, error if in use (we additionally check that the matching condition is the same across processes) */
411: if (array == link->addr) {
412: if (PetscDefined(USE_DEBUG)) {
413: dummy[0] = PETSC_TRUE;
414: dummy[1] = PETSC_TRUE;
415: MPI_Allreduce(MPI_IN_PLACE,dummy,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)sf));
416: MPI_Allreduce(MPI_IN_PLACE,dummy+1,1,MPIU_BOOL,MPI_LOR ,PetscObjectComm((PetscObject)sf));
417: if (dummy[0] != dummy[1]) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"PETSCSF_WINDOW_FLAVOR_DYNAMIC requires root pointers to be consistently used across the comm. Use PETSCSF_WINDOW_FLAVOR_CREATE or PETSCSF_WINDOW_FLAVOR_ALLOCATE instead");
418: }
419: if (link->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Window in use");
420: if (epoch && link->epoch) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Window epoch not finished");
421: winok = PETSC_TRUE;
422: link->paddr = array;
423: } else if (PetscDefined(USE_DEBUG)) {
424: dummy[0] = PETSC_FALSE;
425: dummy[1] = PETSC_FALSE;
426: MPI_Allreduce(MPI_IN_PLACE,dummy ,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)sf));
427: MPI_Allreduce(MPI_IN_PLACE,dummy+1,1,MPIU_BOOL,MPI_LOR ,PetscObjectComm((PetscObject)sf));
428: if (dummy[0] != dummy[1]) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"PETSCSF_WINDOW_FLAVOR_DYNAMIC requires root pointers to be consistently used across the comm. Use PETSCSF_WINDOW_FLAVOR_CREATE or PETSCSF_WINDOW_FLAVOR_ALLOCATE instead");
429: }
430: break;
431: case PETSCSF_WINDOW_FLAVOR_ALLOCATE: /* check available by matching size, allocate if in use */
432: case PETSCSF_WINDOW_FLAVOR_SHARED:
433: if (!link->inuse && bytes == (MPI_Aint)link->bytes) {
434: update = PETSC_TRUE;
435: link->paddr = array;
436: winok = PETSC_TRUE;
437: }
438: break;
439: default: SETERRQ1(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for flavor %s",PetscSFWindowFlavorTypes[w->flavor]);
440: }
441: if (winok) {
442: *win = link->win;
443: PetscInfo3(sf,"Reusing window %d of flavor %d for comm %d\n",link->win,link->flavor,PetscObjectComm((PetscObject)sf));
444: goto found;
445: }
446: }
448: wsize = (MPI_Aint)bytes*sf->nroots;
449: PetscNew(&link);
450: link->bytes = bytes;
451: link->next = w->wins;
452: link->flavor = w->flavor;
453: link->dyn_target_addr = NULL;
454: link->reqs = NULL;
455: w->wins = link;
456: if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
457: PetscInt i;
459: PetscMalloc1(sf->nranks,&link->reqs);
460: for (i = 0; i < sf->nranks; i++) link->reqs[i] = MPI_REQUEST_NULL;
461: }
462: switch (w->flavor) {
463: case PETSCSF_WINDOW_FLAVOR_CREATE:
464: MPI_Win_create(array,wsize,(PetscMPIInt)bytes,w->info,PetscObjectComm((PetscObject)sf),&link->win);
465: link->addr = array;
466: link->paddr = array;
467: break;
468: #if defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
469: case PETSCSF_WINDOW_FLAVOR_DYNAMIC:
470: MPI_Win_create_dynamic(w->info,PetscObjectComm((PetscObject)sf),&link->win);
471: #if defined(PETSC_HAVE_OMPI_MAJOR_VERSION) /* some OpenMPI versions do not support MPI_Win_attach(win,NULL,0); */
472: MPI_Win_attach(link->win,wsize ? array : &ierr,wsize);
473: #else
474: MPI_Win_attach(link->win,array,wsize);
475: #endif
476: link->addr = array;
477: link->paddr = array;
478: if (!w->dynsf) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ORDER,"Must call PetscSFSetUp()");
479: PetscSFSetUp(w->dynsf);
480: PetscSFGetRootRanks(w->dynsf,&nranks,NULL,NULL,NULL,NULL);
481: PetscMalloc1(nranks,&link->dyn_target_addr);
482: MPI_Get_address(array,&winaddr);
483: PetscSFBcastBegin(w->dynsf,MPI_AINT,&winaddr,link->dyn_target_addr,MPI_REPLACE);
484: PetscSFBcastEnd(w->dynsf,MPI_AINT,&winaddr,link->dyn_target_addr,MPI_REPLACE);
485: break;
486: case PETSCSF_WINDOW_FLAVOR_ALLOCATE:
487: MPI_Win_allocate(wsize,(PetscMPIInt)bytes,w->info,PetscObjectComm((PetscObject)sf),&link->addr,&link->win);
488: update = PETSC_TRUE;
489: link->paddr = array;
490: break;
491: #endif
492: #if defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY)
493: case PETSCSF_WINDOW_FLAVOR_SHARED:
494: MPI_Win_allocate_shared(wsize,(PetscMPIInt)bytes,w->info,PetscObjectComm((PetscObject)sf),&link->addr,&link->win);
495: update = PETSC_TRUE;
496: link->paddr = array;
497: break;
498: #endif
499: default: SETERRQ1(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for flavor %s",PetscSFWindowFlavorTypes[w->flavor]);
500: }
501: PetscInfo3(sf,"New window %d of flavor %d for comm %d\n",link->win,link->flavor,PetscObjectComm((PetscObject)sf));
502: *win = link->win;
504: found:
506: if (target_disp) *target_disp = link->dyn_target_addr;
507: if (reqs) *reqs = link->reqs;
508: if (update) { /* locks are needed for the "separate" memory model only, the fence guaranties memory-synchronization */
509: PetscMPIInt rank;
511: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
512: if (sync == PETSCSF_WINDOW_SYNC_LOCK) {MPI_Win_lock(MPI_LOCK_EXCLUSIVE,rank,MPI_MODE_NOCHECK,*win);}
513: PetscMemcpy(link->addr,array,sf->nroots*bytes);
514: if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
515: MPI_Win_unlock(rank,*win);
516: MPI_Win_fence(0,*win);
517: }
518: }
519: link->inuse = PETSC_TRUE;
520: link->epoch = epoch;
521: if (epoch) {
522: switch (sync) {
523: case PETSCSF_WINDOW_SYNC_FENCE:
524: MPI_Win_fence(fenceassert,*win);
525: break;
526: case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */
527: break;
528: case PETSCSF_WINDOW_SYNC_ACTIVE: {
529: MPI_Group ingroup,outgroup;
530: PetscMPIInt isize,osize;
532: /* OpenMPI 4.0.2 with btl=vader does not like calling
533: - MPI_Win_complete when ogroup is empty
534: - MPI_Win_wait when igroup is empty
535: So, we do not even issue the corresponding start and post calls
536: The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that
537: start(outgroup) has a matching post(ingroup)
538: and this is guaranteed by PetscSF
539: */
540: PetscSFGetGroups(sf,&ingroup,&outgroup);
541: MPI_Group_size(ingroup,&isize);
542: MPI_Group_size(outgroup,&osize);
543: if (isize) {MPI_Win_post(ingroup,postassert,*win);}
544: if (osize) {MPI_Win_start(outgroup,startassert,*win);}
545: } break;
546: default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Unknown synchronization type");
547: }
548: }
549: return(0);
550: }
552: /*
553: PetscSFFindWindow - Finds a window that is already in use
555: Not Collective
557: Input Parameters:
558: + sf - star forest
559: . unit - data type
560: - array - array with which the window is associated
562: Output Parameters:
563: + win - window
564: - reqs - outstanding requests associated to the window
566: Level: developer
568: .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
569: */
570: static PetscErrorCode PetscSFFindWindow(PetscSF sf,MPI_Datatype unit,const void *array,MPI_Win *win,MPI_Request **reqs)
571: {
572: PetscSF_Window *w = (PetscSF_Window*)sf->data;
573: PetscSFWinLink link;
577: *win = MPI_WIN_NULL;
578: for (link=w->wins; link; link=link->next) {
579: if (array == link->paddr) {
580: PetscInfo3(sf,"Window %d of flavor %d for comm %d\n",link->win,link->flavor,PetscObjectComm((PetscObject)sf));
581: *win = link->win;
582: *reqs = link->reqs;
583: return(0);
584: }
585: }
586: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Requested window not in use");
587: }
589: /*
590: PetscSFRestoreWindow - Restores a window obtained with PetscSFGetWindow()
592: Collective
594: Input Parameters:
595: + sf - star forest
596: . unit - data type
597: . array - array associated with window
598: . sync - type of synchronization PetscSFWindowSyncType
599: . epoch - close an epoch, must match argument to PetscSFGetWindow()
600: . update - if we have to update the local window array
601: - win - window
603: Level: developer
605: .seealso: PetscSFFindWindow()
606: */
607: static PetscErrorCode PetscSFRestoreWindow(PetscSF sf,MPI_Datatype unit,void *array,PetscSFWindowSyncType sync,PetscBool epoch,PetscMPIInt fenceassert,PetscBool update,MPI_Win *win)
608: {
609: PetscSF_Window *w = (PetscSF_Window*)sf->data;
610: PetscErrorCode ierr;
611: PetscSFWinLink *p,link;
612: PetscBool reuse = PETSC_FALSE;
613: PetscSFWindowFlavorType flavor;
614: void* laddr;
615: size_t bytes;
618: for (p=&w->wins; *p; p=&(*p)->next) {
619: link = *p;
620: if (*win == link->win) {
621: if (array != link->paddr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Matched window, but not array");
622: if (epoch != link->epoch) {
623: if (epoch) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"No epoch to end");
624: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Restoring window without ending epoch");
625: }
626: laddr = link->addr;
627: flavor = link->flavor;
628: bytes = link->bytes;
629: if (flavor != PETSCSF_WINDOW_FLAVOR_CREATE) reuse = PETSC_TRUE;
630: else { *p = link->next; update = PETSC_FALSE; } /* remove from list */
631: goto found;
632: }
633: }
634: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Requested window not in use");
636: found:
637: PetscInfo3(sf,"Window %d of flavor %d for comm %d\n",link->win,link->flavor,PetscObjectComm((PetscObject)sf));
638: if (epoch) {
639: switch (sync) {
640: case PETSCSF_WINDOW_SYNC_FENCE:
641: MPI_Win_fence(fenceassert,*win);
642: break;
643: case PETSCSF_WINDOW_SYNC_LOCK: /* Handled outside */
644: break;
645: case PETSCSF_WINDOW_SYNC_ACTIVE: {
646: MPI_Group ingroup,outgroup;
647: PetscMPIInt isize,osize;
649: /* OpenMPI 4.0.2 with btl=wader does not like calling
650: - MPI_Win_complete when ogroup is empty
651: - MPI_Win_wait when igroup is empty
652: The MPI standard (Sec. 11.5.2 of MPI 3.1) only requires that
653: - each process who issues a call to MPI_Win_start issues a call to MPI_Win_Complete
654: - each process who issues a call to MPI_Win_post issues a call to MPI_Win_Wait
655: */
656: PetscSFGetGroups(sf,&ingroup,&outgroup);
657: MPI_Group_size(ingroup,&isize);
658: MPI_Group_size(outgroup,&osize);
659: if (osize) {MPI_Win_complete(*win);}
660: if (isize) {MPI_Win_wait(*win);}
661: } break;
662: default: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_PLIB,"Unknown synchronization type");
663: }
664: }
665: if (update) {
666: if (sync == PETSCSF_WINDOW_SYNC_LOCK) {
667: MPI_Win_fence(MPI_MODE_NOPUT|MPI_MODE_NOSUCCEED,*win);
668: }
669: PetscMemcpy(array,laddr,sf->nroots*bytes);
670: }
671: link->epoch = PETSC_FALSE;
672: link->inuse = PETSC_FALSE;
673: link->paddr = NULL;
674: if (!reuse) {
675: PetscFree(link->dyn_target_addr);
676: PetscFree(link->reqs);
677: MPI_Win_free(&link->win);
678: PetscFree(link);
679: *win = MPI_WIN_NULL;
680: }
681: return(0);
682: }
684: static PetscErrorCode PetscSFSetUp_Window(PetscSF sf)
685: {
686: PetscSF_Window *w = (PetscSF_Window*)sf->data;
688: MPI_Group ingroup,outgroup;
691: PetscSFSetUpRanks(sf,MPI_GROUP_EMPTY);
692: if (!w->dynsf) {
693: PetscInt i;
694: PetscSFNode *remotes;
696: PetscMalloc1(sf->nranks,&remotes);
697: for (i=0;i<sf->nranks;i++) {
698: remotes[i].rank = sf->ranks[i];
699: remotes[i].index = 0;
700: }
701: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&w->dynsf);
702: PetscSFWindowSetFlavorType(w->dynsf,PETSCSF_WINDOW_FLAVOR_CREATE); /* break recursion */
703: PetscSFSetGraph(w->dynsf,1,sf->nranks,NULL,PETSC_OWN_POINTER,remotes,PETSC_OWN_POINTER);
704: PetscLogObjectParent((PetscObject)sf,(PetscObject)w->dynsf);
705: }
706: switch (w->sync) {
707: case PETSCSF_WINDOW_SYNC_ACTIVE:
708: PetscSFGetGroups(sf,&ingroup,&outgroup);
709: default:
710: break;
711: }
712: return(0);
713: }
715: static PetscErrorCode PetscSFSetFromOptions_Window(PetscOptionItems *PetscOptionsObject,PetscSF sf)
716: {
717: PetscSF_Window *w = (PetscSF_Window*)sf->data;
718: PetscErrorCode ierr;
719: PetscSFWindowFlavorType flavor = w->flavor;
722: PetscOptionsHead(PetscOptionsObject,"PetscSF Window options");
723: PetscOptionsEnum("-sf_window_sync","synchronization type to use for PetscSF Window communication","PetscSFWindowSetSyncType",PetscSFWindowSyncTypes,(PetscEnum)w->sync,(PetscEnum*)&w->sync,NULL);
724: PetscOptionsEnum("-sf_window_flavor","flavor to use for PetscSF Window creation","PetscSFWindowSetFlavorType",PetscSFWindowFlavorTypes,(PetscEnum)flavor,(PetscEnum*)&flavor,NULL);
725: PetscSFWindowSetFlavorType(sf,flavor);
726: PetscOptionsTail();
727: return(0);
728: }
730: static PetscErrorCode PetscSFReset_Window(PetscSF sf)
731: {
732: PetscSF_Window *w = (PetscSF_Window*)sf->data;
733: PetscErrorCode ierr;
734: PetscSFDataLink link,next;
735: PetscSFWinLink wlink,wnext;
736: PetscInt i;
739: for (link=w->link; link; link=next) {
740: next = link->next;
741: MPI_Type_free(&link->unit);
742: for (i=0; i<sf->nranks; i++) {
743: MPI_Type_free(&link->mine[i]);
744: MPI_Type_free(&link->remote[i]);
745: }
746: PetscFree2(link->mine,link->remote);
747: PetscFree(link);
748: }
749: w->link = NULL;
750: for (wlink=w->wins; wlink; wlink=wnext) {
751: wnext = wlink->next;
752: if (wlink->inuse) SETERRQ1(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Window still in use with address %p",(void*)wlink->addr);
753: PetscFree(wlink->dyn_target_addr);
754: PetscFree(wlink->reqs);
755: MPI_Win_free(&wlink->win);
756: PetscFree(wlink);
757: }
758: w->wins = NULL;
759: PetscSFDestroy(&w->dynsf);
760: if (w->info != MPI_INFO_NULL) {
761: MPI_Info_free(&w->info);
762: }
763: return(0);
764: }
766: static PetscErrorCode PetscSFDestroy_Window(PetscSF sf)
767: {
771: PetscSFReset_Window(sf);
772: PetscFree(sf->data);
773: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetSyncType_C",NULL);
774: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetSyncType_C",NULL);
775: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetFlavorType_C",NULL);
776: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetFlavorType_C",NULL);
777: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetInfo_C",NULL);
778: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetInfo_C",NULL);
779: return(0);
780: }
782: static PetscErrorCode PetscSFView_Window(PetscSF sf,PetscViewer viewer)
783: {
784: PetscSF_Window *w = (PetscSF_Window*)sf->data;
785: PetscErrorCode ierr;
786: PetscBool iascii;
787: PetscViewerFormat format;
790: PetscViewerGetFormat(viewer,&format);
791: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
792: if (iascii) {
793: PetscViewerASCIIPrintf(viewer," current flavor=%s synchronization=%s MultiSF sort=%s\n",PetscSFWindowFlavorTypes[w->flavor],PetscSFWindowSyncTypes[w->sync],sf->rankorder ? "rank-order" : "unordered");
794: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
795: if (w->info != MPI_INFO_NULL) {
796: PetscMPIInt k,nkeys;
797: char key[MPI_MAX_INFO_KEY], value[MPI_MAX_INFO_VAL];
799: MPI_Info_get_nkeys(w->info,&nkeys);
800: PetscViewerASCIIPrintf(viewer," current info with %d keys. Ordered key-value pairs follow:\n",nkeys);
801: for (k = 0; k < nkeys; k++) {
802: PetscMPIInt flag;
804: MPI_Info_get_nthkey(w->info,k,key);
805: MPI_Info_get(w->info,key,MPI_MAX_INFO_VAL,value,&flag);
806: if (!flag) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Missing key %s",key);
807: PetscViewerASCIIPrintf(viewer," %s = %s\n",key,value);
808: }
809: } else {
810: PetscViewerASCIIPrintf(viewer," current info=MPI_INFO_NULL\n");
811: }
812: }
813: }
814: return(0);
815: }
817: static PetscErrorCode PetscSFDuplicate_Window(PetscSF sf,PetscSFDuplicateOption opt,PetscSF newsf)
818: {
819: PetscSF_Window *w = (PetscSF_Window*)sf->data;
820: PetscErrorCode ierr;
821: PetscSFWindowSyncType synctype;
824: synctype = w->sync;
825: /* HACK: Must use FENCE or LOCK when called from PetscSFGetGroups() because ACTIVE here would cause recursion. */
826: if (!sf->setupcalled) synctype = PETSCSF_WINDOW_SYNC_LOCK;
827: PetscSFWindowSetSyncType(newsf,synctype);
828: PetscSFWindowSetFlavorType(newsf,w->flavor);
829: PetscSFWindowSetInfo(newsf,w->info);
830: return(0);
831: }
833: static PetscErrorCode PetscSFBcastBegin_Window(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
834: {
835: PetscSF_Window *w = (PetscSF_Window*)sf->data;
836: PetscErrorCode ierr;
837: PetscInt i,nranks;
838: const PetscMPIInt *ranks;
839: const MPI_Aint *target_disp;
840: const MPI_Datatype *mine,*remote;
841: MPI_Request *reqs;
842: MPI_Win win;
845: if (op != MPI_REPLACE) SETERRQ(PetscObjectComm((PetscObject)sf), PETSC_ERR_SUP, "PetscSFBcastBegin_Window with op!=MPI_REPLACE has not been implemented");
846: PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL);
847: PetscSFWindowGetDataTypes(sf,unit,&mine,&remote);
848: PetscSFGetWindow(sf,unit,(void*)rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOPUT|MPI_MODE_NOPRECEDE,MPI_MODE_NOPUT,0,&target_disp,&reqs,&win);
849: for (i=0; i<nranks; i++) {
850: MPI_Aint tdp = target_disp ? target_disp[i] : 0;
852: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {
853: MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],MPI_MODE_NOCHECK,win);
854: #if defined(PETSC_HAVE_MPI_RGET)
855: MPI_Rget(leafdata,1,mine[i],ranks[i],tdp,1,remote[i],win,&reqs[i]);
856: #else
857: MPI_Get(leafdata,1,mine[i],ranks[i],tdp,1,remote[i],win);
858: #endif
859: } else {
860: MPI_Get(leafdata,1,mine[i],ranks[i],tdp,1,remote[i],win);
861: }
862: }
863: return(0);
864: }
866: PetscErrorCode PetscSFBcastEnd_Window(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
867: {
868: PetscSF_Window *w = (PetscSF_Window*)sf->data;
870: MPI_Win win;
871: MPI_Request *reqs = NULL;
874: PetscSFFindWindow(sf,unit,rootdata,&win,&reqs);
875: if (reqs) {MPI_Waitall(sf->nranks,reqs,MPI_STATUSES_IGNORE);}
876: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {
877: PetscInt i,nranks;
878: const PetscMPIInt *ranks;
880: PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL);
881: for (i=0; i<nranks; i++) {
882: MPI_Win_unlock(ranks[i],win);
883: }
884: }
885: PetscSFRestoreWindow(sf,unit,(void*)rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOSTORE|MPI_MODE_NOSUCCEED,PETSC_FALSE,&win);
886: return(0);
887: }
889: PetscErrorCode PetscSFReduceBegin_Window(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
890: {
891: PetscSF_Window *w = (PetscSF_Window*)sf->data;
892: PetscErrorCode ierr;
893: PetscInt i,nranks;
894: const PetscMPIInt *ranks;
895: const MPI_Aint *target_disp;
896: const MPI_Datatype *mine,*remote;
897: MPI_Win win;
900: PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL);
901: PetscSFWindowGetDataTypes(sf,unit,&mine,&remote);
902: PetscSFWindowOpTranslate(&op);
903: PetscSFGetWindow(sf,unit,rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOPRECEDE,0,0,&target_disp,NULL,&win);
904: for (i=0; i<nranks; i++) {
905: MPI_Aint tdp = target_disp ? target_disp[i] : 0;
907: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],MPI_MODE_NOCHECK,win);}
908: MPI_Accumulate((void*)leafdata,1,mine[i],ranks[i],tdp,1,remote[i],op,win);
909: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {MPI_Win_unlock(ranks[i],win);}
910: }
911: return(0);
912: }
914: static PetscErrorCode PetscSFReduceEnd_Window(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
915: {
916: PetscSF_Window *w = (PetscSF_Window*)sf->data;
918: MPI_Win win;
919: MPI_Request *reqs = NULL;
922: PetscSFFindWindow(sf,unit,rootdata,&win,&reqs);
923: if (reqs) {MPI_Waitall(sf->nranks,reqs,MPI_STATUSES_IGNORE);}
924: PetscSFRestoreWindow(sf,unit,rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOSUCCEED,PETSC_TRUE,&win);
925: return(0);
926: }
928: static PetscErrorCode PetscSFFetchAndOpBegin_Window(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,void *leafupdate,MPI_Op op)
929: {
930: PetscErrorCode ierr;
931: PetscInt i,nranks;
932: const PetscMPIInt *ranks;
933: const MPI_Datatype *mine,*remote;
934: const MPI_Aint *target_disp;
935: MPI_Win win;
936: PetscSF_Window *w = (PetscSF_Window*)sf->data;
937: #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
938: PetscSFWindowFlavorType oldf;
939: #endif
942: PetscSFGetRootRanks(sf,&nranks,&ranks,NULL,NULL,NULL);
943: PetscSFWindowGetDataTypes(sf,unit,&mine,&remote);
944: PetscSFWindowOpTranslate(&op);
945: #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
946: /* FetchAndOp without MPI_Get_Accumulate requires locking.
947: we create a new window every time to not interfere with user-defined MPI_Info which may have used "no_locks"="true" */
948: oldf = w->flavor;
949: w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE;
950: PetscSFGetWindow(sf,unit,rootdata,PETSCSF_WINDOW_SYNC_LOCK,PETSC_FALSE,0,0,0,&target_disp,NULL,&win);
951: #else
952: PetscSFGetWindow(sf,unit,rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOPRECEDE,0,0,&target_disp,NULL,&win);
953: #endif
954: for (i=0; i<nranks; i++) {
955: MPI_Aint tdp = target_disp ? target_disp[i] : 0;
957: #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
958: MPI_Win_lock(MPI_LOCK_EXCLUSIVE,ranks[i],0,win);
959: MPI_Get(leafupdate,1,mine[i],ranks[i],tdp,1,remote[i],win);
960: MPI_Accumulate((void*)leafdata,1,mine[i],ranks[i],tdp,1,remote[i],op,win);
961: MPI_Win_unlock(ranks[i],win);
962: #else
963: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {MPI_Win_lock(MPI_LOCK_SHARED,ranks[i],0,win);}
964: MPI_Get_accumulate((void*)leafdata,1,mine[i],leafupdate,1,mine[i],ranks[i],tdp,1,remote[i],op,win);
965: if (w->sync == PETSCSF_WINDOW_SYNC_LOCK) {MPI_Win_unlock(ranks[i],win);}
966: #endif
967: }
968: #if !defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
969: w->flavor = oldf;
970: #endif
971: return(0);
972: }
974: static PetscErrorCode PetscSFFetchAndOpEnd_Window(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
975: {
977: MPI_Win win;
978: #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
979: PetscSF_Window *w = (PetscSF_Window*)sf->data;
980: #endif
981: MPI_Request *reqs = NULL;
984: PetscSFFindWindow(sf,unit,rootdata,&win,&reqs);
985: if (reqs) {MPI_Waitall(sf->nranks,reqs,MPI_STATUSES_IGNORE);}
986: #if defined(PETSC_HAVE_MPI_GET_ACCUMULATE)
987: PetscSFRestoreWindow(sf,unit,rootdata,w->sync,PETSC_TRUE,MPI_MODE_NOSUCCEED,PETSC_TRUE,&win);
988: #else
989: PetscSFRestoreWindow(sf,unit,rootdata,PETSCSF_WINDOW_SYNC_LOCK,PETSC_FALSE,0,PETSC_TRUE,&win);
990: #endif
991: return(0);
992: }
994: PETSC_INTERN PetscErrorCode PetscSFCreate_Window(PetscSF sf)
995: {
996: PetscSF_Window *w = (PetscSF_Window*)sf->data;
1000: sf->ops->SetUp = PetscSFSetUp_Window;
1001: sf->ops->SetFromOptions = PetscSFSetFromOptions_Window;
1002: sf->ops->Reset = PetscSFReset_Window;
1003: sf->ops->Destroy = PetscSFDestroy_Window;
1004: sf->ops->View = PetscSFView_Window;
1005: sf->ops->Duplicate = PetscSFDuplicate_Window;
1006: sf->ops->BcastBegin = PetscSFBcastBegin_Window;
1007: sf->ops->BcastEnd = PetscSFBcastEnd_Window;
1008: sf->ops->ReduceBegin = PetscSFReduceBegin_Window;
1009: sf->ops->ReduceEnd = PetscSFReduceEnd_Window;
1010: sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Window;
1011: sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Window;
1013: PetscNewLog(sf,&w);
1014: sf->data = (void*)w;
1015: w->sync = PETSCSF_WINDOW_SYNC_FENCE;
1016: w->flavor = PETSCSF_WINDOW_FLAVOR_CREATE;
1017: w->info = MPI_INFO_NULL;
1019: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetSyncType_C",PetscSFWindowSetSyncType_Window);
1020: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetSyncType_C",PetscSFWindowGetSyncType_Window);
1021: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetFlavorType_C",PetscSFWindowSetFlavorType_Window);
1022: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetFlavorType_C",PetscSFWindowGetFlavorType_Window);
1023: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowSetInfo_C",PetscSFWindowSetInfo_Window);
1024: PetscObjectComposeFunction((PetscObject)sf,"PetscSFWindowGetInfo_C",PetscSFWindowGetInfo_Window);
1026: #if defined(OMPI_MAJOR_VERSION) && (OMPI_MAJOR_VERSION < 1 || (OMPI_MAJOR_VERSION == 1 && OMPI_MINOR_VERSION <= 6))
1027: {
1028: PetscBool ackbug = PETSC_FALSE;
1029: PetscOptionsGetBool(NULL,NULL,"-acknowledge_ompi_onesided_bug",&ackbug,NULL);
1030: if (ackbug) {
1031: PetscInfo(sf,"Acknowledged Open MPI bug, proceeding anyway. Expect memory corruption.\n");
1032: } else SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_LIB,"Open MPI is known to be buggy (https://svn.open-mpi.org/trac/ompi/ticket/1905 and 2656), use -acknowledge_ompi_onesided_bug to proceed");
1033: }
1034: #endif
1035: return(0);
1036: }