Actual source code: sfalltoall.c
petsc-3.15.0 2021-03-30
1: #include <../src/vec/is/sf/impls/basic/allgatherv/sfallgatherv.h>
2: #include <../src/vec/is/sf/impls/basic/allgather/sfallgather.h>
3: #include <../src/vec/is/sf/impls/basic/gatherv/sfgatherv.h>
5: /* Reuse the type. The difference is some fields (i.e., displs, recvcounts) are not used, which is not a big deal */
6: typedef PetscSF_Allgatherv PetscSF_Alltoall;
8: /*===================================================================================*/
9: /* Implementations of SF public APIs */
10: /*===================================================================================*/
11: static PetscErrorCode PetscSFGetGraph_Alltoall(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
12: {
14: PetscInt i;
17: if (nroots) *nroots = sf->nroots;
18: if (nleaves) *nleaves = sf->nleaves;
19: if (ilocal) *ilocal = NULL; /* Contiguous local indices */
20: if (iremote) {
21: if (!sf->remote) {
22: PetscMalloc1(sf->nleaves,&sf->remote);
23: sf->remote_alloc = sf->remote;
24: for (i=0; i<sf->nleaves; i++) {
25: sf->remote[i].rank = i;
26: sf->remote[i].index = i;
27: }
28: }
29: *iremote = sf->remote;
30: }
31: return(0);
32: }
34: static PetscErrorCode PetscSFBcastBegin_Alltoall(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
35: {
36: PetscErrorCode ierr;
37: PetscSFLink link;
38: MPI_Comm comm;
39: void *rootbuf = NULL,*leafbuf = NULL; /* buffer used by MPI */
40: MPI_Request *req;
43: PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_BCAST,&link);
44: PetscSFLinkPackRootData(sf,link,PETSCSF_REMOTE,rootdata);
45: PetscSFLinkCopyRootBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_TRUE/* device2host before sending */);
46: PetscObjectGetComm((PetscObject)sf,&comm);
47: PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_../../../../../..2LEAF,&rootbuf,&leafbuf,&req,NULL);
48: PetscSFLinkSyncStreamBeforeCallMPI(sf,link,PETSCSF_../../../../../..2LEAF);
49: MPIU_Ialltoall(rootbuf,1,unit,leafbuf,1,unit,comm,req);
50: return(0);
51: }
53: static PetscErrorCode PetscSFReduceBegin_Alltoall(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
54: {
55: PetscErrorCode ierr;
56: PetscSFLink link;
57: MPI_Comm comm;
58: void *rootbuf = NULL,*leafbuf = NULL; /* buffer used by MPI */
59: MPI_Request *req;
62: PetscSFLinkCreate(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op,PETSCSF_REDUCE,&link);
63: PetscSFLinkPackLeafData(sf,link,PETSCSF_REMOTE,leafdata);
64: PetscSFLinkCopyLeafBufferInCaseNotUseGpuAwareMPI(sf,link,PETSC_TRUE/* device2host before sending */);
65: PetscObjectGetComm((PetscObject)sf,&comm);
66: PetscSFLinkGetMPIBuffersAndRequests(sf,link,PETSCSF_LEAF2../../../../../..,&rootbuf,&leafbuf,&req,NULL);
67: PetscSFLinkSyncStreamBeforeCallMPI(sf,link,PETSCSF_LEAF2../../../../../..);
68: MPIU_Ialltoall(leafbuf,1,unit,rootbuf,1,unit,comm,req);
69: return(0);
70: }
72: static PetscErrorCode PetscSFCreateLocalSF_Alltoall(PetscSF sf,PetscSF *out)
73: {
75: PetscInt nroots = 1,nleaves = 1,*ilocal;
76: PetscSFNode *iremote = NULL;
77: PetscSF lsf;
78: PetscMPIInt rank;
81: nroots = 1;
82: nleaves = 1;
83: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
84: PetscMalloc1(nleaves,&ilocal);
85: PetscMalloc1(nleaves,&iremote);
86: ilocal[0] = rank;
87: iremote[0].rank = 0; /* rank in PETSC_COMM_SELF */
88: iremote[0].index = rank; /* LocalSF is an embedded SF. Indices are not remapped */
90: PetscSFCreate(PETSC_COMM_SELF,&lsf);
91: PetscSFSetGraph(lsf,nroots,nleaves,NULL/*contiguous leaves*/,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);
92: PetscSFSetUp(lsf);
93: *out = lsf;
94: return(0);
95: }
97: static PetscErrorCode PetscSFCreateEmbeddedRootSF_Alltoall(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
98: {
100: PetscInt i,*tmproots,*ilocal,ndranks,ndiranks;
101: PetscSFNode *iremote;
102: PetscMPIInt nroots,*roots,nleaves,*leaves,rank;
103: MPI_Comm comm;
104: PetscSF_Basic *bas;
105: PetscSF esf;
108: PetscObjectGetComm((PetscObject)sf,&comm);
109: MPI_Comm_rank(comm,&rank);
111: /* Uniq selected[] and store the result in roots[] */
112: PetscMalloc1(nselected,&tmproots);
113: PetscArraycpy(tmproots,selected,nselected);
114: PetscSortRemoveDupsInt(&nselected,tmproots); /* nselected might be changed */
115: if (tmproots[0] < 0 || tmproots[nselected-1] >= sf->nroots) SETERRQ3(comm,PETSC_ERR_ARG_OUTOFRANGE,"Min/Max root indices %D/%D are not in [0,%D)",tmproots[0],tmproots[nselected-1],sf->nroots);
116: nroots = nselected; /* For Alltoall, we know root indices will not overflow MPI_INT */
117: PetscMalloc1(nselected,&roots);
118: for (i=0; i<nselected; i++) roots[i] = tmproots[i];
119: PetscFree(tmproots);
121: /* Find out which leaves are still connected to roots in the embedded sf. Expect PetscCommBuildTwoSided is more scalable than MPI_Alltoall */
122: PetscCommBuildTwoSided(comm,0/*empty msg*/,MPI_INT/*fake*/,nroots,roots,NULL/*todata*/,&nleaves,&leaves,NULL/*fromdata*/);
124: /* Move myself ahead if rank is in leaves[], since I am a distinguished rank */
125: ndranks = 0;
126: for (i=0; i<nleaves; i++) {
127: if (leaves[i] == rank) {leaves[i] = -rank; ndranks = 1; break;}
128: }
129: PetscSortMPIInt(nleaves,leaves);
130: if (nleaves && leaves[0] < 0) leaves[0] = rank;
132: /* Build esf and fill its fields manually (without calling PetscSFSetUp) */
133: PetscMalloc1(nleaves,&ilocal);
134: PetscMalloc1(nleaves,&iremote);
135: for (i=0; i<nleaves; i++) { /* 1:1 map from roots to leaves */
136: ilocal[i] = leaves[i];
137: iremote[i].rank = leaves[i];
138: iremote[i].index = leaves[i];
139: }
140: PetscSFCreate(comm,&esf);
141: PetscSFSetType(esf,PETSCSFBASIC); /* This optimized routine can only create a basic sf */
142: PetscSFSetGraph(esf,sf->nleaves,nleaves,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);
144: /* As if we are calling PetscSFSetUpRanks(esf,self's group) */
145: PetscMalloc4(nleaves,&esf->ranks,nleaves+1,&esf->roffset,nleaves,&esf->rmine,nleaves,&esf->rremote);
146: esf->nranks = nleaves;
147: esf->ndranks = ndranks;
148: esf->roffset[0] = 0;
149: for (i=0; i<nleaves; i++) {
150: esf->ranks[i] = leaves[i];
151: esf->roffset[i+1] = i+1;
152: esf->rmine[i] = leaves[i];
153: esf->rremote[i] = leaves[i];
154: }
156: /* Set up esf->data, the incoming communication (i.e., recv info), which is usually done by PetscSFSetUp_Basic */
157: bas = (PetscSF_Basic*)esf->data;
158: PetscMalloc2(nroots,&bas->iranks,nroots+1,&bas->ioffset);
159: PetscMalloc1(nroots,&bas->irootloc);
160: /* Move myself ahead if rank is in roots[], since I am a distinguished irank */
161: ndiranks = 0;
162: for (i=0; i<nroots; i++) {
163: if (roots[i] == rank) {roots[i] = -rank; ndiranks = 1; break;}
164: }
165: PetscSortMPIInt(nroots,roots);
166: if (nroots && roots[0] < 0) roots[0] = rank;
168: bas->niranks = nroots;
169: bas->ndiranks = ndiranks;
170: bas->ioffset[0] = 0;
171: bas->itotal = nroots;
172: for (i=0; i<nroots; i++) {
173: bas->iranks[i] = roots[i];
174: bas->ioffset[i+1] = i+1;
175: bas->irootloc[i] = roots[i];
176: }
178: /* See PetscSFCreateEmbeddedRootSF_Basic */
179: esf->nleafreqs = esf->nranks - esf->ndranks;
180: bas->nrootreqs = bas->niranks - bas->ndiranks;
181: esf->persistent = PETSC_TRUE;
182: /* Setup packing related fields */
183: PetscSFSetUpPackFields(esf);
185: esf->setupcalled = PETSC_TRUE; /* We have done setup ourselves! */
186: *newsf = esf;
187: return(0);
188: }
190: PETSC_INTERN PetscErrorCode PetscSFCreate_Alltoall(PetscSF sf)
191: {
192: PetscErrorCode ierr;
193: PetscSF_Alltoall *dat = (PetscSF_Alltoall*)sf->data;
196: sf->ops->BcastEnd = PetscSFBcastEnd_Basic;
197: sf->ops->ReduceEnd = PetscSFReduceEnd_Basic;
199: /* Inherit from Allgatherv. It is astonishing Alltoall can inherit so much from Allgather(v) */
200: sf->ops->Destroy = PetscSFDestroy_Allgatherv;
201: sf->ops->Reset = PetscSFReset_Allgatherv;
202: sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Allgatherv;
203: sf->ops->GetRootRanks = PetscSFGetRootRanks_Allgatherv;
205: /* Inherit from Allgather. Every process gathers equal-sized data from others, which enables this inheritance. */
206: sf->ops->GetLeafRanks = PetscSFGetLeafRanks_Allgatherv;
207: sf->ops->SetUp = PetscSFSetUp_Allgather;
209: /* Inherit from Gatherv. Each root has only one leaf connected, which enables this inheritance */
210: sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Gatherv;
212: /* Alltoall stuff */
213: sf->ops->GetGraph = PetscSFGetGraph_Alltoall;
214: sf->ops->BcastBegin = PetscSFBcastBegin_Alltoall;
215: sf->ops->ReduceBegin = PetscSFReduceBegin_Alltoall;
216: sf->ops->CreateLocalSF = PetscSFCreateLocalSF_Alltoall;
217: sf->ops->CreateEmbeddedRootSF = PetscSFCreateEmbeddedRootSF_Alltoall;
219: PetscNewLog(sf,&dat);
220: sf->data = (void*)dat;
221: return(0);
222: }