Actual source code: mpihip.hip.cpp
2: /*
3: This file contains routines for Parallel vector operations.
4: */
5: #define PETSC_SKIP_SPINLOCK
7: #include <petscconf.h>
8: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
9: #include <petsc/private/hipvecimpl.h>
11: /*MC
12: VECHIP - VECHIP = "hip" - A VECSEQHIP on a single-process communicator, and VECMPIHIP otherwise.
14: Options Database Keys:
15: . -vec_type hip - sets the vector type to VECHIP during a call to VecSetFromOptions()
17: Level: beginner
19: .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateMPIWithArray(), VECSEQHIP, VECMPIHIP, VECSTANDARD, VecType, VecCreateMPI(), VecSetPinnedMemoryMin()
20: M*/
22: PetscErrorCode VecDestroy_MPIHIP(Vec v)
23: {
24: Vec_MPI *vecmpi = (Vec_MPI*)v->data;
25: Vec_HIP *vechip;
27: hipError_t err;
30: if (v->spptr) {
31: vechip = (Vec_HIP*)v->spptr;
32: if (vechip->GPUarray_allocated) {
33: err = hipFree(vechip->GPUarray_allocated);CHKERRHIP(err);
34: vechip->GPUarray_allocated = NULL;
35: }
36: if (vechip->stream) {
37: err = hipStreamDestroy(vechip->stream);CHKERRHIP(err);
38: }
39: if (v->pinned_memory) {
40: PetscMallocSetHIPHost();
41: PetscFree(vecmpi->array_allocated);
42: PetscMallocResetHIPHost();
43: v->pinned_memory = PETSC_FALSE;
44: }
45: PetscFree(v->spptr);
46: }
47: VecDestroy_MPI(v);
48: return(0);
49: }
51: PetscErrorCode VecNorm_MPIHIP(Vec xin,NormType type,PetscReal *z)
52: {
53: PetscReal sum,work = 0.0;
57: if (type == NORM_2 || type == NORM_FROBENIUS) {
58: VecNorm_SeqHIP(xin,NORM_2,&work);
59: work *= work;
60: MPIU_Allreduce(&work,&sum,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)xin));
61: *z = PetscSqrtReal(sum);
62: } else if (type == NORM_1) {
63: /* Find the local part */
64: VecNorm_SeqHIP(xin,NORM_1,&work);
65: /* Find the global max */
66: MPIU_Allreduce(&work,z,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)xin));
67: } else if (type == NORM_INFINITY) {
68: /* Find the local max */
69: VecNorm_SeqHIP(xin,NORM_INFINITY,&work);
70: /* Find the global max */
71: MPIU_Allreduce(&work,z,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)xin));
72: } else if (type == NORM_1_AND_2) {
73: PetscReal temp[2];
74: VecNorm_SeqHIP(xin,NORM_1,temp);
75: VecNorm_SeqHIP(xin,NORM_2,temp+1);
76: temp[1] = temp[1]*temp[1];
77: MPIU_Allreduce(temp,z,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)xin));
78: z[1] = PetscSqrtReal(z[1]);
79: }
80: return(0);
81: }
83: PetscErrorCode VecDot_MPIHIP(Vec xin,Vec yin,PetscScalar *z)
84: {
85: PetscScalar sum,work;
89: VecDot_SeqHIP(xin,yin,&work);
90: MPIU_Allreduce(&work,&sum,1,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)xin));
91: *z = sum;
92: return(0);
93: }
95: PetscErrorCode VecTDot_MPIHIP(Vec xin,Vec yin,PetscScalar *z)
96: {
97: PetscScalar sum,work;
101: VecTDot_SeqHIP(xin,yin,&work);
102: MPIU_Allreduce(&work,&sum,1,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)xin));
103: *z = sum;
104: return(0);
105: }
107: PetscErrorCode VecMDot_MPIHIP(Vec xin,PetscInt nv,const Vec y[],PetscScalar *z)
108: {
109: PetscScalar awork[128],*work = awork;
113: if (nv > 128) {
114: PetscMalloc1(nv,&work);
115: }
116: VecMDot_SeqHIP(xin,nv,y,work);
117: MPIU_Allreduce(work,z,nv,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)xin));
118: if (nv > 128) {
119: PetscFree(work);
120: }
121: return(0);
122: }
124: /*MC
125: VECMPIHIP - VECMPIHIP = "mpihip" - The basic parallel vector, modified to use HIP
127: Options Database Keys:
128: . -vec_type mpihip - sets the vector type to VECMPIHIP during a call to VecSetFromOptions()
130: Level: beginner
132: .seealso: VecCreate(), VecSetType(), VecSetFromOptions(), VecCreateMPIWithArray(), VECMPI, VecType, VecCreateMPI(), VecSetPinnedMemoryMin()
133: M*/
135: PetscErrorCode VecDuplicate_MPIHIP(Vec win,Vec *v)
136: {
138: Vec_MPI *vw,*w = (Vec_MPI*)win->data;
139: PetscScalar *array;
142: VecCreate(PetscObjectComm((PetscObject)win),v);
143: PetscLayoutReference(win->map,&(*v)->map);
145: VecCreate_MPIHIP_Private(*v,PETSC_TRUE,w->nghost,0);
146: vw = (Vec_MPI*)(*v)->data;
147: PetscMemcpy((*v)->ops,win->ops,sizeof(struct _VecOps));
149: /* save local representation of the parallel vector (and scatter) if it exists */
150: if (w->localrep) {
151: VecGetArray(*v,&array);
152: VecCreateSeqWithArray(PETSC_COMM_SELF,1,win->map->n+w->nghost,array,&vw->localrep);
153: PetscMemcpy(vw->localrep->ops,w->localrep->ops,sizeof(struct _VecOps));
154: VecRestoreArray(*v,&array);
155: PetscLogObjectParent((PetscObject)*v,(PetscObject)vw->localrep);
156: vw->localupdate = w->localupdate;
157: if (vw->localupdate) {
158: PetscObjectReference((PetscObject)vw->localupdate);
159: }
160: }
162: /* New vector should inherit stashing property of parent */
163: (*v)->stash.donotstash = win->stash.donotstash;
164: (*v)->stash.ignorenegidx = win->stash.ignorenegidx;
166: /* change type_name appropriately */
167: VecHIPAllocateCheck(*v);
168: PetscObjectChangeTypeName((PetscObject)(*v),VECMPIHIP);
170: PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*v))->olist);
171: PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*v))->qlist);
172: (*v)->map->bs = PetscAbs(win->map->bs);
173: (*v)->bstash.bs = win->bstash.bs;
174: return(0);
175: }
177: PetscErrorCode VecDotNorm2_MPIHIP(Vec s,Vec t,PetscScalar *dp,PetscScalar *nm)
178: {
180: PetscScalar work[2],sum[2];
183: VecDotNorm2_SeqHIP(s,t,work,work+1);
184: MPIU_Allreduce(&work,&sum,2,MPIU_SCALAR,MPIU_SUM,PetscObjectComm((PetscObject)s));
185: *dp = sum[0];
186: *nm = sum[1];
187: return(0);
188: }
190: PetscErrorCode VecCreate_MPIHIP(Vec vv)
191: {
195: PetscHIPInitializeCheck();
196: PetscLayoutSetUp(vv->map);
197: VecHIPAllocateCheck(vv);
198: VecCreate_MPIHIP_Private(vv,PETSC_FALSE,0,((Vec_HIP*)vv->spptr)->GPUarray_allocated);
199: VecHIPAllocateCheckHost(vv);
200: VecSet(vv,0.0);
201: VecSet_Seq(vv,0.0);
202: vv->offloadmask = PETSC_OFFLOAD_BOTH;
203: return(0);
204: }
206: PetscErrorCode VecCreate_HIP(Vec v)
207: {
209: PetscMPIInt size;
212: MPI_Comm_size(PetscObjectComm((PetscObject)v),&size);
213: if (size == 1) {
214: VecSetType(v,VECSEQHIP);
215: } else {
216: VecSetType(v,VECMPIHIP);
217: }
218: return(0);
219: }
221: /*@
222: VecCreateMPIHIP - Creates a standard, parallel array-style vector for HIP devices.
224: Collective
226: Input Parameters:
227: + comm - the MPI communicator to use
228: . n - local vector length (or PETSC_DECIDE to have calculated if N is given)
229: - N - global vector length (or PETSC_DETERMINE to have calculated if n is given)
231: Output Parameter:
232: . v - the vector
234: Notes:
235: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
236: same type as an existing vector.
238: Level: intermediate
240: .seealso: VecCreateMPIHIPWithArray(), VecCreateMPIHIPWithArrays(), VecCreateSeqHIP(), VecCreateSeq(),
241: VecCreateMPI(), VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost(),
242: VecCreateMPIWithArray(), VecCreateGhostWithArray(), VecMPISetGhost()
244: @*/
245: PetscErrorCode VecCreateMPIHIP(MPI_Comm comm,PetscInt n,PetscInt N,Vec *v)
246: {
250: VecCreate(comm,v);
251: VecSetSizes(*v,n,N);
252: VecSetType(*v,VECMPIHIP);
253: return(0);
254: }
256: /*@C
257: VecCreateMPIHIPWithArray - Creates a parallel, array-style vector,
258: where the user provides the GPU array space to store the vector values.
260: Collective
262: Input Parameters:
263: + comm - the MPI communicator to use
264: . bs - block size, same meaning as VecSetBlockSize()
265: . n - local vector length, cannot be PETSC_DECIDE
266: . N - global vector length (or PETSC_DECIDE to have calculated)
267: - array - the user provided GPU array to store the vector values
269: Output Parameter:
270: . vv - the vector
272: Notes:
273: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
274: same type as an existing vector.
276: If the user-provided array is NULL, then VecHIPPlaceArray() can be used
277: at a later stage to SET the array for storing the vector values.
279: PETSc does NOT free the array when the vector is destroyed via VecDestroy().
280: The user should not free the array until the vector is destroyed.
282: Level: intermediate
284: .seealso: VecCreateSeqHIPWithArray(), VecCreateMPIWithArray(), VecCreateSeqWithArray(),
285: VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost(),
286: VecCreateMPI(), VecCreateGhostWithArray(), VecPlaceArray()
288: @*/
289: PetscErrorCode VecCreateMPIHIPWithArray(MPI_Comm comm,PetscInt bs,PetscInt n,PetscInt N,const PetscScalar array[],Vec *vv)
290: {
294: if (n == PETSC_DECIDE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Must set local size of vector");
295: PetscHIPInitializeCheck();
296: VecCreate(comm,vv);
297: VecSetSizes(*vv,n,N);
298: VecSetBlockSize(*vv,bs);
299: VecCreate_MPIHIP_Private(*vv,PETSC_FALSE,0,array);
300: return(0);
301: }
303: /*@C
304: VecCreateMPIHIPWithArrays - Creates a parallel, array-style vector,
305: where the user provides the GPU array space to store the vector values.
307: Collective
309: Input Parameters:
310: + comm - the MPI communicator to use
311: . bs - block size, same meaning as VecSetBlockSize()
312: . n - local vector length, cannot be PETSC_DECIDE
313: . N - global vector length (or PETSC_DECIDE to have calculated)
314: - cpuarray - the user provided CPU array to store the vector values
315: - gpuarray - the user provided GPU array to store the vector values
317: Output Parameter:
318: . vv - the vector
320: Notes:
321: If both cpuarray and gpuarray are provided, the caller must ensure that
322: the provided arrays have identical values.
324: Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
325: same type as an existing vector.
327: PETSc does NOT free the provided arrays when the vector is destroyed via
328: VecDestroy(). The user should not free the array until the vector is
329: destroyed.
331: Level: intermediate
333: .seealso: VecCreateSeqHIPWithArrays(), VecCreateMPIWithArray(), VecCreateSeqWithArray(),
334: VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost(),
335: VecCreateMPI(), VecCreateGhostWithArray(), VecHIPPlaceArray(), VecPlaceArray(),
336: VecHIPAllocateCheckHost()
337: @*/
338: PetscErrorCode VecCreateMPIHIPWithArrays(MPI_Comm comm,PetscInt bs,PetscInt n,PetscInt N,const PetscScalar cpuarray[],const PetscScalar gpuarray[],Vec *vv)
339: {
343: VecCreateMPIHIPWithArray(comm,bs,n,N,gpuarray,vv);
345: if (cpuarray && gpuarray) {
346: Vec_MPI *s = (Vec_MPI*)((*vv)->data);
347: s->array = (PetscScalar*)cpuarray;
348: (*vv)->offloadmask = PETSC_OFFLOAD_BOTH;
349: } else if (cpuarray) {
350: Vec_MPI *s = (Vec_MPI*)((*vv)->data);
351: s->array = (PetscScalar*)cpuarray;
352: (*vv)->offloadmask = PETSC_OFFLOAD_CPU;
353: } else if (gpuarray) {
354: (*vv)->offloadmask = PETSC_OFFLOAD_GPU;
355: } else {
356: (*vv)->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
357: }
359: return(0);
360: }
362: PetscErrorCode VecMax_MPIHIP(Vec xin,PetscInt *idx,PetscReal *z)
363: {
365: PetscReal work;
368: VecMax_SeqHIP(xin,idx,&work);
369: #if defined(PETSC_HAVE_MPIUNI)
370: *z = work;
371: #else
372: if (!idx) {
373: MPIU_Allreduce(&work,z,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)xin));
374: } else {
375: struct { PetscReal v; PetscInt i; } in,out;
377: in.v = work;
378: in.i = *idx + xin->map->rstart;
379: MPIU_Allreduce(&in,&out,1,MPIU_REAL_INT,MPIU_MAXLOC,PetscObjectComm((PetscObject)xin));
380: *z = out.v;
381: *idx = out.i;
382: }
383: #endif
384: return(0);
385: }
387: PetscErrorCode VecMin_MPIHIP(Vec xin,PetscInt *idx,PetscReal *z)
388: {
390: PetscReal work;
393: VecMin_SeqHIP(xin,idx,&work);
394: #if defined(PETSC_HAVE_MPIUNI)
395: *z = work;
396: #else
397: if (!idx) {
398: MPIU_Allreduce(&work,z,1,MPIU_REAL,MPIU_MIN,PetscObjectComm((PetscObject)xin));
399: } else {
400: struct { PetscReal v; PetscInt i; } in,out;
402: in.v = work;
403: in.i = *idx + xin->map->rstart;
404: MPIU_Allreduce(&in,&out,1,MPIU_REAL_INT,MPIU_MINLOC,PetscObjectComm((PetscObject)xin));
405: *z = out.v;
406: *idx = out.i;
407: }
408: #endif
409: return(0);
410: }
412: PetscErrorCode VecBindToCPU_MPIHIP(Vec V,PetscBool pin)
413: {
417: V->boundtocpu = pin;
418: if (pin) {
419: VecHIPCopyFromGPU(V);
420: V->offloadmask = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
421: V->ops->dotnorm2 = NULL;
422: V->ops->waxpy = VecWAXPY_Seq;
423: V->ops->dot = VecDot_MPI;
424: V->ops->mdot = VecMDot_MPI;
425: V->ops->tdot = VecTDot_MPI;
426: V->ops->norm = VecNorm_MPI;
427: V->ops->scale = VecScale_Seq;
428: V->ops->copy = VecCopy_Seq;
429: V->ops->set = VecSet_Seq;
430: V->ops->swap = VecSwap_Seq;
431: V->ops->axpy = VecAXPY_Seq;
432: V->ops->axpby = VecAXPBY_Seq;
433: V->ops->maxpy = VecMAXPY_Seq;
434: V->ops->aypx = VecAYPX_Seq;
435: V->ops->axpbypcz = VecAXPBYPCZ_Seq;
436: V->ops->pointwisemult = VecPointwiseMult_Seq;
437: V->ops->setrandom = VecSetRandom_Seq;
438: V->ops->placearray = VecPlaceArray_Seq;
439: V->ops->replacearray = VecReplaceArray_SeqHIP;
440: V->ops->resetarray = VecResetArray_Seq;
441: V->ops->dot_local = VecDot_Seq;
442: V->ops->tdot_local = VecTDot_Seq;
443: V->ops->norm_local = VecNorm_Seq;
444: V->ops->mdot_local = VecMDot_Seq;
445: V->ops->pointwisedivide = VecPointwiseDivide_Seq;
446: V->ops->getlocalvector = NULL;
447: V->ops->restorelocalvector = NULL;
448: V->ops->getlocalvectorread = NULL;
449: V->ops->restorelocalvectorread = NULL;
450: V->ops->getarraywrite = NULL;
451: V->ops->max = VecMax_MPI;
452: V->ops->min = VecMin_MPI;
453: V->ops->reciprocal = VecReciprocal_Default;
454: V->ops->sum = NULL;
455: V->ops->shift = NULL;
456: } else {
457: V->ops->dotnorm2 = VecDotNorm2_MPIHIP;
458: V->ops->waxpy = VecWAXPY_SeqHIP;
459: V->ops->duplicate = VecDuplicate_MPIHIP;
460: V->ops->dot = VecDot_MPIHIP;
461: V->ops->mdot = VecMDot_MPIHIP;
462: V->ops->tdot = VecTDot_MPIHIP;
463: V->ops->norm = VecNorm_MPIHIP;
464: V->ops->scale = VecScale_SeqHIP;
465: V->ops->copy = VecCopy_SeqHIP;
466: V->ops->set = VecSet_SeqHIP;
467: V->ops->swap = VecSwap_SeqHIP;
468: V->ops->axpy = VecAXPY_SeqHIP;
469: V->ops->axpby = VecAXPBY_SeqHIP;
470: V->ops->maxpy = VecMAXPY_SeqHIP;
471: V->ops->aypx = VecAYPX_SeqHIP;
472: V->ops->axpbypcz = VecAXPBYPCZ_SeqHIP;
473: V->ops->pointwisemult = VecPointwiseMult_SeqHIP;
474: V->ops->setrandom = VecSetRandom_SeqHIP;
475: V->ops->placearray = VecPlaceArray_SeqHIP;
476: V->ops->replacearray = VecReplaceArray_SeqHIP;
477: V->ops->resetarray = VecResetArray_SeqHIP;
478: V->ops->dot_local = VecDot_SeqHIP;
479: V->ops->tdot_local = VecTDot_SeqHIP;
480: V->ops->norm_local = VecNorm_SeqHIP;
481: V->ops->mdot_local = VecMDot_SeqHIP;
482: V->ops->destroy = VecDestroy_MPIHIP;
483: V->ops->pointwisedivide = VecPointwiseDivide_SeqHIP;
484: V->ops->getlocalvector = VecGetLocalVector_SeqHIP;
485: V->ops->restorelocalvector = VecRestoreLocalVector_SeqHIP;
486: V->ops->getlocalvectorread = VecGetLocalVectorRead_SeqHIP;
487: V->ops->restorelocalvectorread = VecRestoreLocalVectorRead_SeqHIP;
488: V->ops->getarraywrite = VecGetArrayWrite_SeqHIP;
489: V->ops->getarray = VecGetArray_SeqHIP;
490: V->ops->restorearray = VecRestoreArray_SeqHIP;
491: V->ops->getarrayandmemtype = VecGetArrayAndMemType_SeqHIP;
492: V->ops->restorearrayandmemtype = VecRestoreArrayAndMemType_SeqHIP;
493: V->ops->max = VecMax_MPIHIP;
494: V->ops->min = VecMin_MPIHIP;
495: V->ops->reciprocal = VecReciprocal_SeqHIP;
496: V->ops->sum = VecSum_SeqHIP;
497: V->ops->shift = VecShift_SeqHIP;
498: }
499: return(0);
500: }
502: PetscErrorCode VecCreate_MPIHIP_Private(Vec vv,PetscBool alloc,PetscInt nghost,const PetscScalar array[])
503: {
505: Vec_HIP *vechip;
508: VecCreate_MPI_Private(vv,PETSC_FALSE,0,0);
509: PetscObjectChangeTypeName((PetscObject)vv,VECMPIHIP);
511: VecBindToCPU_MPIHIP(vv,PETSC_FALSE);
512: vv->ops->bindtocpu = VecBindToCPU_MPIHIP;
514: /* Later, functions check for the Vec_HIP structure existence, so do not create it without array */
515: if (alloc && !array) {
516: VecHIPAllocateCheck(vv);
517: VecHIPAllocateCheckHost(vv);
518: VecSet(vv,0.0);
519: VecSet_Seq(vv,0.0);
520: vv->offloadmask = PETSC_OFFLOAD_BOTH;
521: }
522: if (array) {
523: if (!vv->spptr) {
524: PetscReal pinned_memory_min;
525: PetscBool flag;
526: /* Cannot use PetscNew() here because spptr is void* */
527: PetscCalloc(sizeof(Vec_HIP),&vv->spptr);
528: vechip = (Vec_HIP*)vv->spptr;
529: vv->minimum_bytes_pinned_memory = 0;
531: /* Need to parse command line for minimum size to use for pinned memory allocations on host here.
532: Note: This same code duplicated in VecCreate_SeqHIP_Private() and VecHIPAllocateCheck(). Is there a good way to avoid this? */
533: PetscOptionsBegin(PetscObjectComm((PetscObject)vv),((PetscObject)vv)->prefix,"VECHIP Options","Vec");
534: pinned_memory_min = vv->minimum_bytes_pinned_memory;
535: PetscOptionsReal("-vec_pinned_memory_min","Minimum size (in bytes) for an allocation to use pinned memory on host","VecSetPinnedMemoryMin",pinned_memory_min,&pinned_memory_min,&flag);
536: if (flag) vv->minimum_bytes_pinned_memory = pinned_memory_min;
537: PetscOptionsEnd();
538: }
539: vechip = (Vec_HIP*)vv->spptr;
540: vechip->GPUarray = (PetscScalar*)array;
541: vv->offloadmask = PETSC_OFFLOAD_GPU;
542: }
543: return(0);
544: }