Actual source code: device.cxx

  1: #include "cupmdevice.hpp" /* I "petscdevice.h" */
  2: #include <petsc/private/petscadvancedmacros.h>

  4: using namespace Petsc::Device;

  6: /*
  7:   note to anyone adding more classes, the name must be ALL_CAPS_SHORT_NAME + Device exactly to
  8:   be picked up by the switch-case macros below
  9: */
 10: #if PetscDefined(HAVE_CUDA)
 11: static CUPM::Device<CUPM::DeviceType::CUDA> CUDADevice(PetscDeviceContextCreate_CUDA);
 12: #endif
 13: #if PetscDefined(HAVE_HIP)
 14: static CUPM::Device<CUPM::DeviceType::HIP>  HIPDevice(PetscDeviceContextCreate_HIP);
 15: #endif
 16: #if PetscDefined(HAVE_SYCL)
 17: #include "sycldevice.hpp"
 18: static SYCL::Device                         SYCLDevice(PetscDeviceContextCreate_SYCL);
 19: #endif

 21: static_assert(Petsc::util::integral_value(PETSC_DEVICE_INVALID) == 0,"");
 22: static_assert(Petsc::util::integral_value(PETSC_DEVICE_CUDA)    == 1,"");
 23: static_assert(Petsc::util::integral_value(PETSC_DEVICE_HIP)     == 2,"");
 24: static_assert(Petsc::util::integral_value(PETSC_DEVICE_SYCL)    == 3,"");
 25: static_assert(Petsc::util::integral_value(PETSC_DEVICE_MAX)     == 4,"");
 26: const char *const PetscDeviceTypes[] = {
 27:   "invalid",
 28:   "cuda",
 29:   "hip",
 30:   "sycl",
 31:   "max",
 32:   "PetscDeviceType",
 33:   "PETSC_DEVICE_",
 34:   PETSC_NULLPTR
 35: };

 37: static_assert(Petsc::util::integral_value(PETSC_DEVICE_INIT_NONE)  == 0,"");
 38: static_assert(Petsc::util::integral_value(PETSC_DEVICE_INIT_LAZY)  == 1,"");
 39: static_assert(Petsc::util::integral_value(PETSC_DEVICE_INIT_EAGER) == 2,"");
 40: const char *const PetscDeviceInitTypes[] = {
 41:   "none",
 42:   "lazy",
 43:   "eager",
 44:   "PetscDeviceInitType",
 45:   "PETSC_DEVICE_INIT_",
 46:   PETSC_NULLPTR
 47: };
 48: static_assert(
 49:   sizeof(PetscDeviceInitTypes)/sizeof(*PetscDeviceInitTypes) == 6,
 50:   "Must change CUPMDevice<T>::initialize number of enum values in -device_enable_cupm to match!"
 51: );

 53: #define PETSC_DEVICE_CASE(IMPLS,func,...)                                     \
 54:   case PetscConcat_(PETSC_DEVICE_,IMPLS): {                                   \
 55:     PetscConcat_(IMPLS,Device).func(__VA_ARGS__);                    \
 56:   } break

 58: /*
 59:   Suppose you have:

 61:   CUDADevice.myFunction(arg1,arg2)

 63:   that you would like to conditionally define and call in a switch-case:

 65:   switch(PetscDeviceType) {
 66:   #if PetscDefined(HAVE_CUDA)
 67:   case PETSC_DEVICE_CUDA: {
 68:     CUDADevice.myFunction(arg1,arg2);
 69:   } break;
 70:   #endif
 71:   }

 73:   then calling this macro:

 75:   PETSC_DEVICE_CASE_IF_PETSC_DEFINED(CUDA,myFunction,arg1,arg2)

 77:   will expand to the following case statement:

 79:   case PETSC_DEVICE_CUDA: {
 80:     CUDADevice.myFunction(arg1,arg2);
 81:   } break

 83:   if PetscDefined(HAVE_CUDA) evaluates to 1, and expand to nothing otherwise
 84: */
 85: #define PETSC_DEVICE_CASE_IF_PETSC_DEFINED(IMPLS,func,...)                                     \
 86:   PetscIfPetscDefined(PetscConcat_(HAVE_,IMPLS),PETSC_DEVICE_CASE,PetscExpandToNothing)(IMPLS,func,__VA_ARGS__)

 88: /*@C
 89:   PetscDeviceCreate - Get a new handle for a particular device type

 91:   Not Collective, Possibly Synchronous

 93:   Input Parameters:
 94: + type  - The type of PetscDevice
 95: - devid - The numeric ID# of the device (pass PETSC_DECIDE to assign automatically)

 97:   Output Parameter:
 98: . device - The PetscDevice

100:   Notes:
101:   This routine may initialize PetscDevice. If this is the case, this will most likely cause
102:   some sort of device synchronization.

104:   devid is what you might pass to cudaSetDevice() for example.

106:   Level: beginner

108: .seealso: PetscDevice, PetscDeviceInitType, PetscDeviceInitialize(),
109: PetscDeviceInitialized(), PetscDeviceConfigure(), PetscDeviceView(), PetscDeviceDestroy()
110: @*/
111: PetscErrorCode PetscDeviceCreate(PetscDeviceType type, PetscInt devid, PetscDevice *device)
112: {
113:   static PetscInt PetscDeviceCounter = 0;
114:   PetscDevice     dev;

118:   PetscDeviceInitializePackage();
119:   PetscNew(&dev);
120:   dev->id     = PetscDeviceCounter++;
121:   dev->type   = type;
122:   dev->refcnt = 1;
123:   /*
124:     if you are adding a device, you also need to add it's initialization in
125:     PetscDeviceInitializeTypeFromOptions_Private() below
126:   */
127:   switch (type) {
128:     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(CUDA,getDevice,dev,devid);
129:     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(HIP,getDevice,dev,devid);
130:     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(SYCL,getDevice,dev,devid);
131:   default:
132:     /* in case the above macros expand to nothing this silences any unused variable warnings */
133:     (void)(devid);
134:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"PETSc was seemingly configured for PetscDeviceType %s but we've fallen through all cases in a switch",PetscDeviceTypes[type]);
135:   }
136:   *device = dev;
137:   return 0;
138: }

140: /*@C
141:   PetscDeviceDestroy - Free a PetscDevice

143:   Not Collective, Asynchronous

145:   Input Parameter:
146: . device - The PetscDevice

148:   Level: beginner

150: .seealso: PetscDevice, PetscDeviceCreate(), PetscDeviceConfigure(), PetscDeviceView()
151: @*/
152: PetscErrorCode PetscDeviceDestroy(PetscDevice *device)
153: {
154:   if (!*device) return 0;
156:   PetscDeviceDereference_Internal(*device);
157:   if ((*device)->refcnt) {
158:     *device = PETSC_NULLPTR;
159:     return 0;
160:   }
161:   PetscFree((*device)->data);
162:   PetscFree(*device);
163:   return 0;
164: }

166: /*@C
167:   PetscDeviceConfigure - Configure a particular PetscDevice

169:   Not Collective, Asynchronous

171:   Input Parameter:
172: . device - The PetscDevice to configure

174:   Notes:
175:   The user should not assume that this is a cheap operation

177:   Level: beginner

179: .seealso: PetscDevice, PetscDeviceCreate(), PetscDeviceView(), PetscDeviceDestroy()
180: @*/
181: PetscErrorCode PetscDeviceConfigure(PetscDevice device)
182: {
184:   if (PetscDefined(USE_DEBUG)) {
185:     /*
186:       if no available configuration is available, this cascades all the way down to default
187:       and error
188:     */
189:     switch (device->type) {
190:     case PETSC_DEVICE_CUDA: if (PetscDefined(HAVE_CUDA)) break;
191:     case PETSC_DEVICE_HIP:  if (PetscDefined(HAVE_HIP))  break;
192:     case PETSC_DEVICE_SYCL: if (PetscDefined(HAVE_SYCL)) break;
193:     default:
194:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"PETSc was seemingly configured for PetscDeviceType %s but we've fallen through all cases in a switch",PetscDeviceTypes[device->type]);
195:     }
196:   }
197:   (*device->ops->configure)(device);
198:   return 0;
199: }

201: /*@C
202:   PetscDeviceView - View a PetscDevice

204:   Collective on viewer, Asynchronous

206:   Input Parameters:
207: + device - The PetscDevice to view
208: - viewer - The PetscViewer to view the device with (NULL for PETSC_VIEWER_STDOUT_WORLD)

210:   Level: beginner

212: .seealso: PetscDevice, PetscDeviceCreate(), PetscDeviceConfigure(), PetscDeviceDestroy()
213: @*/
214: PetscErrorCode PetscDeviceView(PetscDevice device, PetscViewer viewer)
215: {
217:   if (!viewer) PetscViewerASCIIGetStdout(PETSC_COMM_WORLD,&viewer);
219:   (*device->ops->view)(device,viewer);
220:   return 0;
221: }

223: /*@C
224:   PetscDeviceGetDeviceId - Get the device id

226:   Not collective

228:   Input Parameter:
229: . device - The PetscDevice

231:   Output Parameter:
232: . id - The device id

234:   Level: beginner

236: .seealso: PetscDevice, PetscDeviceCreate(), PetscDeviceConfigure(), PetscDeviceDestroy()
237: @*/
238: PetscErrorCode PetscDeviceGetDeviceId(PetscDevice device, PetscInt *id)
239: {
242:   *id = device->deviceId;
243:   return 0;
244: }

246: static std::array<bool,PETSC_DEVICE_MAX>        initializedDevice = {};
247: static std::array<PetscDevice,PETSC_DEVICE_MAX> defaultDevices    = {};
248: static_assert(initializedDevice.size() == defaultDevices.size(),"");

250: /*@C
251:   PetscDeviceInitialize - Initialize PetscDevice

253:   Not Collective, Possibly Synchronous

255:   Input Parameter:
256: . type - The PetscDeviceType to initialize

258:   Notes:
259:   Eagerly initializes the corresponding PetscDeviceType if needed.

261:   Level: beginner

263: .seealso: PetscDevice, PetscDeviceInitType, PetscDeviceInitialized(), PetscDeviceCreate(), PetscDeviceDestroy()
264: @*/
265: PetscErrorCode PetscDeviceInitialize(PetscDeviceType type)
266: {
268:   PetscDeviceInitializeDefaultDevice_Internal(type,PETSC_DECIDE);
269:   return 0;
270: }

272: /*@C
273:   PetscDeviceInitialized - Determines whether PetscDevice is initialized for a particular
274:   PetscDeviceType

276:   Not Collective, Asynchronous

278:   Input Parameter:
279: . type - The PetscDeviceType to check

281:   Output Parameter:
282: . [return value] - PETSC_TRUE if type is initialized, PETSC_FALSE otherwise

284:   Notes:
285:   If one has not configured PETSc for a particular PetscDeviceType then this routine will
286:   return PETSC_FALSE for that PetscDeviceType.

288:   Level: beginner

290: .seealso: PetscDevice, PetscDeviceInitType, PetscDeviceInitialize(), PetscDeviceCreate(), PetscDeviceDestroy()
291: @*/
292: PetscBool PetscDeviceInitialized(PetscDeviceType type)
293: {
294:   return static_cast<PetscBool>(PetscDeviceConfiguredFor_Internal(type) && initializedDevice[type]);
295: }

297: /*
298:   Actual intialization function; any functions claiming to initialize PetscDevice or
299:   PetscDeviceContext will have to run through this one
300: */
301: PetscErrorCode PetscDeviceInitializeDefaultDevice_Internal(PetscDeviceType type, PetscInt defaultDeviceId)
302: {
304:   if (PetscLikely(PetscDeviceInitialized(type))) return 0;
305:   PetscAssert(!defaultDevices[type],PETSC_COMM_SELF,PETSC_ERR_MEM,"Trying to overwrite existing default device of type %s",PetscDeviceTypes[type]);
306:   PetscDeviceCreate(type,defaultDeviceId,&defaultDevices[type]);
307:   PetscDeviceConfigure(defaultDevices[type]);
308:   initializedDevice[type] = true;
309:   return 0;
310: }

312: #if PetscDefined(USE_LOG)
313: PETSC_INTERN PetscErrorCode PetscLogInitialize(void);
314: #else
315: #define PetscLogInitialize() 0
316: #endif

318: static PetscErrorCode PetscDeviceInitializeTypeFromOptions_Private(MPI_Comm comm, PetscDeviceType type, PetscInt defaultDeviceId, PetscBool defaultView, PetscDeviceInitType *defaultInitType)
319: {
320:   if (!PetscDeviceConfiguredFor_Internal(type)) {
321:     PetscInfo(PETSC_NULLPTR,"PetscDeviceType %s not supported\n",PetscDeviceTypes[type]);
322:     defaultDevices[type] = PETSC_NULLPTR;
323:     return 0;
324:   }
325:   PetscInfo(PETSC_NULLPTR,"PetscDeviceType %s supported, initializing\n",PetscDeviceTypes[type]);
326:   /* ugly switch needed to pick the right global variable... could maybe do this as a union? */
327:   switch (type) {
328:     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(CUDA,initialize,comm,&defaultDeviceId,defaultInitType);
329:     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(HIP,initialize,comm,&defaultDeviceId,defaultInitType);
330:     PETSC_DEVICE_CASE_IF_PETSC_DEFINED(SYCL,initialize,comm,&defaultDeviceId,defaultInitType);
331:   default:
332:     SETERRQ(comm,PETSC_ERR_PLIB,"PETSc was seemingly configured for PetscDeviceType %s but we've fallen through all cases in a switch",PetscDeviceTypes[type]);
333:   }
334:   /*
335:     defaultInitType and defaultDeviceId now represent what the individual TYPES have decided to
336:     initialize as
337:   */
338:   if (*defaultInitType == PETSC_DEVICE_INIT_EAGER) {
339:     PetscInfo(PETSC_NULLPTR,"Eagerly initializing %s PetscDevice\n",PetscDeviceTypes[type]);
340:     PetscDeviceInitializeDefaultDevice_Internal(type,defaultDeviceId);
341:     if (defaultView) {
342:       PetscViewer vwr;

344:       PetscLogInitialize();
345:       PetscViewerASCIIGetStdout(comm,&vwr);
346:       PetscDeviceView(defaultDevices[type],vwr);
347:     }
348:   }
349:   return 0;
350: }

352: /* called from PetscFinalize() do not call yourself! */
353: static PetscErrorCode PetscDeviceFinalize_Private(void)
354: {
355:   if (PetscDefined(USE_DEBUG)) {
356:     const auto PetscDeviceCheckAllDestroyedAfterFinalize = []{
358:       return 0;
359:     };
360:     /*
361:       you might be thinking, why on earth are you registered yet another finalizer in a
362:       function already called during PetscRegisterFinalizeAll()? If this seems stupid it's
363:       because it is.

365:       The crux of the problem is that the initializer (and therefore the ~finalizer~) of
366:       PetscDeviceContext is guaranteed to run after PetscDevice's. So if the global context had
367:       a default PetscDevice attached, that PetscDevice will have a reference count >0 and hence
368:       won't be destroyed yet. So we need to repeat the check that all devices have been
369:       destroyed again ~after~ the global context is destroyed. In summary:

371:       1. This finalizer runs and destroys all devices, except it may not because the global
372:          context may still hold a reference!
373:       2. The global context finalizer runs and does the final reference count decrement
374:          required, which actually destroys the held device.
375:       3. Our newly added finalizer runs and checks that all is well.
376:     */
377:     PetscRegisterFinalize(PetscDeviceCheckAllDestroyedAfterFinalize);
378:   }
379:   for (auto &&device : defaultDevices) PetscDeviceDestroy(&device);
380:   initializedDevice.fill(false);
381:   return 0;
382: }

384: /*
385:   Begins the init proceeedings for the entire PetscDevice stack. there are 3 stages of
386:   initialization types:

388:   1. defaultInitType - how does PetscDevice as a whole expect to initialize?
389:   2. subTypeDefaultInitType - how does each PetscDevice implementation expect to initialize?
390:      e.g. you may want to blanket disable PetscDevice init (and disable say Kokkos init), but
391:      have all CUDA devices still initialize.

393:   All told the following happens:

395:   0. defaultInitType -> LAZY
396:   1. Check for log_view/log_summary, if yes defaultInitType -> EAGER
397:   2. PetscDevice initializes each sub type with deviceDefaultInitType.
398:   2.1 Each enabled PetscDevice sub-type then does the above disable or view check in addition
399:       to checking for specific device init. if view or specific device init
400:       subTypeDefaultInitType -> EAGER. disabled once again overrides all.
401: */
402: PetscErrorCode PetscDeviceInitializeFromOptions_Internal(MPI_Comm comm)
403: {
404:   PetscBool           flg,defaultView = PETSC_FALSE,initializeDeviceContextEagerly = PETSC_FALSE;
405:   PetscInt            defaultDevice   = PETSC_DECIDE;
406:   PetscDeviceType     deviceContextInitDevice = PETSC_DEVICE_DEFAULT;
407:   PetscDeviceInitType defaultInitType;
408:   PetscErrorCode      ierr;

410:   if (PetscDefined(USE_DEBUG)) {
411:     int result;

413:     MPI_Comm_compare(comm,PETSC_COMM_WORLD,&result);
414:     /* in order to accurately assign ranks to gpus we need to get the MPI_Comm_rank of the
415:      * global space */
416:     if (PetscUnlikely(result != MPI_IDENT)) {
417:       char name[MPI_MAX_OBJECT_NAME] = {};
418:       int  len; /* unused */

420:       MPI_Comm_get_name(comm,name,&len);
421:       SETERRQ(comm,PETSC_ERR_MPI,"Default devices being initialized on MPI_Comm '%s' not PETSC_COMM_WORLD",name);
422:     }
423:   }
424:   comm = PETSC_COMM_WORLD; /* from this point on we assume we're on PETSC_COMM_WORLD */
425:   PetscRegisterFinalize(PetscDeviceFinalize_Private);
426:   PetscOptionsHasName(PETSC_NULLPTR,PETSC_NULLPTR,"-log_view",&flg);
427:   if (!flg) PetscOptionsHasName(PETSC_NULLPTR,PETSC_NULLPTR,"-log_summary",&flg);
428:   {
429:     PetscInt initIdx = flg ? PETSC_DEVICE_INIT_EAGER : PETSC_DEVICE_INIT_LAZY;

431:     PetscOptionsBegin(comm,PETSC_NULLPTR,"PetscDevice Options","Sys");
432:     PetscOptionsEList("-device_enable","How (or whether) to initialize PetscDevices","PetscDeviceInitializeFromOptions_Internal()",PetscDeviceInitTypes,3,PetscDeviceInitTypes[initIdx],&initIdx,PETSC_NULLPTR);
433:     PetscOptionsRangeInt("-device_select","Which device to use. Pass " PetscStringize(PETSC_DECIDE) " to have PETSc decide or (given they exist) [0-NUM_DEVICE) for a specific device","PetscDeviceCreate()",defaultDevice,&defaultDevice,PETSC_NULLPTR,PETSC_DECIDE,std::numeric_limits<int>::max());
434:     PetscOptionsBool("-device_view","Display device information and assignments (forces eager initialization)",PETSC_NULLPTR,defaultView,&defaultView,&flg);
435:     PetscOptionsEnd();
436:     if (initIdx == PETSC_DEVICE_INIT_NONE) {
437:       /* disabled all device initialization if devices are globally disabled */
439:       defaultView = PETSC_FALSE;
440:     } else {
441:       defaultView = static_cast<decltype(defaultView)>(defaultView && flg);
442:       if (defaultView) initIdx = PETSC_DEVICE_INIT_EAGER;
443:     }
444:     defaultInitType = static_cast<decltype(defaultInitType)>(initIdx);
445:   }
446:   static_assert((PETSC_DEVICE_INVALID == 0) && (PETSC_DEVICE_MAX < std::numeric_limits<int>::max()),"");
447:   for (int i = 1; i < PETSC_DEVICE_MAX; ++i) {
448:     const auto deviceType = static_cast<PetscDeviceType>(i);
449:     auto initType         = defaultInitType;

451:     PetscDeviceInitializeTypeFromOptions_Private(comm,deviceType,defaultDevice,defaultView,&initType);
452:     if (PetscDeviceConfiguredFor_Internal(deviceType) && (initType == PETSC_DEVICE_INIT_EAGER)) {
453:       initializeDeviceContextEagerly = PETSC_TRUE;
454:       deviceContextInitDevice        = deviceType;
455:     }
456:   }
457:   if (initializeDeviceContextEagerly) {
458:     PetscDeviceContext dctx;

460:     /*
461:       somewhat inefficient here as the device context is potentially fully set up twice (once
462:       when retrieved then the second time if setfromoptions makes changes)
463:     */
464:     PetscInfo(PETSC_NULLPTR,"Eagerly initializing PetscDeviceContext with %s device\n",PetscDeviceTypes[deviceContextInitDevice]);
465:     PetscDeviceContextSetRootDeviceType_Internal(deviceContextInitDevice);
466:     PetscDeviceContextGetCurrentContext(&dctx);
467:     PetscDeviceContextSetFromOptions(comm,"root_",dctx);
468:     PetscDeviceContextSetUp(dctx);
469:   }
470:   return 0;
471: }

473: /* Get the default PetscDevice for a particular type and constructs them if lazily initialized. */
474: PetscErrorCode PetscDeviceGetDefaultForType_Internal(PetscDeviceType type, PetscDevice *device)
475: {
477:   PetscDeviceInitialize(type);
478:   *device = defaultDevices[type];
479:   return 0;
480: }