Actual source code: vecviennacl.cxx

  1: /*
  2:    Implements the sequential ViennaCL vectors.
  3: */

  5: #include <petscconf.h>
  6: #include <petsc/private/vecimpl.h>
  7: #include <../src/vec/vec/impls/dvecimpl.h>
  8: #include <../src/vec/vec/impls/seq/seqviennacl/viennaclvecimpl.h>

 10: #include "viennacl/linalg/inner_prod.hpp"
 11: #include "viennacl/linalg/norm_1.hpp"
 12: #include "viennacl/linalg/norm_2.hpp"
 13: #include "viennacl/linalg/norm_inf.hpp"

 15: #ifdef VIENNACL_WITH_OPENCL
 16:   #include "viennacl/ocl/backend.hpp"
 17: #endif

 19: PETSC_EXTERN PetscErrorCode VecViennaCLGetArray(Vec v, ViennaCLVector **a)
 20: {
 21:   PetscFunctionBegin;
 22:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
 23:   *a = 0;
 24:   PetscCall(VecViennaCLCopyToGPU(v));
 25:   *a = ((Vec_ViennaCL *)v->spptr)->GPUarray;
 26:   ViennaCLWaitForGPU();
 27:   PetscFunctionReturn(PETSC_SUCCESS);
 28: }

 30: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArray(Vec v, ViennaCLVector **a)
 31: {
 32:   PetscFunctionBegin;
 33:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
 34:   v->offloadmask = PETSC_OFFLOAD_GPU;

 36:   PetscCall(PetscObjectStateIncrease((PetscObject)v));
 37:   PetscFunctionReturn(PETSC_SUCCESS);
 38: }

 40: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayRead(Vec v, const ViennaCLVector **a)
 41: {
 42:   PetscFunctionBegin;
 43:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
 44:   *a = 0;
 45:   PetscCall(VecViennaCLCopyToGPU(v));
 46:   *a = ((Vec_ViennaCL *)v->spptr)->GPUarray;
 47:   ViennaCLWaitForGPU();
 48:   PetscFunctionReturn(PETSC_SUCCESS);
 49: }

 51: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayRead(Vec v, const ViennaCLVector **a)
 52: {
 53:   PetscFunctionBegin;
 54:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
 55:   PetscFunctionReturn(PETSC_SUCCESS);
 56: }

 58: PETSC_EXTERN PetscErrorCode VecViennaCLGetArrayWrite(Vec v, ViennaCLVector **a)
 59: {
 60:   PetscFunctionBegin;
 61:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
 62:   *a = 0;
 63:   PetscCall(VecViennaCLAllocateCheck(v));
 64:   *a = ((Vec_ViennaCL *)v->spptr)->GPUarray;
 65:   ViennaCLWaitForGPU();
 66:   PetscFunctionReturn(PETSC_SUCCESS);
 67: }

 69: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreArrayWrite(Vec v, ViennaCLVector **a)
 70: {
 71:   PetscFunctionBegin;
 72:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
 73:   v->offloadmask = PETSC_OFFLOAD_GPU;

 75:   PetscCall(PetscObjectStateIncrease((PetscObject)v));
 76:   PetscFunctionReturn(PETSC_SUCCESS);
 77: }

 79: PETSC_EXTERN PetscErrorCode PetscViennaCLInit()
 80: {
 81:   char      string[20];
 82:   PetscBool flg, flg_cuda, flg_opencl, flg_openmp;

 84:   PetscFunctionBegin;
 85:   /* ViennaCL backend selection: CUDA, OpenCL, or OpenMP */
 86:   PetscCall(PetscOptionsGetString(NULL, NULL, "-viennacl_backend", string, sizeof(string), &flg));
 87:   if (flg) {
 88:     try {
 89:       PetscCall(PetscStrcasecmp(string, "cuda", &flg_cuda));
 90:       PetscCall(PetscStrcasecmp(string, "opencl", &flg_opencl));
 91:       PetscCall(PetscStrcasecmp(string, "openmp", &flg_openmp));

 93:       /* A default (sequential) CPU backend is always available - even if OpenMP is not enabled. */
 94:       if (flg_openmp) viennacl::backend::default_memory_type(viennacl::MAIN_MEMORY);
 95: #if defined(PETSC_HAVE_CUDA)
 96:       else if (flg_cuda) viennacl::backend::default_memory_type(viennacl::CUDA_MEMORY);
 97: #endif
 98: #if defined(PETSC_HAVE_OPENCL)
 99:       else if (flg_opencl) viennacl::backend::default_memory_type(viennacl::OPENCL_MEMORY);
100: #endif
101:       else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: Backend not recognized or available: %s.\n Pass -viennacl_view to see available backends for ViennaCL.", string);
102:     } catch (std::exception const &ex) {
103:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
104:     }
105:   }

107: #if defined(PETSC_HAVE_OPENCL)
108:   /* ViennaCL OpenCL device type configuration */
109:   PetscCall(PetscOptionsGetString(NULL, NULL, "-viennacl_opencl_device_type", string, sizeof(string), &flg));
110:   if (flg) {
111:     try {
112:       PetscCall(PetscStrcasecmp(string, "cpu", &flg));
113:       if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_CPU);

115:       PetscCall(PetscStrcasecmp(string, "gpu", &flg));
116:       if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_GPU);

118:       PetscCall(PetscStrcasecmp(string, "accelerator", &flg));
119:       if (flg) viennacl::ocl::set_context_device_type(0, CL_DEVICE_TYPE_ACCELERATOR);
120:     } catch (std::exception const &ex) {
121:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
122:     }
123:   }
124: #endif

126:   /* Print available backends */
127:   PetscCall(PetscOptionsHasName(NULL, NULL, "-viennacl_view", &flg));
128:   if (flg) {
129:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backends available: "));
130: #if defined(PETSC_HAVE_CUDA)
131:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "CUDA, "));
132: #endif
133: #if defined(PETSC_HAVE_OPENCL)
134:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenCL, "));
135: #endif
136: #if defined(PETSC_HAVE_OPENMP)
137:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenMP "));
138: #else
139:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenMP (1 thread) "));
140: #endif
141:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\n"));

143:     /* Print selected backends */
144:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "ViennaCL backend  selected: "));
145: #if defined(PETSC_HAVE_CUDA)
146:     if (viennacl::backend::default_memory_type() == viennacl::CUDA_MEMORY) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "CUDA "));
147: #endif
148: #if defined(PETSC_HAVE_OPENCL)
149:     if (viennacl::backend::default_memory_type() == viennacl::OPENCL_MEMORY) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenCL "));
150: #endif
151: #if defined(PETSC_HAVE_OPENMP)
152:     if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenMP "));
153: #else
154:     if (viennacl::backend::default_memory_type() == viennacl::MAIN_MEMORY) PetscCall(PetscPrintf(PETSC_COMM_WORLD, "OpenMP (sequential - consider reconfiguration: --with-openmp=1) "));
155: #endif
156:     PetscCall(PetscPrintf(PETSC_COMM_WORLD, "\n"));
157:   }
158:   PetscFunctionReturn(PETSC_SUCCESS);
159: }

161: /*
162:     Allocates space for the vector array on the Host if it does not exist.
163:     Does NOT change the PetscViennaCLFlag for the vector
164:     Does NOT zero the ViennaCL array
165:  */
166: PETSC_EXTERN PetscErrorCode VecViennaCLAllocateCheckHost(Vec v)
167: {
168:   PetscScalar *array;
169:   Vec_Seq     *s;
170:   PetscInt     n = v->map->n;

172:   PetscFunctionBegin;
173:   s = (Vec_Seq *)v->data;
174:   PetscCall(VecViennaCLAllocateCheck(v));
175:   if (s->array == 0) {
176:     PetscCall(PetscMalloc1(n, &array));
177:     s->array           = array;
178:     s->array_allocated = array;
179:   }
180:   PetscFunctionReturn(PETSC_SUCCESS);
181: }

183: /*
184:     Allocates space for the vector array on the GPU if it does not exist.
185:     Does NOT change the PetscViennaCLFlag for the vector
186:     Does NOT zero the ViennaCL array

188:  */
189: PetscErrorCode VecViennaCLAllocateCheck(Vec v)
190: {
191:   PetscFunctionBegin;
192:   if (!v->spptr) {
193:     try {
194:       v->spptr                                       = new Vec_ViennaCL;
195:       ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated = new ViennaCLVector((PetscBLASInt)v->map->n);
196:       ((Vec_ViennaCL *)v->spptr)->GPUarray           = ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated;

198:     } catch (std::exception const &ex) {
199:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
200:     }
201:   }
202:   PetscFunctionReturn(PETSC_SUCCESS);
203: }

205: /* Copies a vector from the CPU to the GPU unless we already have an up-to-date copy on the GPU */
206: PetscErrorCode VecViennaCLCopyToGPU(Vec v)
207: {
208:   PetscFunctionBegin;
209:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
210:   PetscCall(VecViennaCLAllocateCheck(v));
211:   if (v->map->n > 0) {
212:     if (v->offloadmask == PETSC_OFFLOAD_CPU) {
213:       PetscCall(PetscLogEventBegin(VEC_ViennaCLCopyToGPU, v, 0, 0, 0));
214:       try {
215:         ViennaCLVector *vec = ((Vec_ViennaCL *)v->spptr)->GPUarray;
216:         viennacl::fast_copy(*(PetscScalar **)v->data, *(PetscScalar **)v->data + v->map->n, vec->begin());
217:         ViennaCLWaitForGPU();
218:       } catch (std::exception const &ex) {
219:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
220:       }
221:       PetscCall(PetscLogCpuToGpu(v->map->n * sizeof(PetscScalar)));
222:       PetscCall(PetscLogEventEnd(VEC_ViennaCLCopyToGPU, v, 0, 0, 0));
223:       v->offloadmask = PETSC_OFFLOAD_BOTH;
224:     }
225:   }
226:   PetscFunctionReturn(PETSC_SUCCESS);
227: }

229: /*
230:      VecViennaCLCopyFromGPU - Copies a vector from the GPU to the CPU unless we already have an up-to-date copy on the CPU
231: */
232: PetscErrorCode VecViennaCLCopyFromGPU(Vec v)
233: {
234:   PetscFunctionBegin;
235:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
236:   PetscCall(VecViennaCLAllocateCheckHost(v));
237:   if (v->offloadmask == PETSC_OFFLOAD_GPU) {
238:     PetscCall(PetscLogEventBegin(VEC_ViennaCLCopyFromGPU, v, 0, 0, 0));
239:     try {
240:       ViennaCLVector *vec = ((Vec_ViennaCL *)v->spptr)->GPUarray;
241:       viennacl::fast_copy(vec->begin(), vec->end(), *(PetscScalar **)v->data);
242:       ViennaCLWaitForGPU();
243:     } catch (std::exception const &ex) {
244:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
245:     }
246:     PetscCall(PetscLogGpuToCpu(v->map->n * sizeof(PetscScalar)));
247:     PetscCall(PetscLogEventEnd(VEC_ViennaCLCopyFromGPU, v, 0, 0, 0));
248:     v->offloadmask = PETSC_OFFLOAD_BOTH;
249:   }
250:   PetscFunctionReturn(PETSC_SUCCESS);
251: }

253: /* Copy on CPU */
254: static PetscErrorCode VecCopy_SeqViennaCL_Private(Vec xin, Vec yin)
255: {
256:   PetscScalar       *ya;
257:   const PetscScalar *xa;

259:   PetscFunctionBegin;
260:   PetscCall(VecViennaCLAllocateCheckHost(xin));
261:   PetscCall(VecViennaCLAllocateCheckHost(yin));
262:   if (xin != yin) {
263:     PetscCall(VecGetArrayRead(xin, &xa));
264:     PetscCall(VecGetArray(yin, &ya));
265:     PetscCall(PetscArraycpy(ya, xa, xin->map->n));
266:     PetscCall(VecRestoreArrayRead(xin, &xa));
267:     PetscCall(VecRestoreArray(yin, &ya));
268:   }
269:   PetscFunctionReturn(PETSC_SUCCESS);
270: }

272: static PetscErrorCode VecSetRandom_SeqViennaCL_Private(Vec xin, PetscRandom r)
273: {
274:   PetscInt     n = xin->map->n, i;
275:   PetscScalar *xx;

277:   PetscFunctionBegin;
278:   PetscCall(VecGetArray(xin, &xx));
279:   for (i = 0; i < n; i++) PetscCall(PetscRandomGetValue(r, &xx[i]));
280:   PetscCall(VecRestoreArray(xin, &xx));
281:   PetscFunctionReturn(PETSC_SUCCESS);
282: }

284: static PetscErrorCode VecDestroy_SeqViennaCL_Private(Vec v)
285: {
286:   Vec_Seq *vs = (Vec_Seq *)v->data;

288:   PetscFunctionBegin;
289:   PetscCall(PetscObjectSAWsViewOff(v));
290:   PetscCall(PetscLogObjectState((PetscObject)v, "Length=%" PetscInt_FMT, v->map->n));
291:   PetscCall(PetscFree(vs->array_allocated));
292:   PetscCall(PetscFree(vs));
293:   PetscFunctionReturn(PETSC_SUCCESS);
294: }

296: static PetscErrorCode VecResetArray_SeqViennaCL_Private(Vec vin)
297: {
298:   Vec_Seq *v = (Vec_Seq *)vin->data;

300:   PetscFunctionBegin;
301:   v->array         = v->unplacedarray;
302:   v->unplacedarray = 0;
303:   PetscFunctionReturn(PETSC_SUCCESS);
304: }

306: /*MC
307:    VECSEQVIENNACL - VECSEQVIENNACL = "seqviennacl" - The basic sequential vector, modified to use ViennaCL

309:    Options Database Keys:
310: . -vec_type seqviennacl - sets the vector type to VECSEQVIENNACL during a call to VecSetFromOptions()

312:   Level: beginner

314: .seealso: `VecCreate()`, `VecSetType()`, `VecSetFromOptions()`, `VecCreateSeqWithArray()`, `VECMPI`, `VecType`, `VecCreateMPI()`, `VecCreateSeq()`
315: M*/

317: PetscErrorCode VecAYPX_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
318: {
319:   const ViennaCLVector *xgpu;
320:   ViennaCLVector       *ygpu;

322:   PetscFunctionBegin;
323:   PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
324:   PetscCall(VecViennaCLGetArray(yin, &ygpu));
325:   PetscCall(PetscLogGpuTimeBegin());
326:   try {
327:     if (alpha != 0.0 && xin->map->n > 0) {
328:       *ygpu = *xgpu + alpha * *ygpu;
329:       PetscCall(PetscLogGpuFlops(2.0 * yin->map->n));
330:     } else {
331:       *ygpu = *xgpu;
332:     }
333:     ViennaCLWaitForGPU();
334:   } catch (std::exception const &ex) {
335:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
336:   }
337:   PetscCall(PetscLogGpuTimeEnd());
338:   PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
339:   PetscCall(VecViennaCLRestoreArray(yin, &ygpu));
340:   PetscFunctionReturn(PETSC_SUCCESS);
341: }

343: PetscErrorCode VecAXPY_SeqViennaCL(Vec yin, PetscScalar alpha, Vec xin)
344: {
345:   const ViennaCLVector *xgpu;
346:   ViennaCLVector       *ygpu;

348:   PetscFunctionBegin;
349:   if (alpha != 0.0 && xin->map->n > 0) {
350:     PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
351:     PetscCall(VecViennaCLGetArray(yin, &ygpu));
352:     PetscCall(PetscLogGpuTimeBegin());
353:     try {
354:       *ygpu += alpha * *xgpu;
355:       ViennaCLWaitForGPU();
356:     } catch (std::exception const &ex) {
357:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
358:     }
359:     PetscCall(PetscLogGpuTimeEnd());
360:     PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
361:     PetscCall(VecViennaCLRestoreArray(yin, &ygpu));
362:     PetscCall(PetscLogGpuFlops(2.0 * yin->map->n));
363:   }
364:   PetscFunctionReturn(PETSC_SUCCESS);
365: }

367: PetscErrorCode VecPointwiseDivide_SeqViennaCL(Vec win, Vec xin, Vec yin)
368: {
369:   const ViennaCLVector *xgpu, *ygpu;
370:   ViennaCLVector       *wgpu;

372:   PetscFunctionBegin;
373:   if (xin->map->n > 0) {
374:     PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
375:     PetscCall(VecViennaCLGetArrayRead(yin, &ygpu));
376:     PetscCall(VecViennaCLGetArrayWrite(win, &wgpu));
377:     PetscCall(PetscLogGpuTimeBegin());
378:     try {
379:       *wgpu = viennacl::linalg::element_div(*xgpu, *ygpu);
380:       ViennaCLWaitForGPU();
381:     } catch (std::exception const &ex) {
382:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
383:     }
384:     PetscCall(PetscLogGpuTimeEnd());
385:     PetscCall(PetscLogGpuFlops(win->map->n));
386:     PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
387:     PetscCall(VecViennaCLRestoreArrayRead(yin, &ygpu));
388:     PetscCall(VecViennaCLRestoreArrayWrite(win, &wgpu));
389:   }
390:   PetscFunctionReturn(PETSC_SUCCESS);
391: }

393: PetscErrorCode VecWAXPY_SeqViennaCL(Vec win, PetscScalar alpha, Vec xin, Vec yin)
394: {
395:   const ViennaCLVector *xgpu, *ygpu;
396:   ViennaCLVector       *wgpu;

398:   PetscFunctionBegin;
399:   if (alpha == 0.0 && xin->map->n > 0) PetscCall(VecCopy_SeqViennaCL(yin, win));
400:   else {
401:     PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
402:     PetscCall(VecViennaCLGetArrayRead(yin, &ygpu));
403:     PetscCall(VecViennaCLGetArrayWrite(win, &wgpu));
404:     PetscCall(PetscLogGpuTimeBegin());
405:     if (alpha == 1.0) {
406:       try {
407:         *wgpu = *ygpu + *xgpu;
408:       } catch (std::exception const &ex) {
409:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
410:       }
411:       PetscCall(PetscLogGpuFlops(win->map->n));
412:     } else if (alpha == -1.0) {
413:       try {
414:         *wgpu = *ygpu - *xgpu;
415:       } catch (std::exception const &ex) {
416:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
417:       }
418:       PetscCall(PetscLogGpuFlops(win->map->n));
419:     } else {
420:       try {
421:         *wgpu = *ygpu + alpha * *xgpu;
422:       } catch (std::exception const &ex) {
423:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
424:       }
425:       PetscCall(PetscLogGpuFlops(2 * win->map->n));
426:     }
427:     ViennaCLWaitForGPU();
428:     PetscCall(PetscLogGpuTimeEnd());
429:     PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
430:     PetscCall(VecViennaCLRestoreArrayRead(yin, &ygpu));
431:     PetscCall(VecViennaCLRestoreArrayWrite(win, &wgpu));
432:   }
433:   PetscFunctionReturn(PETSC_SUCCESS);
434: }

436: /*
437:  * Operation x = x + sum_i alpha_i * y_i for vectors x, y_i and scalars alpha_i
438:  *
439:  * ViennaCL supports a fast evaluation of x += alpha * y and x += alpha * y + beta * z,
440:  * hence there is an iterated application of these until the final result is obtained
441:  */
442: PetscErrorCode VecMAXPY_SeqViennaCL(Vec xin, PetscInt nv, const PetscScalar *alpha, Vec *y)
443: {
444:   PetscInt j;

446:   PetscFunctionBegin;
447:   for (j = 0; j < nv; ++j) {
448:     if (j + 1 < nv) {
449:       PetscCall(VecAXPBYPCZ_SeqViennaCL(xin, alpha[j], alpha[j + 1], 1.0, y[j], y[j + 1]));
450:       ++j;
451:     } else {
452:       PetscCall(VecAXPY_SeqViennaCL(xin, alpha[j], y[j]));
453:     }
454:   }
455:   ViennaCLWaitForGPU();
456:   PetscFunctionReturn(PETSC_SUCCESS);
457: }

459: PetscErrorCode VecDot_SeqViennaCL(Vec xin, Vec yin, PetscScalar *z)
460: {
461:   const ViennaCLVector *xgpu, *ygpu;

463:   PetscFunctionBegin;
464:   if (xin->map->n > 0) {
465:     PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
466:     PetscCall(VecViennaCLGetArrayRead(yin, &ygpu));
467:     PetscCall(PetscLogGpuTimeBegin());
468:     try {
469:       *z = viennacl::linalg::inner_prod(*xgpu, *ygpu);
470:     } catch (std::exception const &ex) {
471:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
472:     }
473:     ViennaCLWaitForGPU();
474:     PetscCall(PetscLogGpuTimeEnd());
475:     if (xin->map->n > 0) PetscCall(PetscLogGpuFlops(2.0 * xin->map->n - 1));
476:     PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
477:     PetscCall(VecViennaCLRestoreArrayRead(yin, &ygpu));
478:   } else *z = 0.0;
479:   PetscFunctionReturn(PETSC_SUCCESS);
480: }

482: /*
483:  * Operation z[j] = dot(x, y[j])
484:  *
485:  * We use an iterated application of dot() for each j. For small ranges of j this is still faster than an allocation of extra memory in order to use gemv().
486:  */
487: PetscErrorCode VecMDot_SeqViennaCL(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
488: {
489:   PetscInt                                                n = xin->map->n, i;
490:   const ViennaCLVector                                   *xgpu, *ygpu;
491:   Vec                                                    *yyin = (Vec *)yin;
492:   std::vector<viennacl::vector_base<PetscScalar> const *> ygpu_array(nv);

494:   PetscFunctionBegin;
495:   if (xin->map->n > 0) {
496:     PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
497:     for (i = 0; i < nv; i++) {
498:       PetscCall(VecViennaCLGetArrayRead(yyin[i], &ygpu));
499:       ygpu_array[i] = ygpu;
500:     }
501:     PetscCall(PetscLogGpuTimeBegin());
502:     viennacl::vector_tuple<PetscScalar> y_tuple(ygpu_array);
503:     ViennaCLVector                      result = viennacl::linalg::inner_prod(*xgpu, y_tuple);
504:     viennacl::copy(result.begin(), result.end(), z);
505:     for (i = 0; i < nv; i++) PetscCall(VecViennaCLRestoreArrayRead(yyin[i], &ygpu));
506:     ViennaCLWaitForGPU();
507:     PetscCall(PetscLogGpuTimeEnd());
508:     PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
509:     PetscCall(PetscLogGpuFlops(PetscMax(nv * (2.0 * n - 1), 0.0)));
510:   } else {
511:     for (i = 0; i < nv; i++) z[i] = 0.0;
512:   }
513:   PetscFunctionReturn(PETSC_SUCCESS);
514: }

516: static PetscErrorCode VecMTDot_SeqViennaCL(Vec xin, PetscInt nv, const Vec yin[], PetscScalar *z)
517: {
518:   PetscFunctionBegin;
519:   /* Since complex case is not supported at the moment, this is the same as VecMDot_SeqViennaCL */
520:   PetscCall(VecMDot_SeqViennaCL(xin, nv, yin, z));
521:   ViennaCLWaitForGPU();
522:   PetscFunctionReturn(PETSC_SUCCESS);
523: }

525: PetscErrorCode VecSet_SeqViennaCL(Vec xin, PetscScalar alpha)
526: {
527:   ViennaCLVector *xgpu;

529:   PetscFunctionBegin;
530:   if (xin->map->n > 0) {
531:     PetscCall(VecViennaCLGetArrayWrite(xin, &xgpu));
532:     PetscCall(PetscLogGpuTimeBegin());
533:     try {
534:       *xgpu = viennacl::scalar_vector<PetscScalar>(xgpu->size(), alpha);
535:       ViennaCLWaitForGPU();
536:     } catch (std::exception const &ex) {
537:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
538:     }
539:     PetscCall(PetscLogGpuTimeEnd());
540:     PetscCall(VecViennaCLRestoreArrayWrite(xin, &xgpu));
541:   }
542:   PetscFunctionReturn(PETSC_SUCCESS);
543: }

545: PetscErrorCode VecScale_SeqViennaCL(Vec xin, PetscScalar alpha)
546: {
547:   ViennaCLVector *xgpu;

549:   PetscFunctionBegin;
550:   if (alpha == 0.0 && xin->map->n > 0) {
551:     PetscCall(VecSet_SeqViennaCL(xin, alpha));
552:     PetscCall(PetscLogGpuFlops(xin->map->n));
553:   } else if (alpha != 1.0 && xin->map->n > 0) {
554:     PetscCall(VecViennaCLGetArray(xin, &xgpu));
555:     PetscCall(PetscLogGpuTimeBegin());
556:     try {
557:       *xgpu *= alpha;
558:       ViennaCLWaitForGPU();
559:     } catch (std::exception const &ex) {
560:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
561:     }
562:     PetscCall(PetscLogGpuTimeEnd());
563:     PetscCall(VecViennaCLRestoreArray(xin, &xgpu));
564:     PetscCall(PetscLogGpuFlops(xin->map->n));
565:   }
566:   PetscFunctionReturn(PETSC_SUCCESS);
567: }

569: PetscErrorCode VecTDot_SeqViennaCL(Vec xin, Vec yin, PetscScalar *z)
570: {
571:   PetscFunctionBegin;
572:   /* Since complex case is not supported at the moment, this is the same as VecDot_SeqViennaCL */
573:   PetscCall(VecDot_SeqViennaCL(xin, yin, z));
574:   ViennaCLWaitForGPU();
575:   PetscFunctionReturn(PETSC_SUCCESS);
576: }

578: PetscErrorCode VecCopy_SeqViennaCL(Vec xin, Vec yin)
579: {
580:   const ViennaCLVector *xgpu;
581:   ViennaCLVector       *ygpu;

583:   PetscFunctionBegin;
584:   if (xin != yin && xin->map->n > 0) {
585:     if (xin->offloadmask == PETSC_OFFLOAD_GPU) {
586:       PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
587:       PetscCall(VecViennaCLGetArrayWrite(yin, &ygpu));
588:       PetscCall(PetscLogGpuTimeBegin());
589:       try {
590:         *ygpu = *xgpu;
591:         ViennaCLWaitForGPU();
592:       } catch (std::exception const &ex) {
593:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
594:       }
595:       PetscCall(PetscLogGpuTimeEnd());
596:       PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
597:       PetscCall(VecViennaCLRestoreArrayWrite(yin, &ygpu));

599:     } else if (xin->offloadmask == PETSC_OFFLOAD_CPU) {
600:       /* copy in CPU if we are on the CPU*/
601:       PetscCall(VecCopy_SeqViennaCL_Private(xin, yin));
602:       ViennaCLWaitForGPU();
603:     } else if (xin->offloadmask == PETSC_OFFLOAD_BOTH) {
604:       /* if xin is valid in both places, see where yin is and copy there (because it's probably where we'll want to next use it) */
605:       if (yin->offloadmask == PETSC_OFFLOAD_CPU) {
606:         /* copy in CPU */
607:         PetscCall(VecCopy_SeqViennaCL_Private(xin, yin));
608:         ViennaCLWaitForGPU();
609:       } else if (yin->offloadmask == PETSC_OFFLOAD_GPU) {
610:         /* copy in GPU */
611:         PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
612:         PetscCall(VecViennaCLGetArrayWrite(yin, &ygpu));
613:         PetscCall(PetscLogGpuTimeBegin());
614:         try {
615:           *ygpu = *xgpu;
616:           ViennaCLWaitForGPU();
617:         } catch (std::exception const &ex) {
618:           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
619:         }
620:         PetscCall(PetscLogGpuTimeEnd());
621:         PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
622:         PetscCall(VecViennaCLRestoreArrayWrite(yin, &ygpu));
623:       } else if (yin->offloadmask == PETSC_OFFLOAD_BOTH) {
624:         /* xin and yin are both valid in both places (or yin was unallocated before the earlier call to allocatecheck
625:            default to copy in GPU (this is an arbitrary choice) */
626:         PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
627:         PetscCall(VecViennaCLGetArrayWrite(yin, &ygpu));
628:         PetscCall(PetscLogGpuTimeBegin());
629:         try {
630:           *ygpu = *xgpu;
631:           ViennaCLWaitForGPU();
632:         } catch (std::exception const &ex) {
633:           SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
634:         }
635:         PetscCall(PetscLogGpuTimeEnd());
636:         PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
637:         PetscCall(VecViennaCLRestoreArrayWrite(yin, &ygpu));
638:       } else {
639:         PetscCall(VecCopy_SeqViennaCL_Private(xin, yin));
640:         ViennaCLWaitForGPU();
641:       }
642:     }
643:   }
644:   PetscFunctionReturn(PETSC_SUCCESS);
645: }

647: PetscErrorCode VecSwap_SeqViennaCL(Vec xin, Vec yin)
648: {
649:   ViennaCLVector *xgpu, *ygpu;

651:   PetscFunctionBegin;
652:   if (xin != yin && xin->map->n > 0) {
653:     PetscCall(VecViennaCLGetArray(xin, &xgpu));
654:     PetscCall(VecViennaCLGetArray(yin, &ygpu));
655:     PetscCall(PetscLogGpuTimeBegin());
656:     try {
657:       viennacl::swap(*xgpu, *ygpu);
658:       ViennaCLWaitForGPU();
659:     } catch (std::exception const &ex) {
660:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
661:     }
662:     PetscCall(PetscLogGpuTimeEnd());
663:     PetscCall(VecViennaCLRestoreArray(xin, &xgpu));
664:     PetscCall(VecViennaCLRestoreArray(yin, &ygpu));
665:   }
666:   PetscFunctionReturn(PETSC_SUCCESS);
667: }

669: // y = alpha * x + beta * y
670: PetscErrorCode VecAXPBY_SeqViennaCL(Vec yin, PetscScalar alpha, PetscScalar beta, Vec xin)
671: {
672:   PetscScalar           a = alpha, b = beta;
673:   const ViennaCLVector *xgpu;
674:   ViennaCLVector       *ygpu;

676:   PetscFunctionBegin;
677:   if (a == 0.0 && xin->map->n > 0) PetscCall(VecScale_SeqViennaCL(yin, beta));
678:   else if (b == 1.0 && xin->map->n > 0) PetscCall(VecAXPY_SeqViennaCL(yin, alpha, xin));
679:   else if (a == 1.0 && xin->map->n > 0) PetscCall(VecAYPX_SeqViennaCL(yin, beta, xin));
680:   else if (b == 0.0 && xin->map->n > 0) {
681:     PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
682:     PetscCall(VecViennaCLGetArray(yin, &ygpu));
683:     PetscCall(PetscLogGpuTimeBegin());
684:     try {
685:       *ygpu = *xgpu * alpha;
686:       ViennaCLWaitForGPU();
687:     } catch (std::exception const &ex) {
688:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
689:     }
690:     PetscCall(PetscLogGpuTimeEnd());
691:     PetscCall(PetscLogGpuFlops(xin->map->n));
692:     PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
693:     PetscCall(VecViennaCLRestoreArray(yin, &ygpu));
694:   } else if (xin->map->n > 0) {
695:     PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
696:     PetscCall(VecViennaCLGetArray(yin, &ygpu));
697:     PetscCall(PetscLogGpuTimeBegin());
698:     try {
699:       *ygpu = *xgpu * alpha + *ygpu * beta;
700:       ViennaCLWaitForGPU();
701:     } catch (std::exception const &ex) {
702:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
703:     }
704:     PetscCall(PetscLogGpuTimeEnd());
705:     PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
706:     PetscCall(VecViennaCLRestoreArray(yin, &ygpu));
707:     PetscCall(PetscLogGpuFlops(3.0 * xin->map->n));
708:   }
709:   PetscFunctionReturn(PETSC_SUCCESS);
710: }

712: /* operation  z = alpha * x + beta *y + gamma *z*/
713: PetscErrorCode VecAXPBYPCZ_SeqViennaCL(Vec zin, PetscScalar alpha, PetscScalar beta, PetscScalar gamma, Vec xin, Vec yin)
714: {
715:   PetscInt              n = zin->map->n;
716:   const ViennaCLVector *xgpu, *ygpu;
717:   ViennaCLVector       *zgpu;

719:   PetscFunctionBegin;
720:   PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
721:   PetscCall(VecViennaCLGetArrayRead(yin, &ygpu));
722:   PetscCall(VecViennaCLGetArray(zin, &zgpu));
723:   if (alpha == 0.0 && xin->map->n > 0) {
724:     PetscCall(PetscLogGpuTimeBegin());
725:     try {
726:       if (beta == 0.0) {
727:         *zgpu = gamma * *zgpu;
728:         ViennaCLWaitForGPU();
729:         PetscCall(PetscLogGpuFlops(1.0 * n));
730:       } else if (gamma == 0.0) {
731:         *zgpu = beta * *ygpu;
732:         ViennaCLWaitForGPU();
733:         PetscCall(PetscLogGpuFlops(1.0 * n));
734:       } else {
735:         *zgpu = beta * *ygpu + gamma * *zgpu;
736:         ViennaCLWaitForGPU();
737:         PetscCall(PetscLogGpuFlops(3.0 * n));
738:       }
739:     } catch (std::exception const &ex) {
740:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
741:     }
742:     PetscCall(PetscLogGpuTimeEnd());
743:     PetscCall(PetscLogGpuFlops(3.0 * n));
744:   } else if (beta == 0.0 && xin->map->n > 0) {
745:     PetscCall(PetscLogGpuTimeBegin());
746:     try {
747:       if (gamma == 0.0) {
748:         *zgpu = alpha * *xgpu;
749:         ViennaCLWaitForGPU();
750:         PetscCall(PetscLogGpuFlops(1.0 * n));
751:       } else {
752:         *zgpu = alpha * *xgpu + gamma * *zgpu;
753:         ViennaCLWaitForGPU();
754:         PetscCall(PetscLogGpuFlops(3.0 * n));
755:       }
756:     } catch (std::exception const &ex) {
757:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
758:     }
759:     PetscCall(PetscLogGpuTimeEnd());
760:   } else if (gamma == 0.0 && xin->map->n > 0) {
761:     PetscCall(PetscLogGpuTimeBegin());
762:     try {
763:       *zgpu = alpha * *xgpu + beta * *ygpu;
764:       ViennaCLWaitForGPU();
765:     } catch (std::exception const &ex) {
766:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
767:     }
768:     PetscCall(PetscLogGpuTimeEnd());
769:     PetscCall(PetscLogGpuFlops(3.0 * n));
770:   } else if (xin->map->n > 0) {
771:     PetscCall(PetscLogGpuTimeBegin());
772:     try {
773:       /* Split operation into two steps. This is not completely ideal, but avoids temporaries (which are far worse) */
774:       if (gamma != 1.0) *zgpu *= gamma;
775:       *zgpu += alpha * *xgpu + beta * *ygpu;
776:       ViennaCLWaitForGPU();
777:     } catch (std::exception const &ex) {
778:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
779:     }
780:     PetscCall(PetscLogGpuTimeEnd());
781:     PetscCall(VecViennaCLRestoreArray(zin, &zgpu));
782:     PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
783:     PetscCall(VecViennaCLRestoreArrayRead(yin, &ygpu));
784:     PetscCall(PetscLogGpuFlops(5.0 * n));
785:   }
786:   PetscFunctionReturn(PETSC_SUCCESS);
787: }

789: PetscErrorCode VecPointwiseMult_SeqViennaCL(Vec win, Vec xin, Vec yin)
790: {
791:   PetscInt              n = win->map->n;
792:   const ViennaCLVector *xgpu, *ygpu;
793:   ViennaCLVector       *wgpu;

795:   PetscFunctionBegin;
796:   if (xin->map->n > 0) {
797:     PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
798:     PetscCall(VecViennaCLGetArrayRead(yin, &ygpu));
799:     PetscCall(VecViennaCLGetArray(win, &wgpu));
800:     PetscCall(PetscLogGpuTimeBegin());
801:     try {
802:       *wgpu = viennacl::linalg::element_prod(*xgpu, *ygpu);
803:       ViennaCLWaitForGPU();
804:     } catch (std::exception const &ex) {
805:       SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
806:     }
807:     PetscCall(PetscLogGpuTimeEnd());
808:     PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
809:     PetscCall(VecViennaCLRestoreArrayRead(yin, &ygpu));
810:     PetscCall(VecViennaCLRestoreArray(win, &wgpu));
811:     PetscCall(PetscLogGpuFlops(n));
812:   }
813:   PetscFunctionReturn(PETSC_SUCCESS);
814: }

816: PetscErrorCode VecNorm_SeqViennaCL(Vec xin, NormType type, PetscReal *z)
817: {
818:   PetscInt              n = xin->map->n;
819:   PetscBLASInt          bn;
820:   const ViennaCLVector *xgpu;

822:   PetscFunctionBegin;
823:   if (xin->map->n > 0) {
824:     PetscCall(PetscBLASIntCast(n, &bn));
825:     PetscCall(VecViennaCLGetArrayRead(xin, &xgpu));
826:     if (type == NORM_2 || type == NORM_FROBENIUS) {
827:       PetscCall(PetscLogGpuTimeBegin());
828:       try {
829:         *z = viennacl::linalg::norm_2(*xgpu);
830:         ViennaCLWaitForGPU();
831:       } catch (std::exception const &ex) {
832:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
833:       }
834:       PetscCall(PetscLogGpuTimeEnd());
835:       PetscCall(PetscLogGpuFlops(PetscMax(2.0 * n - 1, 0.0)));
836:     } else if (type == NORM_INFINITY) {
837:       PetscCall(PetscLogGpuTimeBegin());
838:       try {
839:         *z = viennacl::linalg::norm_inf(*xgpu);
840:         ViennaCLWaitForGPU();
841:       } catch (std::exception const &ex) {
842:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
843:       }
844:       PetscCall(PetscLogGpuTimeEnd());
845:     } else if (type == NORM_1) {
846:       PetscCall(PetscLogGpuTimeBegin());
847:       try {
848:         *z = viennacl::linalg::norm_1(*xgpu);
849:         ViennaCLWaitForGPU();
850:       } catch (std::exception const &ex) {
851:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
852:       }
853:       PetscCall(PetscLogGpuTimeEnd());
854:       PetscCall(PetscLogGpuFlops(PetscMax(n - 1.0, 0.0)));
855:     } else if (type == NORM_1_AND_2) {
856:       PetscCall(PetscLogGpuTimeBegin());
857:       try {
858:         *z       = viennacl::linalg::norm_1(*xgpu);
859:         *(z + 1) = viennacl::linalg::norm_2(*xgpu);
860:         ViennaCLWaitForGPU();
861:       } catch (std::exception const &ex) {
862:         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
863:       }
864:       PetscCall(PetscLogGpuTimeEnd());
865:       PetscCall(PetscLogGpuFlops(PetscMax(2.0 * n - 1, 0.0)));
866:       PetscCall(PetscLogGpuFlops(PetscMax(n - 1.0, 0.0)));
867:     }
868:     PetscCall(VecViennaCLRestoreArrayRead(xin, &xgpu));
869:   } else if (type == NORM_1_AND_2) {
870:     *z       = 0.0;
871:     *(z + 1) = 0.0;
872:   } else *z = 0.0;
873:   PetscFunctionReturn(PETSC_SUCCESS);
874: }

876: PetscErrorCode VecSetRandom_SeqViennaCL(Vec xin, PetscRandom r)
877: {
878:   PetscFunctionBegin;
879:   PetscCall(VecSetRandom_SeqViennaCL_Private(xin, r));
880:   xin->offloadmask = PETSC_OFFLOAD_CPU;
881:   PetscFunctionReturn(PETSC_SUCCESS);
882: }

884: PetscErrorCode VecResetArray_SeqViennaCL(Vec vin)
885: {
886:   PetscFunctionBegin;
887:   PetscCheckTypeNames(vin, VECSEQVIENNACL, VECMPIVIENNACL);
888:   PetscCall(VecViennaCLCopyFromGPU(vin));
889:   PetscCall(VecResetArray_SeqViennaCL_Private(vin));
890:   vin->offloadmask = PETSC_OFFLOAD_CPU;
891:   PetscFunctionReturn(PETSC_SUCCESS);
892: }

894: PetscErrorCode VecPlaceArray_SeqViennaCL(Vec vin, const PetscScalar *a)
895: {
896:   PetscFunctionBegin;
897:   PetscCheckTypeNames(vin, VECSEQVIENNACL, VECMPIVIENNACL);
898:   PetscCall(VecViennaCLCopyFromGPU(vin));
899:   PetscCall(VecPlaceArray_Seq(vin, a));
900:   vin->offloadmask = PETSC_OFFLOAD_CPU;
901:   PetscFunctionReturn(PETSC_SUCCESS);
902: }

904: PetscErrorCode VecReplaceArray_SeqViennaCL(Vec vin, const PetscScalar *a)
905: {
906:   PetscFunctionBegin;
907:   PetscCheckTypeNames(vin, VECSEQVIENNACL, VECMPIVIENNACL);
908:   PetscCall(VecViennaCLCopyFromGPU(vin));
909:   PetscCall(VecReplaceArray_Seq(vin, a));
910:   vin->offloadmask = PETSC_OFFLOAD_CPU;
911:   PetscFunctionReturn(PETSC_SUCCESS);
912: }

914: /*@C
915:   VecCreateSeqViennaCL - Creates a standard, sequential array-style vector.

917:   Collective

919:   Input Parameters:
920: + comm - the communicator, should be PETSC_COMM_SELF
921: - n    - the vector length

923:   Output Parameter:
924: . v - the vector

926:   Notes:
927:   Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
928:   same type as an existing vector.

930:   Level: intermediate

932: .seealso: `VecCreateMPI()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`, `VecCreateGhost()`
933: @*/
934: PetscErrorCode VecCreateSeqViennaCL(MPI_Comm comm, PetscInt n, Vec *v)
935: {
936:   PetscFunctionBegin;
937:   PetscCall(VecCreate(comm, v));
938:   PetscCall(VecSetSizes(*v, n, n));
939:   PetscCall(VecSetType(*v, VECSEQVIENNACL));
940:   PetscFunctionReturn(PETSC_SUCCESS);
941: }

943: /*@C
944:   VecCreateSeqViennaCLWithArray - Creates a viennacl sequential array-style vector,
945:   where the user provides the array space to store the vector values.

947:   Collective

949:   Input Parameters:
950: + comm  - the communicator, should be PETSC_COMM_SELF
951: . bs    - the block size
952: . n     - the vector length
953: - array - viennacl array where the vector elements are to be stored.

955:   Output Parameter:
956: . V - the vector

958:   Notes:
959:   Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
960:   same type as an existing vector.

962:   If the user-provided array is NULL, then VecViennaCLPlaceArray() can be used
963:   at a later stage to SET the array for storing the vector values.

965:   PETSc does NOT free the array when the vector is destroyed via VecDestroy().
966:   The user should not free the array until the vector is destroyed.

968:   Level: intermediate

970: .seealso: `VecCreateMPIViennaCLWithArray()`, `VecCreate()`, `VecDuplicate()`, `VecDuplicateVecs()`,
971:           `VecCreateGhost()`, `VecCreateSeq()`, `VecCUDAPlaceArray()`, `VecCreateSeqWithArray()`,
972:           `VecCreateMPIWithArray()`
973: @*/
974: PETSC_EXTERN PetscErrorCode VecCreateSeqViennaCLWithArray(MPI_Comm comm, PetscInt bs, PetscInt n, const ViennaCLVector *array, Vec *V) PeNS
975: {
976:   PetscMPIInt size;

978:   PetscFunctionBegin;
979:   PetscCall(VecCreate(comm, V));
980:   PetscCall(VecSetSizes(*V, n, n));
981:   PetscCall(VecSetBlockSize(*V, bs));
982:   PetscCallMPI(MPI_Comm_size(comm, &size));
983:   PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create VECSEQ on more than one process");
984:   PetscCall(VecCreate_SeqViennaCL_Private(*V, array));
985:   PetscFunctionReturn(PETSC_SUCCESS);
986: }

988: /*@C
989:   VecCreateSeqViennaCLWithArrays - Creates a ViennaCL sequential vector, where
990:   the user provides the array space to store the vector values.

992:   Collective

994:   Input Parameters:
995: + comm        - the communicator, should be PETSC_COMM_SELF
996: . bs          - the block size
997: . n           - the vector length
998: . cpuarray    - CPU memory where the vector elements are to be stored.
999: - viennaclvec - ViennaCL vector where the Vec entries are to be stored on the device.

1001:   Output Parameter:
1002: . V - the vector

1004:   Notes:
1005:   If both cpuarray and viennaclvec are provided, the caller must ensure that
1006:   the provided arrays have identical values.

1008:   PETSc does NOT free the provided arrays when the vector is destroyed via
1009:   VecDestroy(). The user should not free the array until the vector is
1010:   destroyed.

1012:   Level: intermediate

1014: .seealso: `VecCreateMPIViennaCLWithArrays()`, `VecCreate()`, `VecCreateSeqWithArray()`,
1015:           `VecViennaCLPlaceArray()`, `VecPlaceArray()`, `VecCreateSeqCUDAWithArrays()`,
1016:           `VecViennaCLAllocateCheckHost()`
1017: @*/
1018: PetscErrorCode VecCreateSeqViennaCLWithArrays(MPI_Comm comm, PetscInt bs, PetscInt n, const PetscScalar cpuarray[], const ViennaCLVector *viennaclvec, Vec *V) PeNS
1019: {
1020:   PetscMPIInt size;

1022:   PetscFunctionBegin;
1023:   PetscCallMPI(MPI_Comm_size(comm, &size));
1024:   PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create VECSEQ on more than one process");

1026:   // set V's viennaclvec to be viennaclvec, do not allocate memory on host yet.
1027:   PetscCall(VecCreateSeqViennaCLWithArray(comm, bs, n, viennaclvec, V));

1029:   if (cpuarray && viennaclvec) {
1030:     Vec_Seq *s        = (Vec_Seq *)((*V)->data);
1031:     s->array          = (PetscScalar *)cpuarray;
1032:     (*V)->offloadmask = PETSC_OFFLOAD_BOTH;
1033:   } else if (cpuarray) {
1034:     Vec_Seq *s        = (Vec_Seq *)((*V)->data);
1035:     s->array          = (PetscScalar *)cpuarray;
1036:     (*V)->offloadmask = PETSC_OFFLOAD_CPU;
1037:   } else if (viennaclvec) {
1038:     (*V)->offloadmask = PETSC_OFFLOAD_GPU;
1039:   } else {
1040:     (*V)->offloadmask = PETSC_OFFLOAD_UNALLOCATED;
1041:   }
1042:   PetscFunctionReturn(PETSC_SUCCESS);
1043: }

1045: /*@C
1046:   VecViennaCLPlaceArray - Replace the viennacl vector in a Vec with
1047:   the one provided by the user. This is useful to avoid a copy.

1049:   Not Collective

1051:   Input Parameters:
1052: + vin - the vector
1053: - a   - the ViennaCL vector

1055:   Notes:
1056:   You can return to the original viennacl vector with a call to `VecViennaCLResetArray()`.
1057:   It is not possible to use `VecViennaCLPlaceArray()` and `VecPlaceArray()` at the same time on
1058:   the same vector.

1060:   Level: intermediate

1062: .seealso: `VecPlaceArray()`, `VecSetValues()`, `VecViennaCLResetArray()`,
1063:           `VecCUDAPlaceArray()`,

1065: @*/
1066: PETSC_EXTERN PetscErrorCode VecViennaCLPlaceArray(Vec vin, const ViennaCLVector *a)
1067: {
1068:   PetscFunctionBegin;
1069:   PetscCheckTypeNames(vin, VECSEQVIENNACL, VECMPIVIENNACL);
1070:   PetscCall(VecViennaCLCopyToGPU(vin));
1071:   PetscCheck(!((Vec_Seq *)vin->data)->unplacedarray, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "VecViennaCLPlaceArray()/VecPlaceArray() was already called on this vector, without a call to VecViennaCLResetArray()/VecResetArray()");
1072:   ((Vec_Seq *)vin->data)->unplacedarray  = (PetscScalar *)((Vec_ViennaCL *)vin->spptr)->GPUarray; /* save previous GPU array so reset can bring it back */
1073:   ((Vec_ViennaCL *)vin->spptr)->GPUarray = (ViennaCLVector *)a;
1074:   vin->offloadmask                       = PETSC_OFFLOAD_GPU;
1075:   PetscCall(PetscObjectStateIncrease((PetscObject)vin));
1076:   PetscFunctionReturn(PETSC_SUCCESS);
1077: }

1079: /*@C
1080:   VecViennaCLResetArray - Resets a vector to use its default memory. Call this
1081:   after the use of VecViennaCLPlaceArray().

1083:   Not Collective

1085:   Input Parameters:
1086: . vin - the vector

1088:   Level: developer

1090: .seealso: `VecViennaCLPlaceArray()`, `VecResetArray()`, `VecCUDAResetArray()`, `VecPlaceArray()`
1091: @*/
1092: PETSC_EXTERN PetscErrorCode VecViennaCLResetArray(Vec vin)
1093: {
1094:   PetscFunctionBegin;
1095:   PetscCheckTypeNames(vin, VECSEQVIENNACL, VECMPIVIENNACL);
1096:   PetscCall(VecViennaCLCopyToGPU(vin));
1097:   ((Vec_ViennaCL *)vin->spptr)->GPUarray = (ViennaCLVector *)((Vec_Seq *)vin->data)->unplacedarray;
1098:   ((Vec_Seq *)vin->data)->unplacedarray  = 0;
1099:   vin->offloadmask                       = PETSC_OFFLOAD_GPU;
1100:   PetscCall(PetscObjectStateIncrease((PetscObject)vin));
1101:   PetscFunctionReturn(PETSC_SUCCESS);
1102: }

1104: /*  VecDotNorm2 - computes the inner product of two vectors and the 2-norm squared of the second vector
1105:  *
1106:  *  Simply reuses VecDot() and VecNorm(). Performance improvement through custom kernel (kernel generator) possible.
1107:  */
1108: PetscErrorCode VecDotNorm2_SeqViennaCL(Vec s, Vec t, PetscScalar *dp, PetscScalar *nm)
1109: {
1110:   PetscFunctionBegin;
1111:   PetscCall(VecDot_SeqViennaCL(s, t, dp));
1112:   PetscCall(VecNorm_SeqViennaCL(t, NORM_2, nm));
1113:   *nm *= *nm; //squared norm required
1114:   PetscFunctionReturn(PETSC_SUCCESS);
1115: }

1117: PetscErrorCode VecDuplicate_SeqViennaCL(Vec win, Vec *V)
1118: {
1119:   PetscFunctionBegin;
1120:   PetscCall(VecCreateSeqViennaCL(PetscObjectComm((PetscObject)win), win->map->n, V));
1121:   PetscCall(PetscLayoutReference(win->map, &(*V)->map));
1122:   PetscCall(PetscObjectListDuplicate(((PetscObject)win)->olist, &((PetscObject)*V)->olist));
1123:   PetscCall(PetscFunctionListDuplicate(((PetscObject)win)->qlist, &((PetscObject)*V)->qlist));
1124:   (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
1125:   PetscFunctionReturn(PETSC_SUCCESS);
1126: }

1128: PetscErrorCode VecDestroy_SeqViennaCL(Vec v)
1129: {
1130:   PetscFunctionBegin;
1131:   try {
1132:     if (v->spptr) {
1133:       delete ((Vec_ViennaCL *)v->spptr)->GPUarray_allocated;
1134:       delete (Vec_ViennaCL *)v->spptr;
1135:     }
1136:   } catch (char *ex) {
1137:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex);
1138:   }
1139:   PetscCall(VecDestroy_SeqViennaCL_Private(v));
1140:   PetscFunctionReturn(PETSC_SUCCESS);
1141: }

1143: PetscErrorCode VecGetArray_SeqViennaCL(Vec v, PetscScalar **a)
1144: {
1145:   PetscFunctionBegin;
1146:   if (v->offloadmask == PETSC_OFFLOAD_GPU) {
1147:     PetscCall(VecViennaCLCopyFromGPU(v));
1148:   } else {
1149:     PetscCall(VecViennaCLAllocateCheckHost(v));
1150:   }
1151:   *a = *((PetscScalar **)v->data);
1152:   PetscFunctionReturn(PETSC_SUCCESS);
1153: }

1155: PetscErrorCode VecRestoreArray_SeqViennaCL(Vec v, PetscScalar **a)
1156: {
1157:   PetscFunctionBegin;
1158:   v->offloadmask = PETSC_OFFLOAD_CPU;
1159:   PetscFunctionReturn(PETSC_SUCCESS);
1160: }

1162: PetscErrorCode VecGetArrayWrite_SeqViennaCL(Vec v, PetscScalar **a)
1163: {
1164:   PetscFunctionBegin;
1165:   PetscCall(VecViennaCLAllocateCheckHost(v));
1166:   *a = *((PetscScalar **)v->data);
1167:   PetscFunctionReturn(PETSC_SUCCESS);
1168: }

1170: static PetscErrorCode VecBindToCPU_SeqAIJViennaCL(Vec V, PetscBool flg)
1171: {
1172:   PetscFunctionBegin;
1173:   V->boundtocpu = flg;
1174:   if (flg) {
1175:     PetscCall(VecViennaCLCopyFromGPU(V));
1176:     V->offloadmask          = PETSC_OFFLOAD_CPU; /* since the CPU code will likely change values in the vector */
1177:     V->ops->dot             = VecDot_Seq;
1178:     V->ops->norm            = VecNorm_Seq;
1179:     V->ops->tdot            = VecTDot_Seq;
1180:     V->ops->scale           = VecScale_Seq;
1181:     V->ops->copy            = VecCopy_Seq;
1182:     V->ops->set             = VecSet_Seq;
1183:     V->ops->swap            = VecSwap_Seq;
1184:     V->ops->axpy            = VecAXPY_Seq;
1185:     V->ops->axpby           = VecAXPBY_Seq;
1186:     V->ops->axpbypcz        = VecAXPBYPCZ_Seq;
1187:     V->ops->pointwisemult   = VecPointwiseMult_Seq;
1188:     V->ops->pointwisedivide = VecPointwiseDivide_Seq;
1189:     V->ops->setrandom       = VecSetRandom_Seq;
1190:     V->ops->dot_local       = VecDot_Seq;
1191:     V->ops->tdot_local      = VecTDot_Seq;
1192:     V->ops->norm_local      = VecNorm_Seq;
1193:     V->ops->mdot_local      = VecMDot_Seq;
1194:     V->ops->mtdot_local     = VecMTDot_Seq;
1195:     V->ops->maxpy           = VecMAXPY_Seq;
1196:     V->ops->mdot            = VecMDot_Seq;
1197:     V->ops->mtdot           = VecMTDot_Seq;
1198:     V->ops->aypx            = VecAYPX_Seq;
1199:     V->ops->waxpy           = VecWAXPY_Seq;
1200:     V->ops->dotnorm2        = NULL;
1201:     V->ops->placearray      = VecPlaceArray_Seq;
1202:     V->ops->replacearray    = VecReplaceArray_Seq;
1203:     V->ops->resetarray      = VecResetArray_Seq;
1204:     V->ops->duplicate       = VecDuplicate_Seq;
1205:   } else {
1206:     V->ops->dot             = VecDot_SeqViennaCL;
1207:     V->ops->norm            = VecNorm_SeqViennaCL;
1208:     V->ops->tdot            = VecTDot_SeqViennaCL;
1209:     V->ops->scale           = VecScale_SeqViennaCL;
1210:     V->ops->copy            = VecCopy_SeqViennaCL;
1211:     V->ops->set             = VecSet_SeqViennaCL;
1212:     V->ops->swap            = VecSwap_SeqViennaCL;
1213:     V->ops->axpy            = VecAXPY_SeqViennaCL;
1214:     V->ops->axpby           = VecAXPBY_SeqViennaCL;
1215:     V->ops->axpbypcz        = VecAXPBYPCZ_SeqViennaCL;
1216:     V->ops->pointwisemult   = VecPointwiseMult_SeqViennaCL;
1217:     V->ops->pointwisedivide = VecPointwiseDivide_SeqViennaCL;
1218:     V->ops->setrandom       = VecSetRandom_SeqViennaCL;
1219:     V->ops->dot_local       = VecDot_SeqViennaCL;
1220:     V->ops->tdot_local      = VecTDot_SeqViennaCL;
1221:     V->ops->norm_local      = VecNorm_SeqViennaCL;
1222:     V->ops->mdot_local      = VecMDot_SeqViennaCL;
1223:     V->ops->mtdot_local     = VecMTDot_SeqViennaCL;
1224:     V->ops->maxpy           = VecMAXPY_SeqViennaCL;
1225:     V->ops->mdot            = VecMDot_SeqViennaCL;
1226:     V->ops->mtdot           = VecMTDot_SeqViennaCL;
1227:     V->ops->aypx            = VecAYPX_SeqViennaCL;
1228:     V->ops->waxpy           = VecWAXPY_SeqViennaCL;
1229:     V->ops->dotnorm2        = VecDotNorm2_SeqViennaCL;
1230:     V->ops->placearray      = VecPlaceArray_SeqViennaCL;
1231:     V->ops->replacearray    = VecReplaceArray_SeqViennaCL;
1232:     V->ops->resetarray      = VecResetArray_SeqViennaCL;
1233:     V->ops->destroy         = VecDestroy_SeqViennaCL;
1234:     V->ops->duplicate       = VecDuplicate_SeqViennaCL;
1235:     V->ops->getarraywrite   = VecGetArrayWrite_SeqViennaCL;
1236:     V->ops->getarray        = VecGetArray_SeqViennaCL;
1237:     V->ops->restorearray    = VecRestoreArray_SeqViennaCL;
1238:   }
1239:   V->ops->duplicatevecs = VecDuplicateVecs_Default;
1240:   PetscFunctionReturn(PETSC_SUCCESS);
1241: }

1243: PETSC_EXTERN PetscErrorCode VecCreate_SeqViennaCL(Vec V)
1244: {
1245:   PetscMPIInt size;

1247:   PetscFunctionBegin;
1248:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)V), &size));
1249:   PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create VECSEQVIENNACL on more than one process");
1250:   PetscCall(VecCreate_Seq_Private(V, 0));
1251:   PetscCall(PetscObjectChangeTypeName((PetscObject)V, VECSEQVIENNACL));

1253:   PetscCall(VecBindToCPU_SeqAIJViennaCL(V, PETSC_FALSE));
1254:   V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;

1256:   PetscCall(VecViennaCLAllocateCheck(V));
1257:   PetscCall(VecSet_SeqViennaCL(V, 0.0));
1258:   PetscFunctionReturn(PETSC_SUCCESS);
1259: }

1261: /*@C
1262:   VecViennaCLGetCLContext - Get the OpenCL context in which the Vec resides.

1264:   Caller should cast (*ctx) to (const cl_context). Caller is responsible for
1265:   invoking clReleaseContext().

1267:   Input Parameter:
1268: . v - the vector

1270:   Output Parameter:
1271: . ctx - pointer to the underlying CL context

1273:   Level: intermediate

1275: .seealso: `VecViennaCLGetCLQueue()`, `VecViennaCLGetCLMemRead()`
1276: @*/
1277: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLContext(Vec v, PETSC_UINTPTR_T *ctx)
1278: {
1279: #if !defined(PETSC_HAVE_OPENCL)
1280:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get the associated cl_context.");
1281: #else

1283:   PetscFunctionBegin;
1284:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1285:   const ViennaCLVector *v_vcl;
1286:   PetscCall(VecViennaCLGetArrayRead(v, &v_vcl));
1287:   try {
1288:     viennacl::ocl::context vcl_ctx = v_vcl->handle().opencl_handle().context();
1289:     const cl_context       ocl_ctx = vcl_ctx.handle().get();
1290:     clRetainContext(ocl_ctx);
1291:     *ctx = (PETSC_UINTPTR_T)ocl_ctx;
1292:   } catch (std::exception const &ex) {
1293:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1294:   }
1295:   PetscFunctionReturn(PETSC_SUCCESS);
1296: #endif
1297: }

1299: /*@C
1300:   VecViennaCLGetCLQueue - Get the OpenCL command queue to which all
1301:   operations of the Vec are enqueued.

1303:   Caller should cast (*queue) to (const cl_command_queue). Caller is
1304:   responsible for invoking clReleaseCommandQueue().

1306:   Input Parameter:
1307: . v - the vector

1309:   Output Parameter:
1310: . queue - pointer to the CL command queue

1312:   Level: intermediate

1314: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMemRead()`
1315: @*/
1316: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLQueue(Vec v, PETSC_UINTPTR_T *queue)
1317: {
1318: #if !defined(PETSC_HAVE_OPENCL)
1319:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get the associated cl_command_queue.");
1320: #else
1321:   PetscFunctionBegin;
1322:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1323:   const ViennaCLVector *v_vcl;
1324:   PetscCall(VecViennaCLGetArrayRead(v, &v_vcl));
1325:   try {
1326:     viennacl::ocl::context              vcl_ctx   = v_vcl->handle().opencl_handle().context();
1327:     const viennacl::ocl::command_queue &vcl_queue = vcl_ctx.current_queue();
1328:     const cl_command_queue              ocl_queue = vcl_queue.handle().get();
1329:     clRetainCommandQueue(ocl_queue);
1330:     *queue = (PETSC_UINTPTR_T)ocl_queue;
1331:   } catch (std::exception const &ex) {
1332:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1333:   }
1334:   PetscFunctionReturn(PETSC_SUCCESS);
1335: #endif
1336: }

1338: /*@C
1339:   VecViennaCLGetCLMemRead - Provides access to the CL buffer inside a Vec.

1341:   Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1342:   invoking clReleaseMemObject().

1344:   Input Parameter:
1345: . v - the vector

1347:   Output Parameter:
1348: . mem - pointer to the device buffer

1350:   Level: intermediate

1352: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMemWrite()`
1353: @*/
1354: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemRead(Vec v, PETSC_UINTPTR_T *mem)
1355: {
1356: #if !defined(PETSC_HAVE_OPENCL)
1357:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1358: #else
1359:   PetscFunctionBegin;
1360:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1361:   const ViennaCLVector *v_vcl;
1362:   PetscCall(VecViennaCLGetArrayRead(v, &v_vcl));
1363:   try {
1364:     const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1365:     clRetainMemObject(ocl_mem);
1366:     *mem = (PETSC_UINTPTR_T)ocl_mem;
1367:   } catch (std::exception const &ex) {
1368:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1369:   }
1370:   PetscFunctionReturn(PETSC_SUCCESS);
1371: #endif
1372: }

1374: /*@C
1375:   VecViennaCLGetCLMemWrite - Provides access to the CL buffer inside a Vec.

1377:   Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1378:   invoking clReleaseMemObject().

1380:   The device pointer has to be released by calling
1381:   VecViennaCLRestoreCLMemWrite().  Upon restoring the vector data the data on
1382:   the host will be marked as out of date.  A subsequent access of the host data
1383:   will thus incur a data transfer from the device to the host.

1385:   Input Parameter:
1386: . v - the vector

1388:   Output Parameter:
1389: . mem - pointer to the device buffer

1391:   Level: intermediate

1393: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLRestoreCLMemWrite()`
1394: @*/
1395: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMemWrite(Vec v, PETSC_UINTPTR_T *mem)
1396: {
1397: #if !defined(PETSC_HAVE_OPENCL)
1398:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1399: #else
1400:   PetscFunctionBegin;
1401:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1402:   ViennaCLVector *v_vcl;
1403:   PetscCall(VecViennaCLGetArrayWrite(v, &v_vcl));
1404:   try {
1405:     const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1406:     clRetainMemObject(ocl_mem);
1407:     *mem = (PETSC_UINTPTR_T)ocl_mem;
1408:   } catch (std::exception const &ex) {
1409:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1410:   }
1411:   PetscFunctionReturn(PETSC_SUCCESS);
1412: #endif
1413: }

1415: /*@C
1416:   VecViennaCLRestoreCLMemWrite - Restores a CL buffer pointer previously
1417:   acquired with VecViennaCLGetCLMemWrite().

1419:   This marks the host data as out of date.  Subsequent access to the
1420:   vector data on the host side with for instance VecGetArray() incurs a
1421:   data transfer.

1423:   Input Parameter:
1424: . v - the vector

1426:   Level: intermediate

1428: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMemWrite()`
1429: @*/
1430: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMemWrite(Vec v)
1431: {
1432: #if !defined(PETSC_HAVE_OPENCL)
1433:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1434: #else
1435:   PetscFunctionBegin;
1436:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1437:   PetscCall(VecViennaCLRestoreArrayWrite(v, NULL));
1438:   PetscFunctionReturn(PETSC_SUCCESS);
1439: #endif
1440: }

1442: /*@C
1443:   VecViennaCLGetCLMem - Provides access to the CL buffer inside a Vec.

1445:   Caller should cast (*mem) to (const cl_mem). Caller is responsible for
1446:   invoking clReleaseMemObject().

1448:   The device pointer has to be released by calling VecViennaCLRestoreCLMem().
1449:   Upon restoring the vector data the data on the host will be marked as out of
1450:   date.  A subsequent access of the host data will thus incur a data transfer
1451:   from the device to the host.

1453:   Input Parameter:
1454: . v - the vector

1456:   Output Parameter:
1457: . mem - pointer to the device buffer

1459:   Level: intermediate

1461: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLRestoreCLMem()`
1462: @*/
1463: PETSC_EXTERN PetscErrorCode VecViennaCLGetCLMem(Vec v, PETSC_UINTPTR_T *mem)
1464: {
1465: #if !defined(PETSC_HAVE_OPENCL)
1466:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to get a Vec's cl_mem");
1467: #else
1468:   PetscFunctionBegin;
1469:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1470:   ViennaCLVector *v_vcl;
1471:   PetscCall(VecViennaCLGetArray(v, &v_vcl));
1472:   try {
1473:     const cl_mem ocl_mem = v_vcl->handle().opencl_handle().get();
1474:     clRetainMemObject(ocl_mem);
1475:     *mem = (PETSC_UINTPTR_T)ocl_mem;
1476:   } catch (std::exception const &ex) {
1477:     SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "ViennaCL error: %s", ex.what());
1478:   }
1479:   PetscFunctionReturn(PETSC_SUCCESS);
1480: #endif
1481: }

1483: /*@C
1484:   VecViennaCLRestoreCLMem - Restores a CL buffer pointer previously
1485:   acquired with VecViennaCLGetCLMem().

1487:   This marks the host data as out of date. Subsequent access to the vector
1488:   data on the host side with for instance VecGetArray() incurs a data
1489:   transfer.

1491:   Input Parameter:
1492: . v - the vector

1494:   Level: intermediate

1496: .seealso: `VecViennaCLGetCLContext()`, `VecViennaCLGetCLMem()`
1497: @*/
1498: PETSC_EXTERN PetscErrorCode VecViennaCLRestoreCLMem(Vec v)
1499: {
1500: #if !defined(PETSC_HAVE_OPENCL)
1501:   SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "PETSc must be configured with --with-opencl to restore a Vec's cl_mem");
1502: #else
1503:   PetscFunctionBegin;
1504:   PetscCheckTypeNames(v, VECSEQVIENNACL, VECMPIVIENNACL);
1505:   PetscCall(VecViennaCLRestoreArray(v, NULL));
1506:   PetscFunctionReturn(PETSC_SUCCESS);
1507: #endif
1508: }

1510: PetscErrorCode VecCreate_SeqViennaCL_Private(Vec V, const ViennaCLVector *array)
1511: {
1512:   Vec_ViennaCL *vecviennacl;
1513:   PetscMPIInt   size;

1515:   PetscFunctionBegin;
1516:   PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)V), &size));
1517:   PetscCheck(size <= 1, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot create VECSEQVIENNACL on more than one process");
1518:   PetscCall(VecCreate_Seq_Private(V, 0));
1519:   PetscCall(PetscObjectChangeTypeName((PetscObject)V, VECSEQVIENNACL));
1520:   PetscCall(VecBindToCPU_SeqAIJViennaCL(V, PETSC_FALSE));
1521:   V->ops->bindtocpu = VecBindToCPU_SeqAIJViennaCL;

1523:   if (array) {
1524:     if (!V->spptr) V->spptr = new Vec_ViennaCL;
1525:     vecviennacl                     = (Vec_ViennaCL *)V->spptr;
1526:     vecviennacl->GPUarray_allocated = 0;
1527:     vecviennacl->GPUarray           = (ViennaCLVector *)array;
1528:     V->offloadmask                  = PETSC_OFFLOAD_UNALLOCATED;
1529:   }
1530:   PetscFunctionReturn(PETSC_SUCCESS);
1531: }