Actual source code: math2opusutils.cu
1: #include <petsc/private/matimpl.h>
2: #include <petsc/private/vecimpl.h>
3: #include <petscsf.h>
4: #if defined(PETSC_HAVE_CUDA)
5: #include <thrust/for_each.h>
6: #include <thrust/device_vector.h>
7: #include <thrust/execution_policy.h>
8: #endif
10: PETSC_INTERN PetscErrorCode MatDenseGetH2OpusStridedSF(Mat A, PetscSF h2sf, PetscSF *osf)
11: {
12: PetscSF asf;
14: PetscFunctionBegin;
17: PetscAssertPointer(osf, 3);
18: PetscCall(PetscObjectQuery((PetscObject)A, "_math2opus_stridedsf", (PetscObject *)&asf));
19: if (!asf) {
20: PetscInt lda;
22: PetscCall(MatDenseGetLDA(A, &lda));
23: PetscCall(PetscSFCreateStridedSF(h2sf, A->cmap->N, lda, PETSC_DECIDE, &asf));
24: PetscCall(PetscObjectCompose((PetscObject)A, "_math2opus_stridedsf", (PetscObject)asf));
25: PetscCall(PetscObjectDereference((PetscObject)asf));
26: }
27: *osf = asf;
28: PetscFunctionReturn(PETSC_SUCCESS);
29: }
31: #if defined(PETSC_HAVE_CUDA)
32: struct StandardBasis_Functor {
33: PetscScalar *v;
34: PetscInt j;
36: StandardBasis_Functor(PetscScalar *_v, PetscInt _j) : v(_v), j(_j) { }
37: __host__ __device__ void operator()(PetscInt i) { v[i] = (i == j ? 1 : 0); }
38: };
39: #endif
41: PETSC_INTERN PetscErrorCode VecSetDelta(Vec x, PetscInt i)
42: {
43: #if defined(PETSC_HAVE_CUDA)
44: PetscBool iscuda;
45: #endif
46: PetscInt st, en;
48: PetscFunctionBegin;
49: PetscCall(VecGetOwnershipRange(x, &st, &en));
50: #if defined(PETSC_HAVE_CUDA)
51: PetscCall(PetscObjectTypeCompareAny((PetscObject)x, &iscuda, VECSEQCUDA, VECMPICUDA, ""));
52: iscuda = (PetscBool)(iscuda && !x->boundtocpu);
53: if (iscuda) {
54: PetscScalar *ax;
55: PetscCall(VecCUDAGetArrayWrite(x, &ax));
56: StandardBasis_Functor delta(ax, i - st);
57: thrust::for_each(thrust::device, thrust::counting_iterator<PetscInt>(0), thrust::counting_iterator<PetscInt>(en - st), delta);
58: PetscCall(VecCUDARestoreArrayWrite(x, &ax));
59: } else
60: #endif
61: {
62: PetscCall(VecSet(x, 0.));
63: if (st <= i && i < en) PetscCall(VecSetValue(x, i, 1.0, INSERT_VALUES));
64: PetscCall(VecAssemblyBegin(x));
65: PetscCall(VecAssemblyEnd(x));
66: }
67: PetscFunctionReturn(PETSC_SUCCESS);
68: }
70: /* these are approximate norms */
71: /* NORM_2: Estimating the matrix p-norm Nicholas J. Higham
72: NORM_1/NORM_INFINITY: A block algorithm for matrix 1-norm estimation, with an application to 1-norm pseudospectra Higham, Nicholas J. and Tisseur, Francoise */
73: PETSC_INTERN PetscErrorCode MatApproximateNorm_Private(Mat A, NormType normtype, PetscInt normsamples, PetscReal *n)
74: {
75: Vec x, y, w, z;
76: PetscReal normz, adot;
77: PetscScalar dot;
78: PetscInt i, j, N, jold = -1;
79: PetscBool boundtocpu = PETSC_TRUE;
81: PetscFunctionBegin;
82: #if defined(PETSC_HAVE_DEVICE)
83: boundtocpu = A->boundtocpu;
84: #endif
85: switch (normtype) {
86: case NORM_INFINITY:
87: case NORM_1:
88: if (normsamples < 0) normsamples = 10; /* pure guess */
89: if (normtype == NORM_INFINITY) {
90: Mat B;
91: PetscCall(MatCreateTranspose(A, &B));
92: A = B;
93: } else {
94: PetscCall(PetscObjectReference((PetscObject)A));
95: }
96: PetscCall(MatCreateVecs(A, &x, &y));
97: PetscCall(MatCreateVecs(A, &z, &w));
98: PetscCall(VecBindToCPU(x, boundtocpu));
99: PetscCall(VecBindToCPU(y, boundtocpu));
100: PetscCall(VecBindToCPU(z, boundtocpu));
101: PetscCall(VecBindToCPU(w, boundtocpu));
102: PetscCall(VecGetSize(x, &N));
103: PetscCall(VecSet(x, 1. / N));
104: *n = 0.0;
105: for (i = 0; i < normsamples; i++) {
106: PetscCall(MatMult(A, x, y));
107: PetscCall(VecPointwiseSign(w, y, VEC_SIGN_ZERO_TO_SIGNED_UNIT));
108: PetscCall(MatMultTranspose(A, w, z));
109: PetscCall(VecNorm(z, NORM_INFINITY, &normz));
110: PetscCall(VecDot(x, z, &dot));
111: adot = PetscAbsScalar(dot);
112: PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> (%g %g)\n", NormTypes[normtype], i, (double)normz, (double)adot));
113: if (normz <= adot && i > 0) {
114: PetscCall(VecNorm(y, NORM_1, n));
115: break;
116: }
117: PetscCall(VecMax(z, &j, &normz));
118: if (j == jold) {
119: PetscCall(VecNorm(y, NORM_1, n));
120: PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> breakdown (j==jold)\n", NormTypes[normtype], i));
121: break;
122: }
123: jold = j;
124: PetscCall(VecSetDelta(x, j));
125: }
126: PetscCall(MatDestroy(&A));
127: PetscCall(VecDestroy(&x));
128: PetscCall(VecDestroy(&w));
129: PetscCall(VecDestroy(&y));
130: PetscCall(VecDestroy(&z));
131: break;
132: case NORM_2:
133: if (normsamples < 0) normsamples = 20; /* pure guess */
134: PetscCall(MatCreateVecs(A, &x, &y));
135: PetscCall(MatCreateVecs(A, &z, NULL));
136: PetscCall(VecBindToCPU(x, boundtocpu));
137: PetscCall(VecBindToCPU(y, boundtocpu));
138: PetscCall(VecBindToCPU(z, boundtocpu));
139: PetscCall(VecSetRandom(x, NULL));
140: PetscCall(VecNormalize(x, NULL));
141: *n = 0.0;
142: for (i = 0; i < normsamples; i++) {
143: PetscCall(MatMult(A, x, y));
144: PetscCall(VecNormalize(y, n));
145: PetscCall(MatMultTranspose(A, y, z));
146: PetscCall(VecNorm(z, NORM_2, &normz));
147: PetscCall(VecDot(x, z, &dot));
148: adot = PetscAbsScalar(dot);
149: PetscCall(PetscInfo(A, "%s norm it %" PetscInt_FMT " -> %g (%g %g)\n", NormTypes[normtype], i, (double)*n, (double)normz, (double)adot));
150: if (normz <= adot) break;
151: if (i < normsamples - 1) {
152: Vec t;
154: PetscCall(VecNormalize(z, NULL));
155: t = x;
156: x = z;
157: z = t;
158: }
159: }
160: PetscCall(VecDestroy(&x));
161: PetscCall(VecDestroy(&y));
162: PetscCall(VecDestroy(&z));
163: break;
164: default:
165: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "%s norm not supported", NormTypes[normtype]);
166: }
167: PetscCall(PetscInfo(A, "%s norm %g computed in %" PetscInt_FMT " iterations\n", NormTypes[normtype], (double)*n, i));
168: PetscFunctionReturn(PETSC_SUCCESS);
169: }