Actual source code: bddcschurs.c
1: #include <petsc/private/pcbddcimpl.h>
2: #include <petsc/private/pcbddcprivateimpl.h>
3: #include <../src/mat/impls/dense/seq/dense.h>
4: #include <petscblaslapack.h>
6: static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt *, PetscInt, PetscBT, PetscInt *, PetscInt *, PetscInt *);
7: static PetscErrorCode PCBDDCComputeExplicitSchur(Mat, PetscBool, MatReuse, Mat *);
8: static PetscErrorCode PCBDDCReuseSolvers_Interior(PC, Vec, Vec);
9: static PetscErrorCode PCBDDCReuseSolvers_Correction(PC, Vec, Vec);
11: /* if v2 is not present, correction is done in-place */
12: PetscErrorCode PCBDDCReuseSolversBenignAdapt(PCBDDCReuseSolvers ctx, Vec v, Vec v2, PetscBool sol, PetscBool full)
13: {
14: PetscScalar *array;
15: PetscScalar *array2;
17: PetscFunctionBegin;
18: if (!ctx->benign_n) PetscFunctionReturn(PETSC_SUCCESS);
19: if (sol && full) {
20: PetscInt n_I, size_schur;
22: /* get sizes */
23: PetscCall(MatGetSize(ctx->benign_csAIB, &size_schur, NULL));
24: PetscCall(VecGetSize(v, &n_I));
25: n_I = n_I - size_schur;
26: /* get schur sol from array */
27: PetscCall(VecGetArray(v, &array));
28: PetscCall(VecPlaceArray(ctx->benign_dummy_schur_vec, array + n_I));
29: PetscCall(VecRestoreArray(v, &array));
30: /* apply interior sol correction */
31: PetscCall(MatMultTranspose(ctx->benign_csAIB, ctx->benign_dummy_schur_vec, ctx->benign_corr_work));
32: PetscCall(VecResetArray(ctx->benign_dummy_schur_vec));
33: PetscCall(MatMultAdd(ctx->benign_AIIm1ones, ctx->benign_corr_work, v, v));
34: }
35: if (v2) {
36: PetscInt nl;
38: PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array));
39: PetscCall(VecGetLocalSize(v2, &nl));
40: PetscCall(VecGetArray(v2, &array2));
41: PetscCall(PetscArraycpy(array2, array, nl));
42: } else {
43: PetscCall(VecGetArray(v, &array));
44: array2 = array;
45: }
46: if (!sol) { /* change rhs */
47: PetscInt n;
48: for (n = 0; n < ctx->benign_n; n++) {
49: PetscScalar sum = 0.;
50: const PetscInt *cols;
51: PetscInt nz, i;
53: PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[n], &nz));
54: PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[n], &cols));
55: for (i = 0; i < nz - 1; i++) sum += array[cols[i]];
56: #if defined(PETSC_USE_COMPLEX)
57: sum = -(PetscRealPart(sum) / nz + PETSC_i * (PetscImaginaryPart(sum) / nz));
58: #else
59: sum = -sum / nz;
60: #endif
61: for (i = 0; i < nz - 1; i++) array2[cols[i]] += sum;
62: ctx->benign_save_vals[n] = array2[cols[nz - 1]];
63: array2[cols[nz - 1]] = sum;
64: PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[n], &cols));
65: }
66: } else {
67: PetscInt n;
68: for (n = 0; n < ctx->benign_n; n++) {
69: PetscScalar sum = 0.;
70: const PetscInt *cols;
71: PetscInt nz, i;
72: PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[n], &nz));
73: PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[n], &cols));
74: for (i = 0; i < nz - 1; i++) sum += array[cols[i]];
75: #if defined(PETSC_USE_COMPLEX)
76: sum = -(PetscRealPart(sum) / nz + PETSC_i * (PetscImaginaryPart(sum) / nz));
77: #else
78: sum = -sum / nz;
79: #endif
80: for (i = 0; i < nz - 1; i++) array2[cols[i]] += sum;
81: array2[cols[nz - 1]] = ctx->benign_save_vals[n];
82: PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[n], &cols));
83: }
84: }
85: if (v2) {
86: PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array));
87: PetscCall(VecRestoreArray(v2, &array2));
88: } else {
89: PetscCall(VecRestoreArray(v, &array));
90: }
91: if (!sol && full) {
92: Vec usedv;
93: PetscInt n_I, size_schur;
95: /* get sizes */
96: PetscCall(MatGetSize(ctx->benign_csAIB, &size_schur, NULL));
97: PetscCall(VecGetSize(v, &n_I));
98: n_I = n_I - size_schur;
99: /* compute schur rhs correction */
100: if (v2) {
101: usedv = v2;
102: } else {
103: usedv = v;
104: }
105: /* apply schur rhs correction */
106: PetscCall(MatMultTranspose(ctx->benign_AIIm1ones, usedv, ctx->benign_corr_work));
107: PetscCall(VecGetArrayRead(usedv, (const PetscScalar **)&array));
108: PetscCall(VecPlaceArray(ctx->benign_dummy_schur_vec, array + n_I));
109: PetscCall(VecRestoreArrayRead(usedv, (const PetscScalar **)&array));
110: PetscCall(MatMultAdd(ctx->benign_csAIB, ctx->benign_corr_work, ctx->benign_dummy_schur_vec, ctx->benign_dummy_schur_vec));
111: PetscCall(VecResetArray(ctx->benign_dummy_schur_vec));
112: }
113: PetscFunctionReturn(PETSC_SUCCESS);
114: }
116: static PetscErrorCode PCBDDCReuseSolvers_Solve_Private(PC pc, Vec rhs, Vec sol, PetscBool transpose, PetscBool full)
117: {
118: PCBDDCReuseSolvers ctx;
119: PetscBool copy = PETSC_FALSE;
121: PetscFunctionBegin;
122: PetscCall(PCShellGetContext(pc, &ctx));
123: if (full) {
124: PetscCall(MatMumpsSetIcntl(ctx->F, 26, -1));
125: #if defined(PETSC_HAVE_MKL_PARDISO)
126: PetscCall(MatMkl_PardisoSetCntl(ctx->F, 70, 0));
127: #endif
128: copy = ctx->has_vertices;
129: } else { /* interior solver */
130: PetscCall(MatMumpsSetIcntl(ctx->F, 26, 0));
131: #if defined(PETSC_HAVE_MKL_PARDISO)
132: PetscCall(MatMkl_PardisoSetCntl(ctx->F, 70, 1));
133: #endif
134: copy = PETSC_TRUE;
135: }
136: /* copy rhs into factored matrix workspace */
137: if (copy) {
138: PetscInt n;
139: PetscScalar *array, *array_solver;
141: PetscCall(VecGetLocalSize(rhs, &n));
142: PetscCall(VecGetArrayRead(rhs, (const PetscScalar **)&array));
143: PetscCall(VecGetArray(ctx->rhs, &array_solver));
144: PetscCall(PetscArraycpy(array_solver, array, n));
145: PetscCall(VecRestoreArray(ctx->rhs, &array_solver));
146: PetscCall(VecRestoreArrayRead(rhs, (const PetscScalar **)&array));
148: PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, ctx->rhs, NULL, PETSC_FALSE, full));
149: if (transpose) {
150: PetscCall(MatSolveTranspose(ctx->F, ctx->rhs, ctx->sol));
151: } else {
152: PetscCall(MatSolve(ctx->F, ctx->rhs, ctx->sol));
153: }
154: PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, ctx->sol, NULL, PETSC_TRUE, full));
156: /* get back data to caller worskpace */
157: PetscCall(VecGetArrayRead(ctx->sol, (const PetscScalar **)&array_solver));
158: PetscCall(VecGetArray(sol, &array));
159: PetscCall(PetscArraycpy(array, array_solver, n));
160: PetscCall(VecRestoreArray(sol, &array));
161: PetscCall(VecRestoreArrayRead(ctx->sol, (const PetscScalar **)&array_solver));
162: } else {
163: if (ctx->benign_n) {
164: PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, rhs, ctx->rhs, PETSC_FALSE, full));
165: if (transpose) {
166: PetscCall(MatSolveTranspose(ctx->F, ctx->rhs, sol));
167: } else {
168: PetscCall(MatSolve(ctx->F, ctx->rhs, sol));
169: }
170: PetscCall(PCBDDCReuseSolversBenignAdapt(ctx, sol, NULL, PETSC_TRUE, full));
171: } else {
172: if (transpose) {
173: PetscCall(MatSolveTranspose(ctx->F, rhs, sol));
174: } else {
175: PetscCall(MatSolve(ctx->F, rhs, sol));
176: }
177: }
178: }
179: /* restore defaults */
180: PetscCall(MatMumpsSetIcntl(ctx->F, 26, -1));
181: #if defined(PETSC_HAVE_MKL_PARDISO)
182: PetscCall(MatMkl_PardisoSetCntl(ctx->F, 70, 0));
183: #endif
184: PetscFunctionReturn(PETSC_SUCCESS);
185: }
187: static PetscErrorCode PCBDDCReuseSolvers_Correction(PC pc, Vec rhs, Vec sol)
188: {
189: PetscFunctionBegin;
190: PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_FALSE, PETSC_TRUE));
191: PetscFunctionReturn(PETSC_SUCCESS);
192: }
194: static PetscErrorCode PCBDDCReuseSolvers_CorrectionTranspose(PC pc, Vec rhs, Vec sol)
195: {
196: PetscFunctionBegin;
197: PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_TRUE, PETSC_TRUE));
198: PetscFunctionReturn(PETSC_SUCCESS);
199: }
201: static PetscErrorCode PCBDDCReuseSolvers_Interior(PC pc, Vec rhs, Vec sol)
202: {
203: PetscFunctionBegin;
204: PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_FALSE, PETSC_FALSE));
205: PetscFunctionReturn(PETSC_SUCCESS);
206: }
208: static PetscErrorCode PCBDDCReuseSolvers_InteriorTranspose(PC pc, Vec rhs, Vec sol)
209: {
210: PetscFunctionBegin;
211: PetscCall(PCBDDCReuseSolvers_Solve_Private(pc, rhs, sol, PETSC_TRUE, PETSC_FALSE));
212: PetscFunctionReturn(PETSC_SUCCESS);
213: }
215: static PetscErrorCode PCBDDCReuseSolvers_View(PC pc, PetscViewer viewer)
216: {
217: PCBDDCReuseSolvers ctx;
218: PetscBool isascii;
220: PetscFunctionBegin;
221: PetscCall(PCShellGetContext(pc, &ctx));
222: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii));
223: if (isascii) PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_INFO));
224: PetscCall(MatView(ctx->F, viewer));
225: if (isascii) PetscCall(PetscViewerPopFormat(viewer));
226: PetscFunctionReturn(PETSC_SUCCESS);
227: }
229: static PetscErrorCode PCBDDCReuseSolversReset(PCBDDCReuseSolvers reuse)
230: {
231: PetscInt i;
233: PetscFunctionBegin;
234: PetscCall(MatDestroy(&reuse->F));
235: PetscCall(VecDestroy(&reuse->sol));
236: PetscCall(VecDestroy(&reuse->rhs));
237: PetscCall(PCDestroy(&reuse->interior_solver));
238: PetscCall(PCDestroy(&reuse->correction_solver));
239: PetscCall(ISDestroy(&reuse->is_R));
240: PetscCall(ISDestroy(&reuse->is_B));
241: PetscCall(VecScatterDestroy(&reuse->correction_scatter_B));
242: PetscCall(VecDestroy(&reuse->sol_B));
243: PetscCall(VecDestroy(&reuse->rhs_B));
244: for (i = 0; i < reuse->benign_n; i++) PetscCall(ISDestroy(&reuse->benign_zerodiag_subs[i]));
245: PetscCall(PetscFree(reuse->benign_zerodiag_subs));
246: PetscCall(PetscFree(reuse->benign_save_vals));
247: PetscCall(MatDestroy(&reuse->benign_csAIB));
248: PetscCall(MatDestroy(&reuse->benign_AIIm1ones));
249: PetscCall(VecDestroy(&reuse->benign_corr_work));
250: PetscCall(VecDestroy(&reuse->benign_dummy_schur_vec));
251: PetscFunctionReturn(PETSC_SUCCESS);
252: }
254: static PetscErrorCode PCBDDCReuseSolvers_Destroy(PC pc)
255: {
256: PCBDDCReuseSolvers ctx;
258: PetscFunctionBegin;
259: PetscCall(PCShellGetContext(pc, &ctx));
260: PetscCall(PCBDDCReuseSolversReset(ctx));
261: PetscCall(PetscFree(ctx));
262: PetscCall(PCShellSetContext(pc, NULL));
263: PetscFunctionReturn(PETSC_SUCCESS);
264: }
266: static PetscErrorCode PCBDDCComputeExplicitSchur(Mat M, PetscBool issym, MatReuse reuse, Mat *S)
267: {
268: Mat B, C, D, Bd, Cd, AinvBd;
269: KSP ksp;
270: PC pc;
271: PetscBool isLU, isILU, isCHOL, Bdense, Cdense;
272: PetscReal fill = 2.0;
273: PetscInt n_I;
274: PetscMPIInt size;
276: PetscFunctionBegin;
277: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)M), &size));
278: PetscCheck(size == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not for parallel matrices");
279: if (reuse == MAT_REUSE_MATRIX) {
280: PetscBool Sdense;
282: PetscCall(PetscObjectTypeCompare((PetscObject)*S, MATSEQDENSE, &Sdense));
283: PetscCheck(Sdense, PetscObjectComm((PetscObject)M), PETSC_ERR_SUP, "S should dense");
284: }
285: PetscCall(MatSchurComplementGetSubMatrices(M, NULL, NULL, &B, &C, &D));
286: PetscCall(MatSchurComplementGetKSP(M, &ksp));
287: PetscCall(KSPGetPC(ksp, &pc));
288: PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCLU, &isLU));
289: PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCILU, &isILU));
290: PetscCall(PetscObjectTypeCompare((PetscObject)pc, PCCHOLESKY, &isCHOL));
291: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATSEQDENSE, &Bdense));
292: PetscCall(PetscObjectTypeCompare((PetscObject)C, MATSEQDENSE, &Cdense));
293: PetscCall(MatGetSize(B, &n_I, NULL));
294: if (n_I) {
295: if (!Bdense) {
296: PetscCall(MatConvert(B, MATSEQDENSE, MAT_INITIAL_MATRIX, &Bd));
297: } else {
298: Bd = B;
299: }
301: if (isLU || isILU || isCHOL) {
302: Mat fact;
303: PetscCall(KSPSetUp(ksp));
304: PetscCall(PCFactorGetMatrix(pc, &fact));
305: PetscCall(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd));
306: PetscCall(MatMatSolve(fact, Bd, AinvBd));
307: } else {
308: PetscBool ex = PETSC_TRUE;
310: if (ex) {
311: Mat Ainvd;
313: PetscCall(PCComputeOperator(pc, MATDENSE, &Ainvd));
314: PetscCall(MatMatMult(Ainvd, Bd, MAT_INITIAL_MATRIX, fill, &AinvBd));
315: PetscCall(MatDestroy(&Ainvd));
316: } else {
317: Vec sol, rhs;
318: PetscScalar *arrayrhs, *arraysol;
319: PetscInt i, nrhs, n;
321: PetscCall(MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd));
322: PetscCall(MatGetSize(Bd, &n, &nrhs));
323: PetscCall(MatDenseGetArray(Bd, &arrayrhs));
324: PetscCall(MatDenseGetArray(AinvBd, &arraysol));
325: PetscCall(KSPGetSolution(ksp, &sol));
326: PetscCall(KSPGetRhs(ksp, &rhs));
327: for (i = 0; i < nrhs; i++) {
328: PetscCall(VecPlaceArray(rhs, arrayrhs + i * n));
329: PetscCall(VecPlaceArray(sol, arraysol + i * n));
330: PetscCall(KSPSolve(ksp, rhs, sol));
331: PetscCall(VecResetArray(rhs));
332: PetscCall(VecResetArray(sol));
333: }
334: PetscCall(MatDenseRestoreArray(Bd, &arrayrhs));
335: PetscCall(MatDenseRestoreArray(AinvBd, &arrayrhs));
336: }
337: }
338: if (!Bdense & !issym) PetscCall(MatDestroy(&Bd));
340: if (!issym) {
341: if (!Cdense) {
342: PetscCall(MatConvert(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &Cd));
343: } else {
344: Cd = C;
345: }
346: PetscCall(MatMatMult(Cd, AinvBd, reuse, fill, S));
347: if (!Cdense) PetscCall(MatDestroy(&Cd));
348: } else {
349: PetscCall(MatTransposeMatMult(Bd, AinvBd, reuse, fill, S));
350: if (!Bdense) PetscCall(MatDestroy(&Bd));
351: }
352: PetscCall(MatDestroy(&AinvBd));
353: }
355: if (D) {
356: Mat Dd;
357: PetscBool Ddense;
359: PetscCall(PetscObjectTypeCompare((PetscObject)D, MATSEQDENSE, &Ddense));
360: if (!Ddense) {
361: PetscCall(MatConvert(D, MATSEQDENSE, MAT_INITIAL_MATRIX, &Dd));
362: } else {
363: Dd = D;
364: }
365: if (n_I) PetscCall(MatAYPX(*S, -1.0, Dd, SAME_NONZERO_PATTERN));
366: else {
367: if (reuse == MAT_INITIAL_MATRIX) {
368: PetscCall(MatDuplicate(Dd, MAT_COPY_VALUES, S));
369: } else {
370: PetscCall(MatCopy(Dd, *S, SAME_NONZERO_PATTERN));
371: }
372: }
373: if (!Ddense) PetscCall(MatDestroy(&Dd));
374: } else {
375: PetscCall(MatScale(*S, -1.0));
376: }
377: PetscFunctionReturn(PETSC_SUCCESS);
378: }
380: PetscErrorCode PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs, Mat Ain, Mat Sin, PetscBool exact_schur, PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers, Vec scaling, PetscBool compute_Stilda, PetscBool reuse_solvers, PetscBool benign_trick, PetscInt benign_n, PetscInt benign_p0_lidx[], IS benign_zerodiag_subs[], Mat change, IS change_primal)
381: {
382: Mat F, A_II, A_IB, A_BI, A_BB, AE_II;
383: Mat S_all;
384: Vec gstash, lstash;
385: VecScatter sstash;
386: IS is_I, is_I_layer;
387: IS all_subsets, all_subsets_mult, all_subsets_n;
388: PetscScalar *stasharray, *Bwork;
389: PetscInt *all_local_idx_N, *all_local_subid_N = NULL;
390: PetscInt *auxnum1, *auxnum2;
391: PetscInt *local_subs = sub_schurs->graph->local_subs;
392: PetscInt i, subset_size, max_subset_size, n_local_subs = sub_schurs->graph->n_local_subs;
393: PetscInt n_B, extra, local_size, global_size;
394: PetscInt local_stash_size;
395: PetscBLASInt B_N, B_ierr, B_lwork, *pivots;
396: MPI_Comm comm_n;
397: PetscBool deluxe = PETSC_TRUE;
398: PetscBool use_potr = PETSC_FALSE, use_sytr = PETSC_FALSE;
399: PetscViewer matl_dbg_viewer = NULL;
400: PetscBool flg, multi_element = sub_schurs->graph->multi_element;
402: PetscFunctionBegin;
403: PetscCall(MatDestroy(&sub_schurs->A));
404: PetscCall(MatDestroy(&sub_schurs->S));
405: if (Ain) {
406: PetscCall(PetscObjectReference((PetscObject)Ain));
407: sub_schurs->A = Ain;
408: }
410: PetscCall(PetscObjectReference((PetscObject)Sin));
411: sub_schurs->S = Sin;
412: if (sub_schurs->schur_explicit) sub_schurs->schur_explicit = (PetscBool)(!!sub_schurs->A);
414: /* preliminary checks */
415: PetscCheck(sub_schurs->schur_explicit || !compute_Stilda, PetscObjectComm((PetscObject)sub_schurs->l2gmap), PETSC_ERR_SUP, "Adaptive selection of constraints requires MUMPS and/or MKL_PARDISO");
417: if (benign_trick) sub_schurs->is_posdef = PETSC_FALSE;
419: /* debug (MATLAB) */
420: if (sub_schurs->debug) {
421: PetscMPIInt size, rank;
422: PetscInt nr, *print_schurs_ranks, print_schurs = PETSC_FALSE;
424: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &size));
425: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &rank));
426: nr = size;
427: PetscCall(PetscMalloc1(nr, &print_schurs_ranks));
428: PetscOptionsBegin(PetscObjectComm((PetscObject)sub_schurs->l2gmap), sub_schurs->prefix, "BDDC sub_schurs options", "PC");
429: PetscCall(PetscOptionsIntArray("-sub_schurs_debug_ranks", "Ranks to debug (all if the option is not used)", NULL, print_schurs_ranks, &nr, &flg));
430: if (!flg) print_schurs = PETSC_TRUE;
431: else {
432: print_schurs = PETSC_FALSE;
433: for (i = 0; i < nr; i++)
434: if (print_schurs_ranks[i] == rank) {
435: print_schurs = PETSC_TRUE;
436: break;
437: }
438: }
439: PetscOptionsEnd();
440: PetscCall(PetscFree(print_schurs_ranks));
441: if (print_schurs) {
442: char filename[256];
444: PetscCall(PetscSNPrintf(filename, sizeof(filename), "sub_schurs_Schur_r%d.m", PetscGlobalRank));
445: PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &matl_dbg_viewer));
446: PetscCall(PetscViewerPushFormat(matl_dbg_viewer, PETSC_VIEWER_ASCII_MATLAB));
447: }
448: }
450: /* DEBUG: turn on/off multi-element code path */
451: PetscCall(PetscOptionsGetBool(NULL, sub_schurs->prefix, "-sub_schurs_multielement_code", &multi_element, NULL));
452: if (n_local_subs == 0) multi_element = PETSC_FALSE;
454: /* restrict work on active processes */
455: if (sub_schurs->restrict_comm) {
456: PetscSubcomm subcomm;
457: PetscMPIInt color, rank;
459: color = 0;
460: if (!sub_schurs->n_subs) color = 1; /* this can happen if we are in a multilevel case or if the subdomain is disconnected */
461: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &rank));
462: PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &subcomm));
463: PetscCall(PetscSubcommSetNumber(subcomm, 2));
464: PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank));
465: PetscCall(PetscCommDuplicate(PetscSubcommChild(subcomm), &comm_n, NULL));
466: PetscCall(PetscSubcommDestroy(&subcomm));
467: if (!sub_schurs->n_subs) {
468: PetscCall(PetscCommDestroy(&comm_n));
469: PetscFunctionReturn(PETSC_SUCCESS);
470: }
471: } else {
472: PetscCall(PetscCommDuplicate(PetscObjectComm((PetscObject)sub_schurs->l2gmap), &comm_n, NULL));
473: }
475: /* get Schur complement matrices */
476: if (!sub_schurs->schur_explicit) {
477: Mat tA_IB, tA_BI, tA_BB;
478: PetscBool isseqsbaij;
479: PetscCall(MatSchurComplementGetSubMatrices(sub_schurs->S, &A_II, NULL, &tA_IB, &tA_BI, &tA_BB));
480: PetscCall(PetscObjectTypeCompare((PetscObject)tA_BB, MATSEQSBAIJ, &isseqsbaij));
481: if (isseqsbaij) {
482: PetscCall(MatConvert(tA_BB, MATSEQAIJ, MAT_INITIAL_MATRIX, &A_BB));
483: PetscCall(MatConvert(tA_IB, MATSEQAIJ, MAT_INITIAL_MATRIX, &A_IB));
484: PetscCall(MatConvert(tA_BI, MATSEQAIJ, MAT_INITIAL_MATRIX, &A_BI));
485: } else {
486: PetscCall(PetscObjectReference((PetscObject)tA_BB));
487: A_BB = tA_BB;
488: PetscCall(PetscObjectReference((PetscObject)tA_IB));
489: A_IB = tA_IB;
490: PetscCall(PetscObjectReference((PetscObject)tA_BI));
491: A_BI = tA_BI;
492: }
493: } else {
494: A_II = NULL;
495: A_IB = NULL;
496: A_BI = NULL;
497: A_BB = NULL;
498: }
499: S_all = NULL;
501: /* determine interior problems */
502: PetscCall(ISGetLocalSize(sub_schurs->is_I, &i));
503: if (nlayers >= 0 && i) { /* Interior problems can be different from the original one */
504: PetscBT touched;
505: const PetscInt *idx_B;
506: PetscInt n_I, n_B, n_local_dofs, n_prev_added, j, layer, *local_numbering;
508: PetscCheck(xadj, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot request layering without adjacency");
509: /* get sizes */
510: PetscCall(ISGetLocalSize(sub_schurs->is_I, &n_I));
511: PetscCall(ISGetLocalSize(sub_schurs->is_B, &n_B));
513: PetscCall(PetscMalloc1(n_I + n_B, &local_numbering));
514: PetscCall(PetscBTCreate(n_I + n_B, &touched));
515: PetscCall(PetscBTMemzero(n_I + n_B, touched));
517: /* all boundary dofs must be skipped when adding layers */
518: PetscCall(ISGetIndices(sub_schurs->is_B, &idx_B));
519: for (j = 0; j < n_B; j++) PetscCall(PetscBTSet(touched, idx_B[j]));
520: PetscCall(PetscArraycpy(local_numbering, idx_B, n_B));
521: PetscCall(ISRestoreIndices(sub_schurs->is_B, &idx_B));
523: /* add prescribed number of layers of dofs */
524: n_local_dofs = n_B;
525: n_prev_added = n_B;
526: for (layer = 0; layer < nlayers; layer++) {
527: PetscInt n_added = 0;
528: if (n_local_dofs == n_I + n_B) break;
529: PetscCheck(n_local_dofs <= n_I + n_B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error querying layer %" PetscInt_FMT ". Out of bound access (%" PetscInt_FMT " > %" PetscInt_FMT ")", layer, n_local_dofs, n_I + n_B);
530: PetscCall(PCBDDCAdjGetNextLayer_Private(local_numbering + n_local_dofs, n_prev_added, touched, xadj, adjncy, &n_added));
531: n_prev_added = n_added;
532: n_local_dofs += n_added;
533: if (!n_added) break;
534: }
535: PetscCall(PetscBTDestroy(&touched));
537: /* IS for I layer dofs in original numbering */
538: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->is_I), n_local_dofs - n_B, local_numbering + n_B, PETSC_COPY_VALUES, &is_I_layer));
539: PetscCall(PetscFree(local_numbering));
540: PetscCall(ISSort(is_I_layer));
541: /* IS for I layer dofs in I numbering */
542: if (!sub_schurs->schur_explicit) {
543: ISLocalToGlobalMapping ItoNmap;
544: PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_I, &ItoNmap));
545: PetscCall(ISGlobalToLocalMappingApplyIS(ItoNmap, IS_GTOLM_DROP, is_I_layer, &is_I));
546: PetscCall(ISLocalToGlobalMappingDestroy(&ItoNmap));
548: /* II block */
549: PetscCall(MatCreateSubMatrix(A_II, is_I, is_I, MAT_INITIAL_MATRIX, &AE_II));
550: }
551: } else {
552: PetscInt n_I;
554: /* IS for I dofs in original numbering */
555: PetscCall(PetscObjectReference((PetscObject)sub_schurs->is_I));
556: is_I_layer = sub_schurs->is_I;
558: /* IS for I dofs in I numbering (strided 1) */
559: if (!sub_schurs->schur_explicit) {
560: PetscCall(ISGetSize(sub_schurs->is_I, &n_I));
561: PetscCall(ISCreateStride(PetscObjectComm((PetscObject)sub_schurs->is_I), n_I, 0, 1, &is_I));
563: /* II block is the same */
564: PetscCall(PetscObjectReference((PetscObject)A_II));
565: AE_II = A_II;
566: }
567: }
569: /* Get info on subset sizes and sum of all subsets sizes */
570: max_subset_size = 0;
571: local_size = 0;
572: for (i = 0; i < sub_schurs->n_subs; i++) {
573: PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
574: max_subset_size = PetscMax(subset_size, max_subset_size);
575: local_size += subset_size;
576: }
578: /* Work arrays for local indices */
579: extra = 0;
580: PetscCall(ISGetLocalSize(sub_schurs->is_B, &n_B));
581: if (sub_schurs->schur_explicit && is_I_layer) PetscCall(ISGetLocalSize(is_I_layer, &extra));
582: PetscCall(PetscMalloc1(n_B + extra, &all_local_idx_N));
583: if (multi_element) PetscCall(PetscMalloc1(n_B + extra, &all_local_subid_N));
584: if (extra) {
585: const PetscInt *idxs;
586: PetscCall(ISGetIndices(is_I_layer, &idxs));
587: PetscCall(PetscArraycpy(all_local_idx_N, idxs, extra));
588: if (multi_element)
589: for (PetscInt j = 0; j < extra; j++) all_local_subid_N[j] = local_subs[idxs[j]];
590: PetscCall(ISRestoreIndices(is_I_layer, &idxs));
591: }
592: PetscCall(PetscMalloc1(sub_schurs->n_subs, &auxnum1));
593: PetscCall(PetscMalloc1(sub_schurs->n_subs, &auxnum2));
595: /* Get local indices in local numbering */
596: local_size = 0;
597: local_stash_size = 0;
598: for (i = 0; i < sub_schurs->n_subs; i++) {
599: const PetscInt *idxs;
601: PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
602: PetscCall(ISGetIndices(sub_schurs->is_subs[i], &idxs));
603: /* start (smallest in global ordering) and multiplicity */
604: auxnum1[i] = idxs[0];
605: auxnum2[i] = subset_size * subset_size;
606: /* subset indices in local numbering */
607: PetscCall(PetscArraycpy(all_local_idx_N + local_size + extra, idxs, subset_size));
608: if (multi_element)
609: for (PetscInt j = 0; j < subset_size; j++) all_local_subid_N[j + local_size + extra] = local_subs[idxs[j]];
610: PetscCall(ISRestoreIndices(sub_schurs->is_subs[i], &idxs));
611: local_size += subset_size;
612: local_stash_size += subset_size * subset_size;
613: }
615: /* allocate extra workspace needed only for GETRI or SYTRF when inverting the blocks or the entire Schur complement */
616: use_potr = use_sytr = PETSC_FALSE;
617: if (benign_trick || (sub_schurs->is_hermitian && sub_schurs->is_posdef)) {
618: use_potr = PETSC_TRUE;
619: } else if (sub_schurs->is_symmetric) {
620: use_sytr = PETSC_TRUE;
621: }
622: if (local_size && !use_potr && compute_Stilda) {
623: PetscScalar lwork, dummyscalar = 0.;
624: PetscBLASInt dummyint = 0;
626: B_lwork = -1;
627: PetscCall(PetscBLASIntCast(local_size, &B_N));
628: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
629: if (use_sytr) {
630: PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &B_N, &dummyscalar, &B_N, &dummyint, &lwork, &B_lwork, &B_ierr));
631: PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYTRF Lapack routine %" PetscBLASInt_FMT, B_ierr);
632: } else {
633: PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&B_N, &dummyscalar, &B_N, &dummyint, &lwork, &B_lwork, &B_ierr));
634: PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GETRI Lapack routine %" PetscBLASInt_FMT, B_ierr);
635: }
636: PetscCall(PetscFPTrapPop());
637: PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork));
638: PetscCall(PetscMalloc2(B_lwork, &Bwork, B_N, &pivots));
639: } else {
640: Bwork = NULL;
641: pivots = NULL;
642: }
644: /* prepare data for summing up properly schurs on subsets */
645: PetscCall(ISCreateGeneral(comm_n, sub_schurs->n_subs, auxnum1, PETSC_OWN_POINTER, &all_subsets_n));
646: PetscCall(ISLocalToGlobalMappingApplyIS(sub_schurs->l2gmap, all_subsets_n, &all_subsets));
647: PetscCall(ISDestroy(&all_subsets_n));
648: PetscCall(ISCreateGeneral(comm_n, sub_schurs->n_subs, auxnum2, PETSC_OWN_POINTER, &all_subsets_mult));
649: PetscCall(ISRenumber(all_subsets, all_subsets_mult, &global_size, &all_subsets_n));
650: PetscCall(ISDestroy(&all_subsets));
651: PetscCall(ISDestroy(&all_subsets_mult));
652: PetscCall(ISGetLocalSize(all_subsets_n, &i));
653: PetscCheck(i == local_stash_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid size of new subset! %" PetscInt_FMT " != %" PetscInt_FMT, i, local_stash_size);
654: PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, local_stash_size, NULL, &lstash));
655: PetscCall(VecCreateMPI(comm_n, PETSC_DECIDE, global_size, &gstash));
656: PetscCall(VecScatterCreate(lstash, NULL, gstash, all_subsets_n, &sstash));
657: PetscCall(ISDestroy(&all_subsets_n));
659: /* subset indices in local boundary numbering */
660: if (!sub_schurs->is_Ej_all) {
661: PetscInt *all_local_idx_B;
663: PetscCall(PetscMalloc1(local_size, &all_local_idx_B));
664: PetscCall(ISGlobalToLocalMappingApply(sub_schurs->BtoNmap, IS_GTOLM_DROP, local_size, all_local_idx_N + extra, &subset_size, all_local_idx_B));
665: PetscCheck(subset_size == local_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in sub_schurs serial (BtoNmap)! %" PetscInt_FMT " != %" PetscInt_FMT, subset_size, local_size);
666: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, local_size, all_local_idx_B, PETSC_OWN_POINTER, &sub_schurs->is_Ej_all));
667: }
669: if (change) {
670: ISLocalToGlobalMapping BtoS;
671: IS change_primal_B;
672: IS change_primal_all;
674: PetscCheck(!sub_schurs->change_primal_sub, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
675: PetscCheck(!sub_schurs->change, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
676: PetscCall(PetscMalloc1(sub_schurs->n_subs, &sub_schurs->change_primal_sub));
677: for (i = 0; i < sub_schurs->n_subs; i++) {
678: ISLocalToGlobalMapping NtoS;
679: PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_subs[i], &NtoS));
680: PetscCall(ISGlobalToLocalMappingApplyIS(NtoS, IS_GTOLM_DROP, change_primal, &sub_schurs->change_primal_sub[i]));
681: PetscCall(ISLocalToGlobalMappingDestroy(&NtoS));
682: }
683: PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, change_primal, &change_primal_B));
684: PetscCall(ISLocalToGlobalMappingCreateIS(sub_schurs->is_Ej_all, &BtoS));
685: PetscCall(ISGlobalToLocalMappingApplyIS(BtoS, IS_GTOLM_DROP, change_primal_B, &change_primal_all));
686: PetscCall(ISLocalToGlobalMappingDestroy(&BtoS));
687: PetscCall(ISDestroy(&change_primal_B));
688: PetscCall(PetscMalloc1(sub_schurs->n_subs, &sub_schurs->change));
689: for (i = 0; i < sub_schurs->n_subs; i++) {
690: Mat change_sub;
692: PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
693: PetscCall(KSPCreate(PETSC_COMM_SELF, &sub_schurs->change[i]));
694: PetscCall(KSPSetNestLevel(sub_schurs->change[i], 1)); /* do not seem to have direct access to a PC from which to get the level of nests */
695: PetscCall(KSPSetType(sub_schurs->change[i], KSPPREONLY));
696: if (!sub_schurs->change_with_qr) {
697: PetscCall(MatCreateSubMatrix(change, sub_schurs->is_subs[i], sub_schurs->is_subs[i], MAT_INITIAL_MATRIX, &change_sub));
698: } else {
699: Mat change_subt;
700: PetscCall(MatCreateSubMatrix(change, sub_schurs->is_subs[i], sub_schurs->is_subs[i], MAT_INITIAL_MATRIX, &change_subt));
701: PetscCall(MatConvert(change_subt, MATSEQDENSE, MAT_INITIAL_MATRIX, &change_sub));
702: PetscCall(MatDestroy(&change_subt));
703: }
704: PetscCall(KSPSetOperators(sub_schurs->change[i], change_sub, change_sub));
705: PetscCall(MatDestroy(&change_sub));
706: PetscCall(KSPSetOptionsPrefix(sub_schurs->change[i], sub_schurs->prefix));
707: PetscCall(KSPAppendOptionsPrefix(sub_schurs->change[i], "sub_schurs_change_"));
708: }
709: PetscCall(ISDestroy(&change_primal_all));
710: }
712: /* Local matrix of all local Schur on subsets (transposed) */
713: if (!sub_schurs->S_Ej_all) {
714: Mat T;
715: PetscScalar *v;
716: PetscInt *ii, *jj;
717: PetscInt cum, i, j, k;
719: /* MatSeqAIJSetPreallocation + MatSetValues is slow for these kind of matrices (may have large blocks)
720: Allocate properly a representative matrix and duplicate */
721: PetscCall(PetscMalloc3(local_size + 1, &ii, local_stash_size, &jj, local_stash_size, &v));
722: ii[0] = 0;
723: cum = 0;
724: for (i = 0; i < sub_schurs->n_subs; i++) {
725: PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
726: for (j = 0; j < subset_size; j++) {
727: const PetscInt row = cum + j;
728: PetscInt col = cum;
730: ii[row + 1] = ii[row] + subset_size;
731: for (k = ii[row]; k < ii[row + 1]; k++) {
732: jj[k] = col;
733: col++;
734: }
735: }
736: cum += subset_size;
737: }
738: PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, local_size, local_size, ii, jj, v, &T));
739: PetscCall(MatDuplicate(T, MAT_DO_NOT_COPY_VALUES, &sub_schurs->S_Ej_all));
740: PetscCall(MatDestroy(&T));
741: PetscCall(PetscFree3(ii, jj, v));
742: }
743: /* matrices for deluxe scaling and adaptive selection */
744: if (compute_Stilda) {
745: if (!sub_schurs->sum_S_Ej_tilda_all) PetscCall(MatDuplicate(sub_schurs->S_Ej_all, MAT_DO_NOT_COPY_VALUES, &sub_schurs->sum_S_Ej_tilda_all));
746: if (!sub_schurs->sum_S_Ej_inv_all && deluxe) PetscCall(MatDuplicate(sub_schurs->S_Ej_all, MAT_DO_NOT_COPY_VALUES, &sub_schurs->sum_S_Ej_inv_all));
747: }
749: /* Compute Schur complements explicitly */
750: F = NULL;
751: if (!sub_schurs->schur_explicit) {
752: /* this code branch is used when MatFactor with Schur complement support is not present or when explicitly requested;
753: it is not efficient, unless the economic version of the scaling is used */
754: Mat S_Ej_expl;
755: PetscScalar *work;
756: PetscInt j, *dummy_idx;
757: PetscBool Sdense;
759: PetscCall(PetscMalloc2(max_subset_size, &dummy_idx, max_subset_size * max_subset_size, &work));
760: local_size = 0;
761: for (i = 0; i < sub_schurs->n_subs; i++) {
762: IS is_subset_B;
763: Mat AE_EE, AE_IE, AE_EI, S_Ej;
765: /* subsets in original and boundary numbering */
766: PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, sub_schurs->is_subs[i], &is_subset_B));
767: /* EE block */
768: PetscCall(MatCreateSubMatrix(A_BB, is_subset_B, is_subset_B, MAT_INITIAL_MATRIX, &AE_EE));
769: /* IE block */
770: PetscCall(MatCreateSubMatrix(A_IB, is_I, is_subset_B, MAT_INITIAL_MATRIX, &AE_IE));
771: /* EI block */
772: if (sub_schurs->is_symmetric) {
773: PetscCall(MatCreateTranspose(AE_IE, &AE_EI));
774: } else if (sub_schurs->is_hermitian) {
775: PetscCall(MatCreateHermitianTranspose(AE_IE, &AE_EI));
776: } else {
777: PetscCall(MatCreateSubMatrix(A_BI, is_subset_B, is_I, MAT_INITIAL_MATRIX, &AE_EI));
778: }
779: PetscCall(ISDestroy(&is_subset_B));
780: PetscCall(MatCreateSchurComplement(AE_II, AE_II, AE_IE, AE_EI, AE_EE, &S_Ej));
781: PetscCall(MatDestroy(&AE_EE));
782: PetscCall(MatDestroy(&AE_IE));
783: PetscCall(MatDestroy(&AE_EI));
784: if (AE_II == A_II) { /* we can reuse the same ksp */
785: KSP ksp;
786: PetscCall(MatSchurComplementGetKSP(sub_schurs->S, &ksp));
787: PetscCall(MatSchurComplementSetKSP(S_Ej, ksp));
788: } else { /* build new ksp object which inherits ksp and pc types from the original one */
789: KSP origksp, schurksp;
790: PC origpc, schurpc;
791: KSPType ksp_type;
792: PetscInt n_internal;
793: PetscBool ispcnone;
795: PetscCall(MatSchurComplementGetKSP(sub_schurs->S, &origksp));
796: PetscCall(MatSchurComplementGetKSP(S_Ej, &schurksp));
797: PetscCall(KSPGetType(origksp, &ksp_type));
798: PetscCall(KSPSetType(schurksp, ksp_type));
799: PetscCall(KSPGetPC(schurksp, &schurpc));
800: PetscCall(KSPGetPC(origksp, &origpc));
801: PetscCall(PetscObjectTypeCompare((PetscObject)origpc, PCNONE, &ispcnone));
802: if (!ispcnone) {
803: PCType pc_type;
804: PetscCall(PCGetType(origpc, &pc_type));
805: PetscCall(PCSetType(schurpc, pc_type));
806: } else {
807: PetscCall(PCSetType(schurpc, PCLU));
808: }
809: PetscCall(ISGetSize(is_I, &n_internal));
810: if (!n_internal) { /* UMFPACK gives error with 0 sized problems */
811: MatSolverType solver = NULL;
812: PetscCall(PCFactorGetMatSolverType(origpc, &solver));
813: if (solver) PetscCall(PCFactorSetMatSolverType(schurpc, solver));
814: }
815: PetscCall(KSPSetUp(schurksp));
816: }
817: PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
818: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &S_Ej_expl));
819: PetscCall(PCBDDCComputeExplicitSchur(S_Ej, sub_schurs->is_symmetric, MAT_REUSE_MATRIX, &S_Ej_expl));
820: PetscCall(PetscObjectTypeCompare((PetscObject)S_Ej_expl, MATSEQDENSE, &Sdense));
821: PetscCheck(Sdense, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented for sparse matrices");
822: for (j = 0; j < subset_size; j++) dummy_idx[j] = local_size + j;
823: PetscCall(MatSetValues(sub_schurs->S_Ej_all, subset_size, dummy_idx, subset_size, dummy_idx, work, INSERT_VALUES));
824: PetscCall(MatDestroy(&S_Ej));
825: PetscCall(MatDestroy(&S_Ej_expl));
826: local_size += subset_size;
827: }
828: PetscCall(PetscFree2(dummy_idx, work));
829: /* free */
830: PetscCall(ISDestroy(&is_I));
831: PetscCall(MatDestroy(&AE_II));
832: PetscCall(PetscFree(all_local_idx_N));
833: } else {
834: Mat A, cs_AIB_mat = NULL, benign_AIIm1_ones_mat = NULL;
835: Mat *gdswA;
836: Vec Dall = NULL;
837: IS is_A_all, *is_p_r = NULL, is_schur;
838: MatType Stype;
839: PetscScalar *work, *S_data, *schur_factor, infty = PETSC_MAX_REAL;
840: PetscScalar *SEj_arr = NULL, *SEjinv_arr = NULL;
841: const PetscScalar *rS_data;
842: PetscInt n, n_I, size_schur, size_active_schur, cum, cum2;
843: PetscBool economic, solver_S, S_lower_triangular = PETSC_FALSE;
844: PetscBool schur_has_vertices, factor_workaround;
845: PetscBool use_cholesky;
846: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
847: PetscBool oldpin;
848: #endif
849: /* multi-element */
850: IS *is_sub_all = NULL, *is_sub_schur_all = NULL, *is_sub_schur = NULL;
852: /* get sizes */
853: n_I = 0;
854: if (is_I_layer) PetscCall(ISGetLocalSize(is_I_layer, &n_I));
855: economic = PETSC_FALSE;
856: PetscCall(ISGetLocalSize(sub_schurs->is_I, &cum));
857: if (cum != n_I) economic = PETSC_TRUE;
858: PetscCall(MatGetLocalSize(sub_schurs->A, &n, NULL));
859: size_active_schur = local_size;
861: /* import scaling vector (wrong formulation if we have 3D edges) */
862: if (scaling && compute_Stilda) {
863: const PetscScalar *array;
864: PetscScalar *array2;
865: const PetscInt *idxs;
866: PetscInt i;
868: PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs));
869: PetscCall(VecCreateSeq(PETSC_COMM_SELF, size_active_schur, &Dall));
870: PetscCall(VecGetArrayRead(scaling, &array));
871: PetscCall(VecGetArray(Dall, &array2));
872: for (i = 0; i < size_active_schur; i++) array2[i] = array[idxs[i]];
873: PetscCall(VecRestoreArray(Dall, &array2));
874: PetscCall(VecRestoreArrayRead(scaling, &array));
875: PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs));
876: deluxe = PETSC_FALSE;
877: }
879: /* size active schurs does not count any dirichlet or vertex dof on the interface */
880: factor_workaround = PETSC_FALSE;
881: schur_has_vertices = PETSC_FALSE;
882: cum = n_I + size_active_schur;
883: if (sub_schurs->is_dir) {
884: const PetscInt *idxs;
885: PetscInt n_dir;
887: PetscCall(ISGetLocalSize(sub_schurs->is_dir, &n_dir));
888: PetscCall(ISGetIndices(sub_schurs->is_dir, &idxs));
889: PetscCall(PetscArraycpy(all_local_idx_N + cum, idxs, n_dir));
890: if (multi_element)
891: for (PetscInt j = 0; j < n_dir; j++) all_local_subid_N[j + cum] = local_subs[idxs[j]];
892: PetscCall(ISRestoreIndices(sub_schurs->is_dir, &idxs));
893: cum += n_dir;
894: if (!sub_schurs->gdsw) factor_workaround = PETSC_TRUE;
895: }
896: /* include the primal vertices in the Schur complement */
897: if (exact_schur && sub_schurs->is_vertices && (compute_Stilda || benign_n)) {
898: PetscInt n_v;
900: PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_v));
901: if (n_v) {
902: const PetscInt *idxs;
904: PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs));
905: PetscCall(PetscArraycpy(all_local_idx_N + cum, idxs, n_v));
906: if (multi_element)
907: for (PetscInt j = 0; j < n_v; j++) all_local_subid_N[j + cum] = local_subs[idxs[j]];
908: PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs));
909: cum += n_v;
910: if (!sub_schurs->gdsw) factor_workaround = PETSC_TRUE;
911: schur_has_vertices = PETSC_TRUE;
912: }
913: }
914: size_schur = cum - n_I;
915: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, all_local_idx_N, PETSC_OWN_POINTER, &is_A_all));
916: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
917: oldpin = sub_schurs->A->boundtocpu;
918: PetscCall(MatBindToCPU(sub_schurs->A, PETSC_TRUE));
919: #endif
920: if (cum == n) {
921: PetscCall(ISSetPermutation(is_A_all));
922: PetscCall(MatPermute(sub_schurs->A, is_A_all, is_A_all, &A));
923: } else {
924: PetscCall(MatCreateSubMatrix(sub_schurs->A, is_A_all, is_A_all, MAT_INITIAL_MATRIX, &A));
925: }
926: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
927: PetscCall(MatBindToCPU(sub_schurs->A, oldpin));
928: #endif
929: PetscCall(MatSetOptionsPrefixFactor(A, sub_schurs->prefix));
930: PetscCall(MatAppendOptionsPrefixFactor(A, "sub_schurs_"));
931: /* subsets ordered last */
932: PetscCall(ISCreateStride(PETSC_COMM_SELF, size_schur, n_I, 1, &is_schur));
934: if (multi_element) {
935: PetscInt *idx_sub;
937: PetscCall(PetscMalloc3(n_local_subs, &is_sub_all, n_local_subs, &is_sub_schur_all, n_local_subs, &is_sub_schur));
938: PetscCall(PetscMalloc1(n + size_schur, &idx_sub));
939: for (PetscInt sub = 0; sub < n_local_subs; sub++) {
940: PetscInt size_sub = 0, size_schur_sub = 0, size_I_sub;
942: for (PetscInt j = 0; j < n_I; j++)
943: if (all_local_subid_N[j] == sub) idx_sub[size_sub++] = j;
944: size_I_sub = size_sub;
945: for (PetscInt j = n_I; j < n_I + size_schur; j++)
946: if (all_local_subid_N[j] == sub) {
947: idx_sub[size_sub++] = j;
948: idx_sub[n + size_schur_sub++] = j - n_I;
949: }
951: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size_sub, idx_sub, PETSC_COPY_VALUES, &is_sub_all[sub]));
952: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size_schur_sub, idx_sub + n, PETSC_COPY_VALUES, &is_sub_schur[sub]));
953: PetscCall(ISCreateStride(PETSC_COMM_SELF, size_schur_sub, size_I_sub, 1, &is_sub_schur_all[sub]));
954: }
955: PetscCall(PetscFree(idx_sub));
956: }
958: /* if we actually change the basis for the pressures, LDL^T factors will use a lot of memory
959: this is a workaround */
960: if (benign_n) {
961: Vec v, benign_AIIm1_ones;
962: ISLocalToGlobalMapping N_to_reor;
963: IS is_p0, is_p0_p;
964: PetscScalar *cs_AIB, *AIIm1_data;
965: PetscInt sizeA;
967: PetscCall(ISLocalToGlobalMappingCreateIS(is_A_all, &N_to_reor));
968: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, benign_n, benign_p0_lidx, PETSC_COPY_VALUES, &is_p0));
969: PetscCall(ISGlobalToLocalMappingApplyIS(N_to_reor, IS_GTOLM_DROP, is_p0, &is_p0_p));
970: PetscCall(ISDestroy(&is_p0));
971: PetscCall(MatCreateVecs(A, &v, &benign_AIIm1_ones));
972: PetscCall(VecGetSize(v, &sizeA));
973: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, sizeA, benign_n, NULL, &benign_AIIm1_ones_mat));
974: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size_schur, benign_n, NULL, &cs_AIB_mat));
975: PetscCall(MatDenseGetArray(cs_AIB_mat, &cs_AIB));
976: PetscCall(MatDenseGetArray(benign_AIIm1_ones_mat, &AIIm1_data));
977: PetscCall(PetscMalloc1(benign_n, &is_p_r));
978: /* compute colsum of A_IB restricted to pressures */
979: for (i = 0; i < benign_n; i++) {
980: const PetscScalar *array;
981: const PetscInt *idxs;
982: PetscInt j, nz;
984: PetscCall(ISGlobalToLocalMappingApplyIS(N_to_reor, IS_GTOLM_DROP, benign_zerodiag_subs[i], &is_p_r[i]));
985: PetscCall(ISGetLocalSize(is_p_r[i], &nz));
986: PetscCall(ISGetIndices(is_p_r[i], &idxs));
987: for (j = 0; j < nz; j++) AIIm1_data[idxs[j] + sizeA * i] = 1.;
988: PetscCall(ISRestoreIndices(is_p_r[i], &idxs));
989: PetscCall(VecPlaceArray(benign_AIIm1_ones, AIIm1_data + sizeA * i));
990: PetscCall(MatMult(A, benign_AIIm1_ones, v));
991: PetscCall(VecResetArray(benign_AIIm1_ones));
992: PetscCall(VecGetArrayRead(v, &array));
993: for (j = 0; j < size_schur; j++) {
994: #if defined(PETSC_USE_COMPLEX)
995: cs_AIB[i * size_schur + j] = (PetscRealPart(array[j + n_I]) / nz + PETSC_i * (PetscImaginaryPart(array[j + n_I]) / nz));
996: #else
997: cs_AIB[i * size_schur + j] = array[j + n_I] / nz;
998: #endif
999: }
1000: PetscCall(VecRestoreArrayRead(v, &array));
1001: }
1002: PetscCall(MatDenseRestoreArray(cs_AIB_mat, &cs_AIB));
1003: PetscCall(MatDenseRestoreArray(benign_AIIm1_ones_mat, &AIIm1_data));
1004: PetscCall(VecDestroy(&v));
1005: PetscCall(VecDestroy(&benign_AIIm1_ones));
1006: PetscCall(MatSetOption(A, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
1007: PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
1008: PetscCall(MatSetOption(A, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
1009: PetscCall(MatZeroRowsColumnsIS(A, is_p0_p, 1.0, NULL, NULL));
1010: PetscCall(ISDestroy(&is_p0_p));
1011: PetscCall(ISLocalToGlobalMappingDestroy(&N_to_reor));
1012: }
1013: PetscCall(MatSetOption(A, MAT_SYMMETRIC, sub_schurs->is_symmetric));
1014: PetscCall(MatSetOption(A, MAT_HERMITIAN, sub_schurs->is_hermitian));
1015: PetscCall(MatSetOption(A, MAT_SPD, sub_schurs->is_posdef));
1017: /* for complexes, symmetric and hermitian at the same time implies null imaginary part */
1018: use_cholesky = (PetscBool)((use_potr || use_sytr) && sub_schurs->is_hermitian && sub_schurs->is_symmetric);
1020: /* when using the benign subspace trick, the local Schur complements are SPD */
1021: /* MKL_PARDISO does not handle well the computation of a Schur complement from a symmetric indefinite factorization
1022: Use LU and adapt pivoting perturbation (still, solution is not as accurate as with using MUMPS) */
1023: if (benign_trick) {
1024: sub_schurs->is_posdef = PETSC_TRUE;
1025: PetscCall(PetscStrcmp(sub_schurs->mat_solver_type, MATSOLVERMKL_PARDISO, &flg));
1026: if (flg) use_cholesky = PETSC_FALSE;
1027: }
1028: if (sub_schurs->mat_factor_type == MAT_FACTOR_NONE) sub_schurs->mat_factor_type = use_cholesky ? MAT_FACTOR_CHOLESKY : MAT_FACTOR_LU;
1030: if (n_I && !multi_element) {
1031: char stype[64];
1032: PetscBool gpu = PETSC_FALSE;
1034: PetscCall(MatGetFactor(A, sub_schurs->mat_solver_type, sub_schurs->mat_factor_type, &F));
1035: PetscCheck(F, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "MatGetFactor not supported by matrix instance of type %s. Rerun with \"-info :mat | grep MatGetFactor_\" for additional information", ((PetscObject)A)->type_name);
1036: PetscCall(MatSetErrorIfFailure(A, PETSC_TRUE));
1037: #if defined(PETSC_HAVE_MKL_PARDISO)
1038: if (benign_trick) PetscCall(MatMkl_PardisoSetCntl(F, 10, 10));
1039: #endif
1040: PetscCall(MatFactorSetSchurIS(F, is_schur));
1042: /* factorization step */
1043: switch (sub_schurs->mat_factor_type) {
1044: case MAT_FACTOR_CHOLESKY:
1045: PetscCall(MatCholeskyFactorSymbolic(F, A, NULL, NULL));
1046: /* be sure that icntl 19 is not set by command line */
1047: PetscCall(MatMumpsSetIcntl(F, 19, 2));
1048: PetscCall(MatCholeskyFactorNumeric(F, A, NULL));
1049: S_lower_triangular = PETSC_TRUE;
1050: break;
1051: case MAT_FACTOR_LU:
1052: PetscCall(MatLUFactorSymbolic(F, A, NULL, NULL, NULL));
1053: /* be sure that icntl 19 is not set by command line */
1054: PetscCall(MatMumpsSetIcntl(F, 19, 3));
1055: PetscCall(MatLUFactorNumeric(F, A, NULL));
1056: break;
1057: default:
1058: SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unsupported factor type %s", MatFactorTypes[sub_schurs->mat_factor_type]);
1059: }
1060: PetscCall(MatViewFromOptions(F, (PetscObject)A, "-mat_factor_view"));
1062: if (matl_dbg_viewer) {
1063: Mat S;
1064: IS is;
1066: PetscCall(PetscObjectSetName((PetscObject)A, "A"));
1067: PetscCall(MatView(A, matl_dbg_viewer));
1068: PetscCall(MatFactorCreateSchurComplement(F, &S, NULL));
1069: PetscCall(PetscObjectSetName((PetscObject)S, "S"));
1070: PetscCall(MatView(S, matl_dbg_viewer));
1071: PetscCall(MatDestroy(&S));
1072: PetscCall(ISCreateStride(PETSC_COMM_SELF, n_I, 0, 1, &is));
1073: PetscCall(PetscObjectSetName((PetscObject)is, "I"));
1074: PetscCall(ISView(is, matl_dbg_viewer));
1075: PetscCall(ISDestroy(&is));
1076: PetscCall(ISCreateStride(PETSC_COMM_SELF, size_schur, n_I, 1, &is));
1077: PetscCall(PetscObjectSetName((PetscObject)is, "B"));
1078: PetscCall(ISView(is, matl_dbg_viewer));
1079: PetscCall(ISDestroy(&is));
1080: PetscCall(PetscObjectSetName((PetscObject)is_A_all, "IA"));
1081: PetscCall(ISView(is_A_all, matl_dbg_viewer));
1082: for (i = 0, cum = 0; i < sub_schurs->n_subs; i++) {
1083: IS is;
1084: char name[16];
1086: PetscCall(PetscSNPrintf(name, sizeof(name), "IE%" PetscInt_FMT, i));
1087: PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
1088: PetscCall(ISCreateStride(PETSC_COMM_SELF, subset_size, cum, 1, &is));
1089: PetscCall(PetscObjectSetName((PetscObject)is, name));
1090: PetscCall(ISView(is, matl_dbg_viewer));
1091: PetscCall(ISDestroy(&is));
1092: if (sub_schurs->change) {
1093: Mat T;
1095: PetscCall(PetscSNPrintf(name, sizeof(name), "TE%" PetscInt_FMT, i));
1096: PetscCall(KSPGetOperators(sub_schurs->change[i], &T, NULL));
1097: PetscCall(PetscObjectSetName((PetscObject)T, name));
1098: PetscCall(MatView(T, matl_dbg_viewer));
1099: PetscCall(PetscSNPrintf(name, sizeof(name), "ITE%" PetscInt_FMT, i));
1100: PetscCall(PetscObjectSetName((PetscObject)sub_schurs->change_primal_sub[i], name));
1101: PetscCall(ISView(sub_schurs->change_primal_sub[i], matl_dbg_viewer));
1102: }
1103: cum += subset_size;
1104: }
1105: PetscCall(PetscViewerFlush(matl_dbg_viewer));
1106: }
1108: /* get explicit Schur Complement computed during numeric factorization */
1109: PetscCall(MatFactorGetSchurComplement(F, &S_all, NULL));
1110: PetscCall(PetscStrncpy(stype, MATSEQDENSE, sizeof(stype)));
1111: #if defined(PETSC_HAVE_CUDA)
1112: PetscCall(PetscObjectTypeCompareAny((PetscObject)A, &gpu, MATSEQAIJVIENNACL, MATSEQAIJCUSPARSE, ""));
1113: #endif
1114: if (gpu) PetscCall(PetscStrncpy(stype, MATSEQDENSECUDA, sizeof(stype)));
1115: PetscCall(PetscOptionsGetString(NULL, sub_schurs->prefix, "-sub_schurs_schur_mat_type", stype, sizeof(stype), NULL));
1116: PetscCall(MatConvert(S_all, stype, MAT_INPLACE_MATRIX, &S_all));
1117: PetscCall(MatSetOption(S_all, MAT_SPD, sub_schurs->is_posdef));
1118: PetscCall(MatSetOption(S_all, MAT_HERMITIAN, sub_schurs->is_hermitian));
1119: PetscCall(MatGetType(S_all, &Stype));
1121: /* we can reuse the solvers if we are not using the economic version */
1122: reuse_solvers = (PetscBool)(reuse_solvers && !economic && !sub_schurs->graph->multi_element);
1123: if (!sub_schurs->gdsw) {
1124: factor_workaround = (PetscBool)(reuse_solvers && factor_workaround);
1125: if (!sub_schurs->is_posdef && factor_workaround && compute_Stilda && size_active_schur) reuse_solvers = factor_workaround = PETSC_FALSE;
1126: }
1127: solver_S = PETSC_TRUE;
1129: /* update the Schur complement with the change of basis on the pressures */
1130: if (benign_n) {
1131: const PetscScalar *cs_AIB;
1132: PetscScalar *S_data, *AIIm1_data;
1133: Mat S2 = NULL, S3 = NULL; /* dbg */
1134: PetscScalar *S2_data, *S3_data; /* dbg */
1135: Vec v, benign_AIIm1_ones;
1136: PetscInt sizeA;
1138: PetscCall(MatDenseGetArray(S_all, &S_data));
1139: PetscCall(MatCreateVecs(A, &v, &benign_AIIm1_ones));
1140: PetscCall(VecGetSize(v, &sizeA));
1141: PetscCall(MatMumpsSetIcntl(F, 26, 0));
1142: #if defined(PETSC_HAVE_MKL_PARDISO)
1143: PetscCall(MatMkl_PardisoSetCntl(F, 70, 1));
1144: #endif
1145: PetscCall(MatDenseGetArrayRead(cs_AIB_mat, &cs_AIB));
1146: PetscCall(MatDenseGetArray(benign_AIIm1_ones_mat, &AIIm1_data));
1147: if (matl_dbg_viewer) {
1148: PetscCall(MatDuplicate(S_all, MAT_DO_NOT_COPY_VALUES, &S2));
1149: PetscCall(MatDuplicate(S_all, MAT_DO_NOT_COPY_VALUES, &S3));
1150: PetscCall(MatDenseGetArray(S2, &S2_data));
1151: PetscCall(MatDenseGetArray(S3, &S3_data));
1152: }
1153: for (i = 0; i < benign_n; i++) {
1154: PetscScalar *array, sum = 0., one = 1., *sums;
1155: const PetscInt *idxs;
1156: PetscInt k, j, nz;
1157: PetscBLASInt B_k, B_n;
1159: PetscCall(PetscCalloc1(benign_n, &sums));
1160: PetscCall(VecPlaceArray(benign_AIIm1_ones, AIIm1_data + sizeA * i));
1161: PetscCall(VecCopy(benign_AIIm1_ones, v));
1162: PetscCall(MatSolve(F, v, benign_AIIm1_ones));
1163: PetscCall(MatMult(A, benign_AIIm1_ones, v));
1164: PetscCall(VecResetArray(benign_AIIm1_ones));
1165: /* p0 dofs (eliminated) are excluded from the sums */
1166: for (k = 0; k < benign_n; k++) {
1167: PetscCall(ISGetLocalSize(is_p_r[k], &nz));
1168: PetscCall(ISGetIndices(is_p_r[k], &idxs));
1169: for (j = 0; j < nz - 1; j++) sums[k] -= AIIm1_data[idxs[j] + sizeA * i];
1170: PetscCall(ISRestoreIndices(is_p_r[k], &idxs));
1171: }
1172: PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array));
1173: if (matl_dbg_viewer) {
1174: Vec vv;
1175: char name[16];
1177: PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, size_schur, array + n_I, &vv));
1178: PetscCall(PetscSNPrintf(name, sizeof(name), "Pvs%" PetscInt_FMT, i));
1179: PetscCall(PetscObjectSetName((PetscObject)vv, name));
1180: PetscCall(VecView(vv, matl_dbg_viewer));
1181: }
1182: /* perform sparse rank updates on symmetric Schur (TODO: move outside of the loop?) */
1183: /* cs_AIB already scaled by 1./nz */
1184: B_k = 1;
1185: PetscCall(PetscBLASIntCast(size_schur, &B_n));
1186: for (k = 0; k < benign_n; k++) {
1187: sum = sums[k];
1189: if (PetscAbsScalar(sum) == 0.0) continue;
1190: if (k == i) {
1191: if (B_n) PetscCallBLAS("BLASsyrk", BLASsyrk_("L", "N", &B_n, &B_k, &sum, cs_AIB + i * size_schur, &B_n, &one, S_data, &B_n));
1192: if (matl_dbg_viewer && B_n) PetscCallBLAS("BLASsyrk", BLASsyrk_("L", "N", &B_n, &B_k, &sum, cs_AIB + i * size_schur, &B_n, &one, S3_data, &B_n));
1193: } else { /* XXX Is it correct to use symmetric rank-2 update with half of the sum? */
1194: sum /= 2.0;
1195: if (B_n) PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, cs_AIB + k * size_schur, &B_n, cs_AIB + i * size_schur, &B_n, &one, S_data, &B_n));
1196: if (matl_dbg_viewer && B_n) PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, cs_AIB + k * size_schur, &B_n, cs_AIB + i * size_schur, &B_n, &one, S3_data, &B_n));
1197: }
1198: }
1199: sum = 1.;
1200: if (B_n) PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, array + n_I, &B_n, cs_AIB + i * size_schur, &B_n, &one, S_data, &B_n));
1201: if (matl_dbg_viewer && B_n) PetscCallBLAS("BLASsyr2k", BLASsyr2k_("L", "N", &B_n, &B_k, &sum, array + n_I, &B_n, cs_AIB + i * size_schur, &B_n, &one, S2_data, &B_n));
1202: PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array));
1203: /* set p0 entry of AIIm1_ones to zero */
1204: PetscCall(ISGetLocalSize(is_p_r[i], &nz));
1205: PetscCall(ISGetIndices(is_p_r[i], &idxs));
1206: for (j = 0; j < benign_n; j++) AIIm1_data[idxs[nz - 1] + sizeA * j] = 0.;
1207: PetscCall(ISRestoreIndices(is_p_r[i], &idxs));
1208: PetscCall(PetscFree(sums));
1209: }
1210: PetscCall(VecDestroy(&benign_AIIm1_ones));
1211: if (matl_dbg_viewer) {
1212: PetscCall(MatDenseRestoreArray(S2, &S2_data));
1213: PetscCall(MatDenseRestoreArray(S3, &S3_data));
1214: }
1215: if (!S_lower_triangular) { /* I need to expand the upper triangular data (column-oriented) */
1216: PetscInt k, j;
1217: for (k = 0; k < size_schur; k++) {
1218: for (j = k; j < size_schur; j++) S_data[j * size_schur + k] = PetscConj(S_data[k * size_schur + j]);
1219: }
1220: }
1222: /* restore defaults */
1223: PetscCall(MatMumpsSetIcntl(F, 26, -1));
1224: #if defined(PETSC_HAVE_MKL_PARDISO)
1225: PetscCall(MatMkl_PardisoSetCntl(F, 70, 0));
1226: #endif
1227: PetscCall(MatDenseRestoreArrayRead(cs_AIB_mat, &cs_AIB));
1228: PetscCall(MatDenseRestoreArray(benign_AIIm1_ones_mat, &AIIm1_data));
1229: PetscCall(VecDestroy(&v));
1230: PetscCall(MatDenseRestoreArray(S_all, &S_data));
1231: if (matl_dbg_viewer) {
1232: Mat S;
1234: PetscCall(MatFactorRestoreSchurComplement(F, &S_all, MAT_FACTOR_SCHUR_UNFACTORED));
1235: PetscCall(MatFactorCreateSchurComplement(F, &S, NULL));
1236: PetscCall(PetscObjectSetName((PetscObject)S, "Sb"));
1237: PetscCall(MatView(S, matl_dbg_viewer));
1238: PetscCall(MatDestroy(&S));
1239: PetscCall(PetscObjectSetName((PetscObject)S2, "S2P"));
1240: PetscCall(MatView(S2, matl_dbg_viewer));
1241: PetscCall(PetscObjectSetName((PetscObject)S3, "S3P"));
1242: PetscCall(MatView(S3, matl_dbg_viewer));
1243: PetscCall(PetscObjectSetName((PetscObject)cs_AIB_mat, "cs"));
1244: PetscCall(MatView(cs_AIB_mat, matl_dbg_viewer));
1245: PetscCall(MatFactorGetSchurComplement(F, &S_all, NULL));
1246: }
1247: PetscCall(MatDestroy(&S2));
1248: PetscCall(MatDestroy(&S3));
1249: }
1250: if (!reuse_solvers) {
1251: for (i = 0; i < benign_n; i++) PetscCall(ISDestroy(&is_p_r[i]));
1252: PetscCall(PetscFree(is_p_r));
1253: PetscCall(MatDestroy(&cs_AIB_mat));
1254: PetscCall(MatDestroy(&benign_AIIm1_ones_mat));
1255: }
1256: } else if (multi_element) { /* MUMPS does not support sparse Schur complements. Loop over local subs */
1257: PetscInt *nnz;
1258: const PetscInt *idxs;
1259: PetscInt size_schur_sub;
1261: PetscCall(PetscCalloc1(size_schur, &nnz));
1262: for (PetscInt sub = 0; sub < n_local_subs; sub++) {
1263: PetscCall(ISGetLocalSize(is_sub_schur[sub], &size_schur_sub));
1264: PetscCall(ISGetIndices(is_sub_schur[sub], &idxs));
1265: for (PetscInt j = 0; j < size_schur_sub; j++) nnz[idxs[j]] = size_schur_sub;
1266: PetscCall(ISRestoreIndices(is_sub_schur[sub], &idxs));
1267: }
1268: PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, size_schur, size_schur, 0, nnz, &S_all));
1269: PetscCall(MatSetOption(S_all, MAT_ROW_ORIENTED, sub_schurs->is_hermitian));
1270: PetscCall(PetscFree(nnz));
1272: for (PetscInt sub = 0; sub < n_local_subs; sub++) {
1273: Mat Asub, Ssub;
1274: const PetscScalar *vals;
1275: PetscInt size_all_sub;
1277: F = NULL;
1278: PetscCall(ISGetLocalSize(is_sub_schur[sub], &size_schur_sub));
1279: PetscCall(ISGetLocalSize(is_sub_all[sub], &size_all_sub));
1280: PetscCall(MatCreateSubMatrix(A, is_sub_all[sub], is_sub_all[sub], MAT_INITIAL_MATRIX, &Asub));
1281: if (size_schur_sub == size_all_sub) {
1282: /* we can't use MatFactor when size_schur == size_of_the_problem */
1283: PetscCall(MatConvert(Asub, MATDENSE, MAT_INITIAL_MATRIX, &Ssub));
1284: } else {
1285: PetscCall(MatGetFactor(Asub, sub_schurs->mat_solver_type, sub_schurs->mat_factor_type, &F));
1286: PetscCheck(F, PetscObjectComm((PetscObject)Asub), PETSC_ERR_SUP, "MatGetFactor not supported by matrix instance of type %s. Rerun with \"-info :mat | grep MatGetFactor_\" for additional information", ((PetscObject)Asub)->type_name);
1287: PetscCall(MatSetErrorIfFailure(Asub, PETSC_TRUE));
1288: #if defined(PETSC_HAVE_MKL_PARDISO)
1289: if (benign_trick) PetscCall(MatMkl_PardisoSetCntl(F, 10, 10));
1290: #endif
1291: /* subsets ordered last */
1292: PetscCall(MatFactorSetSchurIS(F, is_sub_schur_all[sub]));
1294: /* factorization step */
1295: switch (sub_schurs->mat_factor_type) {
1296: case MAT_FACTOR_CHOLESKY:
1297: PetscCall(MatCholeskyFactorSymbolic(F, Asub, NULL, NULL));
1298: /* be sure that icntl 19 is not set by command line */
1299: PetscCall(MatMumpsSetIcntl(F, 19, 2));
1300: PetscCall(MatCholeskyFactorNumeric(F, Asub, NULL));
1301: S_lower_triangular = PETSC_TRUE;
1302: break;
1303: case MAT_FACTOR_LU:
1304: PetscCall(MatLUFactorSymbolic(F, Asub, NULL, NULL, NULL));
1305: /* be sure that icntl 19 is not set by command line */
1306: PetscCall(MatMumpsSetIcntl(F, 19, 3));
1307: PetscCall(MatLUFactorNumeric(F, Asub, NULL));
1308: break;
1309: default:
1310: SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unsupported factor type %s", MatFactorTypes[sub_schurs->mat_factor_type]);
1311: }
1312: PetscCall(MatFactorCreateSchurComplement(F, &Ssub, NULL));
1313: }
1314: PetscCall(MatDestroy(&Asub));
1315: PetscCall(MatDenseGetArrayRead(Ssub, &vals));
1316: PetscCall(ISGetIndices(is_sub_schur[sub], &idxs));
1317: PetscCall(MatSetValues(S_all, size_schur_sub, idxs, size_schur_sub, idxs, vals, INSERT_VALUES));
1318: PetscCall(ISRestoreIndices(is_sub_schur[sub], &idxs));
1319: PetscCall(MatDenseRestoreArrayRead(Ssub, &vals));
1320: PetscCall(MatDestroy(&Ssub));
1321: PetscCall(MatDestroy(&F));
1322: }
1323: PetscCall(MatAssemblyBegin(S_all, MAT_FINAL_ASSEMBLY));
1324: PetscCall(MatAssemblyEnd(S_all, MAT_FINAL_ASSEMBLY));
1325: PetscCall(MatSetOption(S_all, MAT_SPD, sub_schurs->is_posdef));
1326: PetscCall(MatSetOption(S_all, MAT_HERMITIAN, sub_schurs->is_hermitian));
1327: Stype = MATDENSE;
1328: reuse_solvers = PETSC_FALSE;
1329: factor_workaround = PETSC_FALSE;
1330: solver_S = PETSC_FALSE;
1331: } else { /* we can't use MatFactor when size_schur == size_of_the_problem */
1332: PetscCall(MatConvert(A, MATSEQDENSE, MAT_INITIAL_MATRIX, &S_all));
1333: PetscCall(MatGetType(S_all, &Stype));
1334: reuse_solvers = PETSC_FALSE; /* TODO: why we can't reuse the solvers here? */
1335: factor_workaround = PETSC_FALSE;
1336: solver_S = PETSC_FALSE;
1337: }
1339: if (reuse_solvers) {
1340: Mat A_II, pA_II, Afake;
1341: Vec vec1_B;
1342: PCBDDCReuseSolvers msolv_ctx;
1343: PetscInt n_R;
1345: if (sub_schurs->reuse_solver) {
1346: PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver));
1347: } else {
1348: PetscCall(PetscNew(&sub_schurs->reuse_solver));
1349: }
1350: msolv_ctx = sub_schurs->reuse_solver;
1351: PetscCall(MatSchurComplementGetSubMatrices(sub_schurs->S, &A_II, &pA_II, NULL, NULL, NULL));
1352: PetscCall(PetscObjectReference((PetscObject)F));
1353: msolv_ctx->F = F;
1354: PetscCall(MatCreateVecs(F, &msolv_ctx->sol, NULL));
1355: /* currently PETSc has no support for MatSolve(F,x,x), so cheat and let rhs and sol share the same memory */
1356: {
1357: PetscScalar *array;
1358: PetscInt n;
1360: PetscCall(VecGetLocalSize(msolv_ctx->sol, &n));
1361: PetscCall(VecGetArray(msolv_ctx->sol, &array));
1362: PetscCall(VecCreateSeqWithArray(PetscObjectComm((PetscObject)msolv_ctx->sol), 1, n, array, &msolv_ctx->rhs));
1363: PetscCall(VecRestoreArray(msolv_ctx->sol, &array));
1364: }
1365: msolv_ctx->has_vertices = schur_has_vertices;
1367: /* interior solver */
1368: PetscCall(PCCreate(PetscObjectComm((PetscObject)A_II), &msolv_ctx->interior_solver));
1369: PetscCall(PCSetOperators(msolv_ctx->interior_solver, A_II, pA_II));
1370: PetscCall(PCSetType(msolv_ctx->interior_solver, PCSHELL));
1371: PetscCall(PCShellSetName(msolv_ctx->interior_solver, "Interior solver (w/o Schur factorization)"));
1372: PetscCall(PCShellSetContext(msolv_ctx->interior_solver, msolv_ctx));
1373: PetscCall(PCShellSetView(msolv_ctx->interior_solver, PCBDDCReuseSolvers_View));
1374: PetscCall(PCShellSetApply(msolv_ctx->interior_solver, PCBDDCReuseSolvers_Interior));
1375: PetscCall(PCShellSetApplyTranspose(msolv_ctx->interior_solver, PCBDDCReuseSolvers_InteriorTranspose));
1376: if (sub_schurs->gdsw) PetscCall(PCShellSetDestroy(msolv_ctx->interior_solver, PCBDDCReuseSolvers_Destroy));
1378: /* correction solver */
1379: if (!sub_schurs->gdsw) {
1380: PetscCall(PCCreate(PetscObjectComm((PetscObject)A_II), &msolv_ctx->correction_solver));
1381: PetscCall(PCSetType(msolv_ctx->correction_solver, PCSHELL));
1382: PetscCall(PCShellSetName(msolv_ctx->correction_solver, "Correction solver (with Schur factorization)"));
1383: PetscCall(PCShellSetContext(msolv_ctx->correction_solver, msolv_ctx));
1384: PetscCall(PCShellSetView(msolv_ctx->interior_solver, PCBDDCReuseSolvers_View));
1385: PetscCall(PCShellSetApply(msolv_ctx->correction_solver, PCBDDCReuseSolvers_Correction));
1386: PetscCall(PCShellSetApplyTranspose(msolv_ctx->correction_solver, PCBDDCReuseSolvers_CorrectionTranspose));
1388: /* scatter and vecs for Schur complement solver */
1389: PetscCall(MatCreateVecs(S_all, &msolv_ctx->sol_B, &msolv_ctx->rhs_B));
1390: PetscCall(MatCreateVecs(sub_schurs->S, &vec1_B, NULL));
1391: if (!schur_has_vertices) {
1392: PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, is_A_all, &msolv_ctx->is_B));
1393: PetscCall(VecScatterCreate(vec1_B, msolv_ctx->is_B, msolv_ctx->sol_B, NULL, &msolv_ctx->correction_scatter_B));
1394: PetscCall(PetscObjectReference((PetscObject)is_A_all));
1395: msolv_ctx->is_R = is_A_all;
1396: } else {
1397: IS is_B_all;
1398: const PetscInt *idxs;
1399: PetscInt dual, n_v, n;
1401: PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_v));
1402: dual = size_schur - n_v;
1403: PetscCall(ISGetLocalSize(is_A_all, &n));
1404: PetscCall(ISGetIndices(is_A_all, &idxs));
1405: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all), dual, idxs + n_I, PETSC_COPY_VALUES, &is_B_all));
1406: PetscCall(ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap, IS_GTOLM_DROP, is_B_all, &msolv_ctx->is_B));
1407: PetscCall(ISDestroy(&is_B_all));
1408: PetscCall(ISCreateStride(PetscObjectComm((PetscObject)is_A_all), dual, 0, 1, &is_B_all));
1409: PetscCall(VecScatterCreate(vec1_B, msolv_ctx->is_B, msolv_ctx->sol_B, is_B_all, &msolv_ctx->correction_scatter_B));
1410: PetscCall(ISDestroy(&is_B_all));
1411: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all), n - n_v, idxs, PETSC_COPY_VALUES, &msolv_ctx->is_R));
1412: PetscCall(ISRestoreIndices(is_A_all, &idxs));
1413: }
1414: PetscCall(ISGetLocalSize(msolv_ctx->is_R, &n_R));
1415: PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, n_R, n_R, 0, NULL, &Afake));
1416: PetscCall(MatAssemblyBegin(Afake, MAT_FINAL_ASSEMBLY));
1417: PetscCall(MatAssemblyEnd(Afake, MAT_FINAL_ASSEMBLY));
1418: PetscCall(PCSetOperators(msolv_ctx->correction_solver, Afake, Afake));
1419: PetscCall(MatDestroy(&Afake));
1420: PetscCall(VecDestroy(&vec1_B));
1421: }
1422: /* communicate benign info to solver context */
1423: if (benign_n) {
1424: PetscScalar *array;
1426: msolv_ctx->benign_n = benign_n;
1427: msolv_ctx->benign_zerodiag_subs = is_p_r;
1428: PetscCall(PetscMalloc1(benign_n, &msolv_ctx->benign_save_vals));
1429: msolv_ctx->benign_csAIB = cs_AIB_mat;
1430: PetscCall(MatCreateVecs(cs_AIB_mat, &msolv_ctx->benign_corr_work, NULL));
1431: PetscCall(VecGetArray(msolv_ctx->benign_corr_work, &array));
1432: PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, size_schur, array, &msolv_ctx->benign_dummy_schur_vec));
1433: PetscCall(VecRestoreArray(msolv_ctx->benign_corr_work, &array));
1434: msolv_ctx->benign_AIIm1ones = benign_AIIm1_ones_mat;
1435: }
1436: } else {
1437: if (sub_schurs->reuse_solver) PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver));
1438: PetscCall(PetscFree(sub_schurs->reuse_solver));
1439: }
1440: PetscCall(MatDestroy(&A));
1441: PetscCall(ISDestroy(&is_A_all));
1443: /* Work arrays */
1444: PetscCall(PetscMalloc1(max_subset_size * max_subset_size, &work));
1446: /* S_Ej_all */
1447: PetscInt *idx_work = NULL;
1448: cum = cum2 = 0;
1449: if (!multi_element) PetscCall(MatDenseGetArrayRead(S_all, &rS_data));
1450: else PetscCall(PetscMalloc1(max_subset_size, &idx_work));
1451: PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all, &SEj_arr));
1452: if (sub_schurs->sum_S_Ej_inv_all) PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &SEjinv_arr));
1453: if (sub_schurs->gdsw) PetscCall(MatCreateSubMatrices(sub_schurs->A, sub_schurs->n_subs, sub_schurs->is_subs, sub_schurs->is_subs, MAT_INITIAL_MATRIX, &gdswA));
1454: for (i = 0; i < sub_schurs->n_subs; i++) {
1455: /* get S_E (or K^i_EE for GDSW) */
1456: PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
1457: if (sub_schurs->gdsw) {
1458: Mat T;
1460: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &T));
1461: PetscCall(MatConvert(gdswA[i], MATDENSE, MAT_REUSE_MATRIX, &T));
1462: PetscCall(MatDestroy(&T));
1463: } else {
1464: if (multi_element) { /* transpose copy to workspace */
1465: // XXX CSR directly?
1466: for (PetscInt j = 0; j < subset_size; j++) idx_work[j] = cum + j;
1467: PetscCall(MatGetValues(S_all, subset_size, idx_work, subset_size, idx_work, work));
1468: if (!sub_schurs->is_hermitian) {
1469: for (PetscInt k = 0; k < subset_size; k++) {
1470: for (PetscInt j = k; j < subset_size; j++) {
1471: PetscScalar t = work[k * subset_size + j];
1472: work[k * subset_size + j] = work[j * subset_size + k];
1473: work[j * subset_size + k] = t;
1474: }
1475: }
1476: }
1477: } else if (S_lower_triangular) { /* I need to expand the upper triangular data (column-oriented) */
1478: for (PetscInt k = 0; k < subset_size; k++) {
1479: for (PetscInt j = k; j < subset_size; j++) {
1480: work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j];
1481: work[j * subset_size + k] = PetscConj(rS_data[cum2 + k * size_schur + j]);
1482: }
1483: }
1484: } else { /* just copy to workspace */
1485: for (PetscInt k = 0; k < subset_size; k++) {
1486: for (PetscInt j = 0; j < subset_size; j++) work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j];
1487: }
1488: }
1489: }
1490: /* insert S_E values */
1491: if (sub_schurs->change) {
1492: Mat change_sub, SEj, T;
1494: /* change basis */
1495: PetscCall(KSPGetOperators(sub_schurs->change[i], &change_sub, NULL));
1496: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &SEj));
1497: if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */
1498: Mat T2;
1499: PetscCall(MatTransposeMatMult(change_sub, SEj, MAT_INITIAL_MATRIX, 1.0, &T2));
1500: PetscCall(MatMatMult(T2, change_sub, MAT_INITIAL_MATRIX, 1.0, &T));
1501: PetscCall(MatConvert(T, MATSEQDENSE, MAT_INPLACE_MATRIX, &T));
1502: PetscCall(MatDestroy(&T2));
1503: } else {
1504: PetscCall(MatPtAP(SEj, change_sub, MAT_INITIAL_MATRIX, 1.0, &T));
1505: }
1506: PetscCall(MatCopy(T, SEj, SAME_NONZERO_PATTERN));
1507: PetscCall(MatDestroy(&T));
1508: PetscCall(MatZeroRowsColumnsIS(SEj, sub_schurs->change_primal_sub[i], 1.0, NULL, NULL));
1509: PetscCall(MatDestroy(&SEj));
1510: }
1511: PetscCall(PetscArraycpy(SEj_arr, work, subset_size * subset_size));
1512: if (compute_Stilda) {
1513: if (deluxe) { /* if adaptivity is requested, invert S_E blocks */
1514: Mat M;
1515: const PetscScalar *vals;
1516: PetscBool isdense, isdensecuda;
1518: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &M));
1519: PetscCall(MatSetOption(M, MAT_SPD, sub_schurs->is_posdef));
1520: PetscCall(MatSetOption(M, MAT_HERMITIAN, sub_schurs->is_hermitian));
1521: if (!PetscBTLookup(sub_schurs->is_edge, i)) PetscCall(MatSetType(M, Stype));
1522: PetscCall(PetscObjectTypeCompare((PetscObject)M, MATSEQDENSE, &isdense));
1523: PetscCall(PetscObjectTypeCompare((PetscObject)M, MATSEQDENSECUDA, &isdensecuda));
1524: switch (sub_schurs->mat_factor_type) {
1525: case MAT_FACTOR_CHOLESKY:
1526: PetscCall(MatCholeskyFactor(M, NULL, NULL));
1527: break;
1528: case MAT_FACTOR_LU:
1529: PetscCall(MatLUFactor(M, NULL, NULL, NULL));
1530: break;
1531: default:
1532: SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unsupported factor type %s", MatFactorTypes[sub_schurs->mat_factor_type]);
1533: }
1534: if (isdense) {
1535: PetscCall(MatSeqDenseInvertFactors_Private(M));
1536: #if defined(PETSC_HAVE_CUDA)
1537: } else if (isdensecuda) {
1538: PetscCall(MatSeqDenseCUDAInvertFactors_Internal(M));
1539: #endif
1540: } else SETERRQ(PetscObjectComm((PetscObject)M), PETSC_ERR_SUP, "Not implemented for type %s", Stype);
1541: PetscCall(MatDenseGetArrayRead(M, &vals));
1542: PetscCall(PetscArraycpy(SEjinv_arr, vals, subset_size * subset_size));
1543: PetscCall(MatDenseRestoreArrayRead(M, &vals));
1544: PetscCall(MatDestroy(&M));
1545: } else if (scaling) { /* not using deluxe */
1546: Mat SEj;
1547: Vec D;
1548: PetscScalar *array;
1550: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, work, &SEj));
1551: PetscCall(VecGetArray(Dall, &array));
1552: PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, subset_size, array + cum, &D));
1553: PetscCall(VecRestoreArray(Dall, &array));
1554: PetscCall(VecShift(D, -1.));
1555: PetscCall(MatDiagonalScale(SEj, D, D));
1556: PetscCall(MatDestroy(&SEj));
1557: PetscCall(VecDestroy(&D));
1558: PetscCall(PetscArraycpy(SEj_arr, work, subset_size * subset_size));
1559: }
1560: }
1561: cum += subset_size;
1562: cum2 += subset_size * (size_schur + 1);
1563: SEj_arr += subset_size * subset_size;
1564: if (SEjinv_arr) SEjinv_arr += subset_size * subset_size;
1565: }
1566: if (sub_schurs->gdsw) PetscCall(MatDestroySubMatrices(sub_schurs->n_subs, &gdswA));
1567: if (!multi_element) PetscCall(MatDenseRestoreArrayRead(S_all, &rS_data));
1568: PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all, &SEj_arr));
1569: if (sub_schurs->sum_S_Ej_inv_all) PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &SEjinv_arr));
1570: if (solver_S) PetscCall(MatFactorRestoreSchurComplement(F, &S_all, MAT_FACTOR_SCHUR_UNFACTORED));
1572: /* may prevent from unneeded copies, since MUMPS or MKL_Pardiso always use CPU memory
1573: however, preliminary tests indicate using GPUs is still faster in the solve phase */
1574: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
1575: if (reuse_solvers) {
1576: Mat St;
1577: MatFactorSchurStatus st;
1579: flg = PETSC_FALSE;
1580: PetscCall(PetscOptionsGetBool(NULL, sub_schurs->prefix, "-sub_schurs_schur_pin_to_cpu", &flg, NULL));
1581: PetscCall(MatFactorGetSchurComplement(F, &St, &st));
1582: PetscCall(MatBindToCPU(St, flg));
1583: PetscCall(MatFactorRestoreSchurComplement(F, &St, st));
1584: }
1585: #endif
1587: schur_factor = NULL;
1588: if (compute_Stilda && size_active_schur) {
1589: if (sub_schurs->n_subs == 1 && size_schur == size_active_schur && deluxe) { /* we already computed the inverse */
1590: PetscCall(MatSeqAIJGetArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
1591: PetscCall(PetscArraycpy(SEjinv_arr, work, size_schur * size_schur));
1592: PetscCall(MatSeqAIJRestoreArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
1593: } else {
1594: Mat S_all_inv = NULL;
1596: if (solver_S && !sub_schurs->gdsw) {
1597: /* for adaptive selection we need S^-1; for solver reusage we need S_\Delta\Delta^-1.
1598: The latter is not the principal subminor for S^-1. However, the factors can be reused since S_\Delta\Delta is the leading principal submatrix of S */
1599: if (factor_workaround) { /* invert without calling MatFactorInvertSchurComplement, since we are hacking */
1600: PetscScalar *data;
1601: PetscInt nd = 0;
1603: PetscCheck(use_potr, PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor update not yet implemented for non SPD matrices");
1604: PetscCall(MatFactorGetSchurComplement(F, &S_all_inv, NULL));
1605: PetscCall(MatDenseGetArray(S_all_inv, &data));
1606: if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */
1607: PetscCall(ISGetLocalSize(sub_schurs->is_dir, &nd));
1608: }
1610: /* factor and invert activedofs and vertices (dirichlet dofs does not contribute) */
1611: if (schur_has_vertices) {
1612: Mat M;
1613: PetscScalar *tdata;
1614: PetscInt nv = 0, news;
1616: PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &nv));
1617: news = size_active_schur + nv;
1618: PetscCall(PetscCalloc1(news * news, &tdata));
1619: for (i = 0; i < size_active_schur; i++) {
1620: PetscCall(PetscArraycpy(tdata + i * (news + 1), data + i * (size_schur + 1), size_active_schur - i));
1621: PetscCall(PetscArraycpy(tdata + i * (news + 1) + size_active_schur - i, data + i * size_schur + size_active_schur + nd, nv));
1622: }
1623: for (i = 0; i < nv; i++) {
1624: PetscInt k = i + size_active_schur;
1625: PetscCall(PetscArraycpy(tdata + k * (news + 1), data + (k + nd) * (size_schur + 1), nv - i));
1626: }
1628: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, news, news, tdata, &M));
1629: PetscCall(MatSetOption(M, MAT_SPD, PETSC_TRUE));
1630: PetscCall(MatCholeskyFactor(M, NULL, NULL));
1631: /* save the factors */
1632: cum = 0;
1633: PetscCall(PetscMalloc1((size_active_schur * (size_active_schur + 1)) / 2 + nd, &schur_factor));
1634: for (i = 0; i < size_active_schur; i++) {
1635: PetscCall(PetscArraycpy(schur_factor + cum, tdata + i * (news + 1), size_active_schur - i));
1636: cum += size_active_schur - i;
1637: }
1638: for (i = 0; i < nd; i++) schur_factor[cum + i] = PetscSqrtReal(PetscRealPart(data[(i + size_active_schur) * (size_schur + 1)]));
1639: S_all_inv->ops->solve = M->ops->solve;
1640: S_all_inv->ops->matsolve = M->ops->matsolve;
1641: S_all_inv->ops->solvetranspose = M->ops->solvetranspose;
1642: S_all_inv->ops->matsolvetranspose = M->ops->matsolvetranspose;
1643: S_all_inv->factortype = MAT_FACTOR_CHOLESKY;
1644: PetscCall(MatSeqDenseInvertFactors_Private(M));
1645: /* move back just the active dofs to the Schur complement */
1646: for (i = 0; i < size_active_schur; i++) PetscCall(PetscArraycpy(data + i * size_schur, tdata + i * news, size_active_schur));
1647: PetscCall(PetscFree(tdata));
1648: PetscCall(MatDestroy(&M));
1649: } else { /* we can factorize and invert just the activedofs */
1650: Mat M;
1651: PetscScalar *aux;
1653: PetscCall(PetscMalloc1(nd, &aux));
1654: for (i = 0; i < nd; i++) aux[i] = 1.0 / data[(i + size_active_schur) * (size_schur + 1)];
1655: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size_active_schur, size_active_schur, data, &M));
1656: PetscCall(MatDenseSetLDA(M, size_schur));
1657: PetscCall(MatSetOption(M, MAT_SPD, PETSC_TRUE));
1658: PetscCall(MatCholeskyFactor(M, NULL, NULL));
1659: PetscCall(MatSeqDenseInvertFactors_Private(M));
1660: PetscCall(MatDestroy(&M));
1661: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, size_schur, nd, data + size_active_schur * size_schur, &M));
1662: PetscCall(MatZeroEntries(M));
1663: PetscCall(MatDestroy(&M));
1664: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nd, size_schur, data + size_active_schur, &M));
1665: PetscCall(MatDenseSetLDA(M, size_schur));
1666: PetscCall(MatZeroEntries(M));
1667: PetscCall(MatDestroy(&M));
1668: for (i = 0; i < nd; i++) data[(i + size_active_schur) * (size_schur + 1)] = aux[i];
1669: PetscCall(PetscFree(aux));
1670: }
1671: PetscCall(MatDenseRestoreArray(S_all_inv, &data));
1672: } else { /* use MatFactor calls to invert S */
1673: PetscCall(MatFactorInvertSchurComplement(F));
1674: PetscCall(MatFactorGetSchurComplement(F, &S_all_inv, NULL));
1675: }
1676: } else if (!sub_schurs->gdsw) { /* we need to invert explicitly since we are not using MatFactor for S */
1677: if (multi_element) {
1678: PetscCall(MatDuplicate(S_all, MAT_DO_NOT_COPY_VALUES, &S_all_inv));
1679: PetscCall(MatSetOption(S_all_inv, MAT_ROW_ORIENTED, sub_schurs->is_hermitian));
1680: for (PetscInt sub = 0; sub < n_local_subs; sub++) {
1681: const PetscScalar *vals;
1682: const PetscInt *idxs;
1683: PetscInt size_schur_sub;
1684: Mat M;
1686: PetscCall(MatCreateSubMatrix(S_all, is_sub_schur[sub], is_sub_schur[sub], MAT_INITIAL_MATRIX, &M));
1687: PetscCall(MatConvert(M, MATDENSE, MAT_INPLACE_MATRIX, &M));
1688: PetscCall(MatSetOption(M, MAT_SPD, sub_schurs->is_posdef));
1689: PetscCall(MatSetOption(M, MAT_HERMITIAN, sub_schurs->is_hermitian));
1690: switch (sub_schurs->mat_factor_type) {
1691: case MAT_FACTOR_CHOLESKY:
1692: PetscCall(MatCholeskyFactor(M, NULL, NULL));
1693: break;
1694: case MAT_FACTOR_LU:
1695: PetscCall(MatLUFactor(M, NULL, NULL, NULL));
1696: break;
1697: default:
1698: SETERRQ(PetscObjectComm((PetscObject)F), PETSC_ERR_SUP, "Unsupported factor type %s", MatFactorTypes[sub_schurs->mat_factor_type]);
1699: }
1700: PetscCall(MatSeqDenseInvertFactors_Private(M));
1701: PetscCall(MatDenseGetArrayRead(M, &vals));
1702: PetscCall(ISGetLocalSize(is_sub_schur[sub], &size_schur_sub));
1703: PetscCall(ISGetIndices(is_sub_schur[sub], &idxs));
1704: PetscCall(MatSetValues(S_all_inv, size_schur_sub, idxs, size_schur_sub, idxs, vals, INSERT_VALUES));
1705: PetscCall(ISRestoreIndices(is_sub_schur[sub], &idxs));
1706: PetscCall(MatDenseRestoreArrayRead(M, &vals));
1707: PetscCall(MatDestroy(&M));
1708: }
1709: PetscCall(MatAssemblyBegin(S_all_inv, MAT_FINAL_ASSEMBLY));
1710: PetscCall(MatAssemblyEnd(S_all_inv, MAT_FINAL_ASSEMBLY));
1711: } else {
1712: PetscCall(PetscObjectReference((PetscObject)S_all));
1713: S_all_inv = S_all;
1714: PetscCall(MatDenseGetArray(S_all_inv, &S_data));
1715: PetscCall(PetscBLASIntCast(size_schur, &B_N));
1716: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
1717: if (use_potr) {
1718: PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &B_N, S_data, &B_N, &B_ierr));
1719: PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRF Lapack routine %" PetscBLASInt_FMT, B_ierr);
1720: PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &B_N, S_data, &B_N, &B_ierr));
1721: PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRI Lapack routine %" PetscBLASInt_FMT, B_ierr);
1722: } else if (use_sytr) {
1723: PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &B_N, S_data, &B_N, pivots, Bwork, &B_lwork, &B_ierr));
1724: PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRF Lapack routine %" PetscBLASInt_FMT, B_ierr);
1725: PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &B_N, S_data, &B_N, pivots, Bwork, &B_ierr));
1726: PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRI Lapack routine %" PetscBLASInt_FMT, B_ierr);
1727: } else {
1728: PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&B_N, &B_N, S_data, &B_N, pivots, &B_ierr));
1729: PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRF Lapack routine %" PetscBLASInt_FMT, B_ierr);
1730: PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&B_N, S_data, &B_N, pivots, Bwork, &B_lwork, &B_ierr));
1731: PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRI Lapack routine %" PetscBLASInt_FMT, B_ierr);
1732: }
1733: PetscCall(PetscLogFlops(1.0 * size_schur * size_schur * size_schur));
1734: PetscCall(PetscFPTrapPop());
1735: PetscCall(MatDenseRestoreArray(S_all_inv, &S_data));
1736: }
1737: } else if (sub_schurs->gdsw) {
1738: Mat tS, tX, SEj, S_II, S_IE, S_EE;
1739: KSP pS_II;
1740: PC pS_II_pc;
1741: IS EE, II;
1742: PetscInt nS;
1744: PetscCall(MatFactorCreateSchurComplement(F, &tS, NULL));
1745: PetscCall(MatGetSize(tS, &nS, NULL));
1746: PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
1747: for (i = 0, cum = 0; i < sub_schurs->n_subs; i++) { /* naive implementation */
1748: PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
1749: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, SEjinv_arr, &SEj));
1751: PetscCall(ISCreateStride(PETSC_COMM_SELF, subset_size, cum, 1, &EE));
1752: PetscCall(ISComplement(EE, 0, nS, &II));
1753: PetscCall(MatCreateSubMatrix(tS, II, II, MAT_INITIAL_MATRIX, &S_II));
1754: PetscCall(MatCreateSubMatrix(tS, II, EE, MAT_INITIAL_MATRIX, &S_IE));
1755: PetscCall(MatCreateSubMatrix(tS, EE, EE, MAT_INITIAL_MATRIX, &S_EE));
1756: PetscCall(ISDestroy(&II));
1757: PetscCall(ISDestroy(&EE));
1759: PetscCall(KSPCreate(PETSC_COMM_SELF, &pS_II));
1760: PetscCall(KSPSetNestLevel(pS_II, 1)); /* do not have direct access to a PC to provide the level of nesting of the KSP */
1761: PetscCall(KSPSetType(pS_II, KSPPREONLY));
1762: PetscCall(KSPGetPC(pS_II, &pS_II_pc));
1763: PetscCall(PCSetType(pS_II_pc, PCSVD));
1764: PetscCall(KSPSetOptionsPrefix(pS_II, sub_schurs->prefix));
1765: PetscCall(KSPAppendOptionsPrefix(pS_II, "pseudo_"));
1766: PetscCall(KSPSetOperators(pS_II, S_II, S_II));
1767: PetscCall(MatDestroy(&S_II));
1768: PetscCall(KSPSetFromOptions(pS_II));
1769: PetscCall(KSPSetUp(pS_II));
1770: PetscCall(MatDuplicate(S_IE, MAT_DO_NOT_COPY_VALUES, &tX));
1771: PetscCall(KSPMatSolve(pS_II, S_IE, tX));
1772: PetscCall(KSPDestroy(&pS_II));
1774: PetscCall(MatTransposeMatMult(S_IE, tX, MAT_REUSE_MATRIX, PETSC_DETERMINE, &SEj));
1775: PetscCall(MatDestroy(&S_IE));
1776: PetscCall(MatDestroy(&tX));
1777: PetscCall(MatAYPX(SEj, -1, S_EE, SAME_NONZERO_PATTERN));
1778: PetscCall(MatDestroy(&S_EE));
1780: PetscCall(MatDestroy(&SEj));
1781: cum += subset_size;
1782: SEjinv_arr += subset_size * subset_size;
1783: }
1784: PetscCall(MatDestroy(&tS));
1785: PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
1786: }
1787: /* S_Ej_tilda_all */
1788: cum = cum2 = 0;
1789: rS_data = NULL;
1790: if (S_all_inv && !multi_element) PetscCall(MatDenseGetArrayRead(S_all_inv, &rS_data));
1791: PetscCall(MatSeqAIJGetArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
1792: for (i = 0; i < sub_schurs->n_subs; i++) {
1793: PetscInt j;
1795: PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
1796: /* get (St^-1)_E */
1797: /* Unless we are changing the variables, I don't need to expand to upper triangular since St^-1
1798: will be properly accessed later during adaptive selection */
1799: if (multi_element) { /* transpose copy to workspace */
1800: // XXX CSR directly?
1801: for (PetscInt j = 0; j < subset_size; j++) idx_work[j] = cum + j;
1802: PetscCall(MatGetValues(S_all_inv, subset_size, idx_work, subset_size, idx_work, work));
1803: if (!sub_schurs->is_hermitian) {
1804: for (PetscInt k = 0; k < subset_size; k++) {
1805: for (PetscInt j = k; j < subset_size; j++) {
1806: PetscScalar t = work[k * subset_size + j];
1807: work[k * subset_size + j] = work[j * subset_size + k];
1808: work[j * subset_size + k] = t;
1809: }
1810: }
1811: }
1812: } else if (rS_data) {
1813: if (S_lower_triangular) {
1814: PetscInt k;
1815: if (sub_schurs->change) {
1816: for (k = 0; k < subset_size; k++) {
1817: for (j = k; j < subset_size; j++) {
1818: work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j];
1819: work[j * subset_size + k] = work[k * subset_size + j];
1820: }
1821: }
1822: } else {
1823: for (k = 0; k < subset_size; k++) {
1824: for (j = k; j < subset_size; j++) work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j];
1825: }
1826: }
1827: } else {
1828: PetscInt k;
1829: for (k = 0; k < subset_size; k++) {
1830: for (j = 0; j < subset_size; j++) work[k * subset_size + j] = rS_data[cum2 + k * size_schur + j];
1831: }
1832: }
1833: }
1834: if (sub_schurs->change) {
1835: Mat change_sub, SEj, T;
1836: PetscScalar val = sub_schurs->gdsw ? PETSC_SMALL : 1. / PETSC_SMALL;
1838: /* change basis */
1839: PetscCall(KSPGetOperators(sub_schurs->change[i], &change_sub, NULL));
1840: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, (rS_data || multi_element) ? work : SEjinv_arr, &SEj));
1841: if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */
1842: Mat T2;
1843: PetscCall(MatTransposeMatMult(change_sub, SEj, MAT_INITIAL_MATRIX, 1.0, &T2));
1844: PetscCall(MatMatMult(T2, change_sub, MAT_INITIAL_MATRIX, 1.0, &T));
1845: PetscCall(MatDestroy(&T2));
1846: PetscCall(MatConvert(T, MATSEQDENSE, MAT_INPLACE_MATRIX, &T));
1847: } else {
1848: PetscCall(MatPtAP(SEj, change_sub, MAT_INITIAL_MATRIX, 1.0, &T));
1849: }
1850: PetscCall(MatCopy(T, SEj, SAME_NONZERO_PATTERN));
1851: PetscCall(MatDestroy(&T));
1852: PetscCall(MatZeroRowsColumnsIS(SEj, sub_schurs->change_primal_sub[i], val, NULL, NULL));
1853: PetscCall(MatDestroy(&SEj));
1854: }
1855: if (rS_data || multi_element) PetscCall(PetscArraycpy(SEjinv_arr, work, subset_size * subset_size));
1856: cum += subset_size;
1857: cum2 += subset_size * (size_schur + 1);
1858: SEjinv_arr += subset_size * subset_size;
1859: }
1860: PetscCall(MatSeqAIJRestoreArrayWrite(sub_schurs->sum_S_Ej_tilda_all, &SEjinv_arr));
1861: if (S_all_inv) {
1862: if (!multi_element) PetscCall(MatDenseRestoreArrayRead(S_all_inv, &rS_data));
1863: if (solver_S) {
1864: if (schur_has_vertices) {
1865: PetscCall(MatFactorRestoreSchurComplement(F, &S_all_inv, MAT_FACTOR_SCHUR_FACTORED));
1866: } else {
1867: PetscCall(MatFactorRestoreSchurComplement(F, &S_all_inv, MAT_FACTOR_SCHUR_INVERTED));
1868: }
1869: }
1870: }
1871: PetscCall(MatDestroy(&S_all_inv));
1872: }
1874: /* move back factors if needed */
1875: if (schur_has_vertices && factor_workaround && !sub_schurs->gdsw) {
1876: Mat S_tmp;
1877: PetscInt nd = 0;
1878: PetscScalar *data;
1880: PetscCheck(use_potr, PETSC_COMM_SELF, PETSC_ERR_SUP, "Factor update not yet implemented for non SPD matrices");
1881: PetscCheck(solver_S, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
1882: PetscCall(MatFactorGetSchurComplement(F, &S_tmp, NULL));
1883: PetscCall(MatDenseGetArray(S_tmp, &data));
1884: PetscCall(PetscArrayzero(data, size_schur * size_schur));
1886: if (S_lower_triangular) {
1887: cum = 0;
1888: for (i = 0; i < size_active_schur; i++) {
1889: PetscCall(PetscArraycpy(data + i * (size_schur + 1), schur_factor + cum, size_active_schur - i));
1890: cum += size_active_schur - i;
1891: }
1892: } else {
1893: PetscCall(PetscArraycpy(data, schur_factor, size_schur * size_schur));
1894: }
1895: if (sub_schurs->is_dir) {
1896: PetscCall(ISGetLocalSize(sub_schurs->is_dir, &nd));
1897: for (i = 0; i < nd; i++) data[(i + size_active_schur) * (size_schur + 1)] = schur_factor[cum + i];
1898: }
1899: /* workaround: since I cannot modify the matrices used inside the solvers for the forward and backward substitutions,
1900: set the diagonal entry of the Schur factor to a very large value */
1901: for (i = size_active_schur + nd; i < size_schur; i++) data[i * (size_schur + 1)] = infty;
1902: PetscCall(MatDenseRestoreArray(S_tmp, &data));
1903: PetscCall(MatFactorRestoreSchurComplement(F, &S_tmp, MAT_FACTOR_SCHUR_FACTORED));
1904: }
1905: } else if (factor_workaround && !sub_schurs->gdsw) { /* we need to eliminate any unneeded coupling */
1906: PetscScalar *data;
1907: PetscInt nd = 0;
1909: if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */
1910: PetscCall(ISGetLocalSize(sub_schurs->is_dir, &nd));
1911: }
1912: PetscCall(MatFactorGetSchurComplement(F, &S_all, NULL));
1913: PetscCall(MatDenseGetArray(S_all, &data));
1914: for (i = 0; i < size_active_schur; i++) PetscCall(PetscArrayzero(data + i * size_schur + size_active_schur, size_schur - size_active_schur));
1915: for (i = size_active_schur + nd; i < size_schur; i++) {
1916: PetscCall(PetscArrayzero(data + i * size_schur + size_active_schur, size_schur - size_active_schur));
1917: data[i * (size_schur + 1)] = infty;
1918: }
1919: PetscCall(MatDenseRestoreArray(S_all, &data));
1920: PetscCall(MatFactorRestoreSchurComplement(F, &S_all, MAT_FACTOR_SCHUR_UNFACTORED));
1921: }
1922: PetscCall(PetscFree(idx_work));
1923: PetscCall(PetscFree(work));
1924: PetscCall(PetscFree(schur_factor));
1925: PetscCall(VecDestroy(&Dall));
1926: PetscCall(ISDestroy(&is_schur));
1927: if (multi_element) {
1928: for (PetscInt sub = 0; sub < n_local_subs; sub++) {
1929: PetscCall(ISDestroy(&is_sub_all[sub]));
1930: PetscCall(ISDestroy(&is_sub_schur_all[sub]));
1931: PetscCall(ISDestroy(&is_sub_schur[sub]));
1932: }
1933: PetscCall(PetscFree3(is_sub_all, is_sub_schur_all, is_sub_schur));
1934: }
1935: }
1936: PetscCall(ISDestroy(&is_I_layer));
1937: PetscCall(MatDestroy(&S_all));
1938: PetscCall(MatDestroy(&A_BB));
1939: PetscCall(MatDestroy(&A_IB));
1940: PetscCall(MatDestroy(&A_BI));
1941: PetscCall(MatDestroy(&F));
1943: PetscCall(MatAssemblyBegin(sub_schurs->S_Ej_all, MAT_FINAL_ASSEMBLY));
1944: PetscCall(MatAssemblyEnd(sub_schurs->S_Ej_all, MAT_FINAL_ASSEMBLY));
1945: if (compute_Stilda) {
1946: PetscCall(MatAssemblyBegin(sub_schurs->sum_S_Ej_tilda_all, MAT_FINAL_ASSEMBLY));
1947: PetscCall(MatAssemblyEnd(sub_schurs->sum_S_Ej_tilda_all, MAT_FINAL_ASSEMBLY));
1948: if (deluxe) {
1949: PetscCall(MatAssemblyBegin(sub_schurs->sum_S_Ej_inv_all, MAT_FINAL_ASSEMBLY));
1950: PetscCall(MatAssemblyEnd(sub_schurs->sum_S_Ej_inv_all, MAT_FINAL_ASSEMBLY));
1951: }
1952: }
1954: /* Get local part of (\sum_j S_Ej) */
1955: if (!sub_schurs->sum_S_Ej_all) PetscCall(MatDuplicate(sub_schurs->S_Ej_all, MAT_DO_NOT_COPY_VALUES, &sub_schurs->sum_S_Ej_all));
1956: PetscCall(VecSet(gstash, 0.0));
1957: PetscCall(MatSeqAIJGetArray(sub_schurs->S_Ej_all, &stasharray));
1958: PetscCall(VecPlaceArray(lstash, stasharray));
1959: PetscCall(VecScatterBegin(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
1960: PetscCall(VecScatterEnd(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
1961: PetscCall(MatSeqAIJRestoreArray(sub_schurs->S_Ej_all, &stasharray));
1962: PetscCall(VecResetArray(lstash));
1963: PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &stasharray));
1964: PetscCall(VecPlaceArray(lstash, stasharray));
1965: PetscCall(VecScatterBegin(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
1966: PetscCall(VecScatterEnd(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
1967: PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all, &stasharray));
1968: PetscCall(VecResetArray(lstash));
1970: /* Get local part of (\sum_j S^-1_Ej) (\sum_j St^-1_Ej) */
1971: if (compute_Stilda) {
1972: PetscCall(VecSet(gstash, 0.0));
1973: PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &stasharray));
1974: PetscCall(VecPlaceArray(lstash, stasharray));
1975: PetscCall(VecScatterBegin(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
1976: PetscCall(VecScatterEnd(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
1977: PetscCall(VecScatterBegin(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
1978: PetscCall(VecScatterEnd(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
1979: PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &stasharray));
1980: PetscCall(VecResetArray(lstash));
1981: if (deluxe) {
1982: PetscCall(VecSet(gstash, 0.0));
1983: PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &stasharray));
1984: PetscCall(VecPlaceArray(lstash, stasharray));
1985: PetscCall(VecScatterBegin(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
1986: PetscCall(VecScatterEnd(sstash, lstash, gstash, ADD_VALUES, SCATTER_FORWARD));
1987: PetscCall(VecScatterBegin(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
1988: PetscCall(VecScatterEnd(sstash, gstash, lstash, INSERT_VALUES, SCATTER_REVERSE));
1989: PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &stasharray));
1990: PetscCall(VecResetArray(lstash));
1991: } else if (!sub_schurs->gdsw) {
1992: PetscScalar *array;
1993: PetscInt cum;
1995: PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &array));
1996: cum = 0;
1997: for (i = 0; i < sub_schurs->n_subs; i++) {
1998: PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
1999: PetscCall(PetscBLASIntCast(subset_size, &B_N));
2000: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
2001: if (use_potr) {
2002: PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("L", &B_N, array + cum, &B_N, &B_ierr));
2003: PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRF Lapack routine %" PetscBLASInt_FMT, B_ierr);
2004: PetscCallBLAS("LAPACKpotri", LAPACKpotri_("L", &B_N, array + cum, &B_N, &B_ierr));
2005: PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in POTRI Lapack routine %" PetscBLASInt_FMT, B_ierr);
2006: } else if (use_sytr) {
2007: PetscCallBLAS("LAPACKsytrf", LAPACKsytrf_("L", &B_N, array + cum, &B_N, pivots, Bwork, &B_lwork, &B_ierr));
2008: PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRF Lapack routine %" PetscBLASInt_FMT, B_ierr);
2009: PetscCallBLAS("LAPACKsytri", LAPACKsytri_("L", &B_N, array + cum, &B_N, pivots, Bwork, &B_ierr));
2010: PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYTRI Lapack routine %" PetscBLASInt_FMT, B_ierr);
2011: } else {
2012: PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&B_N, &B_N, array + cum, &B_N, pivots, &B_ierr));
2013: PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRF Lapack routine %" PetscBLASInt_FMT, B_ierr);
2014: PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&B_N, array + cum, &B_N, pivots, Bwork, &B_lwork, &B_ierr));
2015: PetscCheck(!B_ierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GETRI Lapack routine %" PetscBLASInt_FMT, B_ierr);
2016: }
2017: PetscCall(PetscLogFlops(1.0 * subset_size * subset_size * subset_size));
2018: PetscCall(PetscFPTrapPop());
2019: cum += subset_size * subset_size;
2020: }
2021: PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &array));
2022: PetscCall(PetscObjectReference((PetscObject)sub_schurs->sum_S_Ej_all));
2023: PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all));
2024: sub_schurs->sum_S_Ej_inv_all = sub_schurs->sum_S_Ej_all;
2025: }
2026: }
2027: PetscCall(VecDestroy(&lstash));
2028: PetscCall(VecDestroy(&gstash));
2029: PetscCall(VecScatterDestroy(&sstash));
2031: if (matl_dbg_viewer) {
2032: if (sub_schurs->S_Ej_all) {
2033: PetscCall(PetscObjectSetName((PetscObject)sub_schurs->S_Ej_all, "SE"));
2034: PetscCall(MatView(sub_schurs->S_Ej_all, matl_dbg_viewer));
2035: }
2036: if (sub_schurs->sum_S_Ej_all) {
2037: PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_all, "SSE"));
2038: PetscCall(MatView(sub_schurs->sum_S_Ej_all, matl_dbg_viewer));
2039: }
2040: if (sub_schurs->sum_S_Ej_inv_all) {
2041: PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_inv_all, "SSEm"));
2042: PetscCall(MatView(sub_schurs->sum_S_Ej_inv_all, matl_dbg_viewer));
2043: }
2044: if (sub_schurs->sum_S_Ej_tilda_all) {
2045: PetscCall(PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_tilda_all, "SSEt"));
2046: PetscCall(MatView(sub_schurs->sum_S_Ej_tilda_all, matl_dbg_viewer));
2047: }
2048: }
2050: /* when not explicit, we need to set the factor type */
2051: if (sub_schurs->mat_factor_type == MAT_FACTOR_NONE) sub_schurs->mat_factor_type = sub_schurs->is_hermitian ? MAT_FACTOR_CHOLESKY : MAT_FACTOR_LU;
2053: /* free workspace */
2054: if (matl_dbg_viewer) PetscCall(PetscViewerFlush(matl_dbg_viewer));
2055: if (sub_schurs->debug) PetscCallMPI(MPI_Barrier(comm_n));
2056: PetscCall(PetscViewerDestroy(&matl_dbg_viewer));
2057: PetscCall(PetscFree2(Bwork, pivots));
2058: PetscCall(PetscCommDestroy(&comm_n));
2059: PetscCall(PetscFree(all_local_subid_N));
2060: PetscFunctionReturn(PETSC_SUCCESS);
2061: }
2063: PetscErrorCode PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs, const char *prefix, IS is_I, IS is_B, PCBDDCGraph graph, ISLocalToGlobalMapping BtoNmap, PetscBool copycc, PetscBool gdsw)
2064: {
2065: IS *faces, *edges, *all_cc, vertices;
2066: PetscInt s, i, n_faces, n_edges, n_all_cc;
2067: PetscBool is_sorted, ispardiso, ismumps;
2069: PetscFunctionBegin;
2070: PetscCall(ISSorted(is_I, &is_sorted));
2071: PetscCheck(is_sorted, PetscObjectComm((PetscObject)is_I), PETSC_ERR_PLIB, "IS for I dofs should be shorted");
2072: PetscCall(ISSorted(is_B, &is_sorted));
2073: PetscCheck(is_sorted, PetscObjectComm((PetscObject)is_B), PETSC_ERR_PLIB, "IS for B dofs should be shorted");
2075: /* reset any previous data */
2076: PetscCall(PCBDDCSubSchursReset(sub_schurs));
2078: sub_schurs->gdsw = gdsw;
2079: sub_schurs->graph = graph;
2081: /* get index sets for faces and edges (already sorted by global ordering) */
2082: PetscCall(PCBDDCGraphGetCandidatesIS(graph, &n_faces, &faces, &n_edges, &edges, &vertices));
2083: n_all_cc = n_faces + n_edges;
2084: PetscCall(PetscBTCreate(n_all_cc, &sub_schurs->is_edge));
2085: PetscCall(PetscMalloc1(n_all_cc, &all_cc));
2086: n_all_cc = 0;
2087: for (i = 0; i < n_faces; i++) {
2088: PetscCall(ISGetSize(faces[i], &s));
2089: if (!s) continue;
2090: if (copycc) PetscCall(ISDuplicate(faces[i], &all_cc[n_all_cc]));
2091: else {
2092: PetscCall(PetscObjectReference((PetscObject)faces[i]));
2093: all_cc[n_all_cc] = faces[i];
2094: }
2095: n_all_cc++;
2096: }
2097: for (i = 0; i < n_edges; i++) {
2098: PetscCall(ISGetSize(edges[i], &s));
2099: if (!s) continue;
2100: if (copycc) PetscCall(ISDuplicate(edges[i], &all_cc[n_all_cc]));
2101: else {
2102: PetscCall(PetscObjectReference((PetscObject)edges[i]));
2103: all_cc[n_all_cc] = edges[i];
2104: }
2105: PetscCall(PetscBTSet(sub_schurs->is_edge, n_all_cc));
2106: n_all_cc++;
2107: }
2108: PetscCall(PetscObjectReference((PetscObject)vertices));
2109: sub_schurs->is_vertices = vertices;
2110: PetscCall(PCBDDCGraphRestoreCandidatesIS(graph, &n_faces, &faces, &n_edges, &edges, &vertices));
2111: sub_schurs->is_dir = NULL;
2112: PetscCall(PCBDDCGraphGetDirichletDofsB(graph, &sub_schurs->is_dir));
2114: /* Determine if MatFactor can be used */
2115: PetscCall(PetscStrallocpy(prefix, &sub_schurs->prefix));
2116: #if defined(PETSC_HAVE_MUMPS)
2117: PetscCall(PetscStrncpy(sub_schurs->mat_solver_type, MATSOLVERMUMPS, sizeof(sub_schurs->mat_solver_type)));
2118: #elif defined(PETSC_HAVE_MKL_PARDISO)
2119: PetscCall(PetscStrncpy(sub_schurs->mat_solver_type, MATSOLVERMKL_PARDISO, sizeof(sub_schurs->mat_solver_type)));
2120: #else
2121: PetscCall(PetscStrncpy(sub_schurs->mat_solver_type, MATSOLVERPETSC, sizeof(sub_schurs->mat_solver_type)));
2122: #endif
2123: sub_schurs->mat_factor_type = MAT_FACTOR_NONE;
2124: sub_schurs->is_hermitian = PetscDefined(USE_COMPLEX) ? PETSC_FALSE : PETSC_TRUE; /* Hermitian Cholesky is not supported by PETSc and external packages */
2125: sub_schurs->is_posdef = PETSC_TRUE;
2126: sub_schurs->is_symmetric = PETSC_TRUE;
2127: sub_schurs->debug = PETSC_FALSE;
2128: sub_schurs->restrict_comm = PETSC_FALSE;
2129: PetscOptionsBegin(PetscObjectComm((PetscObject)graph->l2gmap), sub_schurs->prefix, "BDDC sub_schurs options", "PC");
2130: PetscCall(PetscOptionsString("-sub_schurs_mat_solver_type", "Specific direct solver to use", NULL, sub_schurs->mat_solver_type, sub_schurs->mat_solver_type, sizeof(sub_schurs->mat_solver_type), NULL));
2131: PetscCall(PetscOptionsEnum("-sub_schurs_mat_factor_type", "Factor type to use. Use MAT_FACTOR_NONE for automatic selection", NULL, MatFactorTypes, (PetscEnum)sub_schurs->mat_factor_type, (PetscEnum *)&sub_schurs->mat_factor_type, NULL));
2132: PetscCall(PetscOptionsBool("-sub_schurs_symmetric", "Symmetric problem", NULL, sub_schurs->is_symmetric, &sub_schurs->is_symmetric, NULL));
2133: PetscCall(PetscOptionsBool("-sub_schurs_hermitian", "Hermitian problem", NULL, sub_schurs->is_hermitian, &sub_schurs->is_hermitian, NULL));
2134: PetscCall(PetscOptionsBool("-sub_schurs_posdef", "Positive definite problem", NULL, sub_schurs->is_posdef, &sub_schurs->is_posdef, NULL));
2135: PetscCall(PetscOptionsBool("-sub_schurs_restrictcomm", "Restrict communicator on active processes only", NULL, sub_schurs->restrict_comm, &sub_schurs->restrict_comm, NULL));
2136: PetscCall(PetscOptionsBool("-sub_schurs_debug", "Debug output", NULL, sub_schurs->debug, &sub_schurs->debug, NULL));
2137: PetscOptionsEnd();
2138: PetscCall(PetscStrcmp(sub_schurs->mat_solver_type, MATSOLVERMUMPS, &ismumps));
2139: PetscCall(PetscStrcmp(sub_schurs->mat_solver_type, MATSOLVERMKL_PARDISO, &ispardiso));
2140: sub_schurs->schur_explicit = (PetscBool)(ispardiso || ismumps);
2142: /* for reals, symmetric and Hermitian are synonyms */
2143: #if !defined(PETSC_USE_COMPLEX)
2144: sub_schurs->is_symmetric = (PetscBool)(sub_schurs->is_symmetric && sub_schurs->is_hermitian);
2145: sub_schurs->is_hermitian = sub_schurs->is_symmetric;
2146: #endif
2148: PetscCall(PetscObjectReference((PetscObject)is_I));
2149: sub_schurs->is_I = is_I;
2150: PetscCall(PetscObjectReference((PetscObject)is_B));
2151: sub_schurs->is_B = is_B;
2152: PetscCall(PetscObjectReference((PetscObject)graph->l2gmap));
2153: sub_schurs->l2gmap = graph->l2gmap;
2154: PetscCall(PetscObjectReference((PetscObject)BtoNmap));
2155: sub_schurs->BtoNmap = BtoNmap;
2156: sub_schurs->n_subs = n_all_cc;
2157: sub_schurs->is_subs = all_cc;
2158: sub_schurs->S_Ej_all = NULL;
2159: sub_schurs->sum_S_Ej_all = NULL;
2160: sub_schurs->sum_S_Ej_inv_all = NULL;
2161: sub_schurs->sum_S_Ej_tilda_all = NULL;
2162: sub_schurs->is_Ej_all = NULL;
2163: PetscFunctionReturn(PETSC_SUCCESS);
2164: }
2166: PetscErrorCode PCBDDCSubSchursCreate(PCBDDCSubSchurs *sub_schurs)
2167: {
2168: PCBDDCSubSchurs schurs_ctx;
2170: PetscFunctionBegin;
2171: PetscCall(PetscNew(&schurs_ctx));
2172: schurs_ctx->n_subs = 0;
2173: *sub_schurs = schurs_ctx;
2174: PetscFunctionReturn(PETSC_SUCCESS);
2175: }
2177: PetscErrorCode PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs)
2178: {
2179: PetscFunctionBegin;
2180: if (!sub_schurs) PetscFunctionReturn(PETSC_SUCCESS);
2181: sub_schurs->graph = NULL;
2182: PetscCall(PetscFree(sub_schurs->prefix));
2183: PetscCall(MatDestroy(&sub_schurs->A));
2184: PetscCall(MatDestroy(&sub_schurs->S));
2185: PetscCall(ISDestroy(&sub_schurs->is_I));
2186: PetscCall(ISDestroy(&sub_schurs->is_B));
2187: PetscCall(ISLocalToGlobalMappingDestroy(&sub_schurs->l2gmap));
2188: PetscCall(ISLocalToGlobalMappingDestroy(&sub_schurs->BtoNmap));
2189: PetscCall(MatDestroy(&sub_schurs->S_Ej_all));
2190: PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all));
2191: PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all));
2192: PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all));
2193: PetscCall(ISDestroy(&sub_schurs->is_Ej_all));
2194: PetscCall(ISDestroy(&sub_schurs->is_vertices));
2195: PetscCall(ISDestroy(&sub_schurs->is_dir));
2196: PetscCall(PetscBTDestroy(&sub_schurs->is_edge));
2197: for (PetscInt i = 0; i < sub_schurs->n_subs; i++) PetscCall(ISDestroy(&sub_schurs->is_subs[i]));
2198: if (sub_schurs->n_subs) PetscCall(PetscFree(sub_schurs->is_subs));
2199: if (sub_schurs->reuse_solver) PetscCall(PCBDDCReuseSolversReset(sub_schurs->reuse_solver));
2200: PetscCall(PetscFree(sub_schurs->reuse_solver));
2201: if (sub_schurs->change) {
2202: for (PetscInt i = 0; i < sub_schurs->n_subs; i++) {
2203: PetscCall(KSPDestroy(&sub_schurs->change[i]));
2204: PetscCall(ISDestroy(&sub_schurs->change_primal_sub[i]));
2205: }
2206: }
2207: PetscCall(PetscFree(sub_schurs->change));
2208: PetscCall(PetscFree(sub_schurs->change_primal_sub));
2209: sub_schurs->n_subs = 0;
2210: PetscFunctionReturn(PETSC_SUCCESS);
2211: }
2213: PetscErrorCode PCBDDCSubSchursDestroy(PCBDDCSubSchurs *sub_schurs)
2214: {
2215: PetscFunctionBegin;
2216: PetscCall(PCBDDCSubSchursReset(*sub_schurs));
2217: PetscCall(PetscFree(*sub_schurs));
2218: PetscFunctionReturn(PETSC_SUCCESS);
2219: }
2221: static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt *queue_tip, PetscInt n_prev, PetscBT touched, PetscInt *xadj, PetscInt *adjncy, PetscInt *n_added)
2222: {
2223: PetscInt i, j, n;
2225: PetscFunctionBegin;
2226: n = 0;
2227: for (i = -n_prev; i < 0; i++) {
2228: PetscInt start_dof = queue_tip[i];
2229: for (j = xadj[start_dof]; j < xadj[start_dof + 1]; j++) {
2230: PetscInt dof = adjncy[j];
2231: if (!PetscBTLookup(touched, dof)) {
2232: PetscCall(PetscBTSet(touched, dof));
2233: queue_tip[n] = dof;
2234: n++;
2235: }
2236: }
2237: }
2238: *n_added = n;
2239: PetscFunctionReturn(PETSC_SUCCESS);
2240: }