Actual source code: denseqn.c
1: #include <../src/ksp/ksp/utils/lmvm/dense/denseqn.h>
2: #include <../src/ksp/ksp/utils/lmvm/blas_cyclic/blas_cyclic.h>
3: #include <petscblaslapack.h>
4: #include <petscmat.h>
5: #include <petscsys.h>
6: #include <petscsystypes.h>
7: #include <petscis.h>
8: #include <petscoptions.h>
9: #include <petscdevice.h>
10: #include <petsc/private/deviceimpl.h>
12: static PetscErrorCode MatMult_LMVMDQN(Mat, Vec, Vec);
13: static PetscErrorCode MatMult_LMVMDBFGS(Mat, Vec, Vec);
14: static PetscErrorCode MatMult_LMVMDDFP(Mat, Vec, Vec);
15: static PetscErrorCode MatSolve_LMVMDQN(Mat, Vec, Vec);
16: static PetscErrorCode MatSolve_LMVMDBFGS(Mat, Vec, Vec);
17: static PetscErrorCode MatSolve_LMVMDDFP(Mat, Vec, Vec);
19: static inline PetscInt recycle_index(PetscInt m, PetscInt idx)
20: {
21: return idx % m;
22: }
24: static inline PetscInt history_index(PetscInt m, PetscInt num_updates, PetscInt idx)
25: {
26: return (idx - num_updates) + PetscMin(m, num_updates);
27: }
29: static inline PetscInt oldest_update(PetscInt m, PetscInt idx)
30: {
31: return PetscMax(0, idx - m);
32: }
34: static PetscErrorCode MatView_LMVMDQN(Mat B, PetscViewer pv)
35: {
36: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
37: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
39: PetscBool isascii;
41: PetscFunctionBegin;
42: PetscCall(PetscObjectTypeCompare((PetscObject)pv, PETSCVIEWERASCII, &isascii));
43: PetscCall(MatView_LMVM(B, pv));
44: PetscCall(SymBroydenRescaleView(lqn->rescale, pv));
45: if (isascii) PetscCall(PetscViewerASCIIPrintf(pv, "Counts: S x : %" PetscInt_FMT ", S^T x : %" PetscInt_FMT ", Y x : %" PetscInt_FMT ", Y^T x: %" PetscInt_FMT "\n", lqn->S_count, lqn->St_count, lqn->Y_count, lqn->Yt_count));
46: PetscFunctionReturn(PETSC_SUCCESS);
47: }
49: static PetscErrorCode MatLMVMDQNResetDestructive(Mat B)
50: {
51: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
52: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
54: PetscFunctionBegin;
55: PetscCall(MatDestroy(&lqn->HY));
56: PetscCall(MatDestroy(&lqn->BS));
57: PetscCall(MatDestroy(&lqn->StY_triu));
58: PetscCall(MatDestroy(&lqn->YtS_triu));
59: PetscCall(VecDestroy(&lqn->StFprev));
60: PetscCall(VecDestroy(&lqn->Fprev_ref));
61: lqn->Fprev_state = 0;
62: PetscCall(MatDestroy(&lqn->YtS_triu_strict));
63: PetscCall(MatDestroy(&lqn->StY_triu_strict));
64: PetscCall(MatDestroy(&lqn->StBS));
65: PetscCall(MatDestroy(&lqn->YtHY));
66: PetscCall(MatDestroy(&lqn->J));
67: PetscCall(MatDestroy(&lqn->temp_mat));
68: PetscCall(VecDestroy(&lqn->diag_vec));
69: PetscCall(VecDestroy(&lqn->diag_vec_recycle_order));
70: PetscCall(VecDestroy(&lqn->inv_diag_vec));
71: PetscCall(VecDestroy(&lqn->column_work));
72: PetscCall(VecDestroy(&lqn->column_work2));
73: PetscCall(VecDestroy(&lqn->rwork1));
74: PetscCall(VecDestroy(&lqn->rwork2));
75: PetscCall(VecDestroy(&lqn->rwork3));
76: PetscCall(VecDestroy(&lqn->rwork2_local));
77: PetscCall(VecDestroy(&lqn->rwork3_local));
78: PetscCall(VecDestroy(&lqn->cyclic_work_vec));
79: PetscCall(VecDestroyVecs(lmvm->m, &lqn->PQ));
80: PetscCall(PetscFree(lqn->stp));
81: PetscCall(PetscFree(lqn->yts));
82: PetscCall(PetscFree(lqn->ytq));
83: lqn->allocated = PETSC_FALSE;
84: PetscFunctionReturn(PETSC_SUCCESS);
85: }
87: static PetscErrorCode MatReset_LMVMDQN_Internal(Mat B, MatLMVMResetMode mode)
88: {
89: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
90: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
92: PetscFunctionBegin;
93: lqn->watchdog = 0;
94: lqn->needPQ = PETSC_TRUE;
95: lqn->num_updates = 0;
96: lqn->num_mult_updates = 0;
97: if (MatLMVMResetClearsBases(mode)) PetscCall(MatLMVMDQNResetDestructive(B));
98: else {
99: if (lqn->BS) PetscCall(MatZeroEntries(lqn->BS));
100: if (lqn->HY) PetscCall(MatZeroEntries(lqn->HY));
101: if (lqn->StY_triu) { /* Set to identity by default so it is invertible */
102: PetscCall(MatZeroEntries(lqn->StY_triu));
103: PetscCall(MatShift(lqn->StY_triu, 1.0));
104: }
105: if (lqn->YtS_triu) {
106: PetscCall(MatZeroEntries(lqn->YtS_triu));
107: PetscCall(MatShift(lqn->YtS_triu, 1.0));
108: }
109: if (lqn->YtS_triu_strict) PetscCall(MatZeroEntries(lqn->YtS_triu_strict));
110: if (lqn->StY_triu_strict) PetscCall(MatZeroEntries(lqn->StY_triu_strict));
111: if (lqn->StBS) {
112: PetscCall(MatZeroEntries(lqn->StBS));
113: PetscCall(MatShift(lqn->StBS, 1.0));
114: }
115: if (lqn->YtHY) {
116: PetscCall(MatZeroEntries(lqn->YtHY));
117: PetscCall(MatShift(lqn->YtHY, 1.0));
118: }
119: PetscCall(VecDestroy(&lqn->Fprev_ref));
120: lqn->Fprev_state = 0;
121: if (lqn->StFprev) PetscCall(VecZeroEntries(lqn->StFprev));
122: }
123: PetscFunctionReturn(PETSC_SUCCESS);
124: }
126: static PetscErrorCode MatReset_LMVMDQN(Mat B, MatLMVMResetMode mode)
127: {
128: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
129: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
131: PetscFunctionBegin;
132: PetscCall(SymBroydenRescaleReset(B, lqn->rescale, mode));
133: PetscCall(MatReset_LMVMDQN_Internal(B, mode));
134: PetscFunctionReturn(PETSC_SUCCESS);
135: }
137: static PetscErrorCode MatAllocate_LMVMDQN_Internal(Mat B)
138: {
139: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
140: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
142: PetscFunctionBegin;
143: if (!lqn->allocated) {
144: if (lmvm->m > 0) {
145: PetscMPIInt rank;
146: PetscInt n, N, m, M;
147: PetscBool is_dbfgs, is_ddfp, is_dqn;
148: VecType vec_type;
149: MPI_Comm comm = PetscObjectComm((PetscObject)B);
150: Mat Sfull = lmvm->basis[LMBASIS_S]->vecs;
152: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDBFGS, &is_dbfgs));
153: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDDFP, &is_ddfp));
154: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDQN, &is_dqn));
156: PetscCallMPI(MPI_Comm_rank(comm, &rank));
157: PetscCall(MatGetSize(B, &N, NULL));
158: PetscCall(MatGetLocalSize(B, &n, NULL));
159: M = lmvm->m;
160: m = (rank == 0) ? M : 0;
162: /* For DBFGS: Create data needed for MatSolve() eagerly; data needed for MatMult() will be created on demand
163: * For DDFP : Create data needed for MatMult() eagerly; data needed for MatSolve() will be created on demand
164: * For DQN : Create all data eagerly */
165: PetscCall(VecGetType(lmvm->Xprev, &vec_type));
166: if (is_dqn) {
167: PetscCall(MatCreateDenseFromVecType(comm, vec_type, m, m, M, M, -1, NULL, &lqn->StY_triu));
168: PetscCall(MatCreateDenseFromVecType(comm, vec_type, m, m, M, M, -1, NULL, &lqn->YtS_triu));
169: PetscCall(MatCreateVecs(lqn->StY_triu, &lqn->diag_vec, &lqn->rwork1));
170: PetscCall(MatCreateVecs(lqn->StY_triu, &lqn->rwork2, &lqn->rwork3));
171: } else if (is_ddfp) {
172: PetscCall(MatCreateDenseFromVecType(comm, vec_type, m, m, M, M, -1, NULL, &lqn->YtS_triu));
173: PetscCall(MatDuplicate(Sfull, MAT_SHARE_NONZERO_PATTERN, &lqn->HY));
174: PetscCall(MatCreateVecs(lqn->YtS_triu, &lqn->diag_vec, &lqn->rwork1));
175: PetscCall(MatCreateVecs(lqn->YtS_triu, &lqn->rwork2, &lqn->rwork3));
176: } else if (is_dbfgs) {
177: PetscCall(MatCreateDenseFromVecType(comm, vec_type, m, m, M, M, -1, NULL, &lqn->StY_triu));
178: PetscCall(MatDuplicate(Sfull, MAT_SHARE_NONZERO_PATTERN, &lqn->BS));
179: PetscCall(MatCreateVecs(lqn->StY_triu, &lqn->diag_vec, &lqn->rwork1));
180: PetscCall(MatCreateVecs(lqn->StY_triu, &lqn->rwork2, &lqn->rwork3));
181: } else {
182: SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_INCOMP, "MatAllocate_LMVMDQN is only available for dense derived types. (DBFGS, DDFP, DQN");
183: }
184: /* initialize StY_triu and YtS_triu to identity, if they exist, so it is invertible */
185: if (lqn->StY_triu) {
186: PetscCall(MatZeroEntries(lqn->StY_triu));
187: PetscCall(MatShift(lqn->StY_triu, 1.0));
188: }
189: if (lqn->YtS_triu) {
190: PetscCall(MatZeroEntries(lqn->YtS_triu));
191: PetscCall(MatShift(lqn->YtS_triu, 1.0));
192: }
193: if (lqn->use_recursive && (is_dbfgs || is_ddfp)) {
194: PetscCall(VecDuplicateVecs(lmvm->Xprev, lmvm->m, &lqn->PQ));
195: PetscCall(VecDuplicate(lmvm->Xprev, &lqn->column_work2));
196: PetscCall(PetscMalloc1(lmvm->m, &lqn->yts));
197: if (is_dbfgs) PetscCall(PetscMalloc1(lmvm->m, &lqn->stp));
198: else if (is_ddfp) PetscCall(PetscMalloc1(lmvm->m, &lqn->ytq));
199: }
200: PetscCall(VecDuplicate(lqn->rwork2, &lqn->cyclic_work_vec));
201: PetscCall(VecZeroEntries(lqn->rwork1));
202: PetscCall(VecZeroEntries(lqn->rwork2));
203: PetscCall(VecZeroEntries(lqn->rwork3));
204: PetscCall(VecZeroEntries(lqn->diag_vec));
205: }
206: PetscCall(VecDuplicate(lmvm->Xprev, &lqn->column_work));
207: lqn->allocated = PETSC_TRUE;
208: }
209: PetscFunctionReturn(PETSC_SUCCESS);
210: }
212: static PetscErrorCode MatSetUp_LMVMDQN(Mat B)
213: {
214: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
215: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
217: PetscFunctionBegin;
218: PetscCall(MatSetUp_LMVM(B));
219: PetscCall(SymBroydenRescaleInitializeJ0(B, lqn->rescale));
220: PetscCall(MatAllocate_LMVMDQN_Internal(B));
221: PetscFunctionReturn(PETSC_SUCCESS);
222: }
224: static PetscErrorCode MatSetFromOptions_LMVMDQN(Mat B, PetscOptionItems PetscOptionsObject)
225: {
226: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
227: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
228: PetscBool is_dbfgs, is_ddfp, is_dqn;
230: PetscFunctionBegin;
231: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDBFGS, &is_dbfgs));
232: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDDFP, &is_ddfp));
233: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDQN, &is_dqn));
234: PetscCall(MatSetFromOptions_LMVM(B, PetscOptionsObject));
235: PetscOptionsHeadBegin(PetscOptionsObject, "Dense symmetric Broyden method for approximating SPD Jacobian actions");
236: if (is_dqn) {
237: PetscCall(PetscOptionsEnum("-mat_lqn_type", "Implementation options for L-QN", "MatLMVMDenseType", MatLMVMDenseTypes, (PetscEnum)lqn->strategy, (PetscEnum *)&lqn->strategy, NULL));
238: } else if (is_dbfgs) {
239: PetscCall(PetscOptionsBool("-mat_lbfgs_recursive", "Use recursive formulation for MatMult_LMVMDBFGS, instead of Cholesky", "", lqn->use_recursive, &lqn->use_recursive, NULL));
240: PetscCall(PetscOptionsEnum("-mat_lbfgs_type", "Implementation options for L-BFGS", "MatLMVMDenseType", MatLMVMDenseTypes, (PetscEnum)lqn->strategy, (PetscEnum *)&lqn->strategy, NULL));
241: } else if (is_ddfp) {
242: PetscCall(PetscOptionsBool("-mat_ldfp_recursive", "Use recursive formulation for MatSolve_LMVMDDFP, instead of Cholesky", "", lqn->use_recursive, &lqn->use_recursive, NULL));
243: PetscCall(PetscOptionsEnum("-mat_ldfp_type", "Implementation options for L-DFP", "MatLMVMDenseType", MatLMVMDenseTypes, (PetscEnum)lqn->strategy, (PetscEnum *)&lqn->strategy, NULL));
244: } else {
245: SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_INCOMP, "MatSetFromOptions_LMVMDQN is only available for dense derived types. (DBFGS, DDFP, DQN");
246: }
247: PetscCall(SymBroydenRescaleSetFromOptions(B, lqn->rescale, PetscOptionsObject));
248: PetscOptionsHeadEnd();
249: PetscFunctionReturn(PETSC_SUCCESS);
250: }
252: static PetscErrorCode MatDestroy_LMVMDQN(Mat B)
253: {
254: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
255: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
257: PetscFunctionBegin;
258: PetscCall(SymBroydenRescaleDestroy(&lqn->rescale));
259: PetscCall(MatReset_LMVMDQN_Internal(B, MAT_LMVM_RESET_ALL));
260: PetscCall(PetscFree(lqn->workscalar));
261: PetscCall(PetscFree(lmvm->ctx));
262: PetscCall(MatDestroy_LMVM(B));
263: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatLMVMSymBroydenSetDelta_C", NULL));
264: PetscFunctionReturn(PETSC_SUCCESS);
265: }
267: static PetscErrorCode MatUpdate_LMVMDQN(Mat B, Vec X, Vec F)
268: {
269: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
270: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
271: Mat Sfull = lmvm->basis[LMBASIS_S]->vecs;
272: Mat Yfull = lmvm->basis[LMBASIS_Y]->vecs;
274: PetscBool is_ddfp, is_dbfgs, is_dqn;
275: PetscDeviceContext dctx;
277: PetscFunctionBegin;
278: if (!lmvm->m) PetscFunctionReturn(PETSC_SUCCESS);
279: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDBFGS, &is_dbfgs));
280: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDDFP, &is_ddfp));
281: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDQN, &is_dqn));
282: PetscCall(PetscDeviceContextGetCurrentContext(&dctx));
283: if (lmvm->prev_set) {
284: Vec FX[2];
285: PetscScalar dotFX[2];
286: PetscScalar stFprev;
287: PetscScalar curvature, yTy;
288: PetscReal curvtol;
290: /* Compute the new (S = X - Xprev) and (Y = F - Fprev) vectors */
291: PetscCall(VecAYPX(lmvm->Xprev, -1.0, X));
292: /* Test if the updates can be accepted */
293: FX[0] = lmvm->Fprev; /* dotFX[0] = s^T Fprev */
294: FX[1] = F; /* dotFX[1] = s^T F */
295: PetscCall(VecMDot(lmvm->Xprev, 2, FX, dotFX));
296: PetscCall(VecAYPX(lmvm->Fprev, -1.0, F));
297: PetscCall(VecDot(lmvm->Fprev, lmvm->Fprev, &yTy));
298: stFprev = PetscConj(dotFX[0]);
299: curvature = PetscConj(dotFX[1] - dotFX[0]); /* s^T y */
300: if (PetscRealPart(yTy) < lmvm->eps) {
301: curvtol = 0.0;
302: } else {
303: curvtol = lmvm->eps * PetscRealPart(yTy);
304: }
305: if (PetscRealPart(curvature) > curvtol) {
306: PetscInt m = lmvm->m;
307: PetscInt k = lmvm->k;
308: PetscInt h_old = k - oldest_update(m, k);
309: PetscInt h_new = k + 1 - oldest_update(m, k + 1);
310: PetscInt idx = recycle_index(m, k);
312: /* Update is good, accept it */
313: PetscCall(MatUpdateKernel_LMVM(B, lmvm->Xprev, lmvm->Fprev));
314: lqn->num_updates++;
315: lqn->watchdog = 0;
316: lqn->needPQ = PETSC_TRUE;
318: if (h_old == m && lqn->strategy == MAT_LMVM_DENSE_REORDER) {
319: if (is_dqn) {
320: PetscCall(MatMove_LR3(B, lqn->StY_triu, m - 1));
321: PetscCall(MatMove_LR3(B, lqn->YtS_triu, m - 1));
322: } else if (is_dbfgs) {
323: PetscCall(MatMove_LR3(B, lqn->StY_triu, m - 1));
324: } else if (is_ddfp) {
325: PetscCall(MatMove_LR3(B, lqn->YtS_triu, m - 1));
326: } else {
327: SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_INCOMP, "MatUpdate_LMVMDQN is only available for dense derived types. (DBFGS, DDFP, DQN");
328: }
329: }
331: if (lqn->use_recursive && (is_dbfgs || is_ddfp)) lqn->yts[idx] = PetscRealPart(curvature);
333: if (is_dqn || is_dbfgs) { /* implement the scheme of Byrd, Nocedal, and Schnabel to save a MatMultTranspose call in the common case the *
334: * H_k is immediately applied to F after begin updated. The S^T y computation can be split up as S^T (F - F_prev) */
335: PetscInt local_n;
336: PetscScalar *StFprev;
337: PetscMemType memtype;
338: PetscInt StYidx;
340: StYidx = (lqn->strategy == MAT_LMVM_DENSE_REORDER) ? history_index(m, lqn->num_updates, k) : idx;
341: if (!lqn->StFprev) PetscCall(VecDuplicate(lqn->rwork1, &lqn->StFprev));
342: PetscCall(VecGetLocalSize(lqn->StFprev, &local_n));
343: PetscCall(VecGetArrayAndMemType(lqn->StFprev, &StFprev, &memtype));
344: if (local_n) {
345: if (PetscMemTypeHost(memtype)) {
346: StFprev[idx] = stFprev;
347: } else {
348: PetscCall(PetscDeviceRegisterMemory(&stFprev, PETSC_MEMTYPE_HOST, 1 * sizeof(stFprev)));
349: PetscCall(PetscDeviceRegisterMemory(StFprev, memtype, local_n * sizeof(*StFprev)));
350: PetscCall(PetscDeviceArrayCopy(dctx, &StFprev[idx], &stFprev, 1));
351: }
352: }
353: PetscCall(VecRestoreArrayAndMemType(lqn->StFprev, &StFprev));
355: {
356: Vec this_sy_col;
357: /* Now StFprev is updated for the new S vector. Write -StFprev into the appropriate row */
358: PetscCall(MatDenseGetColumnVecWrite(lqn->StY_triu, StYidx, &this_sy_col));
359: PetscCall(VecAXPBY(this_sy_col, -1.0, 0.0, lqn->StFprev));
361: /* Now compute the new StFprev */
362: PetscCall(MatMultHermitianTransposeColumnRange(Sfull, F, lqn->StFprev, 0, h_new));
363: lqn->St_count++;
365: /* Now add StFprev: this_sy_col == S^T (F - Fprev) == S^T y */
366: PetscCall(VecAXPY(this_sy_col, 1.0, lqn->StFprev));
368: if (lqn->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, this_sy_col, lqn->num_updates, lqn->cyclic_work_vec));
369: PetscCall(MatDenseRestoreColumnVecWrite(lqn->StY_triu, StYidx, &this_sy_col));
370: }
371: }
373: if (is_ddfp || is_dqn) {
374: PetscInt YtSidx;
376: YtSidx = (lqn->strategy == MAT_LMVM_DENSE_REORDER) ? history_index(m, lqn->num_updates, k) : idx;
378: {
379: Vec this_ys_col;
381: PetscCall(MatDenseGetColumnVecWrite(lqn->YtS_triu, YtSidx, &this_ys_col));
382: PetscCall(MatMultHermitianTransposeColumnRange(Yfull, lmvm->Xprev, this_ys_col, 0, h_new));
383: lqn->Yt_count++;
385: if (lqn->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, this_ys_col, lqn->num_updates, lqn->cyclic_work_vec));
386: PetscCall(MatDenseRestoreColumnVecWrite(lqn->YtS_triu, YtSidx, &this_ys_col));
387: }
388: }
390: if (is_dbfgs || is_dqn) {
391: PetscCall(MatGetDiagonal(lqn->StY_triu, lqn->diag_vec));
392: } else if (is_ddfp) {
393: PetscCall(MatGetDiagonal(lqn->YtS_triu, lqn->diag_vec));
394: } else {
395: SETERRQ(PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_INCOMP, "MatUpdate_LMVMDQN is only available for dense derived types. (DBFGS, DDFP, DQN");
396: }
398: if (lqn->strategy == MAT_LMVM_DENSE_REORDER) {
399: if (!lqn->diag_vec_recycle_order) PetscCall(VecDuplicate(lqn->diag_vec, &lqn->diag_vec_recycle_order));
400: PetscCall(VecCopy(lqn->diag_vec, lqn->diag_vec_recycle_order));
401: PetscCall(VecHistoryOrderToRecycleOrder(B, lqn->diag_vec_recycle_order, lqn->num_updates, lqn->cyclic_work_vec));
402: } else {
403: if (!lqn->diag_vec_recycle_order) {
404: PetscCall(PetscObjectReference((PetscObject)lqn->diag_vec));
405: lqn->diag_vec_recycle_order = lqn->diag_vec;
406: }
407: }
409: PetscCall(SymBroydenRescaleUpdate(B, lqn->rescale));
410: } else {
411: /* Update is bad, skip it */
412: ++lmvm->nrejects;
413: ++lqn->watchdog;
414: PetscInt m = lmvm->m;
415: PetscInt k = lmvm->k;
416: PetscInt h = k - oldest_update(m, k);
418: /* we still have to maintain StFprev */
419: if (!lqn->StFprev) PetscCall(VecDuplicate(lqn->rwork1, &lqn->StFprev));
420: PetscCall(MatMultHermitianTransposeColumnRange(Sfull, F, lqn->StFprev, 0, h));
421: lqn->St_count++;
422: }
423: }
425: if (lqn->watchdog > lqn->max_seq_rejects) PetscCall(MatLMVMReset(B, PETSC_FALSE));
427: /* Save the solution and function to be used in the next update */
428: PetscCall(VecCopy(X, lmvm->Xprev));
429: PetscCall(VecCopy(F, lmvm->Fprev));
430: PetscCall(PetscObjectReference((PetscObject)F));
431: PetscCall(VecDestroy(&lqn->Fprev_ref));
432: lqn->Fprev_ref = F;
433: PetscCall(PetscObjectStateGet((PetscObject)F, &lqn->Fprev_state));
434: lmvm->prev_set = PETSC_TRUE;
435: PetscFunctionReturn(PETSC_SUCCESS);
436: }
438: static PetscErrorCode MatDestroyThenCopy(Mat src, Mat *dst)
439: {
440: PetscFunctionBegin;
441: PetscCall(MatDestroy(dst));
442: if (src) PetscCall(MatDuplicate(src, MAT_COPY_VALUES, dst));
443: PetscFunctionReturn(PETSC_SUCCESS);
444: }
446: static PetscErrorCode VecDestroyThenCopy(Vec src, Vec *dst)
447: {
448: PetscFunctionBegin;
449: PetscCall(VecDestroy(dst));
450: if (src) {
451: PetscCall(VecDuplicate(src, dst));
452: PetscCall(VecCopy(src, *dst));
453: }
454: PetscFunctionReturn(PETSC_SUCCESS);
455: }
457: static PetscErrorCode MatCopy_LMVMDQN(Mat B, Mat M, MatStructure str)
458: {
459: Mat_LMVM *bdata = (Mat_LMVM *)B->data;
460: Mat_DQN *blqn = (Mat_DQN *)bdata->ctx;
461: Mat_LMVM *mdata = (Mat_LMVM *)M->data;
462: Mat_DQN *mlqn = (Mat_DQN *)mdata->ctx;
463: PetscInt i;
464: PetscBool is_dbfgs, is_ddfp, is_dqn;
466: PetscFunctionBegin;
467: PetscCall(SymBroydenRescaleCopy(blqn->rescale, mlqn->rescale));
468: mlqn->num_updates = blqn->num_updates;
469: mlqn->num_mult_updates = blqn->num_mult_updates;
470: mlqn->dense_type = blqn->dense_type;
471: mlqn->strategy = blqn->strategy;
472: mlqn->S_count = 0;
473: mlqn->St_count = 0;
474: mlqn->Y_count = 0;
475: mlqn->Yt_count = 0;
476: mlqn->watchdog = blqn->watchdog;
477: mlqn->max_seq_rejects = blqn->max_seq_rejects;
478: mlqn->use_recursive = blqn->use_recursive;
479: mlqn->needPQ = blqn->needPQ;
480: if (blqn->allocated) {
481: PetscCall(MatAllocate_LMVMDQN_Internal(M));
482: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDBFGS, &is_dbfgs));
483: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDDFP, &is_ddfp));
484: PetscCall(PetscObjectTypeCompare((PetscObject)B, MATLMVMDQN, &is_dqn));
485: PetscCall(MatDestroyThenCopy(blqn->HY, &mlqn->BS));
486: PetscCall(VecDestroyThenCopy(blqn->StFprev, &mlqn->StFprev));
487: PetscCall(MatDestroyThenCopy(blqn->StY_triu, &mlqn->StY_triu));
488: PetscCall(MatDestroyThenCopy(blqn->StY_triu_strict, &mlqn->StY_triu_strict));
489: PetscCall(MatDestroyThenCopy(blqn->YtS_triu, &mlqn->YtS_triu));
490: PetscCall(MatDestroyThenCopy(blqn->YtS_triu_strict, &mlqn->YtS_triu_strict));
491: PetscCall(MatDestroyThenCopy(blqn->YtHY, &mlqn->YtHY));
492: PetscCall(MatDestroyThenCopy(blqn->StBS, &mlqn->StBS));
493: PetscCall(MatDestroyThenCopy(blqn->J, &mlqn->J));
494: PetscCall(VecDestroyThenCopy(blqn->diag_vec, &mlqn->diag_vec));
495: PetscCall(VecDestroyThenCopy(blqn->diag_vec_recycle_order, &mlqn->diag_vec_recycle_order));
496: PetscCall(VecDestroyThenCopy(blqn->inv_diag_vec, &mlqn->inv_diag_vec));
497: if (blqn->use_recursive && (is_dbfgs || is_ddfp)) {
498: for (i = 0; i < bdata->m; i++) {
499: PetscCall(VecDestroyThenCopy(blqn->PQ[i], &mlqn->PQ[i]));
500: mlqn->yts[i] = blqn->yts[i];
501: if (is_dbfgs) {
502: mlqn->stp[i] = blqn->stp[i];
503: } else if (is_ddfp) {
504: mlqn->ytq[i] = blqn->ytq[i];
505: }
506: }
507: }
508: }
509: PetscCall(PetscObjectReference((PetscObject)blqn->Fprev_ref));
510: PetscCall(VecDestroy(&mlqn->Fprev_ref));
511: mlqn->Fprev_ref = blqn->Fprev_ref;
512: mlqn->Fprev_state = blqn->Fprev_state;
513: PetscFunctionReturn(PETSC_SUCCESS);
514: }
516: static PetscErrorCode MatMult_LMVMDQN(Mat B, Vec X, Vec Z)
517: {
518: PetscFunctionBegin;
519: PetscCall(MatMult_LMVMDDFP(B, X, Z));
520: PetscFunctionReturn(PETSC_SUCCESS);
521: }
523: static PetscErrorCode MatSolve_LMVMDQN(Mat H, Vec F, Vec dX)
524: {
525: PetscFunctionBegin;
526: PetscCall(MatSolve_LMVMDBFGS(H, F, dX));
527: PetscFunctionReturn(PETSC_SUCCESS);
528: }
530: static PetscErrorCode MatLMVMSymBroydenSetDelta_LMVMDQN(Mat B, PetscScalar delta)
531: {
532: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
533: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
535: PetscFunctionBegin;
536: PetscCall(SymBroydenRescaleSetDelta(B, lqn->rescale, PetscAbsReal(PetscRealPart(delta))));
537: PetscFunctionReturn(PETSC_SUCCESS);
538: }
540: /*
541: This dense representation uses Davidon-Fletcher-Powell (DFP) for MatMult,
542: and Broyden-Fletcher-Goldfarb-Shanno (BFGS) for MatSolve. This implementation
543: results in avoiding costly Cholesky factorization, at the cost of duality cap.
544: Please refer to MatLMVMDDFP and MatLMVMDBFGS for more information.
545: */
546: PetscErrorCode MatCreate_LMVMDQN(Mat B)
547: {
548: Mat_LMVM *lmvm;
549: Mat_DQN *lqn;
551: PetscFunctionBegin;
552: PetscCall(MatCreate_LMVM(B));
553: PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATLMVMDQN));
554: PetscCall(MatSetOption(B, MAT_HERMITIAN, PETSC_TRUE));
555: PetscCall(MatSetOption(B, MAT_SPD, PETSC_TRUE));
556: PetscCall(MatSetOption(B, MAT_SPD_ETERNAL, PETSC_TRUE));
557: B->ops->view = MatView_LMVMDQN;
558: B->ops->setup = MatSetUp_LMVMDQN;
559: B->ops->setfromoptions = MatSetFromOptions_LMVMDQN;
560: B->ops->destroy = MatDestroy_LMVMDQN;
562: lmvm = (Mat_LMVM *)B->data;
563: lmvm->ops->reset = MatReset_LMVMDQN;
564: lmvm->ops->update = MatUpdate_LMVMDQN;
565: lmvm->ops->mult = MatMult_LMVMDQN;
566: lmvm->ops->solve = MatSolve_LMVMDQN;
567: lmvm->ops->copy = MatCopy_LMVMDQN;
569: lmvm->ops->multht = lmvm->ops->mult;
570: lmvm->ops->solveht = lmvm->ops->solve;
572: PetscCall(PetscNew(&lqn));
573: lmvm->ctx = (void *)lqn;
574: lqn->allocated = PETSC_FALSE;
575: lqn->use_recursive = PETSC_FALSE;
576: lqn->needPQ = PETSC_FALSE;
577: lqn->watchdog = 0;
578: lqn->max_seq_rejects = lmvm->m / 2;
579: lqn->strategy = MAT_LMVM_DENSE_INPLACE;
581: PetscCall(SymBroydenRescaleCreate(&lqn->rescale));
582: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatLMVMSymBroydenSetDelta_C", MatLMVMSymBroydenSetDelta_LMVMDQN));
583: PetscFunctionReturn(PETSC_SUCCESS);
584: }
586: /*@
587: MatCreateLMVMDQN - Creates a dense representation of the limited-memory
588: Quasi-Newton approximation to a Hessian.
590: Collective
592: Input Parameters:
593: + comm - MPI communicator
594: . n - number of local rows for storage vectors
595: - N - global size of the storage vectors
597: Output Parameter:
598: . B - the matrix
600: Level: advanced
602: Note:
603: It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`
604: paradigm instead of this routine directly.
606: .seealso: `MatCreate()`, `MATLMVM`, `MATLMVMDBFGS`, `MATLMVMDDFP`, `MatCreateLMVMDDFP()`, `MatCreateLMVMDBFGS()`
607: @*/
608: PetscErrorCode MatCreateLMVMDQN(MPI_Comm comm, PetscInt n, PetscInt N, Mat *B)
609: {
610: PetscFunctionBegin;
611: PetscCall(KSPInitializePackage());
612: PetscCall(MatCreate(comm, B));
613: PetscCall(MatSetSizes(*B, n, n, N, N));
614: PetscCall(MatSetType(*B, MATLMVMDQN));
615: PetscCall(MatSetUp(*B));
616: PetscFunctionReturn(PETSC_SUCCESS);
617: }
619: static PetscErrorCode MatDQNApplyJ0Fwd(Mat B, Vec X, Vec Z)
620: {
621: PetscFunctionBegin;
622: PetscCall(MatLMVMApplyJ0Fwd(B, X, Z));
623: PetscFunctionReturn(PETSC_SUCCESS);
624: }
626: static PetscErrorCode MatDQNApplyJ0Inv(Mat B, Vec F, Vec dX)
627: {
628: PetscFunctionBegin;
629: PetscCall(MatLMVMApplyJ0Inv(B, F, dX));
630: PetscFunctionReturn(PETSC_SUCCESS);
631: }
633: /* This is not Bunch-Kaufman LDLT: here L is strictly lower triangular part of STY */
634: static PetscErrorCode MatGetLDLT(Mat B, Mat result)
635: {
636: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
637: Mat_DQN *lbfgs = (Mat_DQN *)lmvm->ctx;
638: PetscInt m_local;
640: PetscFunctionBegin;
641: if (!lbfgs->temp_mat) PetscCall(MatDuplicate(lbfgs->YtS_triu_strict, MAT_SHARE_NONZERO_PATTERN, &lbfgs->temp_mat));
642: PetscCall(MatCopy(lbfgs->YtS_triu_strict, lbfgs->temp_mat, SAME_NONZERO_PATTERN));
643: PetscCall(MatDiagonalScale(lbfgs->temp_mat, lbfgs->inv_diag_vec, NULL));
644: PetscCall(MatGetLocalSize(result, &m_local, NULL));
645: // need to conjugate and conjugate again because we have MatTransposeMatMult but not MatHermitianTransposeMatMult()
646: PetscCall(MatConjugate(lbfgs->temp_mat));
647: if (m_local) {
648: Mat temp_local, YtS_local, result_local;
649: PetscCall(MatDenseGetLocalMatrix(lbfgs->YtS_triu_strict, &YtS_local));
650: PetscCall(MatDenseGetLocalMatrix(lbfgs->temp_mat, &temp_local));
651: PetscCall(MatDenseGetLocalMatrix(result, &result_local));
652: PetscCall(MatTransposeMatMult(YtS_local, temp_local, MAT_REUSE_MATRIX, PETSC_DETERMINE, &result_local));
653: }
654: PetscCall(MatConjugate(result));
655: PetscFunctionReturn(PETSC_SUCCESS);
656: }
658: static PetscErrorCode MatLMVMDBFGSUpdateMultData(Mat B)
659: {
660: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
661: Mat_DQN *lbfgs = (Mat_DQN *)lmvm->ctx;
662: PetscInt m = lmvm->m, m_local;
663: PetscInt k = lmvm->k;
664: PetscInt h = k - oldest_update(m, k);
665: PetscInt j_0;
666: PetscInt prev_oldest;
667: Mat J_local;
668: Mat Sfull = lmvm->basis[LMBASIS_S]->vecs;
669: Mat Yfull = lmvm->basis[LMBASIS_Y]->vecs;
671: PetscFunctionBegin;
672: if (!lbfgs->YtS_triu_strict) {
673: PetscCall(MatDuplicate(lbfgs->StY_triu, MAT_SHARE_NONZERO_PATTERN, &lbfgs->YtS_triu_strict));
674: PetscCall(MatDestroy(&lbfgs->StBS));
675: PetscCall(MatDuplicate(lbfgs->StY_triu, MAT_SHARE_NONZERO_PATTERN, &lbfgs->StBS));
676: PetscCall(MatDestroy(&lbfgs->J));
677: PetscCall(MatDuplicate(lbfgs->StY_triu, MAT_SHARE_NONZERO_PATTERN, &lbfgs->J));
678: PetscCall(MatDestroy(&lbfgs->BS));
679: PetscCall(MatDuplicate(Yfull, MAT_SHARE_NONZERO_PATTERN, &lbfgs->BS));
680: PetscCall(MatShift(lbfgs->StBS, 1.0));
681: lbfgs->num_mult_updates = oldest_update(m, k);
682: }
683: if (lbfgs->num_mult_updates == k) PetscFunctionReturn(PETSC_SUCCESS);
685: /* B_0 may have been updated, we must recompute B_0 S and S^T B_0 S */
686: for (PetscInt j = oldest_update(m, k); j < k; j++) {
687: Vec s_j;
688: Vec Bs_j;
689: Vec StBs_j;
690: PetscInt S_idx = recycle_index(m, j);
691: PetscInt StBS_idx = lbfgs->strategy == MAT_LMVM_DENSE_INPLACE ? S_idx : history_index(m, k, j);
693: PetscCall(MatDenseGetColumnVecWrite(lbfgs->BS, S_idx, &Bs_j));
694: PetscCall(MatDenseGetColumnVecRead(Sfull, S_idx, &s_j));
695: PetscCall(MatDQNApplyJ0Fwd(B, s_j, Bs_j));
696: PetscCall(MatDenseRestoreColumnVecRead(Sfull, S_idx, &s_j));
697: PetscCall(MatDenseGetColumnVecWrite(lbfgs->StBS, StBS_idx, &StBs_j));
698: PetscCall(MatMultHermitianTransposeColumnRange(Sfull, Bs_j, StBs_j, 0, h));
699: lbfgs->St_count++;
700: if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, StBs_j, lbfgs->num_updates, lbfgs->cyclic_work_vec));
701: PetscCall(MatDenseRestoreColumnVecWrite(lbfgs->StBS, StBS_idx, &StBs_j));
702: PetscCall(MatDenseRestoreColumnVecWrite(lbfgs->BS, S_idx, &Bs_j));
703: }
704: prev_oldest = oldest_update(m, lbfgs->num_mult_updates);
705: if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER && prev_oldest < oldest_update(m, k)) {
706: /* move the YtS entries that have been computed and need to be kept back up */
707: PetscInt m_keep = m - (oldest_update(m, k) - prev_oldest);
709: PetscCall(MatMove_LR3(B, lbfgs->YtS_triu_strict, m_keep));
710: }
711: PetscCall(MatGetLocalSize(lbfgs->YtS_triu_strict, &m_local, NULL));
712: j_0 = PetscMax(lbfgs->num_mult_updates, oldest_update(m, k));
713: for (PetscInt j = j_0; j < k; j++) {
714: PetscInt S_idx = recycle_index(m, j);
715: PetscInt YtS_idx = lbfgs->strategy == MAT_LMVM_DENSE_INPLACE ? S_idx : history_index(m, k, j);
716: Vec s_j, Yts_j;
718: PetscCall(MatDenseGetColumnVecRead(Sfull, S_idx, &s_j));
719: PetscCall(MatDenseGetColumnVecWrite(lbfgs->YtS_triu_strict, YtS_idx, &Yts_j));
720: PetscCall(MatMultHermitianTransposeColumnRange(Yfull, s_j, Yts_j, 0, h));
721: lbfgs->Yt_count++;
722: if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, Yts_j, lbfgs->num_updates, lbfgs->cyclic_work_vec));
723: PetscCall(MatDenseRestoreColumnVecWrite(lbfgs->YtS_triu_strict, YtS_idx, &Yts_j));
724: PetscCall(MatDenseRestoreColumnVecRead(Sfull, S_idx, &s_j));
725: /* zero the corresponding row */
726: if (m_local > 0) {
727: Mat YtS_local, YtS_row;
729: PetscCall(MatDenseGetLocalMatrix(lbfgs->YtS_triu_strict, &YtS_local));
730: PetscCall(MatDenseGetSubMatrix(YtS_local, YtS_idx, YtS_idx + 1, PETSC_DECIDE, PETSC_DECIDE, &YtS_row));
731: PetscCall(MatZeroEntries(YtS_row));
732: PetscCall(MatDenseRestoreSubMatrix(YtS_local, &YtS_row));
733: }
734: }
735: if (!lbfgs->inv_diag_vec) PetscCall(VecDuplicate(lbfgs->diag_vec, &lbfgs->inv_diag_vec));
736: PetscCall(VecCopy(lbfgs->diag_vec, lbfgs->inv_diag_vec));
737: PetscCall(VecReciprocal(lbfgs->inv_diag_vec));
738: PetscCall(MatDenseGetLocalMatrix(lbfgs->J, &J_local));
739: PetscCall(MatSetFactorType(J_local, MAT_FACTOR_NONE));
740: PetscCall(MatGetLDLT(B, lbfgs->J));
741: PetscCall(MatAXPY(lbfgs->J, 1.0, lbfgs->StBS, SAME_NONZERO_PATTERN));
742: if (m_local) {
743: PetscCall(MatSetOption(J_local, MAT_SPD, PETSC_TRUE));
744: PetscCall(MatCholeskyFactor(J_local, NULL, NULL));
745: }
746: lbfgs->num_mult_updates = lbfgs->num_updates;
747: PetscFunctionReturn(PETSC_SUCCESS);
748: }
750: /* Solves for
751: * [ I | -S R^{-T} ] [ I | 0 ] [ H_0 | 0 ] [ I | Y ] [ I ]
752: * [-----+---] [-----+---] [---+---] [-------------]
753: * [ Y^T | I ] [ 0 | D ] [ 0 | I ] [ -R^{-1} S^T ] */
755: static PetscErrorCode MatSolve_LMVMDBFGS(Mat H, Vec F, Vec dX)
756: {
757: Mat_LMVM *lmvm = (Mat_LMVM *)H->data;
758: Mat_DQN *lbfgs = (Mat_DQN *)lmvm->ctx;
759: Vec rwork1 = lbfgs->rwork1;
760: PetscInt m = lmvm->m;
761: PetscInt k = lmvm->k;
762: PetscInt h = k - oldest_update(m, k);
763: Mat Sfull = lmvm->basis[LMBASIS_S]->vecs;
764: Mat Yfull = lmvm->basis[LMBASIS_Y]->vecs;
765: PetscObjectState Fstate;
767: PetscFunctionBegin;
768: VecCheckSameSize(F, 2, dX, 3);
769: VecCheckMatCompatible(H, dX, 3, F, 2);
771: /* Block Version */
772: if (!lbfgs->num_updates) {
773: PetscCall(MatDQNApplyJ0Inv(H, F, dX));
774: PetscFunctionReturn(PETSC_SUCCESS); /* No updates stored yet */
775: }
777: PetscCall(PetscObjectStateGet((PetscObject)F, &Fstate));
778: if (F == lbfgs->Fprev_ref && Fstate == lbfgs->Fprev_state) {
779: PetscCall(VecCopy(lbfgs->StFprev, rwork1));
780: } else {
781: PetscCall(MatMultHermitianTransposeColumnRange(Sfull, F, rwork1, 0, h));
782: lbfgs->St_count++;
783: }
785: /* Reordering rwork1, as STY is in history order, while S is in recycled order */
786: if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(H, rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));
787: PetscCall(MatUpperTriangularSolveInPlace(H, lbfgs->StY_triu, rwork1, PETSC_FALSE, lbfgs->num_updates, lbfgs->strategy));
788: PetscCall(VecScale(rwork1, -1.0));
789: if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecHistoryOrderToRecycleOrder(H, rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));
791: PetscCall(VecCopy(F, lbfgs->column_work));
792: PetscCall(MatMultAddColumnRange(Yfull, rwork1, lbfgs->column_work, lbfgs->column_work, 0, h));
793: lbfgs->Y_count++;
795: PetscCall(VecPointwiseMult(rwork1, lbfgs->diag_vec_recycle_order, rwork1));
796: PetscCall(MatDQNApplyJ0Inv(H, lbfgs->column_work, dX));
798: PetscCall(MatMultHermitianTransposeAddColumnRange(Yfull, dX, rwork1, rwork1, 0, h));
799: lbfgs->Yt_count++;
801: if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(H, rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));
802: PetscCall(MatUpperTriangularSolveInPlace(H, lbfgs->StY_triu, rwork1, PETSC_TRUE, lbfgs->num_updates, lbfgs->strategy));
803: PetscCall(VecScale(rwork1, -1.0));
804: if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecHistoryOrderToRecycleOrder(H, rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));
806: PetscCall(MatMultAddColumnRange(Sfull, rwork1, dX, dX, 0, h));
807: lbfgs->S_count++;
808: PetscFunctionReturn(PETSC_SUCCESS);
809: }
811: /* Solves for
812: B_0 - [ Y | B_0 S] [ -D | L^T ]^-1 [ Y^T ]
813: [-----+-----------] [---------]
814: [ L | S^T B_0 S ] [ S^T B_0 ]
816: Above is equivalent to
818: B_0 - [ Y | B_0 S] [[ I | 0 ][ -D | 0 ][ I | -D^{-1} L^T ]]^-1 [ Y^T ]
819: [[-----------+---][-----+---][---+-------------]] [---------]
820: [[ -L D^{-1} | I ][ 0 | J ][ 0 | I ]] [ S^T B_0 ]
822: where J = S^T B_0 S + L D^{-1} L^T
824: becomes
826: B_0 - [ Y | B_0 S] [ I | D^{-1} L^T ][ -D^{-1} | 0 ][ I | 0 ] [ Y^T ]
827: [---+------------][----------+--------][----------+---] [---------]
828: [ 0 | I ][ 0 | J^{-1} ][ L D^{-1} | I ] [ S^T B_0 ]
830: =
832: B_0 + [ Y | B_0 S] [ D^{-1} | 0 ][ I | L^T ][ I | 0 ][ I | 0 ] [ Y^T ]
833: [--------+---][---+-----][---+---------][----------+---] [---------]
834: [ 0 | I ][ 0 | I ][ 0 | -J^{-1} ][ L D^{-1} | I ] [ S^T B_0 ]
836: (Note that YtS_triu_strict is L^T)
837: Byrd, Nocedal, Schnabel 1994
839: Alternative approach: considering the fact that DFP is dual to BFGS, use MatMult of DPF:
840: (See ddfp.c's MatMult_LMVMDDFP)
842: */
843: static PetscErrorCode MatMult_LMVMDBFGS(Mat B, Vec X, Vec Z)
844: {
845: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
846: Mat_DQN *lbfgs = (Mat_DQN *)lmvm->ctx;
847: Mat J_local;
848: PetscInt idx, i, j, m_local, local_n;
849: PetscInt m = lmvm->m;
850: PetscInt k = lmvm->k;
851: PetscInt h = k - oldest_update(m, k);
852: Mat Sfull = lmvm->basis[LMBASIS_S]->vecs;
853: Mat Yfull = lmvm->basis[LMBASIS_Y]->vecs;
855: PetscFunctionBegin;
856: VecCheckSameSize(X, 2, Z, 3);
857: VecCheckMatCompatible(B, X, 2, Z, 3);
859: /* Cholesky Version */
860: /* Start with the B0 term */
861: PetscCall(MatDQNApplyJ0Fwd(B, X, Z));
862: if (!lbfgs->num_updates) PetscFunctionReturn(PETSC_SUCCESS); /* No updates stored yet */
864: if (lbfgs->use_recursive) {
865: PetscDeviceContext dctx;
866: PetscMemType memtype;
867: PetscScalar stz, ytx, stp, sjtpi, yjtsi, *workscalar;
868: PetscInt oldest = oldest_update(m, k);
870: PetscCall(PetscDeviceContextGetCurrentContext(&dctx));
871: /* Recursive formulation to avoid Cholesky. Not a dense formulation */
872: PetscCall(MatMultHermitianTransposeColumnRange(Yfull, X, lbfgs->rwork1, 0, h));
873: lbfgs->Yt_count++;
875: PetscCall(VecGetLocalSize(lbfgs->rwork1, &local_n));
877: if (lbfgs->needPQ) {
878: PetscInt oldest = oldest_update(m, k);
879: for (i = oldest; i < k; ++i) {
880: idx = recycle_index(m, i);
881: /* column_work = S[idx] */
882: PetscCall(MatGetColumnVector(Sfull, lbfgs->column_work, idx));
883: PetscCall(MatDQNApplyJ0Fwd(B, lbfgs->column_work, lbfgs->PQ[idx]));
884: PetscCall(MatMultHermitianTransposeColumnRange(Yfull, lbfgs->column_work, lbfgs->rwork3, 0, h));
885: PetscCall(VecGetArrayAndMemType(lbfgs->rwork3, &workscalar, &memtype));
886: for (j = oldest; j < i; ++j) {
887: PetscInt idx_j = recycle_index(m, j);
888: /* Copy yjtsi in device-aware manner */
889: if (local_n) {
890: if (PetscMemTypeHost(memtype)) {
891: yjtsi = workscalar[idx_j];
892: } else {
893: PetscCall(PetscDeviceRegisterMemory(&yjtsi, PETSC_MEMTYPE_HOST, sizeof(yjtsi)));
894: PetscCall(PetscDeviceRegisterMemory(workscalar, memtype, local_n * sizeof(*workscalar)));
895: PetscCall(PetscDeviceArrayCopy(dctx, &yjtsi, &workscalar[idx_j], 1));
896: }
897: }
898: PetscCallMPI(MPI_Bcast(&yjtsi, 1, MPIU_SCALAR, 0, PetscObjectComm((PetscObject)B)));
899: /* column_work2 = S[j] */
900: PetscCall(MatGetColumnVector(Sfull, lbfgs->column_work2, idx_j));
901: PetscCall(VecDot(lbfgs->PQ[idx], lbfgs->column_work2, &sjtpi));
902: /* column_work2 = Y[j] */
903: PetscCall(MatGetColumnVector(Yfull, lbfgs->column_work2, idx_j));
904: /* Compute the pure BFGS component of the forward product */
905: PetscCall(VecAXPBYPCZ(lbfgs->PQ[idx], -sjtpi / lbfgs->stp[idx_j], yjtsi / lbfgs->yts[idx_j], 1.0, lbfgs->PQ[idx_j], lbfgs->column_work2));
906: }
907: PetscCall(VecDot(lbfgs->PQ[idx], lbfgs->column_work, &stp));
908: lbfgs->stp[idx] = PetscRealPart(stp);
909: }
910: lbfgs->needPQ = PETSC_FALSE;
911: }
913: PetscCall(VecGetArrayAndMemType(lbfgs->rwork1, &workscalar, &memtype));
914: for (i = oldest; i < k; ++i) {
915: idx = recycle_index(m, i);
916: /* Copy stz[i], ytx[i] in device-aware manner */
917: if (local_n) {
918: if (PetscMemTypeHost(memtype)) {
919: ytx = workscalar[idx];
920: } else {
921: PetscCall(PetscDeviceRegisterMemory(&ytx, PETSC_MEMTYPE_HOST, 1 * sizeof(ytx)));
922: PetscCall(PetscDeviceRegisterMemory(workscalar, memtype, local_n * sizeof(*workscalar)));
923: PetscCall(PetscDeviceArrayCopy(dctx, &ytx, &workscalar[idx], 1));
924: }
925: }
926: PetscCallMPI(MPI_Bcast(&ytx, 1, MPIU_SCALAR, 0, PetscObjectComm((PetscObject)B)));
927: /* column_work : S[i], column_work2 : Y[i] */
928: PetscCall(MatGetColumnVector(Sfull, lbfgs->column_work, idx));
929: PetscCall(MatGetColumnVector(Yfull, lbfgs->column_work2, idx));
930: PetscCall(VecDot(Z, lbfgs->column_work, &stz));
931: PetscCall(VecAXPBYPCZ(Z, -stz / lbfgs->stp[idx], ytx / lbfgs->yts[idx], 1.0, lbfgs->PQ[idx], lbfgs->column_work2));
932: }
933: PetscCall(VecRestoreArrayAndMemType(lbfgs->rwork1, &workscalar));
934: } else {
935: PetscCall(MatLMVMDBFGSUpdateMultData(B));
936: PetscCall(MatMultHermitianTransposeColumnRange(Yfull, X, lbfgs->rwork1, 0, h));
937: lbfgs->Yt_count++;
938: PetscCall(MatMultHermitianTransposeColumnRange(Sfull, Z, lbfgs->rwork2, 0, h));
939: lbfgs->St_count++;
940: if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) {
941: PetscCall(VecRecycleOrderToHistoryOrder(B, lbfgs->rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));
942: PetscCall(VecRecycleOrderToHistoryOrder(B, lbfgs->rwork2, lbfgs->num_updates, lbfgs->cyclic_work_vec));
943: }
945: PetscCall(VecPointwiseMult(lbfgs->rwork1, lbfgs->rwork1, lbfgs->inv_diag_vec));
946: if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate(lbfgs->YtS_triu_strict));
947: PetscCall(MatMultTransposeAdd(lbfgs->YtS_triu_strict, lbfgs->rwork1, lbfgs->rwork2, lbfgs->rwork2));
948: if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate(lbfgs->YtS_triu_strict));
950: if (!lbfgs->rwork2_local) PetscCall(VecCreateLocalVector(lbfgs->rwork2, &lbfgs->rwork2_local));
951: if (!lbfgs->rwork3_local) PetscCall(VecCreateLocalVector(lbfgs->rwork3, &lbfgs->rwork3_local));
952: PetscCall(VecGetLocalVectorRead(lbfgs->rwork2, lbfgs->rwork2_local));
953: PetscCall(VecGetLocalVector(lbfgs->rwork3, lbfgs->rwork3_local));
954: PetscCall(MatDenseGetLocalMatrix(lbfgs->J, &J_local));
955: PetscCall(VecGetSize(lbfgs->rwork2_local, &m_local));
956: if (m_local) {
957: PetscCall(MatDenseGetLocalMatrix(lbfgs->J, &J_local));
958: PetscCall(MatSolve(J_local, lbfgs->rwork2_local, lbfgs->rwork3_local));
959: }
960: PetscCall(VecRestoreLocalVector(lbfgs->rwork3, lbfgs->rwork3_local));
961: PetscCall(VecRestoreLocalVectorRead(lbfgs->rwork2, lbfgs->rwork2_local));
962: PetscCall(VecScale(lbfgs->rwork3, -1.0));
964: PetscCall(MatMult(lbfgs->YtS_triu_strict, lbfgs->rwork3, lbfgs->rwork2));
965: PetscCall(VecPointwiseMult(lbfgs->rwork2, lbfgs->rwork2, lbfgs->inv_diag_vec));
966: PetscCall(VecAXPY(lbfgs->rwork1, 1.0, lbfgs->rwork2));
968: if (lbfgs->strategy == MAT_LMVM_DENSE_REORDER) {
969: PetscCall(VecHistoryOrderToRecycleOrder(B, lbfgs->rwork1, lbfgs->num_updates, lbfgs->cyclic_work_vec));
970: PetscCall(VecHistoryOrderToRecycleOrder(B, lbfgs->rwork3, lbfgs->num_updates, lbfgs->cyclic_work_vec));
971: }
973: PetscCall(MatMultAddColumnRange(Yfull, lbfgs->rwork1, Z, Z, 0, h));
974: lbfgs->Y_count++;
975: PetscCall(MatMultAddColumnRange(lbfgs->BS, lbfgs->rwork3, Z, Z, 0, h));
976: lbfgs->S_count++;
977: }
978: PetscFunctionReturn(PETSC_SUCCESS);
979: }
981: /*
982: This dense representation reduces the L-BFGS update to a series of
983: matrix-vector products with dense matrices in lieu of the conventional matrix-free
984: two-loop algorithm.
985: */
986: PetscErrorCode MatCreate_LMVMDBFGS(Mat B)
987: {
988: Mat_LMVM *lmvm;
989: Mat_DQN *lbfgs;
991: PetscFunctionBegin;
992: PetscCall(MatCreate_LMVM(B));
993: PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATLMVMDBFGS));
994: PetscCall(MatSetOption(B, MAT_HERMITIAN, PETSC_TRUE));
995: PetscCall(MatSetOption(B, MAT_SPD, PETSC_TRUE));
996: PetscCall(MatSetOption(B, MAT_SPD_ETERNAL, PETSC_TRUE));
997: B->ops->view = MatView_LMVMDQN;
998: B->ops->setup = MatSetUp_LMVMDQN;
999: B->ops->setfromoptions = MatSetFromOptions_LMVMDQN;
1000: B->ops->destroy = MatDestroy_LMVMDQN;
1002: lmvm = (Mat_LMVM *)B->data;
1003: lmvm->ops->reset = MatReset_LMVMDQN;
1004: lmvm->ops->update = MatUpdate_LMVMDQN;
1005: lmvm->ops->mult = MatMult_LMVMDBFGS;
1006: lmvm->ops->solve = MatSolve_LMVMDBFGS;
1007: lmvm->ops->copy = MatCopy_LMVMDQN;
1009: lmvm->ops->multht = lmvm->ops->mult;
1010: lmvm->ops->solveht = lmvm->ops->solve;
1012: PetscCall(PetscNew(&lbfgs));
1013: lmvm->ctx = (void *)lbfgs;
1014: lbfgs->allocated = PETSC_FALSE;
1015: lbfgs->use_recursive = PETSC_TRUE;
1016: lbfgs->needPQ = PETSC_TRUE;
1017: lbfgs->watchdog = 0;
1018: lbfgs->max_seq_rejects = lmvm->m / 2;
1019: lbfgs->strategy = MAT_LMVM_DENSE_INPLACE;
1021: PetscCall(SymBroydenRescaleCreate(&lbfgs->rescale));
1022: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatLMVMSymBroydenSetDelta_C", MatLMVMSymBroydenSetDelta_LMVMDQN));
1023: PetscFunctionReturn(PETSC_SUCCESS);
1024: }
1026: /*@
1027: MatCreateLMVMDBFGS - Creates a dense representation of the limited-memory
1028: Broyden-Fletcher-Goldfarb-Shanno (BFGS) approximation to a Hessian.
1030: Collective
1032: Input Parameters:
1033: + comm - MPI communicator
1034: . n - number of local rows for storage vectors
1035: - N - global size of the storage vectors
1037: Output Parameter:
1038: . B - the matrix
1040: Level: advanced
1042: Note:
1043: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions()
1044: paradigm instead of this routine directly.
1046: .seealso: `MatCreate()`, `MATLMVM`, `MATLMVMDBFGS`, `MatCreateLMVMBFGS()`
1047: @*/
1048: PetscErrorCode MatCreateLMVMDBFGS(MPI_Comm comm, PetscInt n, PetscInt N, Mat *B)
1049: {
1050: PetscFunctionBegin;
1051: PetscCall(KSPInitializePackage());
1052: PetscCall(MatCreate(comm, B));
1053: PetscCall(MatSetSizes(*B, n, n, N, N));
1054: PetscCall(MatSetType(*B, MATLMVMDBFGS));
1055: PetscCall(MatSetUp(*B));
1056: PetscFunctionReturn(PETSC_SUCCESS);
1057: }
1059: /* here R is strictly upper triangular part of STY */
1060: static PetscErrorCode MatGetRTDR(Mat B, Mat result)
1061: {
1062: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
1063: Mat_DQN *ldfp = (Mat_DQN *)lmvm->ctx;
1064: PetscInt m_local;
1066: PetscFunctionBegin;
1067: if (!ldfp->temp_mat) PetscCall(MatDuplicate(ldfp->StY_triu_strict, MAT_SHARE_NONZERO_PATTERN, &ldfp->temp_mat));
1068: PetscCall(MatCopy(ldfp->StY_triu_strict, ldfp->temp_mat, SAME_NONZERO_PATTERN));
1069: PetscCall(MatDiagonalScale(ldfp->temp_mat, ldfp->inv_diag_vec, NULL));
1070: PetscCall(MatGetLocalSize(result, &m_local, NULL));
1071: // need to conjugate and conjugate again because we have MatTransposeMatMult but not MatHermitianTransposeMatMult()
1072: PetscCall(MatConjugate(ldfp->temp_mat));
1073: if (m_local) {
1074: Mat temp_local, StY_local, result_local;
1075: PetscCall(MatDenseGetLocalMatrix(ldfp->StY_triu_strict, &StY_local));
1076: PetscCall(MatDenseGetLocalMatrix(ldfp->temp_mat, &temp_local));
1077: PetscCall(MatDenseGetLocalMatrix(result, &result_local));
1078: PetscCall(MatTransposeMatMult(StY_local, temp_local, MAT_REUSE_MATRIX, PETSC_DETERMINE, &result_local));
1079: }
1080: PetscCall(MatConjugate(result));
1081: PetscFunctionReturn(PETSC_SUCCESS);
1082: }
1084: static PetscErrorCode MatLMVMDDFPUpdateSolveData(Mat B)
1085: {
1086: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
1087: Mat_DQN *ldfp = (Mat_DQN *)lmvm->ctx;
1088: PetscInt m = lmvm->m, m_local;
1089: PetscInt k = lmvm->k;
1090: PetscInt h = k - oldest_update(m, k);
1091: PetscInt j_0;
1092: PetscInt prev_oldest;
1093: Mat Sfull = lmvm->basis[LMBASIS_S]->vecs;
1094: Mat Yfull = lmvm->basis[LMBASIS_Y]->vecs;
1095: Mat J_local;
1097: PetscFunctionBegin;
1098: if (!ldfp->StY_triu_strict) {
1099: PetscCall(MatDuplicate(ldfp->YtS_triu, MAT_SHARE_NONZERO_PATTERN, &ldfp->StY_triu_strict));
1100: PetscCall(MatDestroy(&ldfp->YtHY));
1101: PetscCall(MatDuplicate(ldfp->YtS_triu, MAT_SHARE_NONZERO_PATTERN, &ldfp->YtHY));
1102: PetscCall(MatDestroy(&ldfp->J));
1103: PetscCall(MatDuplicate(ldfp->YtS_triu, MAT_SHARE_NONZERO_PATTERN, &ldfp->J));
1104: PetscCall(MatDestroy(&ldfp->HY));
1105: PetscCall(MatDuplicate(Yfull, MAT_SHARE_NONZERO_PATTERN, &ldfp->HY));
1106: PetscCall(MatShift(ldfp->YtHY, 1.0));
1107: ldfp->num_mult_updates = oldest_update(m, k);
1108: }
1109: if (ldfp->num_mult_updates == k) PetscFunctionReturn(PETSC_SUCCESS);
1111: /* H_0 may have been updated, we must recompute H_0 Y and Y^T H_0 Y */
1112: for (PetscInt j = oldest_update(m, k); j < k; j++) {
1113: Vec y_j;
1114: Vec Hy_j;
1115: Vec YtHy_j;
1116: PetscInt Y_idx = recycle_index(m, j);
1117: PetscInt YtHY_idx = ldfp->strategy == MAT_LMVM_DENSE_INPLACE ? Y_idx : history_index(m, k, j);
1119: PetscCall(MatDenseGetColumnVecWrite(ldfp->HY, Y_idx, &Hy_j));
1120: PetscCall(MatDenseGetColumnVecRead(Yfull, Y_idx, &y_j));
1121: PetscCall(MatDQNApplyJ0Inv(B, y_j, Hy_j));
1122: PetscCall(MatDenseRestoreColumnVecRead(Yfull, Y_idx, &y_j));
1123: PetscCall(MatDenseGetColumnVecWrite(ldfp->YtHY, YtHY_idx, &YtHy_j));
1124: PetscCall(MatMultHermitianTransposeColumnRange(Yfull, Hy_j, YtHy_j, 0, h));
1125: ldfp->Yt_count++;
1126: if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, YtHy_j, ldfp->num_updates, ldfp->cyclic_work_vec));
1127: PetscCall(MatDenseRestoreColumnVecWrite(ldfp->YtHY, YtHY_idx, &YtHy_j));
1128: PetscCall(MatDenseRestoreColumnVecWrite(ldfp->HY, Y_idx, &Hy_j));
1129: }
1130: prev_oldest = oldest_update(m, ldfp->num_mult_updates);
1131: if (ldfp->strategy == MAT_LMVM_DENSE_REORDER && prev_oldest < oldest_update(m, k)) {
1132: /* move the YtS entries that have been computed and need to be kept back up */
1133: PetscInt m_keep = m - (oldest_update(m, k) - prev_oldest);
1135: PetscCall(MatMove_LR3(B, ldfp->StY_triu_strict, m_keep));
1136: }
1137: PetscCall(MatGetLocalSize(ldfp->StY_triu_strict, &m_local, NULL));
1138: j_0 = PetscMax(ldfp->num_mult_updates, oldest_update(m, k));
1139: for (PetscInt j = j_0; j < k; j++) {
1140: PetscInt Y_idx = recycle_index(m, j);
1141: PetscInt StY_idx = ldfp->strategy == MAT_LMVM_DENSE_INPLACE ? Y_idx : history_index(m, k, j);
1142: Vec y_j, Sty_j;
1144: PetscCall(MatDenseGetColumnVecRead(Yfull, Y_idx, &y_j));
1145: PetscCall(MatDenseGetColumnVecWrite(ldfp->StY_triu_strict, StY_idx, &Sty_j));
1146: PetscCall(MatMultHermitianTransposeColumnRange(Sfull, y_j, Sty_j, 0, h));
1147: ldfp->St_count++;
1148: if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, Sty_j, ldfp->num_updates, ldfp->cyclic_work_vec));
1149: PetscCall(MatDenseRestoreColumnVecWrite(ldfp->StY_triu_strict, StY_idx, &Sty_j));
1150: PetscCall(MatDenseRestoreColumnVecRead(Yfull, Y_idx, &y_j));
1151: /* zero the corresponding row */
1152: if (m_local > 0) {
1153: Mat StY_local, StY_row;
1155: PetscCall(MatDenseGetLocalMatrix(ldfp->StY_triu_strict, &StY_local));
1156: PetscCall(MatDenseGetSubMatrix(StY_local, StY_idx, StY_idx + 1, PETSC_DECIDE, PETSC_DECIDE, &StY_row));
1157: PetscCall(MatZeroEntries(StY_row));
1158: PetscCall(MatDenseRestoreSubMatrix(StY_local, &StY_row));
1159: }
1160: }
1161: if (!ldfp->inv_diag_vec) PetscCall(VecDuplicate(ldfp->diag_vec, &ldfp->inv_diag_vec));
1162: PetscCall(VecCopy(ldfp->diag_vec, ldfp->inv_diag_vec));
1163: PetscCall(VecReciprocal(ldfp->inv_diag_vec));
1164: PetscCall(MatDenseGetLocalMatrix(ldfp->J, &J_local));
1165: PetscCall(MatSetFactorType(J_local, MAT_FACTOR_NONE));
1166: PetscCall(MatGetRTDR(B, ldfp->J));
1167: PetscCall(MatAXPY(ldfp->J, 1.0, ldfp->YtHY, SAME_NONZERO_PATTERN));
1168: if (m_local) {
1169: PetscCall(MatSetOption(J_local, MAT_SPD, PETSC_TRUE));
1170: PetscCall(MatCholeskyFactor(J_local, NULL, NULL));
1171: }
1172: ldfp->num_mult_updates = ldfp->num_updates;
1173: PetscFunctionReturn(PETSC_SUCCESS);
1174: }
1176: /* Solves for
1178: H_0 - [ S | H_0 Y] [ -D | R.T ]^-1 [ S^T ]
1179: [-----+-----------] [---------]
1180: [ R | Y^T H_0 Y ] [ Y^T H_0 ]
1182: Above is equivalent to
1184: H_0 - [ S | H_0 Y] [[ I | 0 ][ -D | 0 ][ I | -D^{-1} R^T ]]^-1 [ S^T ]
1185: [[-----------+---][----+---][---+-------------]] [---------]
1186: [[ -R D^{-1} | I ][ 0 | J ][ 0 | I ]] [ Y^T H_0 ]
1188: where J = Y^T H_0 Y + R D^{-1} R.T
1190: becomes
1192: H_0 - [ S | H_0 Y] [ I | D^{-1} R^T ][ -D^{-1} | 0 ][ I | 0 ] [ S^T ]
1193: [---+------------][----------+--------][----------+---] [---------]
1194: [ 0 | I ][ 0 | J^{-1} ][ R D^{-1} | I ] [ Y^T H_0 ]
1196: =
1198: H_0 + [ S | H_0 Y] [ D^{-1} | 0 ][ I | R^T ][ I | 0 ][ I | 0 ] [ S^T ]
1199: [--------+---][---+-----][---+---------][----------+---] [---------]
1200: [ 0 | I ][ 0 | I ][ 0 | -J^{-1} ][ R D^{-1} | I ] [ Y^T H_0 ]
1202: (Note that StY_triu_strict is R)
1203: Byrd, Nocedal, Schnabel 1994
1205: */
1206: static PetscErrorCode MatSolve_LMVMDDFP(Mat H, Vec F, Vec dX)
1207: {
1208: Mat_LMVM *lmvm = (Mat_LMVM *)H->data;
1209: Mat_DQN *ldfp = (Mat_DQN *)lmvm->ctx;
1210: PetscInt m = lmvm->m;
1211: PetscInt k = lmvm->k;
1212: PetscInt h = k - oldest_update(m, k);
1213: PetscInt idx, i, j, local_n;
1214: PetscInt m_local;
1215: Mat J_local;
1216: Mat Sfull = lmvm->basis[LMBASIS_S]->vecs;
1217: Mat Yfull = lmvm->basis[LMBASIS_Y]->vecs;
1219: PetscFunctionBegin;
1220: VecCheckSameSize(F, 2, dX, 3);
1221: VecCheckMatCompatible(H, dX, 3, F, 2);
1223: /* Cholesky Version */
1224: /* Start with the B0 term */
1225: PetscCall(MatDQNApplyJ0Inv(H, F, dX));
1226: if (!ldfp->num_updates) PetscFunctionReturn(PETSC_SUCCESS); /* No updates stored yet */
1228: if (ldfp->use_recursive) {
1229: PetscDeviceContext dctx;
1230: PetscMemType memtype;
1231: PetscScalar stf, ytx, ytq, yjtqi, sjtyi, *workscalar;
1233: PetscCall(PetscDeviceContextGetCurrentContext(&dctx));
1234: /* Recursive formulation to avoid Cholesky. Not a dense formulation */
1235: PetscCall(MatMultHermitianTransposeColumnRange(Sfull, F, ldfp->rwork1, 0, h));
1236: ldfp->Yt_count++;
1238: PetscCall(VecGetLocalSize(ldfp->rwork1, &local_n));
1240: PetscInt oldest = oldest_update(m, k);
1242: if (ldfp->needPQ) {
1243: PetscInt oldest = oldest_update(m, k);
1244: for (i = oldest; i < k; ++i) {
1245: idx = recycle_index(m, i);
1246: /* column_work = S[idx] */
1247: PetscCall(MatGetColumnVector(Yfull, ldfp->column_work, idx));
1248: PetscCall(MatDQNApplyJ0Inv(H, ldfp->column_work, ldfp->PQ[idx]));
1249: PetscCall(MatMultHermitianTransposeColumnRange(Sfull, ldfp->column_work, ldfp->rwork3, 0, h));
1250: PetscCall(VecGetArrayAndMemType(ldfp->rwork3, &workscalar, &memtype));
1251: for (j = oldest; j < i; ++j) {
1252: PetscInt idx_j = recycle_index(m, j);
1253: /* Copy sjtyi in device-aware manner */
1254: if (local_n) {
1255: if (PetscMemTypeHost(memtype)) {
1256: sjtyi = workscalar[idx_j];
1257: } else {
1258: PetscCall(PetscDeviceRegisterMemory(&sjtyi, PETSC_MEMTYPE_HOST, 1 * sizeof(sjtyi)));
1259: PetscCall(PetscDeviceRegisterMemory(workscalar, memtype, local_n * sizeof(*workscalar)));
1260: PetscCall(PetscDeviceArrayCopy(dctx, &sjtyi, &workscalar[idx_j], 1));
1261: }
1262: }
1263: PetscCallMPI(MPI_Bcast(&sjtyi, 1, MPIU_SCALAR, 0, PetscObjectComm((PetscObject)H)));
1264: /* column_work2 = Y[j] */
1265: PetscCall(MatGetColumnVector(Yfull, ldfp->column_work2, idx_j));
1266: PetscCall(VecDot(ldfp->PQ[idx], ldfp->column_work2, &yjtqi));
1267: /* column_work2 = Y[j] */
1268: PetscCall(MatGetColumnVector(Sfull, ldfp->column_work2, idx_j));
1269: /* Compute the pure BFGS component of the forward product */
1270: PetscCall(VecAXPBYPCZ(ldfp->PQ[idx], -yjtqi / ldfp->ytq[idx_j], sjtyi / ldfp->yts[idx_j], 1.0, ldfp->PQ[idx_j], ldfp->column_work2));
1271: }
1272: PetscCall(VecDot(ldfp->PQ[idx], ldfp->column_work, &ytq));
1273: ldfp->ytq[idx] = PetscRealPart(ytq);
1274: }
1275: ldfp->needPQ = PETSC_FALSE;
1276: }
1278: PetscCall(VecGetArrayAndMemType(ldfp->rwork1, &workscalar, &memtype));
1279: for (i = oldest; i < k; ++i) {
1280: idx = recycle_index(m, i);
1281: /* Copy stz[i], ytx[i] in device-aware manner */
1282: if (local_n) {
1283: if (PetscMemTypeHost(memtype)) {
1284: stf = workscalar[idx];
1285: } else {
1286: PetscCall(PetscDeviceRegisterMemory(&stf, PETSC_MEMTYPE_HOST, sizeof(stf)));
1287: PetscCall(PetscDeviceRegisterMemory(workscalar, memtype, local_n * sizeof(*workscalar)));
1288: PetscCall(PetscDeviceArrayCopy(dctx, &stf, &workscalar[idx], 1));
1289: }
1290: }
1291: PetscCallMPI(MPI_Bcast(&stf, 1, MPIU_SCALAR, 0, PetscObjectComm((PetscObject)H)));
1292: /* column_work : S[i], column_work2 : Y[i] */
1293: PetscCall(MatGetColumnVector(Sfull, ldfp->column_work, idx));
1294: PetscCall(MatGetColumnVector(Yfull, ldfp->column_work2, idx));
1295: PetscCall(VecDot(dX, ldfp->column_work2, &ytx));
1296: PetscCall(VecAXPBYPCZ(dX, -ytx / ldfp->ytq[idx], stf / ldfp->yts[idx], 1.0, ldfp->PQ[idx], ldfp->column_work));
1297: }
1298: PetscCall(VecRestoreArrayAndMemType(ldfp->rwork1, &workscalar));
1299: } else {
1300: PetscCall(MatLMVMDDFPUpdateSolveData(H));
1301: PetscCall(MatMultHermitianTransposeColumnRange(Sfull, F, ldfp->rwork1, 0, h));
1302: ldfp->St_count++;
1303: PetscCall(MatMultHermitianTransposeColumnRange(Yfull, dX, ldfp->rwork2, 0, h));
1304: ldfp->Yt_count++;
1305: if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) {
1306: PetscCall(VecRecycleOrderToHistoryOrder(H, ldfp->rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));
1307: PetscCall(VecRecycleOrderToHistoryOrder(H, ldfp->rwork2, ldfp->num_updates, ldfp->cyclic_work_vec));
1308: }
1310: PetscCall(VecPointwiseMult(ldfp->rwork3, ldfp->rwork1, ldfp->inv_diag_vec));
1311: if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate(ldfp->StY_triu_strict));
1312: PetscCall(MatMultTransposeAdd(ldfp->StY_triu_strict, ldfp->rwork3, ldfp->rwork2, ldfp->rwork2));
1313: if (PetscDefined(USE_COMPLEX)) PetscCall(MatConjugate(ldfp->StY_triu_strict));
1315: if (!ldfp->rwork2_local) PetscCall(VecCreateLocalVector(ldfp->rwork2, &ldfp->rwork2_local));
1316: if (!ldfp->rwork3_local) PetscCall(VecCreateLocalVector(ldfp->rwork3, &ldfp->rwork3_local));
1317: PetscCall(VecGetLocalVectorRead(ldfp->rwork2, ldfp->rwork2_local));
1318: PetscCall(VecGetLocalVector(ldfp->rwork3, ldfp->rwork3_local));
1319: PetscCall(MatDenseGetLocalMatrix(ldfp->J, &J_local));
1320: PetscCall(VecGetSize(ldfp->rwork2_local, &m_local));
1321: if (m_local) {
1322: Mat J_local;
1324: PetscCall(MatDenseGetLocalMatrix(ldfp->J, &J_local));
1325: PetscCall(MatSolve(J_local, ldfp->rwork2_local, ldfp->rwork3_local));
1326: }
1327: PetscCall(VecRestoreLocalVector(ldfp->rwork3, ldfp->rwork3_local));
1328: PetscCall(VecRestoreLocalVectorRead(ldfp->rwork2, ldfp->rwork2_local));
1329: PetscCall(VecScale(ldfp->rwork3, -1.0));
1331: PetscCall(MatMultAdd(ldfp->StY_triu_strict, ldfp->rwork3, ldfp->rwork1, ldfp->rwork1));
1332: PetscCall(VecPointwiseMult(ldfp->rwork1, ldfp->rwork1, ldfp->inv_diag_vec));
1334: if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) {
1335: PetscCall(VecHistoryOrderToRecycleOrder(H, ldfp->rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));
1336: PetscCall(VecHistoryOrderToRecycleOrder(H, ldfp->rwork3, ldfp->num_updates, ldfp->cyclic_work_vec));
1337: }
1339: PetscCall(MatMultAddColumnRange(Sfull, ldfp->rwork1, dX, dX, 0, h));
1340: ldfp->S_count++;
1341: PetscCall(MatMultAddColumnRange(ldfp->HY, ldfp->rwork3, dX, dX, 0, h));
1342: ldfp->Y_count++;
1343: }
1344: PetscFunctionReturn(PETSC_SUCCESS);
1345: }
1347: /* Solves for
1348: (Theorem 1, Erway, Jain, and Marcia, 2013)
1350: B_0 - [ Y | B_0 S] [ -R^{-T} (D + S^T B_0 S) R^{-1} | R^{-T} ] [ Y^T ]
1351: ---------------------------------+--------] [---------]
1352: [ R^{-1} | 0 ] [ S^T B_0 ]
1354: (Note: R above is right triangular part of YTS)
1355: which becomes,
1357: [ I | -Y L^{-T} ] [ I | 0 ] [ B_0 | 0 ] [ I | S ] [ I ]
1358: [-----+---] [-----+---] [---+---] [-------------]
1359: [ S^T | I ] [ 0 | D ] [ 0 | I ] [ -L^{-1} Y^T ]
1361: (Note: L above is right triangular part of STY)
1363: */
1364: static PetscErrorCode MatMult_LMVMDDFP(Mat B, Vec X, Vec Z)
1365: {
1366: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
1367: Mat_DQN *ldfp = (Mat_DQN *)lmvm->ctx;
1368: Vec rwork1 = ldfp->rwork1;
1369: PetscInt m = lmvm->m;
1370: PetscInt k = lmvm->k;
1371: PetscInt h = k - oldest_update(m, k);
1372: Mat Sfull = lmvm->basis[LMBASIS_S]->vecs;
1373: Mat Yfull = lmvm->basis[LMBASIS_Y]->vecs;
1374: PetscObjectState Xstate;
1376: PetscFunctionBegin;
1377: VecCheckSameSize(X, 2, Z, 3);
1378: VecCheckMatCompatible(B, X, 2, Z, 3);
1380: /* DFP Version. Erway, Jain, Marcia, 2013, Theorem 1 */
1381: /* Block Version */
1382: if (!ldfp->num_updates) {
1383: PetscCall(MatDQNApplyJ0Fwd(B, X, Z));
1384: PetscFunctionReturn(PETSC_SUCCESS); /* No updates stored yet */
1385: }
1387: PetscCall(PetscObjectStateGet((PetscObject)X, &Xstate));
1388: PetscCall(MatMultHermitianTransposeColumnRange(Yfull, X, rwork1, 0, h));
1390: /* Reordering rwork1, as STY is in history order, while Y is in recycled order */
1391: if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));
1392: PetscCall(MatUpperTriangularSolveInPlace(B, ldfp->YtS_triu, rwork1, PETSC_FALSE, ldfp->num_updates, ldfp->strategy));
1393: PetscCall(VecScale(rwork1, -1.0));
1394: if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecHistoryOrderToRecycleOrder(B, rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));
1396: PetscCall(VecCopy(X, ldfp->column_work));
1397: PetscCall(MatMultAddColumnRange(Sfull, rwork1, ldfp->column_work, ldfp->column_work, 0, h));
1398: ldfp->S_count++;
1400: PetscCall(VecPointwiseMult(rwork1, ldfp->diag_vec_recycle_order, rwork1));
1401: PetscCall(MatDQNApplyJ0Fwd(B, ldfp->column_work, Z));
1403: PetscCall(MatMultHermitianTransposeAddColumnRange(Sfull, Z, rwork1, rwork1, 0, h));
1404: ldfp->St_count++;
1406: if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecRecycleOrderToHistoryOrder(B, rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));
1407: PetscCall(MatUpperTriangularSolveInPlace(B, ldfp->YtS_triu, rwork1, PETSC_TRUE, ldfp->num_updates, ldfp->strategy));
1408: PetscCall(VecScale(rwork1, -1.0));
1409: if (ldfp->strategy == MAT_LMVM_DENSE_REORDER) PetscCall(VecHistoryOrderToRecycleOrder(B, rwork1, ldfp->num_updates, ldfp->cyclic_work_vec));
1411: PetscCall(MatMultAddColumnRange(Yfull, rwork1, Z, Z, 0, h));
1412: ldfp->Y_count++;
1413: PetscFunctionReturn(PETSC_SUCCESS);
1414: }
1416: /*
1417: This dense representation reduces the L-DFP update to a series of
1418: matrix-vector products with dense matrices in lieu of the conventional
1419: matrix-free two-loop algorithm.
1420: */
1421: PetscErrorCode MatCreate_LMVMDDFP(Mat B)
1422: {
1423: Mat_LMVM *lmvm;
1424: Mat_DQN *ldfp;
1426: PetscFunctionBegin;
1427: PetscCall(MatCreate_LMVM(B));
1428: PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATLMVMDDFP));
1429: PetscCall(MatSetOption(B, MAT_HERMITIAN, PETSC_TRUE));
1430: PetscCall(MatSetOption(B, MAT_SPD, PETSC_TRUE));
1431: PetscCall(MatSetOption(B, MAT_SPD_ETERNAL, PETSC_TRUE));
1432: B->ops->view = MatView_LMVMDQN;
1433: B->ops->setup = MatSetUp_LMVMDQN;
1434: B->ops->setfromoptions = MatSetFromOptions_LMVMDQN;
1435: B->ops->destroy = MatDestroy_LMVMDQN;
1437: lmvm = (Mat_LMVM *)B->data;
1438: lmvm->ops->reset = MatReset_LMVMDQN;
1439: lmvm->ops->update = MatUpdate_LMVMDQN;
1440: lmvm->ops->mult = MatMult_LMVMDDFP;
1441: lmvm->ops->solve = MatSolve_LMVMDDFP;
1442: lmvm->ops->copy = MatCopy_LMVMDQN;
1444: lmvm->ops->multht = lmvm->ops->mult;
1445: lmvm->ops->solveht = lmvm->ops->solve;
1447: PetscCall(PetscNew(&ldfp));
1448: lmvm->ctx = (void *)ldfp;
1449: ldfp->allocated = PETSC_FALSE;
1450: ldfp->watchdog = 0;
1451: ldfp->max_seq_rejects = lmvm->m / 2;
1452: ldfp->strategy = MAT_LMVM_DENSE_INPLACE;
1453: ldfp->use_recursive = PETSC_TRUE;
1454: ldfp->needPQ = PETSC_TRUE;
1456: PetscCall(SymBroydenRescaleCreate(&ldfp->rescale));
1457: PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatLMVMSymBroydenSetDelta_C", MatLMVMSymBroydenSetDelta_LMVMDQN));
1458: PetscFunctionReturn(PETSC_SUCCESS);
1459: }
1461: /*@
1462: MatCreateLMVMDDFP - Creates a dense representation of the limited-memory
1463: Davidon-Fletcher-Powell (DFP) approximation to a Hessian.
1465: Collective
1467: Input Parameters:
1468: + comm - MPI communicator
1469: . n - number of local rows for storage vectors
1470: - N - global size of the storage vectors
1472: Output Parameter:
1473: . B - the matrix
1475: Level: advanced
1477: Note:
1478: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions()
1479: paradigm instead of this routine directly.
1481: .seealso: `MatCreate()`, `MATLMVM`, `MATLMVMDDFP`, `MatCreateLMVMDFP()`
1482: @*/
1483: PetscErrorCode MatCreateLMVMDDFP(MPI_Comm comm, PetscInt n, PetscInt N, Mat *B)
1484: {
1485: PetscFunctionBegin;
1486: PetscCall(KSPInitializePackage());
1487: PetscCall(MatCreate(comm, B));
1488: PetscCall(MatSetSizes(*B, n, n, N, N));
1489: PetscCall(MatSetType(*B, MATLMVMDDFP));
1490: PetscCall(MatSetUp(*B));
1491: PetscFunctionReturn(PETSC_SUCCESS);
1492: }
1494: /*@
1495: MatLMVMDenseSetType - Sets the memory storage type for dense `MATLMVM`
1497: Input Parameters:
1498: + B - the `MATLMVM` matrix
1499: - type - scale type, see `MatLMVMDenseSetType`
1501: Options Database Keys:
1502: + -mat_lqn_type (reorder|inplace) - set the strategy
1503: . -mat_lbfgs_type (reorder|inplace) - set the strategy
1504: - -mat_ldfp_type (reorder|inplace) - set the strategy
1506: Level: intermediate
1508: MatLMVMDenseTypes\:
1509: + `MAT_LMVM_DENSE_REORDER` - reorders memory to minimize kernel launch
1510: - `MAT_LMVM_DENSE_INPLACE` - launches kernel inplace to minimize memory movement
1512: .seealso: [](ch_ksp), `MATLMVMDQN`, `MATLMVMDBFGS`, `MATLMVMDDFP`, `MatLMVMDenseType`
1513: @*/
1514: PetscErrorCode MatLMVMDenseSetType(Mat B, MatLMVMDenseType type)
1515: {
1516: Mat_LMVM *lmvm = (Mat_LMVM *)B->data;
1517: Mat_DQN *lqn = (Mat_DQN *)lmvm->ctx;
1519: PetscFunctionBegin;
1521: lqn->strategy = type;
1522: PetscFunctionReturn(PETSC_SUCCESS);
1523: }