Actual source code: sbaijfact.c


  2: #include <../src/mat/impls/baij/seq/baij.h>
  3: #include <../src/mat/impls/sbaij/seq/sbaij.h>
  4: #include <petsc/private/kernels/blockinvert.h>
  5: #include <petscis.h>

  7: PetscErrorCode MatGetInertia_SeqSBAIJ(Mat F,PetscInt *nneg,PetscInt *nzero,PetscInt *npos)
  8: {
  9:   Mat_SeqSBAIJ *fact=(Mat_SeqSBAIJ*)F->data;
 10:   MatScalar    *dd=fact->a;
 11:   PetscInt     mbs=fact->mbs,bs=F->rmap->bs,i,nneg_tmp,npos_tmp,*fi=fact->diag;

 14:   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for bs: %D >1 yet",bs);

 16:   nneg_tmp = 0; npos_tmp = 0;
 17:   for (i=0; i<mbs; i++) {
 18:     if (PetscRealPart(dd[*fi]) > 0.0) npos_tmp++;
 19:     else if (PetscRealPart(dd[*fi]) < 0.0) nneg_tmp++;
 20:     fi++;
 21:   }
 22:   if (nneg)  *nneg  = nneg_tmp;
 23:   if (npos)  *npos  = npos_tmp;
 24:   if (nzero) *nzero = mbs - nneg_tmp - npos_tmp;
 25:   return(0);
 26: }

 28: /*
 29:   Symbolic U^T*D*U factorization for SBAIJ format. Modified from SSF of YSMP.
 30:   Use Modified Sparse Row (MSR) storage for u and ju. See page 85, "Iterative Methods ..." by Saad.
 31: */
 32: PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(Mat F,Mat A,IS perm,const MatFactorInfo *info)
 33: {
 34:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b;
 36:   const PetscInt *rip,*ai,*aj;
 37:   PetscInt       i,mbs = a->mbs,*jutmp,bs = A->rmap->bs,bs2=a->bs2;
 38:   PetscInt       m,reallocs = 0,prow;
 39:   PetscInt       *jl,*q,jmin,jmax,juidx,nzk,qm,*iu,*ju,k,j,vj,umax,maxadd;
 40:   PetscReal      f = info->fill;
 41:   PetscBool      perm_identity;

 44:   /* check whether perm is the identity mapping */
 45:   ISIdentity(perm,&perm_identity);
 46:   ISGetIndices(perm,&rip);

 48:   if (perm_identity) { /* without permutation */
 49:     a->permute = PETSC_FALSE;

 51:     ai = a->i; aj = a->j;
 52:   } else {            /* non-trivial permutation */
 53:     a->permute = PETSC_TRUE;

 55:     MatReorderingSeqSBAIJ(A,perm);

 57:     ai = a->inew; aj = a->jnew;
 58:   }

 60:   /* initialization */
 61:   PetscMalloc1(mbs+1,&iu);
 62:   umax  = (PetscInt)(f*ai[mbs] + 1); umax += mbs + 1;
 63:   PetscMalloc1(umax,&ju);
 64:   iu[0] = mbs+1;
 65:   juidx = mbs + 1; /* index for ju */
 66:   /* jl linked list for pivot row -- linked list for col index */
 67:   PetscMalloc2(mbs,&jl,mbs,&q);
 68:   for (i=0; i<mbs; i++) {
 69:     jl[i] = mbs;
 70:     q[i]  = 0;
 71:   }

 73:   /* for each row k */
 74:   for (k=0; k<mbs; k++) {
 75:     for (i=0; i<mbs; i++) q[i] = 0;  /* to be removed! */
 76:     nzk  = 0; /* num. of nz blocks in k-th block row with diagonal block excluded */
 77:     q[k] = mbs;
 78:     /* initialize nonzero structure of k-th row to row rip[k] of A */
 79:     jmin = ai[rip[k]] +1; /* exclude diag[k] */
 80:     jmax = ai[rip[k]+1];
 81:     for (j=jmin; j<jmax; j++) {
 82:       vj = rip[aj[j]]; /* col. value */
 83:       if (vj > k) {
 84:         qm = k;
 85:         do {
 86:           m = qm; qm = q[m];
 87:         } while (qm < vj);
 88:         if (qm == vj) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Duplicate entry in A\n");
 89:         nzk++;
 90:         q[m]  = vj;
 91:         q[vj] = qm;
 92:       } /* if (vj > k) */
 93:     } /* for (j=jmin; j<jmax; j++) */

 95:     /* modify nonzero structure of k-th row by computing fill-in
 96:        for each row i to be merged in */
 97:     prow = k;
 98:     prow = jl[prow]; /* next pivot row (== mbs for symbolic factorization) */

100:     while (prow < k) {
101:       /* merge row prow into k-th row */
102:       jmin = iu[prow] + 1; jmax = iu[prow+1];
103:       qm   = k;
104:       for (j=jmin; j<jmax; j++) {
105:         vj = ju[j];
106:         do {
107:           m = qm; qm = q[m];
108:         } while (qm < vj);
109:         if (qm != vj) {
110:           nzk++; q[m] = vj; q[vj] = qm; qm = vj;
111:         }
112:       }
113:       prow = jl[prow]; /* next pivot row */
114:     }

116:     /* add k to row list for first nonzero element in k-th row */
117:     if (nzk > 0) {
118:       i     = q[k]; /* col value of first nonzero element in U(k, k+1:mbs-1) */
119:       jl[k] = jl[i]; jl[i] = k;
120:     }
121:     iu[k+1] = iu[k] + nzk;

123:     /* allocate more space to ju if needed */
124:     if (iu[k+1] > umax) {
125:       /* estimate how much additional space we will need */
126:       /* use the strategy suggested by David Hysom <hysom@perch-t.icase.edu> */
127:       /* just double the memory each time */
128:       maxadd = umax;
129:       if (maxadd < nzk) maxadd = (mbs-k)*(nzk+1)/2;
130:       umax += maxadd;

132:       /* allocate a longer ju */
133:       PetscMalloc1(umax,&jutmp);
134:       PetscArraycpy(jutmp,ju,iu[k]);
135:       PetscFree(ju);
136:       ju   = jutmp;
137:       reallocs++; /* count how many times we realloc */
138:     }

140:     /* save nonzero structure of k-th row in ju */
141:     i=k;
142:     while (nzk--) {
143:       i           = q[i];
144:       ju[juidx++] = i;
145:     }
146:   }

148: #if defined(PETSC_USE_INFO)
149:   if (ai[mbs] != 0) {
150:     PetscReal af = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
151:     PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
152:     PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
153:     PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);
154:     PetscInfo(A,"for best performance.\n");
155:   } else {
156:     PetscInfo(A,"Empty matrix\n");
157:   }
158: #endif

160:   ISRestoreIndices(perm,&rip);
161:   PetscFree2(jl,q);

163:   /* put together the new matrix */
164:   MatSeqSBAIJSetPreallocation(F,bs,MAT_SKIP_ALLOCATION,NULL);

166:   /* PetscLogObjectParent((PetscObject)B,(PetscObject)iperm); */
167:   b                = (Mat_SeqSBAIJ*)(F)->data;
168:   b->singlemalloc  = PETSC_FALSE;
169:   b->free_a        = PETSC_TRUE;
170:   b->free_ij       = PETSC_TRUE;

172:   PetscMalloc1((iu[mbs]+1)*bs2,&b->a);
173:   b->j    = ju;
174:   b->i    = iu;
175:   b->diag = NULL;
176:   b->ilen = NULL;
177:   b->imax = NULL;
178:   b->row  = perm;

180:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

182:   PetscObjectReference((PetscObject)perm);

184:   b->icol = perm;
185:   PetscObjectReference((PetscObject)perm);
186:   PetscMalloc1(bs*mbs+bs,&b->solve_work);
187:   /* In b structure:  Free imax, ilen, old a, old j.
188:      Allocate idnew, solve_work, new a, new j */
189:   PetscLogObjectMemory((PetscObject)F,(iu[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));
190:   b->maxnz = b->nz = iu[mbs];

192:   (F)->info.factor_mallocs   = reallocs;
193:   (F)->info.fill_ratio_given = f;
194:   if (ai[mbs] != 0) {
195:     (F)->info.fill_ratio_needed = ((PetscReal)iu[mbs])/((PetscReal)ai[mbs]);
196:   } else {
197:     (F)->info.fill_ratio_needed = 0.0;
198:   }
199:   MatSeqSBAIJSetNumericFactorization_inplace(F,perm_identity);
200:   return(0);
201: }
202: /*
203:     Symbolic U^T*D*U factorization for SBAIJ format.
204:     See MatICCFactorSymbolic_SeqAIJ() for description of its data structure.
205: */
206: #include <petscbt.h>
207: #include <../src/mat/utils/freespace.h>
208: PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
209: {
210:   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data;
211:   Mat_SeqSBAIJ       *b;
212:   PetscErrorCode     ierr;
213:   PetscBool          perm_identity,missing;
214:   PetscReal          fill = info->fill;
215:   const PetscInt     *rip,*ai=a->i,*aj=a->j;
216:   PetscInt           i,mbs=a->mbs,bs=A->rmap->bs,reallocs=0,prow;
217:   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
218:   PetscInt           nlnk,*lnk,ncols,*cols,*uj,**ui_ptr,*uj_ptr,*udiag;
219:   PetscFreeSpaceList free_space=NULL,current_space=NULL;
220:   PetscBT            lnkbt;

223:   if (A->rmap->n != A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Must be square matrix, rows %D columns %D",A->rmap->n,A->cmap->n);
224:   MatMissingDiagonal(A,&missing,&i);
225:   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
226:   if (bs > 1) {
227:     MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(fact,A,perm,info);
228:     return(0);
229:   }

231:   /* check whether perm is the identity mapping */
232:   ISIdentity(perm,&perm_identity);
233:   if (!perm_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix reordering is not supported for sbaij matrix. Use aij format");
234:   a->permute = PETSC_FALSE;
235:   ISGetIndices(perm,&rip);

237:   /* initialization */
238:   PetscMalloc1(mbs+1,&ui);
239:   PetscMalloc1(mbs+1,&udiag);
240:   ui[0] = 0;

242:   /* jl: linked list for storing indices of the pivot rows
243:      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
244:   PetscMalloc4(mbs,&ui_ptr,mbs,&il,mbs,&jl,mbs,&cols);
245:   for (i=0; i<mbs; i++) {
246:     jl[i] = mbs; il[i] = 0;
247:   }

249:   /* create and initialize a linked list for storing column indices of the active row k */
250:   nlnk = mbs + 1;
251:   PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);

253:   /* initial FreeSpace size is fill*(ai[mbs]+1) */
254:   PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[mbs]+1),&free_space);
255:   current_space = free_space;

257:   for (k=0; k<mbs; k++) {  /* for each active row k */
258:     /* initialize lnk by the column indices of row rip[k] of A */
259:     nzk   = 0;
260:     ncols = ai[k+1] - ai[k];
261:     if (!ncols) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_MAT_CH_ZRPVT,"Empty row %D in matrix ",k);
262:     for (j=0; j<ncols; j++) {
263:       i       = *(aj + ai[k] + j);
264:       cols[j] = i;
265:     }
266:     PetscLLAdd(ncols,cols,mbs,nlnk,lnk,lnkbt);
267:     nzk += nlnk;

269:     /* update lnk by computing fill-in for each pivot row to be merged in */
270:     prow = jl[k]; /* 1st pivot row */

272:     while (prow < k) {
273:       nextprow = jl[prow];
274:       /* merge prow into k-th row */
275:       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
276:       jmax   = ui[prow+1];
277:       ncols  = jmax-jmin;
278:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
279:       PetscLLAddSorted(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);
280:       nzk   += nlnk;

282:       /* update il and jl for prow */
283:       if (jmin < jmax) {
284:         il[prow] = jmin;
285:         j        = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
286:       }
287:       prow = nextprow;
288:     }

290:     /* if free space is not available, make more free space */
291:     if (current_space->local_remaining<nzk) {
292:       i    = mbs - k + 1; /* num of unfactored rows */
293:       i    = PetscIntMultTruncate(i,PetscMin(nzk, i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
294:       PetscFreeSpaceGet(i,&current_space);
295:       reallocs++;
296:     }

298:     /* copy data into free space, then initialize lnk */
299:     PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);

301:     /* add the k-th row into il and jl */
302:     if (nzk > 1) {
303:       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
304:       jl[k] = jl[i]; jl[i] = k;
305:       il[k] = ui[k] + 1;
306:     }
307:     ui_ptr[k] = current_space->array;

309:     current_space->array           += nzk;
310:     current_space->local_used      += nzk;
311:     current_space->local_remaining -= nzk;

313:     ui[k+1] = ui[k] + nzk;
314:   }

316:   ISRestoreIndices(perm,&rip);
317:   PetscFree4(ui_ptr,il,jl,cols);

319:   /* destroy list of free space and other temporary array(s) */
320:   PetscMalloc1(ui[mbs]+1,&uj);
321:   PetscFreeSpaceContiguous_Cholesky(&free_space,uj,mbs,ui,udiag); /* store matrix factor */
322:   PetscLLDestroy(lnk,lnkbt);

324:   /* put together the new matrix in MATSEQSBAIJ format */
325:   MatSeqSBAIJSetPreallocation(fact,bs,MAT_SKIP_ALLOCATION,NULL);

327:   b               = (Mat_SeqSBAIJ*)fact->data;
328:   b->singlemalloc = PETSC_FALSE;
329:   b->free_a       = PETSC_TRUE;
330:   b->free_ij      = PETSC_TRUE;

332:   PetscMalloc1(ui[mbs]+1,&b->a);

334:   b->j         = uj;
335:   b->i         = ui;
336:   b->diag      = udiag;
337:   b->free_diag = PETSC_TRUE;
338:   b->ilen      = NULL;
339:   b->imax      = NULL;
340:   b->row       = perm;
341:   b->icol      = perm;

343:   PetscObjectReference((PetscObject)perm);
344:   PetscObjectReference((PetscObject)perm);

346:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

348:   PetscMalloc1(mbs+1,&b->solve_work);
349:   PetscLogObjectMemory((PetscObject)fact,ui[mbs]*(sizeof(PetscInt)+sizeof(MatScalar)));

351:   b->maxnz = b->nz = ui[mbs];

353:   fact->info.factor_mallocs   = reallocs;
354:   fact->info.fill_ratio_given = fill;
355:   if (ai[mbs] != 0) {
356:     fact->info.fill_ratio_needed = ((PetscReal)ui[mbs])/ai[mbs];
357:   } else {
358:     fact->info.fill_ratio_needed = 0.0;
359:   }
360: #if defined(PETSC_USE_INFO)
361:   if (ai[mbs] != 0) {
362:     PetscReal af = fact->info.fill_ratio_needed;
363:     PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
364:     PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
365:     PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
366:   } else {
367:     PetscInfo(A,"Empty matrix\n");
368:   }
369: #endif
370:   fact->ops->choleskyfactornumeric = MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering;
371:   return(0);
372: }

374: PetscErrorCode MatCholeskyFactorSymbolic_SeqSBAIJ_inplace(Mat fact,Mat A,IS perm,const MatFactorInfo *info)
375: {
376:   Mat_SeqSBAIJ       *a = (Mat_SeqSBAIJ*)A->data;
377:   Mat_SeqSBAIJ       *b;
378:   PetscErrorCode     ierr;
379:   PetscBool          perm_identity,missing;
380:   PetscReal          fill = info->fill;
381:   const PetscInt     *rip,*ai,*aj;
382:   PetscInt           i,mbs=a->mbs,bs=A->rmap->bs,reallocs=0,prow,d;
383:   PetscInt           *jl,jmin,jmax,nzk,*ui,k,j,*il,nextprow;
384:   PetscInt           nlnk,*lnk,ncols,*cols,*uj,**ui_ptr,*uj_ptr;
385:   PetscFreeSpaceList free_space=NULL,current_space=NULL;
386:   PetscBT            lnkbt;

389:   MatMissingDiagonal(A,&missing,&d);
390:   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",d);

392:   /*
393:    This code originally uses Modified Sparse Row (MSR) storage
394:    (see page 85, "Iterative Methods ..." by Saad) for the output matrix B - bad choise!
395:    Then it is rewritten so the factor B takes seqsbaij format. However the associated
396:    MatCholeskyFactorNumeric_() have not been modified for the cases of bs>1 or !perm_identity,
397:    thus the original code in MSR format is still used for these cases.
398:    The code below should replace MatCholeskyFactorSymbolic_SeqSBAIJ_MSR() whenever
399:    MatCholeskyFactorNumeric_() is modified for using sbaij symbolic factor.
400:   */
401:   if (bs > 1) {
402:     MatCholeskyFactorSymbolic_SeqSBAIJ_MSR(fact,A,perm,info);
403:     return(0);
404:   }

406:   /* check whether perm is the identity mapping */
407:   ISIdentity(perm,&perm_identity);

409:   if (perm_identity) {
410:     a->permute = PETSC_FALSE;

412:     ai = a->i; aj = a->j;
413:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix reordering is not supported for sbaij matrix. Use aij format");
414:   ISGetIndices(perm,&rip);

416:   /* initialization */
417:   PetscMalloc1(mbs+1,&ui);
418:   ui[0] = 0;

420:   /* jl: linked list for storing indices of the pivot rows
421:      il: il[i] points to the 1st nonzero entry of U(i,k:mbs-1) */
422:   PetscMalloc4(mbs,&ui_ptr,mbs,&il,mbs,&jl,mbs,&cols);
423:   for (i=0; i<mbs; i++) {
424:     jl[i] = mbs; il[i] = 0;
425:   }

427:   /* create and initialize a linked list for storing column indices of the active row k */
428:   nlnk = mbs + 1;
429:   PetscLLCreate(mbs,mbs,nlnk,lnk,lnkbt);

431:   /* initial FreeSpace size is fill*(ai[mbs]+1) */
432:   PetscFreeSpaceGet(PetscRealIntMultTruncate(fill,ai[mbs]+1),&free_space);
433:   current_space = free_space;

435:   for (k=0; k<mbs; k++) {  /* for each active row k */
436:     /* initialize lnk by the column indices of row rip[k] of A */
437:     nzk   = 0;
438:     ncols = ai[rip[k]+1] - ai[rip[k]];
439:     for (j=0; j<ncols; j++) {
440:       i       = *(aj + ai[rip[k]] + j);
441:       cols[j] = rip[i];
442:     }
443:     PetscLLAdd(ncols,cols,mbs,nlnk,lnk,lnkbt);
444:     nzk += nlnk;

446:     /* update lnk by computing fill-in for each pivot row to be merged in */
447:     prow = jl[k]; /* 1st pivot row */

449:     while (prow < k) {
450:       nextprow = jl[prow];
451:       /* merge prow into k-th row */
452:       jmin   = il[prow] + 1; /* index of the 2nd nzero entry in U(prow,k:mbs-1) */
453:       jmax   = ui[prow+1];
454:       ncols  = jmax-jmin;
455:       uj_ptr = ui_ptr[prow] + jmin - ui[prow]; /* points to the 2nd nzero entry in U(prow,k:mbs-1) */
456:       PetscLLAddSorted(ncols,uj_ptr,mbs,nlnk,lnk,lnkbt);
457:       nzk   += nlnk;

459:       /* update il and jl for prow */
460:       if (jmin < jmax) {
461:         il[prow] = jmin;

463:         j = *uj_ptr; jl[prow] = jl[j]; jl[j] = prow;
464:       }
465:       prow = nextprow;
466:     }

468:     /* if free space is not available, make more free space */
469:     if (current_space->local_remaining<nzk) {
470:       i    = mbs - k + 1; /* num of unfactored rows */
471:       i    = PetscMin(PetscIntMultTruncate(i,nzk), PetscIntMultTruncate(i,i-1)); /* i*nzk, i*(i-1): estimated and max additional space needed */
472:       PetscFreeSpaceGet(i,&current_space);
473:       reallocs++;
474:     }

476:     /* copy data into free space, then initialize lnk */
477:     PetscLLClean(mbs,mbs,nzk,lnk,current_space->array,lnkbt);

479:     /* add the k-th row into il and jl */
480:     if (nzk-1 > 0) {
481:       i     = current_space->array[1]; /* col value of the first nonzero element in U(k, k+1:mbs-1) */
482:       jl[k] = jl[i]; jl[i] = k;
483:       il[k] = ui[k] + 1;
484:     }
485:     ui_ptr[k] = current_space->array;

487:     current_space->array           += nzk;
488:     current_space->local_used      += nzk;
489:     current_space->local_remaining -= nzk;

491:     ui[k+1] = ui[k] + nzk;
492:   }

494:   ISRestoreIndices(perm,&rip);
495:   PetscFree4(ui_ptr,il,jl,cols);

497:   /* destroy list of free space and other temporary array(s) */
498:   PetscMalloc1(ui[mbs]+1,&uj);
499:   PetscFreeSpaceContiguous(&free_space,uj);
500:   PetscLLDestroy(lnk,lnkbt);

502:   /* put together the new matrix in MATSEQSBAIJ format */
503:   MatSeqSBAIJSetPreallocation(fact,bs,MAT_SKIP_ALLOCATION,NULL);

505:   b               = (Mat_SeqSBAIJ*)fact->data;
506:   b->singlemalloc = PETSC_FALSE;
507:   b->free_a       = PETSC_TRUE;
508:   b->free_ij      = PETSC_TRUE;

510:   PetscMalloc1(ui[mbs]+1,&b->a);

512:   b->j    = uj;
513:   b->i    = ui;
514:   b->diag = NULL;
515:   b->ilen = NULL;
516:   b->imax = NULL;
517:   b->row  = perm;

519:   b->pivotinblocks = PETSC_FALSE; /* need to get from MatFactorInfo */

521:   PetscObjectReference((PetscObject)perm);
522:   b->icol  = perm;
523:   PetscObjectReference((PetscObject)perm);
524:   PetscMalloc1(mbs+1,&b->solve_work);
525:   PetscLogObjectMemory((PetscObject)fact,(ui[mbs]-mbs)*(sizeof(PetscInt)+sizeof(MatScalar)));
526:   b->maxnz = b->nz = ui[mbs];

528:   fact->info.factor_mallocs   = reallocs;
529:   fact->info.fill_ratio_given = fill;
530:   if (ai[mbs] != 0) {
531:     fact->info.fill_ratio_needed = ((PetscReal)ui[mbs])/ai[mbs];
532:   } else {
533:     fact->info.fill_ratio_needed = 0.0;
534:   }
535: #if defined(PETSC_USE_INFO)
536:   if (ai[mbs] != 0) {
537:     PetscReal af = fact->info.fill_ratio_needed;
538:     PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)fill,(double)af);
539:     PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);
540:     PetscInfo1(A,"PCFactorSetFill(pc,%g) for best performance.\n",(double)af);
541:   } else {
542:     PetscInfo(A,"Empty matrix\n");
543:   }
544: #endif
545:   MatSeqSBAIJSetNumericFactorization_inplace(fact,perm_identity);
546:   return(0);
547: }

549: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N(Mat C,Mat A,const MatFactorInfo *info)
550: {
551:   Mat_SeqSBAIJ   *a   = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ*)C->data;
552:   IS             perm = b->row;
554:   const PetscInt *ai,*aj,*perm_ptr,mbs=a->mbs,*bi=b->i,*bj=b->j;
555:   PetscInt       i,j;
556:   PetscInt       *a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
557:   PetscInt       bs  =A->rmap->bs,bs2 = a->bs2;
558:   MatScalar      *ba = b->a,*aa,*ap,*dk,*uik;
559:   MatScalar      *u,*diag,*rtmp,*rtmp_ptr;
560:   MatScalar      *work;
561:   PetscInt       *pivots;
562:   PetscBool      allowzeropivot,zeropivotdetected;

565:   /* initialization */
566:   PetscCalloc1(bs2*mbs,&rtmp);
567:   PetscMalloc2(mbs,&il,mbs,&jl);
568:   allowzeropivot = PetscNot(A->erroriffailure);

570:   il[0] = 0;
571:   for (i=0; i<mbs; i++) jl[i] = mbs;

573:   PetscMalloc3(bs2,&dk,bs2,&uik,bs,&work);
574:   PetscMalloc1(bs,&pivots);

576:   ISGetIndices(perm,&perm_ptr);

578:   /* check permutation */
579:   if (!a->permute) {
580:     ai = a->i; aj = a->j; aa = a->a;
581:   } else {
582:     ai   = a->inew; aj = a->jnew;
583:     PetscMalloc1(bs2*ai[mbs],&aa);
584:     PetscArraycpy(aa,a->a,bs2*ai[mbs]);
585:     PetscMalloc1(ai[mbs],&a2anew);
586:     PetscArraycpy(a2anew,a->a2anew,ai[mbs]);

588:     for (i=0; i<mbs; i++) {
589:       jmin = ai[i]; jmax = ai[i+1];
590:       for (j=jmin; j<jmax; j++) {
591:         while (a2anew[j] != j) {
592:           k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
593:           for (k1=0; k1<bs2; k1++) {
594:             dk[k1]       = aa[k*bs2+k1];
595:             aa[k*bs2+k1] = aa[j*bs2+k1];
596:             aa[j*bs2+k1] = dk[k1];
597:           }
598:         }
599:         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
600:         if (i > aj[j]) {
601:           ap = aa + j*bs2;                     /* ptr to the beginning of j-th block of aa */
602:           for (k=0; k<bs2; k++) dk[k] = ap[k]; /* dk <- j-th block of aa */
603:           for (k=0; k<bs; k++) {               /* j-th block of aa <- dk^T */
604:             for (k1=0; k1<bs; k1++) *ap++ = dk[k + bs*k1];
605:           }
606:         }
607:       }
608:     }
609:     PetscFree(a2anew);
610:   }

612:   /* for each row k */
613:   for (k = 0; k<mbs; k++) {

615:     /*initialize k-th row with elements nonzero in row perm(k) of A */
616:     jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];

618:     ap = aa + jmin*bs2;
619:     for (j = jmin; j < jmax; j++) {
620:       vj       = perm_ptr[aj[j]];   /* block col. index */
621:       rtmp_ptr = rtmp + vj*bs2;
622:       for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
623:     }

625:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
626:     PetscArraycpy(dk,rtmp+k*bs2,bs2);
627:     i    = jl[k]; /* first row to be added to k_th row  */

629:     while (i < k) {
630:       nexti = jl[i]; /* next row to be added to k_th row */

632:       /* compute multiplier */
633:       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */

635:       /* uik = -inv(Di)*U_bar(i,k) */
636:       diag = ba + i*bs2;
637:       u    = ba + ili*bs2;
638:       PetscArrayzero(uik,bs2);
639:       PetscKernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);

641:       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
642:       PetscKernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
643:       PetscLogFlops(4.0*bs*bs2);

645:       /* update -U(i,k) */
646:       PetscArraycpy(ba+ili*bs2,uik,bs2);

648:       /* add multiple of row i to k-th row ... */
649:       jmin = ili + 1; jmax = bi[i+1];
650:       if (jmin < jmax) {
651:         for (j=jmin; j<jmax; j++) {
652:           /* rtmp += -U(i,k)^T * U_bar(i,j) */
653:           rtmp_ptr = rtmp + bj[j]*bs2;
654:           u        = ba + j*bs2;
655:           PetscKernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
656:         }
657:         PetscLogFlops(2.0*bs*bs2*(jmax-jmin));

659:         /* ... add i to row list for next nonzero entry */
660:         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
661:         j     = bj[jmin];
662:         jl[i] = jl[j]; jl[j] = i; /* update jl */
663:       }
664:       i = nexti;
665:     }

667:     /* save nonzero entries in k-th row of U ... */

669:     /* invert diagonal block */
670:     diag = ba+k*bs2;
671:     PetscArraycpy(diag,dk,bs2);

673:     PetscKernel_A_gets_inverse_A(bs,diag,pivots,work,allowzeropivot,&zeropivotdetected);
674:     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;

676:     jmin = bi[k]; jmax = bi[k+1];
677:     if (jmin < jmax) {
678:       for (j=jmin; j<jmax; j++) {
679:         vj       = bj[j];      /* block col. index of U */
680:         u        = ba + j*bs2;
681:         rtmp_ptr = rtmp + vj*bs2;
682:         for (k1=0; k1<bs2; k1++) {
683:           *u++        = *rtmp_ptr;
684:           *rtmp_ptr++ = 0.0;
685:         }
686:       }

688:       /* ... add k to row list for first nonzero entry in k-th row */
689:       il[k] = jmin;
690:       i     = bj[jmin];
691:       jl[k] = jl[i]; jl[i] = k;
692:     }
693:   }

695:   PetscFree(rtmp);
696:   PetscFree2(il,jl);
697:   PetscFree3(dk,uik,work);
698:   PetscFree(pivots);
699:   if (a->permute) {
700:     PetscFree(aa);
701:   }

703:   ISRestoreIndices(perm,&perm_ptr);

705:   C->ops->solve          = MatSolve_SeqSBAIJ_N_inplace;
706:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_inplace;
707:   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_N_inplace;
708:   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_N_inplace;

710:   C->assembled    = PETSC_TRUE;
711:   C->preallocated = PETSC_TRUE;

713:   PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */
714:   return(0);
715: }

717: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_N_NaturalOrdering(Mat C,Mat A,const MatFactorInfo *info)
718: {
719:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ*)C->data;
721:   PetscInt       i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
722:   PetscInt       *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
723:   PetscInt       bs  =A->rmap->bs,bs2 = a->bs2;
724:   MatScalar      *ba = b->a,*aa,*ap,*dk,*uik;
725:   MatScalar      *u,*diag,*rtmp,*rtmp_ptr;
726:   MatScalar      *work;
727:   PetscInt       *pivots;
728:   PetscBool      allowzeropivot,zeropivotdetected;

731:   PetscCalloc1(bs2*mbs,&rtmp);
732:   PetscMalloc2(mbs,&il,mbs,&jl);
733:   il[0] = 0;
734:   for (i=0; i<mbs; i++) jl[i] = mbs;

736:   PetscMalloc3(bs2,&dk,bs2,&uik,bs,&work);
737:   PetscMalloc1(bs,&pivots);
738:   allowzeropivot = PetscNot(A->erroriffailure);

740:   ai = a->i; aj = a->j; aa = a->a;

742:   /* for each row k */
743:   for (k = 0; k<mbs; k++) {

745:     /*initialize k-th row with elements nonzero in row k of A */
746:     jmin = ai[k]; jmax = ai[k+1];
747:     ap   = aa + jmin*bs2;
748:     for (j = jmin; j < jmax; j++) {
749:       vj       = aj[j];   /* block col. index */
750:       rtmp_ptr = rtmp + vj*bs2;
751:       for (i=0; i<bs2; i++) *rtmp_ptr++ = *ap++;
752:     }

754:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
755:     PetscArraycpy(dk,rtmp+k*bs2,bs2);
756:     i    = jl[k]; /* first row to be added to k_th row  */

758:     while (i < k) {
759:       nexti = jl[i]; /* next row to be added to k_th row */

761:       /* compute multiplier */
762:       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */

764:       /* uik = -inv(Di)*U_bar(i,k) */
765:       diag = ba + i*bs2;
766:       u    = ba + ili*bs2;
767:       PetscArrayzero(uik,bs2);
768:       PetscKernel_A_gets_A_minus_B_times_C(bs,uik,diag,u);

770:       /* update D(k) += -U(i,k)^T * U_bar(i,k) */
771:       PetscKernel_A_gets_A_plus_Btranspose_times_C(bs,dk,uik,u);
772:       PetscLogFlops(2.0*bs*bs2);

774:       /* update -U(i,k) */
775:       PetscArraycpy(ba+ili*bs2,uik,bs2);

777:       /* add multiple of row i to k-th row ... */
778:       jmin = ili + 1; jmax = bi[i+1];
779:       if (jmin < jmax) {
780:         for (j=jmin; j<jmax; j++) {
781:           /* rtmp += -U(i,k)^T * U_bar(i,j) */
782:           rtmp_ptr = rtmp + bj[j]*bs2;
783:           u        = ba + j*bs2;
784:           PetscKernel_A_gets_A_plus_Btranspose_times_C(bs,rtmp_ptr,uik,u);
785:         }
786:         PetscLogFlops(2.0*bs*bs2*(jmax-jmin));

788:         /* ... add i to row list for next nonzero entry */
789:         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
790:         j     = bj[jmin];
791:         jl[i] = jl[j]; jl[j] = i; /* update jl */
792:       }
793:       i = nexti;
794:     }

796:     /* save nonzero entries in k-th row of U ... */

798:     /* invert diagonal block */
799:     diag = ba+k*bs2;
800:     PetscArraycpy(diag,dk,bs2);

802:     PetscKernel_A_gets_inverse_A(bs,diag,pivots,work,allowzeropivot,&zeropivotdetected);
803:     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;

805:     jmin = bi[k]; jmax = bi[k+1];
806:     if (jmin < jmax) {
807:       for (j=jmin; j<jmax; j++) {
808:         vj       = bj[j];      /* block col. index of U */
809:         u        = ba + j*bs2;
810:         rtmp_ptr = rtmp + vj*bs2;
811:         for (k1=0; k1<bs2; k1++) {
812:           *u++        = *rtmp_ptr;
813:           *rtmp_ptr++ = 0.0;
814:         }
815:       }

817:       /* ... add k to row list for first nonzero entry in k-th row */
818:       il[k] = jmin;
819:       i     = bj[jmin];
820:       jl[k] = jl[i]; jl[i] = k;
821:     }
822:   }

824:   PetscFree(rtmp);
825:   PetscFree2(il,jl);
826:   PetscFree3(dk,uik,work);
827:   PetscFree(pivots);

829:   C->ops->solve          = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
830:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
831:   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
832:   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_N_NaturalOrdering_inplace;
833:   C->assembled           = PETSC_TRUE;
834:   C->preallocated        = PETSC_TRUE;

836:   PetscLogFlops(1.3333*bs*bs2*b->mbs); /* from inverting diagonal blocks */
837:   return(0);
838: }

840: /*
841:     Numeric U^T*D*U factorization for SBAIJ format. Modified from SNF of YSMP.
842:     Version for blocks 2 by 2.
843: */
844: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2(Mat C,Mat A,const MatFactorInfo *info)
845: {
846:   Mat_SeqSBAIJ   *a   = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ*)C->data;
847:   IS             perm = b->row;
849:   const PetscInt *ai,*aj,*perm_ptr;
850:   PetscInt       i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
851:   PetscInt       *a2anew,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
852:   MatScalar      *ba = b->a,*aa,*ap;
853:   MatScalar      *u,*diag,*rtmp,*rtmp_ptr,dk[4],uik[4];
854:   PetscReal      shift = info->shiftamount;
855:   PetscBool      allowzeropivot,zeropivotdetected;

858:   allowzeropivot = PetscNot(A->erroriffailure);

860:   /* initialization */
861:   /* il and jl record the first nonzero element in each row of the accessing
862:      window U(0:k, k:mbs-1).
863:      jl:    list of rows to be added to uneliminated rows
864:             i>= k: jl(i) is the first row to be added to row i
865:             i<  k: jl(i) is the row following row i in some list of rows
866:             jl(i) = mbs indicates the end of a list
867:      il(i): points to the first nonzero element in columns k,...,mbs-1 of
868:             row i of U */
869:   PetscCalloc1(4*mbs,&rtmp);
870:   PetscMalloc2(mbs,&il,mbs,&jl);
871:   il[0] = 0;
872:   for (i=0; i<mbs; i++) jl[i] = mbs;

874:   ISGetIndices(perm,&perm_ptr);

876:   /* check permutation */
877:   if (!a->permute) {
878:     ai = a->i; aj = a->j; aa = a->a;
879:   } else {
880:     ai   = a->inew; aj = a->jnew;
881:     PetscMalloc1(4*ai[mbs],&aa);
882:     PetscArraycpy(aa,a->a,4*ai[mbs]);
883:     PetscMalloc1(ai[mbs],&a2anew);
884:     PetscArraycpy(a2anew,a->a2anew,ai[mbs]);

886:     for (i=0; i<mbs; i++) {
887:       jmin = ai[i]; jmax = ai[i+1];
888:       for (j=jmin; j<jmax; j++) {
889:         while (a2anew[j] != j) {
890:           k = a2anew[j]; a2anew[j] = a2anew[k]; a2anew[k] = k;
891:           for (k1=0; k1<4; k1++) {
892:             dk[k1]     = aa[k*4+k1];
893:             aa[k*4+k1] = aa[j*4+k1];
894:             aa[j*4+k1] = dk[k1];
895:           }
896:         }
897:         /* transform columnoriented blocks that lie in the lower triangle to roworiented blocks */
898:         if (i > aj[j]) {
899:           ap    = aa + j*4;  /* ptr to the beginning of the block */
900:           dk[1] = ap[1];     /* swap ap[1] and ap[2] */
901:           ap[1] = ap[2];
902:           ap[2] = dk[1];
903:         }
904:       }
905:     }
906:     PetscFree(a2anew);
907:   }

909:   /* for each row k */
910:   for (k = 0; k<mbs; k++) {

912:     /*initialize k-th row with elements nonzero in row perm(k) of A */
913:     jmin = ai[perm_ptr[k]]; jmax = ai[perm_ptr[k]+1];
914:     ap   = aa + jmin*4;
915:     for (j = jmin; j < jmax; j++) {
916:       vj       = perm_ptr[aj[j]];   /* block col. index */
917:       rtmp_ptr = rtmp + vj*4;
918:       for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
919:     }

921:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
922:     PetscArraycpy(dk,rtmp+k*4,4);
923:     i    = jl[k]; /* first row to be added to k_th row  */

925:     while (i < k) {
926:       nexti = jl[i]; /* next row to be added to k_th row */

928:       /* compute multiplier */
929:       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */

931:       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
932:       diag   = ba + i*4;
933:       u      = ba + ili*4;
934:       uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
935:       uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
936:       uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
937:       uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);

939:       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
940:       dk[0] += uik[0]*u[0] + uik[1]*u[1];
941:       dk[1] += uik[2]*u[0] + uik[3]*u[1];
942:       dk[2] += uik[0]*u[2] + uik[1]*u[3];
943:       dk[3] += uik[2]*u[2] + uik[3]*u[3];

945:       PetscLogFlops(16.0*2.0);

947:       /* update -U(i,k): ba[ili] = uik */
948:       PetscArraycpy(ba+ili*4,uik,4);

950:       /* add multiple of row i to k-th row ... */
951:       jmin = ili + 1; jmax = bi[i+1];
952:       if (jmin < jmax) {
953:         for (j=jmin; j<jmax; j++) {
954:           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
955:           rtmp_ptr     = rtmp + bj[j]*4;
956:           u            = ba + j*4;
957:           rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
958:           rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
959:           rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
960:           rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
961:         }
962:         PetscLogFlops(16.0*(jmax-jmin));

964:         /* ... add i to row list for next nonzero entry */
965:         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
966:         j     = bj[jmin];
967:         jl[i] = jl[j]; jl[j] = i; /* update jl */
968:       }
969:       i = nexti;
970:     }

972:     /* save nonzero entries in k-th row of U ... */

974:     /* invert diagonal block */
975:     diag = ba+k*4;
976:     PetscArraycpy(diag,dk,4);
977:     PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);
978:     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;

980:     jmin = bi[k]; jmax = bi[k+1];
981:     if (jmin < jmax) {
982:       for (j=jmin; j<jmax; j++) {
983:         vj       = bj[j];      /* block col. index of U */
984:         u        = ba + j*4;
985:         rtmp_ptr = rtmp + vj*4;
986:         for (k1=0; k1<4; k1++) {
987:           *u++        = *rtmp_ptr;
988:           *rtmp_ptr++ = 0.0;
989:         }
990:       }

992:       /* ... add k to row list for first nonzero entry in k-th row */
993:       il[k] = jmin;
994:       i     = bj[jmin];
995:       jl[k] = jl[i]; jl[i] = k;
996:     }
997:   }

999:   PetscFree(rtmp);
1000:   PetscFree2(il,jl);
1001:   if (a->permute) {
1002:     PetscFree(aa);
1003:   }
1004:   ISRestoreIndices(perm,&perm_ptr);

1006:   C->ops->solve          = MatSolve_SeqSBAIJ_2_inplace;
1007:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_inplace;
1008:   C->assembled           = PETSC_TRUE;
1009:   C->preallocated        = PETSC_TRUE;

1011:   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
1012:   return(0);
1013: }

1015: /*
1016:       Version for when blocks are 2 by 2 Using natural ordering
1017: */
1018: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_2_NaturalOrdering(Mat C,Mat A,const MatFactorInfo *info)
1019: {
1020:   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)A->data,*b = (Mat_SeqSBAIJ*)C->data;
1022:   PetscInt       i,j,mbs=a->mbs,*bi=b->i,*bj=b->j;
1023:   PetscInt       *ai,*aj,k,k1,jmin,jmax,*jl,*il,vj,nexti,ili;
1024:   MatScalar      *ba = b->a,*aa,*ap,dk[8],uik[8];
1025:   MatScalar      *u,*diag,*rtmp,*rtmp_ptr;
1026:   PetscReal      shift = info->shiftamount;
1027:   PetscBool      allowzeropivot,zeropivotdetected;

1030:   allowzeropivot = PetscNot(A->erroriffailure);

1032:   /* initialization */
1033:   /* il and jl record the first nonzero element in each row of the accessing
1034:      window U(0:k, k:mbs-1).
1035:      jl:    list of rows to be added to uneliminated rows
1036:             i>= k: jl(i) is the first row to be added to row i
1037:             i<  k: jl(i) is the row following row i in some list of rows
1038:             jl(i) = mbs indicates the end of a list
1039:      il(i): points to the first nonzero element in columns k,...,mbs-1 of
1040:             row i of U */
1041:   PetscCalloc1(4*mbs,&rtmp);
1042:   PetscMalloc2(mbs,&il,mbs,&jl);
1043:   il[0] = 0;
1044:   for (i=0; i<mbs; i++) jl[i] = mbs;

1046:   ai = a->i; aj = a->j; aa = a->a;

1048:   /* for each row k */
1049:   for (k = 0; k<mbs; k++) {

1051:     /*initialize k-th row with elements nonzero in row k of A */
1052:     jmin = ai[k]; jmax = ai[k+1];
1053:     ap   = aa + jmin*4;
1054:     for (j = jmin; j < jmax; j++) {
1055:       vj       = aj[j];   /* block col. index */
1056:       rtmp_ptr = rtmp + vj*4;
1057:       for (i=0; i<4; i++) *rtmp_ptr++ = *ap++;
1058:     }

1060:     /* modify k-th row by adding in those rows i with U(i,k) != 0 */
1061:     PetscArraycpy(dk,rtmp+k*4,4);
1062:     i    = jl[k]; /* first row to be added to k_th row  */

1064:     while (i < k) {
1065:       nexti = jl[i]; /* next row to be added to k_th row */

1067:       /* compute multiplier */
1068:       ili = il[i];  /* index of first nonzero element in U(i,k:bms-1) */

1070:       /* uik = -inv(Di)*U_bar(i,k): - ba[ili]*ba[i] */
1071:       diag   = ba + i*4;
1072:       u      = ba + ili*4;
1073:       uik[0] = -(diag[0]*u[0] + diag[2]*u[1]);
1074:       uik[1] = -(diag[1]*u[0] + diag[3]*u[1]);
1075:       uik[2] = -(diag[0]*u[2] + diag[2]*u[3]);
1076:       uik[3] = -(diag[1]*u[2] + diag[3]*u[3]);

1078:       /* update D(k) += -U(i,k)^T * U_bar(i,k): dk += uik*ba[ili] */
1079:       dk[0] += uik[0]*u[0] + uik[1]*u[1];
1080:       dk[1] += uik[2]*u[0] + uik[3]*u[1];
1081:       dk[2] += uik[0]*u[2] + uik[1]*u[3];
1082:       dk[3] += uik[2]*u[2] + uik[3]*u[3];

1084:       PetscLogFlops(16.0*2.0);

1086:       /* update -U(i,k): ba[ili] = uik */
1087:       PetscArraycpy(ba+ili*4,uik,4);

1089:       /* add multiple of row i to k-th row ... */
1090:       jmin = ili + 1; jmax = bi[i+1];
1091:       if (jmin < jmax) {
1092:         for (j=jmin; j<jmax; j++) {
1093:           /* rtmp += -U(i,k)^T * U_bar(i,j): rtmp[bj[j]] += uik*ba[j]; */
1094:           rtmp_ptr     = rtmp + bj[j]*4;
1095:           u            = ba + j*4;
1096:           rtmp_ptr[0] += uik[0]*u[0] + uik[1]*u[1];
1097:           rtmp_ptr[1] += uik[2]*u[0] + uik[3]*u[1];
1098:           rtmp_ptr[2] += uik[0]*u[2] + uik[1]*u[3];
1099:           rtmp_ptr[3] += uik[2]*u[2] + uik[3]*u[3];
1100:         }
1101:         PetscLogFlops(16.0*(jmax-jmin));

1103:         /* ... add i to row list for next nonzero entry */
1104:         il[i] = jmin;             /* update il(i) in column k+1, ... mbs-1 */
1105:         j     = bj[jmin];
1106:         jl[i] = jl[j]; jl[j] = i; /* update jl */
1107:       }
1108:       i = nexti;
1109:     }

1111:     /* save nonzero entries in k-th row of U ... */

1113:     /* invert diagonal block */
1114:     diag = ba+k*4;
1115:     PetscArraycpy(diag,dk,4);
1116:     PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);
1117:     if (zeropivotdetected) C->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;

1119:     jmin = bi[k]; jmax = bi[k+1];
1120:     if (jmin < jmax) {
1121:       for (j=jmin; j<jmax; j++) {
1122:         vj       = bj[j];      /* block col. index of U */
1123:         u        = ba + j*4;
1124:         rtmp_ptr = rtmp + vj*4;
1125:         for (k1=0; k1<4; k1++) {
1126:           *u++        = *rtmp_ptr;
1127:           *rtmp_ptr++ = 0.0;
1128:         }
1129:       }

1131:       /* ... add k to row list for first nonzero entry in k-th row */
1132:       il[k] = jmin;
1133:       i     = bj[jmin];
1134:       jl[k] = jl[i]; jl[i] = k;
1135:     }
1136:   }

1138:   PetscFree(rtmp);
1139:   PetscFree2(il,jl);

1141:   C->ops->solve          = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1142:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1143:   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1144:   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_2_NaturalOrdering_inplace;
1145:   C->assembled           = PETSC_TRUE;
1146:   C->preallocated        = PETSC_TRUE;

1148:   PetscLogFlops(1.3333*8*b->mbs); /* from inverting diagonal blocks */
1149:   return(0);
1150: }

1152: /*
1153:     Numeric U^T*D*U factorization for SBAIJ format.
1154:     Version for blocks are 1 by 1.
1155: */
1156: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_inplace(Mat C,Mat A,const MatFactorInfo *info)
1157: {
1158:   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ*)C->data;
1159:   IS             ip=b->row;
1161:   const PetscInt *ai,*aj,*rip;
1162:   PetscInt       *a2anew,i,j,mbs=a->mbs,*bi=b->i,*bj=b->j,*bcol;
1163:   PetscInt       k,jmin,jmax,*jl,*il,col,nexti,ili,nz;
1164:   MatScalar      *rtmp,*ba=b->a,*bval,*aa,dk,uikdi;
1165:   PetscReal      rs;
1166:   FactorShiftCtx sctx;

1169:   /* MatPivotSetUp(): initialize shift context sctx */
1170:   PetscMemzero(&sctx,sizeof(FactorShiftCtx));

1172:   ISGetIndices(ip,&rip);
1173:   if (!a->permute) {
1174:     ai = a->i; aj = a->j; aa = a->a;
1175:   } else {
1176:     ai     = a->inew; aj = a->jnew;
1177:     nz     = ai[mbs];
1178:     PetscMalloc1(nz,&aa);
1179:     a2anew = a->a2anew;
1180:     bval   = a->a;
1181:     for (j=0; j<nz; j++) {
1182:       aa[a2anew[j]] = *(bval++);
1183:     }
1184:   }

1186:   /* initialization */
1187:   /* il and jl record the first nonzero element in each row of the accessing
1188:      window U(0:k, k:mbs-1).
1189:      jl:    list of rows to be added to uneliminated rows
1190:             i>= k: jl(i) is the first row to be added to row i
1191:             i<  k: jl(i) is the row following row i in some list of rows
1192:             jl(i) = mbs indicates the end of a list
1193:      il(i): points to the first nonzero element in columns k,...,mbs-1 of
1194:             row i of U */
1195:   PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&jl);

1197:   do {
1198:     sctx.newshift = PETSC_FALSE;
1199:     il[0] = 0;
1200:     for (i=0; i<mbs; i++) {
1201:       rtmp[i] = 0.0; jl[i] = mbs;
1202:     }

1204:     for (k = 0; k<mbs; k++) {
1205:       /*initialize k-th row by the perm[k]-th row of A */
1206:       jmin = ai[rip[k]]; jmax = ai[rip[k]+1];
1207:       bval = ba + bi[k];
1208:       for (j = jmin; j < jmax; j++) {
1209:         col       = rip[aj[j]];
1210:         rtmp[col] = aa[j];
1211:         *bval++   = 0.0; /* for in-place factorization */
1212:       }

1214:       /* shift the diagonal of the matrix */
1215:       if (sctx.nshift) rtmp[k] += sctx.shift_amount;

1217:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1218:       dk = rtmp[k];
1219:       i  = jl[k]; /* first row to be added to k_th row  */

1221:       while (i < k) {
1222:         nexti = jl[i]; /* next row to be added to k_th row */

1224:         /* compute multiplier, update diag(k) and U(i,k) */
1225:         ili     = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1226:         uikdi   = -ba[ili]*ba[bi[i]]; /* diagonal(k) */
1227:         dk     += uikdi*ba[ili];
1228:         ba[ili] = uikdi; /* -U(i,k) */

1230:         /* add multiple of row i to k-th row */
1231:         jmin = ili + 1; jmax = bi[i+1];
1232:         if (jmin < jmax) {
1233:           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1234:           PetscLogFlops(2.0*(jmax-jmin));

1236:           /* update il and jl for row i */
1237:           il[i] = jmin;
1238:           j     = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1239:         }
1240:         i = nexti;
1241:       }

1243:       /* shift the diagonals when zero pivot is detected */
1244:       /* compute rs=sum of abs(off-diagonal) */
1245:       rs   = 0.0;
1246:       jmin = bi[k]+1;
1247:       nz   = bi[k+1] - jmin;
1248:       if (nz) {
1249:         bcol = bj + jmin;
1250:         while (nz--) {
1251:           rs += PetscAbsScalar(rtmp[*bcol]);
1252:           bcol++;
1253:         }
1254:       }

1256:       sctx.rs = rs;
1257:       sctx.pv = dk;
1258:       MatPivotCheck(C,A,info,&sctx,k);
1259:       if (sctx.newshift) break;    /* sctx.shift_amount is updated */
1260:       dk = sctx.pv;

1262:       /* copy data into U(k,:) */
1263:       ba[bi[k]] = 1.0/dk; /* U(k,k) */
1264:       jmin      = bi[k]+1; jmax = bi[k+1];
1265:       if (jmin < jmax) {
1266:         for (j=jmin; j<jmax; j++) {
1267:           col = bj[j]; ba[j] = rtmp[col]; rtmp[col] = 0.0;
1268:         }
1269:         /* add the k-th row into il and jl */
1270:         il[k] = jmin;
1271:         i     = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1272:       }
1273:     }
1274:   } while (sctx.newshift);
1275:   PetscFree3(rtmp,il,jl);
1276:   if (a->permute) {PetscFree(aa);}

1278:   ISRestoreIndices(ip,&rip);

1280:   C->ops->solve          = MatSolve_SeqSBAIJ_1_inplace;
1281:   C->ops->solves         = MatSolves_SeqSBAIJ_1_inplace;
1282:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_inplace;
1283:   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_inplace;
1284:   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_inplace;
1285:   C->assembled           = PETSC_TRUE;
1286:   C->preallocated        = PETSC_TRUE;

1288:   PetscLogFlops(C->rmap->N);
1289:   if (sctx.nshift) {
1290:     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1291:       PetscInfo2(A,"number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
1292:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1293:       PetscInfo2(A,"number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
1294:     }
1295:   }
1296:   return(0);
1297: }

1299: /*
1300:   Version for when blocks are 1 by 1 Using natural ordering under new datastructure
1301:   Modified from MatCholeskyFactorNumeric_SeqAIJ()
1302: */
1303: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering(Mat B,Mat A,const MatFactorInfo *info)
1304: {
1305:   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data;
1306:   Mat_SeqSBAIJ   *b=(Mat_SeqSBAIJ*)B->data;
1308:   PetscInt       i,j,mbs=A->rmap->n,*bi=b->i,*bj=b->j,*bdiag=b->diag,*bjtmp;
1309:   PetscInt       *ai=a->i,*aj=a->j,*ajtmp;
1310:   PetscInt       k,jmin,jmax,*c2r,*il,col,nexti,ili,nz;
1311:   MatScalar      *rtmp,*ba=b->a,*bval,*aa=a->a,dk,uikdi;
1312:   FactorShiftCtx sctx;
1313:   PetscReal      rs;
1314:   MatScalar      d,*v;

1317:   PetscMalloc3(mbs,&rtmp,mbs,&il,mbs,&c2r);

1319:   /* MatPivotSetUp(): initialize shift context sctx */
1320:   PetscMemzero(&sctx,sizeof(FactorShiftCtx));

1322:   if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) { /* set sctx.shift_top=max{rs} */
1323:     sctx.shift_top = info->zeropivot;

1325:     PetscArrayzero(rtmp,mbs);

1327:     for (i=0; i<mbs; i++) {
1328:       /* calculate sum(|aij|)-RealPart(aii), amt of shift needed for this row */
1329:       d        = (aa)[a->diag[i]];
1330:       rtmp[i] += -PetscRealPart(d);  /* diagonal entry */
1331:       ajtmp    = aj + ai[i] + 1;     /* exclude diagonal */
1332:       v        = aa + ai[i] + 1;
1333:       nz       = ai[i+1] - ai[i] - 1;
1334:       for (j=0; j<nz; j++) {
1335:         rtmp[i]        += PetscAbsScalar(v[j]);
1336:         rtmp[ajtmp[j]] += PetscAbsScalar(v[j]);
1337:       }
1338:       if (PetscRealPart(rtmp[i]) > sctx.shift_top) sctx.shift_top = PetscRealPart(rtmp[i]);
1339:     }
1340:     sctx.shift_top *= 1.1;
1341:     sctx.nshift_max = 5;
1342:     sctx.shift_lo   = 0.;
1343:     sctx.shift_hi   = 1.;
1344:   }

1346:   /* allocate working arrays
1347:      c2r: linked list, keep track of pivot rows for a given column. c2r[col]: head of the list for a given col
1348:      il:  for active k row, il[i] gives the index of the 1st nonzero entry in U[i,k:n-1] in bj and ba arrays
1349:   */
1350:   do {
1351:     sctx.newshift = PETSC_FALSE;

1353:     for (i=0; i<mbs; i++) c2r[i] = mbs;
1354:     if (mbs) il[0] = 0;

1356:     for (k = 0; k<mbs; k++) {
1357:       /* zero rtmp */
1358:       nz    = bi[k+1] - bi[k];
1359:       bjtmp = bj + bi[k];
1360:       for (j=0; j<nz; j++) rtmp[bjtmp[j]] = 0.0;

1362:       /* load in initial unfactored row */
1363:       bval = ba + bi[k];
1364:       jmin = ai[k]; jmax = ai[k+1];
1365:       for (j = jmin; j < jmax; j++) {
1366:         col       = aj[j];
1367:         rtmp[col] = aa[j];
1368:         *bval++   = 0.0; /* for in-place factorization */
1369:       }
1370:       /* shift the diagonal of the matrix: ZeropivotApply() */
1371:       rtmp[k] += sctx.shift_amount;  /* shift the diagonal of the matrix */

1373:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1374:       dk = rtmp[k];
1375:       i  = c2r[k]; /* first row to be added to k_th row  */

1377:       while (i < k) {
1378:         nexti = c2r[i]; /* next row to be added to k_th row */

1380:         /* compute multiplier, update diag(k) and U(i,k) */
1381:         ili     = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1382:         uikdi   = -ba[ili]*ba[bdiag[i]]; /* diagonal(k) */
1383:         dk     += uikdi*ba[ili]; /* update diag[k] */
1384:         ba[ili] = uikdi; /* -U(i,k) */

1386:         /* add multiple of row i to k-th row */
1387:         jmin = ili + 1; jmax = bi[i+1];
1388:         if (jmin < jmax) {
1389:           for (j=jmin; j<jmax; j++) rtmp[bj[j]] += uikdi*ba[j];
1390:           /* update il and c2r for row i */
1391:           il[i] = jmin;
1392:           j     = bj[jmin]; c2r[i] = c2r[j]; c2r[j] = i;
1393:         }
1394:         i = nexti;
1395:       }

1397:       /* copy data into U(k,:) */
1398:       rs   = 0.0;
1399:       jmin = bi[k]; jmax = bi[k+1]-1;
1400:       if (jmin < jmax) {
1401:         for (j=jmin; j<jmax; j++) {
1402:           col = bj[j]; ba[j] = rtmp[col]; rs += PetscAbsScalar(ba[j]);
1403:         }
1404:         /* add the k-th row into il and c2r */
1405:         il[k] = jmin;
1406:         i     = bj[jmin]; c2r[k] = c2r[i]; c2r[i] = k;
1407:       }

1409:       sctx.rs = rs;
1410:       sctx.pv = dk;
1411:       MatPivotCheck(B,A,info,&sctx,k);
1412:       if (sctx.newshift) break;
1413:       dk = sctx.pv;

1415:       ba[bdiag[k]] = 1.0/dk; /* U(k,k) */
1416:     }
1417:   } while (sctx.newshift);

1419:   PetscFree3(rtmp,il,c2r);

1421:   B->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1422:   B->ops->solves         = MatSolves_SeqSBAIJ_1;
1423:   B->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering;
1424:   B->ops->matsolve       = MatMatSolve_SeqSBAIJ_1_NaturalOrdering;
1425:   B->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering;
1426:   B->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering;

1428:   B->assembled    = PETSC_TRUE;
1429:   B->preallocated = PETSC_TRUE;

1431:   PetscLogFlops(B->rmap->n);

1433:   /* MatPivotView() */
1434:   if (sctx.nshift) {
1435:     if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1436:       PetscInfo4(A,"number of shift_pd tries %D, shift_amount %g, diagonal shifted up by %e fraction top_value %e\n",sctx.nshift,(double)sctx.shift_amount,(double)sctx.shift_fraction,(double)sctx.shift_top);
1437:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1438:       PetscInfo2(A,"number of shift_nz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
1439:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_INBLOCKS) {
1440:       PetscInfo2(A,"number of shift_inblocks applied %D, each shift_amount %g\n",sctx.nshift,(double)info->shiftamount);
1441:     }
1442:   }
1443:   return(0);
1444: }

1446: PetscErrorCode MatCholeskyFactorNumeric_SeqSBAIJ_1_NaturalOrdering_inplace(Mat C,Mat A,const MatFactorInfo *info)
1447: {
1448:   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)A->data,*b=(Mat_SeqSBAIJ*)C->data;
1450:   PetscInt       i,j,mbs = a->mbs;
1451:   PetscInt       *ai=a->i,*aj=a->j,*bi=b->i,*bj=b->j;
1452:   PetscInt       k,jmin,*jl,*il,nexti,ili,*acol,*bcol,nz;
1453:   MatScalar      *rtmp,*ba=b->a,*aa=a->a,dk,uikdi,*aval,*bval;
1454:   PetscReal      rs;
1455:   FactorShiftCtx sctx;

1458:   /* MatPivotSetUp(): initialize shift context sctx */
1459:   PetscMemzero(&sctx,sizeof(FactorShiftCtx));

1461:   /* initialization */
1462:   /* il and jl record the first nonzero element in each row of the accessing
1463:      window U(0:k, k:mbs-1).
1464:      jl:    list of rows to be added to uneliminated rows
1465:             i>= k: jl(i) is the first row to be added to row i
1466:             i<  k: jl(i) is the row following row i in some list of rows
1467:             jl(i) = mbs indicates the end of a list
1468:      il(i): points to the first nonzero element in U(i,k:mbs-1)
1469:   */
1470:   PetscMalloc1(mbs,&rtmp);
1471:   PetscMalloc2(mbs,&il,mbs,&jl);

1473:   do {
1474:     sctx.newshift = PETSC_FALSE;
1475:     il[0] = 0;
1476:     for (i=0; i<mbs; i++) {
1477:       rtmp[i] = 0.0; jl[i] = mbs;
1478:     }

1480:     for (k = 0; k<mbs; k++) {
1481:       /*initialize k-th row with elements nonzero in row perm(k) of A */
1482:       nz   = ai[k+1] - ai[k];
1483:       acol = aj + ai[k];
1484:       aval = aa + ai[k];
1485:       bval = ba + bi[k];
1486:       while (nz--) {
1487:         rtmp[*acol++] = *aval++;
1488:         *bval++       = 0.0; /* for in-place factorization */
1489:       }

1491:       /* shift the diagonal of the matrix */
1492:       if (sctx.nshift) rtmp[k] += sctx.shift_amount;

1494:       /* modify k-th row by adding in those rows i with U(i,k)!=0 */
1495:       dk = rtmp[k];
1496:       i  = jl[k]; /* first row to be added to k_th row  */

1498:       while (i < k) {
1499:         nexti = jl[i]; /* next row to be added to k_th row */
1500:         /* compute multiplier, update D(k) and U(i,k) */
1501:         ili     = il[i]; /* index of first nonzero element in U(i,k:bms-1) */
1502:         uikdi   = -ba[ili]*ba[bi[i]];
1503:         dk     += uikdi*ba[ili];
1504:         ba[ili] = uikdi; /* -U(i,k) */

1506:         /* add multiple of row i to k-th row ... */
1507:         jmin = ili + 1;
1508:         nz   = bi[i+1] - jmin;
1509:         if (nz > 0) {
1510:           bcol = bj + jmin;
1511:           bval = ba + jmin;
1512:           PetscLogFlops(2.0*nz);
1513:           while (nz--) rtmp[*bcol++] += uikdi*(*bval++);

1515:           /* update il and jl for i-th row */
1516:           il[i] = jmin;
1517:           j     = bj[jmin]; jl[i] = jl[j]; jl[j] = i;
1518:         }
1519:         i = nexti;
1520:       }

1522:       /* shift the diagonals when zero pivot is detected */
1523:       /* compute rs=sum of abs(off-diagonal) */
1524:       rs   = 0.0;
1525:       jmin = bi[k]+1;
1526:       nz   = bi[k+1] - jmin;
1527:       if (nz) {
1528:         bcol = bj + jmin;
1529:         while (nz--) {
1530:           rs += PetscAbsScalar(rtmp[*bcol]);
1531:           bcol++;
1532:         }
1533:       }

1535:       sctx.rs = rs;
1536:       sctx.pv = dk;
1537:       MatPivotCheck(C,A,info,&sctx,k);
1538:       if (sctx.newshift) break;    /* sctx.shift_amount is updated */
1539:       dk = sctx.pv;

1541:       /* copy data into U(k,:) */
1542:       ba[bi[k]] = 1.0/dk;
1543:       jmin      = bi[k]+1;
1544:       nz        = bi[k+1] - jmin;
1545:       if (nz) {
1546:         bcol = bj + jmin;
1547:         bval = ba + jmin;
1548:         while (nz--) {
1549:           *bval++       = rtmp[*bcol];
1550:           rtmp[*bcol++] = 0.0;
1551:         }
1552:         /* add k-th row into il and jl */
1553:         il[k] = jmin;
1554:         i     = bj[jmin]; jl[k] = jl[i]; jl[i] = k;
1555:       }
1556:     } /* end of for (k = 0; k<mbs; k++) */
1557:   } while (sctx.newshift);
1558:   PetscFree(rtmp);
1559:   PetscFree2(il,jl);

1561:   C->ops->solve          = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1562:   C->ops->solves         = MatSolves_SeqSBAIJ_1_inplace;
1563:   C->ops->solvetranspose = MatSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1564:   C->ops->forwardsolve   = MatForwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;
1565:   C->ops->backwardsolve  = MatBackwardSolve_SeqSBAIJ_1_NaturalOrdering_inplace;

1567:   C->assembled    = PETSC_TRUE;
1568:   C->preallocated = PETSC_TRUE;

1570:   PetscLogFlops(C->rmap->N);
1571:   if (sctx.nshift) {
1572:     if (info->shifttype == (PetscReal)MAT_SHIFT_NONZERO) {
1573:       PetscInfo2(A,"number of shiftnz tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
1574:     } else if (info->shifttype == (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE) {
1575:       PetscInfo2(A,"number of shiftpd tries %D, shift_amount %g\n",sctx.nshift,(double)sctx.shift_amount);
1576:     }
1577:   }
1578:   return(0);
1579: }

1581: PetscErrorCode MatCholeskyFactor_SeqSBAIJ(Mat A,IS perm,const MatFactorInfo *info)
1582: {
1584:   Mat            C;

1587:   MatGetFactor(A,"petsc",MAT_FACTOR_CHOLESKY,&C);
1588:   MatCholeskyFactorSymbolic(C,A,perm,info);
1589:   MatCholeskyFactorNumeric(C,A,info);

1591:   A->ops->solve          = C->ops->solve;
1592:   A->ops->solvetranspose = C->ops->solvetranspose;

1594:   MatHeaderMerge(A,&C);
1595:   return(0);
1596: }