Actual source code: baijsolvnat3.c

  1: #include <../src/mat/impls/baij/seq/baij.h>
  2: #include <petsc/private/kernels/blockinvert.h>

  4: /*
  5:       Special case where the matrix was ILU(0) factored in the natural
  6:    ordering. This eliminates the need for the column and row permutation.
  7: */
  8: PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
  9: {
 10:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
 11:   const PetscInt    n  =a->mbs,*ai=a->i,*aj=a->j;
 12:   const PetscInt    *diag = a->diag,*vi;
 13:   const MatScalar   *aa   =a->a,*v;
 14:   PetscScalar       *x,s1,s2,s3,x1,x2,x3;
 15:   const PetscScalar *b;
 16:   PetscInt          jdx,idt,idx,nz,i;

 18:   VecGetArrayRead(bb,&b);
 19:   VecGetArray(xx,&x);

 21:   /* forward solve the lower triangular */
 22:   idx  = 0;
 23:   x[0] = b[0]; x[1] = b[1]; x[2] = b[2];
 24:   for (i=1; i<n; i++) {
 25:     v    =  aa      + 9*ai[i];
 26:     vi   =  aj      + ai[i];
 27:     nz   =  diag[i] - ai[i];
 28:     idx +=  3;
 29:     s1   =  b[idx];s2 = b[1+idx];s3 = b[2+idx];
 30:     while (nz--) {
 31:       jdx = 3*(*vi++);
 32:       x1  = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];
 33:       s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
 34:       s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
 35:       s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
 36:       v  += 9;
 37:     }
 38:     x[idx]   = s1;
 39:     x[1+idx] = s2;
 40:     x[2+idx] = s3;
 41:   }
 42:   /* backward solve the upper triangular */
 43:   for (i=n-1; i>=0; i--) {
 44:     v   = aa + 9*diag[i] + 9;
 45:     vi  = aj + diag[i] + 1;
 46:     nz  = ai[i+1] - diag[i] - 1;
 47:     idt = 3*i;
 48:     s1  = x[idt];  s2 = x[1+idt];
 49:     s3  = x[2+idt];
 50:     while (nz--) {
 51:       idx = 3*(*vi++);
 52:       x1  = x[idx];   x2 = x[1+idx];x3    = x[2+idx];
 53:       s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
 54:       s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
 55:       s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
 56:       v  += 9;
 57:     }
 58:     v        = aa +  9*diag[i];
 59:     x[idt]   = v[0]*s1 + v[3]*s2 + v[6]*s3;
 60:     x[1+idt] = v[1]*s1 + v[4]*s2 + v[7]*s3;
 61:     x[2+idt] = v[2]*s1 + v[5]*s2 + v[8]*s3;
 62:   }

 64:   VecRestoreArrayRead(bb,&b);
 65:   VecRestoreArray(xx,&x);
 66:   PetscLogFlops(2.0*9*(a->nz) - 3.0*A->cmap->n);
 67:   return 0;
 68: }

 70: PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
 71: {
 72:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
 73:   const PetscInt    n  =a->mbs,*vi,*ai=a->i,*aj=a->j,*adiag=a->diag;
 74:   PetscInt          i,k,nz,idx,jdx,idt;
 75:   const PetscInt    bs = A->rmap->bs,bs2 = a->bs2;
 76:   const MatScalar   *aa=a->a,*v;
 77:   PetscScalar       *x;
 78:   const PetscScalar *b;
 79:   PetscScalar       s1,s2,s3,x1,x2,x3;

 81:   VecGetArrayRead(bb,&b);
 82:   VecGetArray(xx,&x);
 83:   /* forward solve the lower triangular */
 84:   idx  = 0;
 85:   x[0] = b[idx]; x[1] = b[1+idx];x[2] = b[2+idx];
 86:   for (i=1; i<n; i++) {
 87:     v   = aa + bs2*ai[i];
 88:     vi  = aj + ai[i];
 89:     nz  = ai[i+1] - ai[i];
 90:     idx = bs*i;
 91:     s1  = b[idx];s2 = b[1+idx];s3 = b[2+idx];
 92:     for (k=0; k<nz; k++) {
 93:       jdx = bs*vi[k];
 94:       x1  = x[jdx];x2 = x[1+jdx]; x3 =x[2+jdx];
 95:       s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
 96:       s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
 97:       s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;

 99:       v +=  bs2;
100:     }

102:     x[idx]   = s1;
103:     x[1+idx] = s2;
104:     x[2+idx] = s3;
105:   }

107:   /* backward solve the upper triangular */
108:   for (i=n-1; i>=0; i--) {
109:     v   = aa + bs2*(adiag[i+1]+1);
110:     vi  = aj + adiag[i+1]+1;
111:     nz  = adiag[i] - adiag[i+1]-1;
112:     idt = bs*i;
113:     s1  = x[idt];  s2 = x[1+idt];s3 = x[2+idt];

115:     for (k=0; k<nz; k++) {
116:       idx = bs*vi[k];
117:       x1  = x[idx];   x2 = x[1+idx]; x3 = x[2+idx];
118:       s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
119:       s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
120:       s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;

122:       v +=  bs2;
123:     }
124:     /* x = inv_diagonal*x */
125:     x[idt]   = v[0]*s1 + v[3]*s2 + v[6]*s3;
126:     x[1+idt] = v[1]*s1 + v[4]*s2 + v[7]*s3;
127:     x[2+idt] = v[2]*s1 + v[5]*s2 + v[8]*s3;

129:   }

131:   VecRestoreArrayRead(bb,&b);
132:   VecRestoreArray(xx,&x);
133:   PetscLogFlops(2.0*bs2*(a->nz) - bs*A->cmap->n);
134:   return 0;
135: }

137: PetscErrorCode MatForwardSolve_SeqBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
138: {
139:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
140:   const PetscInt    n  =a->mbs,*vi,*ai=a->i,*aj=a->j;
141:   PetscInt          i,k,nz,idx,jdx;
142:   const PetscInt    bs = A->rmap->bs,bs2 = a->bs2;
143:   const MatScalar   *aa=a->a,*v;
144:   PetscScalar       *x;
145:   const PetscScalar *b;
146:   PetscScalar       s1,s2,s3,x1,x2,x3;

148:   VecGetArrayRead(bb,&b);
149:   VecGetArray(xx,&x);
150:   /* forward solve the lower triangular */
151:   idx  = 0;
152:   x[0] = b[idx]; x[1] = b[1+idx];x[2] = b[2+idx];
153:   for (i=1; i<n; i++) {
154:     v   = aa + bs2*ai[i];
155:     vi  = aj + ai[i];
156:     nz  = ai[i+1] - ai[i];
157:     idx = bs*i;
158:     s1  = b[idx];s2 = b[1+idx];s3 = b[2+idx];
159:     for (k=0; k<nz; k++) {
160:       jdx = bs*vi[k];
161:       x1  = x[jdx];x2 = x[1+jdx]; x3 =x[2+jdx];
162:       s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
163:       s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
164:       s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;

166:       v +=  bs2;
167:     }

169:     x[idx]   = s1;
170:     x[1+idx] = s2;
171:     x[2+idx] = s3;
172:   }

174:   VecRestoreArrayRead(bb,&b);
175:   VecRestoreArray(xx,&x);
176:   PetscLogFlops(1.0*bs2*(a->nz) - bs*A->cmap->n);
177:   return 0;
178: }

180: PetscErrorCode MatBackwardSolve_SeqBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
181: {
182:   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
183:   const PetscInt    n  =a->mbs,*vi,*aj=a->j,*adiag=a->diag;
184:   PetscInt          i,k,nz,idx,idt;
185:   const PetscInt    bs = A->rmap->bs,bs2 = a->bs2;
186:   const MatScalar   *aa=a->a,*v;
187:   PetscScalar       *x;
188:   const PetscScalar *b;
189:   PetscScalar       s1,s2,s3,x1,x2,x3;

191:   VecGetArrayRead(bb,&b);
192:   VecGetArray(xx,&x);

194:   /* backward solve the upper triangular */
195:   for (i=n-1; i>=0; i--) {
196:     v   = aa + bs2*(adiag[i+1]+1);
197:     vi  = aj + adiag[i+1]+1;
198:     nz  = adiag[i] - adiag[i+1]-1;
199:     idt = bs*i;
200:     s1  = b[idt];  s2 = b[1+idt];s3 = b[2+idt];

202:     for (k=0; k<nz; k++) {
203:       idx = bs*vi[k];
204:       x1  = x[idx];   x2 = x[1+idx]; x3 = x[2+idx];
205:       s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
206:       s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
207:       s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;

209:       v +=  bs2;
210:     }
211:     /* x = inv_diagonal*x */
212:     x[idt]   = v[0]*s1 + v[3]*s2 + v[6]*s3;
213:     x[1+idt] = v[1]*s1 + v[4]*s2 + v[7]*s3;
214:     x[2+idt] = v[2]*s1 + v[5]*s2 + v[8]*s3;

216:   }

218:   VecRestoreArrayRead(bb,&b);
219:   VecRestoreArray(xx,&x);
220:   PetscLogFlops(2.0*bs2*(a->nz) - bs*A->cmap->n);
221:   return 0;
222: }