3. Matrix-free preconditioning#
We combine a matrix-free operator with a matrix-free preconditioner. This allows to solve huge high-order problems with little memory
import numpy as np
import ngsolve.comp
from ngsolve import *
from ngsolve.webgui import Draw
from ngsolve.krylovspace import CGSolver
from time import time
mesh = Mesh(unit_square.GenerateMesh(maxh=0.1))
order = 20
fes = H1(mesh, order=order, dirichlet='left|bottom', wb_withedges=False)
print ("ndof =", fes.ndof)
fes2 = VectorL2(mesh, order=order-1, covariant=True)
fesl2 = L2(mesh, order=1)
u,v = fes.TnT()
from ngsolve.comp import ConvertOperator
opgrad = ConvertOperator(fes, fes2, grad(u), geom_free=True)
opid = ConvertOperator(fes, fesl2, geom_free=True)
feslo = H1(mesh, order=1, dirichlet='left|bottom')
ulo,vlo = feslo.TnT()
alo = BilinearForm(grad(ulo)*grad(vlo)*dx).Assemble()
loinv = alo.mat.Inverse(feslo.FreeDofs())
prelo = fes.loembedding@loinv@fes.loembedding.T
ndof = 46801
Laplace = opgrad.T @ fes2.Mass(1) @ opgrad
Mass = opid.T @fesl2.Mass(1) @ opid
K = Laplace+Mass
pre = IdentityMatrix(fes.ndof)
bfi = (grad(u)*grad(v)*dx)[0].MakeBFI()
def CreatorLocal(ei):
# print (ei)
mat = bfi.CalcElementMatrix(fes.GetFE(ei), mesh.GetTrafo(ei))
dofs = fes.GetDofNrs(ei)
# print (dofs)
local = [i for (i,d) in enumerate(dofs) if fes.CouplingType(d)==COUPLING_TYPE.LOCAL_DOF]
# print (local)
matlocal = Matrix( np.asmatrix(mat)[local,:][:,local]).I
# print (matlocal)
return tuple([matlocal, local, local])
prelocal = ngsolve.comp.MatrixFreeOperator(fes, CreatorLocal)
def CreatorExt(ei):
mat = bfi.CalcElementMatrix(fes.GetFE(ei), mesh.GetTrafo(ei))
dofs = fes.GetDofNrs(ei)
local = [i for (i,d) in enumerate(dofs) if fes.CouplingType(d)==COUPLING_TYPE.LOCAL_DOF]
coupling = [i for (i,d) in enumerate(dofs) if fes.CouplingType(d)!=COUPLING_TYPE.LOCAL_DOF]
matLL = Matrix( np.asmatrix(mat)[local,:][:,local])
matLC = Matrix( np.asmatrix(mat)[local,:][:,coupling])
mat = -matLL.I*matLC
return tuple([mat, coupling, local])
ext = IdentityMatrix(fes.ndof)+ngsolve.comp.MatrixFreeOperator(fes, CreatorExt)
def CreatorInterface(ei):
mat = bfi.CalcElementMatrix(fes.GetFE(ei), mesh.GetTrafo(ei))
dofs = fes.GetDofNrs(ei)
local = [i for (i,d) in enumerate(dofs) if fes.CouplingType(d)==COUPLING_TYPE.LOCAL_DOF]
interface = [i for (i,d) in enumerate(dofs) if fes.CouplingType(d)==COUPLING_TYPE.INTERFACE_DOF]
matIF = Matrix( np.asmatrix(mat)[interface,:][:,interface]).I
return tuple([matIF, interface, interface])
preinterface = ngsolve.comp.MatrixFreeOperator(fes, CreatorInterface)
coupling = fes.FreeDofs(True)
proj = Projector(coupling, True)
# pre = prelocal + proj
pre = prelocal + ext@(proj@preinterface@proj + prelo)@ext.T
# pre = prelocal + proj@preinterface@proj
print (pre.GetOperatorInfo())
SumMatrix, h = 46801, w = 46801
SumMatrix, h = 46801, w = 46801
ConstantEBEMatrix (bs = 171x171), h = 46801, w = 46801
ConstantEBEMatrix (bs = 171x171), h = 46801, w = 46801
ProductMatrix, h = 46801, w = 46801
ProductMatrix, h = 46801, w = 46801
SumMatrix, h = 46801, w = 46801
Identity, h = 46801, w = 46801
SumMatrix, h = 46801, w = 46801
ConstantEBEMatrix (bs = 171x60), h = 46801, w = 46801
ConstantEBEMatrix (bs = 171x60), h = 46801, w = 46801
SumMatrix, h = 46801, w = 46801
ProductMatrix, h = 46801, w = 46801
ProductMatrix, h = 46801, w = 46801
N4ngla9ProjectorE, h = 46801, w = 46801
SumMatrix, h = 46801, w = 46801
ConstantEBEMatrix (bs = 57x57), h = 46801, w = 46801
ConstantEBEMatrix (bs = 57x57), h = 46801, w = 46801
N4ngla9ProjectorE, h = 46801, w = 46801
EmbeddedTransposeMatrix, h = 46801, w = 46801
EmbeddedMatrix, h = 46801, w = 137
N4ngla14UmfpackInverseIdddEE, h = 137, w = 137
Transpose, h = 46801, w = 46801
SumMatrix, h = 46801, w = 46801
Identity, h = 46801, w = 46801
SumMatrix, h = 46801, w = 46801
ConstantEBEMatrix (bs = 171x60), h = 46801, w = 46801
ConstantEBEMatrix (bs = 171x60), h = 46801, w = 46801
f = LinearForm(x*v*dx).Assemble()
gfu = GridFunction(fes)
ts = time()
with TaskManager():
inv = CGSolver(K, pre, printrates=True, maxiter=500)
gfu.vec.data = inv * f.vec
print ("Solved ", fes.ndof, " in ", time()-ts)
CG iteration 1, residual = 0.23862092559273262
CG iteration 2, residual = 0.08880102805909865
CG iteration 3, residual = 0.03381300972822417
CG iteration 4, residual = 0.01442684744689833
CG iteration 5, residual = 0.008722273824359365
CG iteration 6, residual = 0.006263490376480935
CG iteration 7, residual = 0.004017908376184001
CG iteration 8, residual = 0.0026607639885974908
CG iteration 9, residual = 0.001957219525598442
CG iteration 10, residual = 0.0015120507152747429
CG iteration 11, residual = 0.0011996952983300618
CG iteration 12, residual = 0.0009631932705251823
CG iteration 13, residual = 0.0009102482643261315
CG iteration 14, residual = 0.0009226087005467614
CG iteration 15, residual = 0.0009331507468653179
CG iteration 16, residual = 0.0008471194978452839
CG iteration 17, residual = 0.0006865774593190383
CG iteration 18, residual = 0.0005121617360755124
CG iteration 19, residual = 0.00036534505459031197
CG iteration 20, residual = 0.00026491997611589017
CG iteration 21, residual = 0.00019088766523605574
CG iteration 22, residual = 0.00014636525744731184
CG iteration 23, residual = 0.00012211845104255828
CG iteration 24, residual = 0.00010211065964075285
CG iteration 25, residual = 8.961808157019775e-05
CG iteration 26, residual = 7.922388152278657e-05
CG iteration 27, residual = 6.940845228042119e-05
CG iteration 28, residual = 6.060473988794019e-05
CG iteration 29, residual = 5.4900528479769684e-05
CG iteration 30, residual = 5.152303786165254e-05
CG iteration 31, residual = 4.894089643907753e-05
CG iteration 32, residual = 4.626788403965864e-05
CG iteration 33, residual = 4.3895289580086844e-05
CG iteration 34, residual = 4.0892698828499795e-05
CG iteration 35, residual = 3.735962026228574e-05
CG iteration 36, residual = 3.30094264229415e-05
CG iteration 37, residual = 2.8494763541617077e-05
CG iteration 38, residual = 2.398314280536251e-05
CG iteration 39, residual = 2.0379063394642992e-05
CG iteration 40, residual = 1.783737059550748e-05
CG iteration 41, residual = 1.5546580110184333e-05
CG iteration 42, residual = 1.3781903294795854e-05
CG iteration 43, residual = 1.2456766240080638e-05
CG iteration 44, residual = 1.1243107809842627e-05
CG iteration 45, residual = 1.0168609497010008e-05
CG iteration 46, residual = 8.917725012852201e-06
CG iteration 47, residual = 7.599052757876488e-06
CG iteration 48, residual = 6.446899214320503e-06
CG iteration 49, residual = 5.261921279309597e-06
CG iteration 50, residual = 4.243304104141862e-06
CG iteration 51, residual = 3.4882151414348994e-06
CG iteration 52, residual = 2.8969978256341414e-06
CG iteration 53, residual = 2.564937218440891e-06
CG iteration 54, residual = 2.320916492518856e-06
CG iteration 55, residual = 2.0473265791681824e-06
CG iteration 56, residual = 1.8407123070561859e-06
CG iteration 57, residual = 1.6042835442241118e-06
CG iteration 58, residual = 1.3556043251654957e-06
CG iteration 59, residual = 1.1410051051376062e-06
CG iteration 60, residual = 9.590072115605497e-07
CG iteration 61, residual = 7.833369389643368e-07
CG iteration 62, residual = 6.41053274201669e-07
CG iteration 63, residual = 5.291073820616801e-07
CG iteration 64, residual = 4.4184116258823423e-07
CG iteration 65, residual = 3.8269683600394645e-07
CG iteration 66, residual = 3.387395296362217e-07
CG iteration 67, residual = 3.0162396108885343e-07
CG iteration 68, residual = 2.74384646131662e-07
CG iteration 69, residual = 2.4307701689950537e-07
CG iteration 70, residual = 2.1483737704452754e-07
CG iteration 71, residual = 1.8813366514303868e-07
CG iteration 72, residual = 1.6429355486401868e-07
CG iteration 73, residual = 1.468358881694338e-07
CG iteration 74, residual = 1.3379351647818102e-07
CG iteration 75, residual = 1.21711494187964e-07
CG iteration 76, residual = 1.0994407450822265e-07
CG iteration 77, residual = 9.631631603147349e-08
CG iteration 78, residual = 8.565175197682566e-08
CG iteration 79, residual = 7.555433373491492e-08
CG iteration 80, residual = 6.826138005108876e-08
CG iteration 81, residual = 6.214385492539165e-08
CG iteration 82, residual = 5.6280818050844345e-08
CG iteration 83, residual = 5.17978415057295e-08
CG iteration 84, residual = 4.6138669757846424e-08
CG iteration 85, residual = 4.0874258543545286e-08
CG iteration 86, residual = 3.621040528194347e-08
CG iteration 87, residual = 3.1574099208111426e-08
CG iteration 88, residual = 2.722569047506893e-08
CG iteration 89, residual = 2.3181967035656456e-08
CG iteration 90, residual = 1.9246976679853425e-08
CG iteration 91, residual = 1.548998577982342e-08
CG iteration 92, residual = 1.2627703275204279e-08
CG iteration 93, residual = 1.0032278188369058e-08
CG iteration 94, residual = 8.301447024709129e-09
CG iteration 95, residual = 6.7702659571997494e-09
CG iteration 96, residual = 5.768267957680529e-09
CG iteration 97, residual = 5.1096409473358395e-09
CG iteration 98, residual = 4.5089475137169995e-09
CG iteration 99, residual = 4.064235983033147e-09
CG iteration 100, residual = 3.685292395472403e-09
CG iteration 101, residual = 3.279765029262252e-09
CG iteration 102, residual = 2.920902065131502e-09
CG iteration 103, residual = 2.5983734004171065e-09
CG iteration 104, residual = 2.3113942007021456e-09
CG iteration 105, residual = 2.085821190156387e-09
CG iteration 106, residual = 1.8238692167637434e-09
CG iteration 107, residual = 1.6239490403357458e-09
CG iteration 108, residual = 1.4581598524181765e-09
CG iteration 109, residual = 1.2985042720485252e-09
CG iteration 110, residual = 1.1630842702130264e-09
CG iteration 111, residual = 1.0450979457594662e-09
CG iteration 112, residual = 9.634163056136282e-10
CG iteration 113, residual = 8.581314897214475e-10
CG iteration 114, residual = 7.938697614930958e-10
CG iteration 115, residual = 7.14882834002984e-10
CG iteration 116, residual = 6.441576677964192e-10
CG iteration 117, residual = 5.759187876299561e-10
CG iteration 118, residual = 5.041184683101573e-10
CG iteration 119, residual = 4.3746227041689953e-10
CG iteration 120, residual = 3.771856531233403e-10
CG iteration 121, residual = 3.1483947727899907e-10
CG iteration 122, residual = 2.686523591420383e-10
CG iteration 123, residual = 2.2429673730701492e-10
CG iteration 124, residual = 1.9384158811397053e-10
CG iteration 125, residual = 1.646056513505712e-10
CG iteration 126, residual = 1.3504120017205838e-10
CG iteration 127, residual = 1.101140304606656e-10
CG iteration 128, residual = 8.627791233337817e-11
CG iteration 129, residual = 6.732555123557093e-11
CG iteration 130, residual = 5.377112904739389e-11
CG iteration 131, residual = 4.4879032696804154e-11
CG iteration 132, residual = 3.8690616101466405e-11
CG iteration 133, residual = 3.4770157971565467e-11
CG iteration 134, residual = 3.2623751039953786e-11
CG iteration 135, residual = 3.0164230896676187e-11
CG iteration 136, residual = 2.683399922380791e-11
CG iteration 137, residual = 2.3010655407250503e-11
CG iteration 138, residual = 1.9266675825525687e-11
CG iteration 139, residual = 1.566587530748172e-11
CG iteration 140, residual = 1.2986814468501075e-11
CG iteration 141, residual = 1.0889621181318835e-11
CG iteration 142, residual = 9.170156410642548e-12
CG iteration 143, residual = 7.841108869120405e-12
CG iteration 144, residual = 7.028671200154906e-12
CG iteration 145, residual = 6.126023840075073e-12
CG iteration 146, residual = 5.473333920790427e-12
CG iteration 147, residual = 4.9844201185042774e-12
CG iteration 148, residual = 4.344492698638465e-12
CG iteration 149, residual = 3.951242112208828e-12
CG iteration 150, residual = 3.6162652564546397e-12
CG iteration 151, residual = 3.321817984384574e-12
CG iteration 152, residual = 3.195840783826177e-12
CG iteration 153, residual = 2.9807014142655468e-12
CG iteration 154, residual = 2.7208310973254355e-12
CG iteration 155, residual = 2.4647533437728166e-12
CG iteration 156, residual = 2.0877375762106725e-12
CG iteration 157, residual = 1.800562325810574e-12
CG iteration 158, residual = 1.5078928961292372e-12
CG iteration 159, residual = 1.2683005618719666e-12
CG iteration 160, residual = 1.0928302501778526e-12
CG iteration 161, residual = 9.798668395403108e-13
CG iteration 162, residual = 8.687336125801655e-13
CG iteration 163, residual = 7.695714424335506e-13
CG iteration 164, residual = 6.89838028158149e-13
CG iteration 165, residual = 5.833522245124051e-13
CG iteration 166, residual = 4.949292352529398e-13
CG iteration 167, residual = 4.0752584133895906e-13
CG iteration 168, residual = 3.41459695764561e-13
CG iteration 169, residual = 2.6992522860043303e-13
CG iteration 170, residual = 2.1805478953944475e-13
Solved 46801 in 0.29103779792785645
Draw (gfu, order=8);