Linear solve with Conjugate Gradient (CG)#
This tutorial shows how to implement a simple linear solver using Lacuna’s CSR and a common Conjugate Gradient loop. It is fully SciPy-free and constructs the test matrix directly with NumPy.
Problem#
Solve Ax = b for a symmetric positive definite (SPD) sparse matrix A.
Build an SPD test matrix (SciPy-free)#
import numpy as np
n = 4096
rs = np.random.RandomState(0)
density = 2e-4
nnz = max(n, int(n*n*density))
# Accumulate random COO into a dict to coalesce duplicates
coo = {}
ri = rs.randint(0, n, size=nnz, dtype=np.int64)
cj = rs.randint(0, n, size=nnz, dtype=np.int64)
val = rs.standard_normal(nnz)
for r, c, v in zip(ri, cj, val):
coo[(int(r), int(c))] = coo.get((int(r), int(c)), 0.0) + float(v)
# Symmetrize: A = A + A.T (coalesce on the fly)
sym = {}
for (i, j), v in coo.items():
sym[(i, j)] = sym.get((i, j), 0.0) + v
sym[(j, i)] = sym.get((j, i), 0.0) + v
# Make strictly diagonally dominant => SPD
row_sum_abs = np.zeros(n, dtype=np.float64)
for (i, j), v in sym.items():
if i != j:
row_sum_abs[i] += abs(v)
eps = 1e-3
for i in range(n):
sym[(i, i)] = sym.get((i, i), 0.0) + row_sum_abs[i] + eps
# Convert dict to CSR arrays
rows = [[] for _ in range(n)]
vals = [[] for _ in range(n)]
for (i, j), v in sym.items():
rows[i].append(int(j))
vals[i].append(float(v))
for i in range(n):
if rows[i]:
order = np.argsort(rows[i])
rows[i] = list(np.asarray(rows[i])[order])
vals[i] = list(np.asarray(vals[i])[order])
indptr = np.zeros(n+1, dtype=np.int64)
for i in range(n):
indptr[i+1] = indptr[i] + len(rows[i])
indices = np.zeros(indptr[-1], dtype=np.int64)
data = np.zeros(indptr[-1], dtype=np.float64)
pos = 0
for i in range(n):
k = len(rows[i])
if k:
indices[pos:pos+k] = np.asarray(rows[i], dtype=np.int64)
data[pos:pos+k] = np.asarray(vals[i], dtype=np.float64)
pos += k
shape = (n, n)
Wrap in Lacuna CSR#
from lacuna.sparse.csr import CSR
A_lac = CSR(indptr, indices, data, shape, check=False)
Conjugate Gradient (CG)#
def cg(matvec, b, tol=1e-8, maxiter=1000, x0=None):
import numpy as np
x = np.zeros_like(b) if x0 is None else np.array(x0, dtype=np.float64, copy=True)
r = b - matvec(x)
p = r.copy()
rr = float(np.dot(r, r))
nb = float(np.linalg.norm(b)) or 1.0
for k in range(1, maxiter + 1):
Ap = matvec(p)
pAp = float(np.dot(p, Ap))
if pAp == 0.0:
break
alpha = rr / pAp
x += alpha * p
r -= alpha * Ap
rr_new = float(np.dot(r, r))
if (rr_new ** 0.5) / nb <= tol:
rr = rr_new
break
beta = rr_new / rr
p = r + beta * p
rr = rr_new
rel = (rr ** 0.5) / nb
return x, k, rel
Run#
rs = np.random.RandomState(123)
x_true = rs.standard_normal(n)
b = np.asarray(A_lac @ x_true, dtype=np.float64)
# Lacuna matvec
matvec_lac = lambda v: np.asarray(A_lac @ v, dtype=np.float64)
x, iters, rel = cg(matvec_lac, b, tol=1e-8, maxiter=1000)
print(iters, rel)
Notes#
Lacuna uses Rayon threads internally; control with:
import lacuna as la la.set_num_threads(8)
For full benchmark and plotting, see
python/benchmarks/benchmark_linear_solve.py.