# Linear solve with Conjugate Gradient (CG) This tutorial shows how to implement a simple linear solver using Lacuna’s CSR and a common Conjugate Gradient loop. It is fully SciPy-free and constructs the test matrix directly with NumPy. ## Problem Solve Ax = b for a symmetric positive definite (SPD) sparse matrix A. ## Build an SPD test matrix (SciPy-free) ```python import numpy as np n = 4096 rs = np.random.RandomState(0) density = 2e-4 nnz = max(n, int(n*n*density)) # Accumulate random COO into a dict to coalesce duplicates coo = {} ri = rs.randint(0, n, size=nnz, dtype=np.int64) cj = rs.randint(0, n, size=nnz, dtype=np.int64) val = rs.standard_normal(nnz) for r, c, v in zip(ri, cj, val): coo[(int(r), int(c))] = coo.get((int(r), int(c)), 0.0) + float(v) # Symmetrize: A = A + A.T (coalesce on the fly) sym = {} for (i, j), v in coo.items(): sym[(i, j)] = sym.get((i, j), 0.0) + v sym[(j, i)] = sym.get((j, i), 0.0) + v # Make strictly diagonally dominant => SPD row_sum_abs = np.zeros(n, dtype=np.float64) for (i, j), v in sym.items(): if i != j: row_sum_abs[i] += abs(v) eps = 1e-3 for i in range(n): sym[(i, i)] = sym.get((i, i), 0.0) + row_sum_abs[i] + eps # Convert dict to CSR arrays rows = [[] for _ in range(n)] vals = [[] for _ in range(n)] for (i, j), v in sym.items(): rows[i].append(int(j)) vals[i].append(float(v)) for i in range(n): if rows[i]: order = np.argsort(rows[i]) rows[i] = list(np.asarray(rows[i])[order]) vals[i] = list(np.asarray(vals[i])[order]) indptr = np.zeros(n+1, dtype=np.int64) for i in range(n): indptr[i+1] = indptr[i] + len(rows[i]) indices = np.zeros(indptr[-1], dtype=np.int64) data = np.zeros(indptr[-1], dtype=np.float64) pos = 0 for i in range(n): k = len(rows[i]) if k: indices[pos:pos+k] = np.asarray(rows[i], dtype=np.int64) data[pos:pos+k] = np.asarray(vals[i], dtype=np.float64) pos += k shape = (n, n) ``` ## Wrap in Lacuna CSR ```python from lacuna.sparse.csr import CSR A_lac = CSR(indptr, indices, data, shape, check=False) ``` ## Conjugate Gradient (CG) ```python def cg(matvec, b, tol=1e-8, maxiter=1000, x0=None): import numpy as np x = np.zeros_like(b) if x0 is None else np.array(x0, dtype=np.float64, copy=True) r = b - matvec(x) p = r.copy() rr = float(np.dot(r, r)) nb = float(np.linalg.norm(b)) or 1.0 for k in range(1, maxiter + 1): Ap = matvec(p) pAp = float(np.dot(p, Ap)) if pAp == 0.0: break alpha = rr / pAp x += alpha * p r -= alpha * Ap rr_new = float(np.dot(r, r)) if (rr_new ** 0.5) / nb <= tol: rr = rr_new break beta = rr_new / rr p = r + beta * p rr = rr_new rel = (rr ** 0.5) / nb return x, k, rel ``` ## Run ```python rs = np.random.RandomState(123) x_true = rs.standard_normal(n) b = np.asarray(A_lac @ x_true, dtype=np.float64) # Lacuna matvec matvec_lac = lambda v: np.asarray(A_lac @ v, dtype=np.float64) x, iters, rel = cg(matvec_lac, b, tol=1e-8, maxiter=1000) print(iters, rel) ``` ## Notes - Lacuna uses Rayon threads internally; control with: ```python import lacuna as la la.set_num_threads(8) ``` - For full benchmark and plotting, see `python/benchmarks/benchmark_linear_solve.py`.