Source code for pyLOM.MANIFOLD.wrapper

#!/usr/bin/env python
#
# Manifold learning methodologies.
#
# Last revision: 11/02/2025
import numpy as np
from scipy.sparse.csgraph import shortest_path
from scipy.linalg import eigh

from ..utils.gpu import cp
from ..vmmath    import euclidean_d
from ..utils     import cr_nvtx as cr, raiseError, pprint


[docs] @cr('MANIFOLD.isomap') def isomap(X:np.ndarray, dims:int, n_size:int, comp:int = 1 ,verbose:bool = True): """ Computes Isomap embedding using the algorithm of Tenenbaum, de Silva, and Langford (2000). Parameters: X : ndarray NxM Data matrix with N points in the mesh for M simulations dims: int Embedding dimensionality to use n_size : int Neighborhood size (number of neighbors for 'k' method) comp: int Component to embed, if more than 1 (defaults to 1, the largest) verbose: bool Display information (default is True) Returns: Y : ndarray Contains coordinates for d-dimensional embeddings in Y. R : list Residual variances for the embedding in Y. E : ndarray Edge matrix for neighborhood graph. """ # Compute pairwise distances in a condensed form and convert to a square form D = cp.asnumpy(euclidean_d(X)) if type(X) is cp.ndarray else euclidean_d(X) # Step 0: Initialization and Parameters N = D.shape[0] if D.shape[1] != N: raise ValueError("D must be a square matrix") K = n_size if not isinstance(K, int) or K <= 0: raiseError("Number of neighbors for 'k' method must be a positive integer") INF = 1000 * np.max(D) * N # Effectively infinite distance # Step 1: Construct neighborhood graph if verbose: pprint(0,"Constructing neighborhood graph...",flush=True) # Sort distances and keep only K-nearest neighbors sorted_indices = np.argsort(D, axis=1) for i in range(N): D[i, sorted_indices[i, K+1:]] = INF # Only keep the K nearest neighbors D = np.minimum(D, D.T) # Ensure distance matrix is symmetric E = (D != INF).astype(int) # Edge matrix for neighborhood graph # Step 2: Compute shortest paths using Floyd-Warshall algorithm if verbose: pprint(0,"Computing shortest paths...",flush=True) G = shortest_path(D, method='FW', directed=False, unweighted=False) # Step 3: Identify connected components firsts = np.min((G == INF).astype(int) * np.arange(N), axis=1) comps, indices = np.unique(firsts, return_inverse=True) size_comps = np.array([np.sum(indices == i) for i in range(len(comps))]) sorted_indices = np.argsort(size_comps)[::-1] comps = comps[sorted_indices] size_comps = size_comps[sorted_indices] if comp > len(comps): comp = 1 # Default to largest component if specified component is out of range if verbose: pprint(0,f"Number of connected components in graph: {len(comps)}",flush=True) pprint(0,f"Embedding component {comp} with {size_comps[comp-1]} points.",flush=True) # Select the largest connected component index = np.where(firsts == comps[comp-1])[0] G = G[np.ix_(index, index)] N = len(index) # Step 4: Construct low-dimensional embeddings using Classical MDS H = np.eye(N) - np.ones((N, N)) / N B = -0.5 * H @ (G**2) @ H # Centering the matrix eigenvalues, eigenvectors = eigh(B, subset_by_index=[N - dims, N - 1]) # Sorting eigenvalues and corresponding eigenvectors in descending order idx = np.argsort(eigenvalues)[::-1] eigenvalues = eigenvalues[idx] eigenvectors = eigenvectors[:, idx] # Compute embeddings for each specified dimension if dims <= N: coords = eigenvectors[:, :dims] * np.sqrt(eigenvalues[:dims]) Y = coords.T # Compute residual variance L2_D = np.linalg.norm(coords[:, None] - coords[None, :], axis=2).flatten() r2 = 1 - np.corrcoef(L2_D, G.flatten())[0, 1]**2 R = r2 if verbose: pprint(0,f"Isomap on {N} points with dimensionality {dims} --> residual variance = {R}",flush=True) else: raiseError('Selected number of dimensions is higher than the number of samples') return None, None, None return Y, R, E
[docs] @cr('MANIFOLD.mds') def mds(X:np.ndarray, dims:int, verbose:bool = True): """ Computes the MDS embedding using a custom approach with squared distances and eigen-decomposition. Parameters: X : ndarray NxM Data matrix with N points in the mesh for M simulations dims : int Embedding dimensionality to use (p in your MATLAB code) Returns: Y : ndarray Contains coordinates for d-dimensional embeddings in Y. """ # Step 1: Compute the pairwise Euclidean distance matrix and square it D = cp.asnumpy(euclidean_d(X)) if type(X) is cp.ndarray else euclidean_d(X) D = D * D # Step 2: Apply the custom centering formula to get matrix B n = D.shape[0] C = np.eye(n) - np.ones((n,n))/n B = -0.5*np.matmul(np.matmul(C,D),C) # Step 3: Compute eigen-decomposition # eigenvalues, eigenvectors = eigh((B + B.T) / 2) eigenvalues, eigenvectors = eigh(B) # Step 4: Sort eigenvalues and corresponding eigenvectors in descending order idx = np.argsort(eigenvalues)[::-1] eigenvalues = eigenvalues[idx] eigenvectors = eigenvectors[:, idx] eigenvalues = eigenvalues[:dims] eigenvectors = eigenvectors[:,:dims] # Step 5: Keep only positive eigenvalues beyond a roundoff threshold threshold = np.max(np.abs(eigenvalues)) * np.finfo(eigenvalues.dtype).eps**(3 / 4) keep = eigenvalues > threshold if not np.any(keep): Y = np.zeros((n, 1)) else: # Compute the embedding with kept eigenvalues and corresponding eigenvectors Y = eigenvectors[:, keep] * np.sqrt(eigenvalues[keep]) return Y.T