Source code for evaluation.MMD

import typing

from sklearn.neighbors import NearestNeighbors
from keras import backend as K
import numpy as np
import tensorflow as tf


[docs]class MMD: """ Maximum Mean Discrepancy (MMD) class for computing distribution similarity between real and generated samples using Gaussian kernels. """
[docs] def __init__(self, real_cells: np.ndarray): """ Initialize the MMD class with scale and weight parameters based on the median nearest neighbor distance among real cells. Parameters ---------- real_cells : np.ndarray A NumPy array representing real cell data (cells x features). """ n_neighbors = 25 med = np.ones(20) for ii in range(1, 20): sample = real_cells[ np.random.randint(real_cells.shape[0] - 1, size=real_cells.shape[0]), : ] nbrs = NearestNeighbors(n_neighbors=n_neighbors).fit(sample) distances, _ = nbrs.kneighbors(sample) # nearest neighbor is the point so we need to exclude it med[ii] = np.median(distances[:, 1:n_neighbors]) med = np.median(med) scales = [med / 2, med, med * 2] weights = K.eval(K.shape(scales)[0]) weights = K.variable(value=np.asarray(weights)) self.scales = np.expand_dims(np.expand_dims(scales, -1), -1) self.weights = np.expand_dims(np.expand_dims(weights, -1), -1)
[docs] def squaredDistance( self, X: typing.Union[np.ndarray, "tf.Tensor"], Y: typing.Union[np.ndarray, "tf.Tensor"], ) -> "tf.Tensor": """ Compute pairwise squared Euclidean distances between rows of X and Y. Parameters ---------- X : np.ndarray or tf.Tensor Input array of shape (n, d). Y : np.ndarray or tf.Tensor Input array of shape (m, d). Returns ------- tf.Tensor A tensor of shape (n, m) representing squared distances. """ # X is nxd, Y is mxd, returns nxm matrix of all pairwise Euclidean distances # broadcasted subtraction, a square, and a sum. r = K.expand_dims(X, axis=1) return K.sum(K.square(r - Y), axis=-1)
[docs] def gaussian_kernel( self, a: typing.Union[np.ndarray, "tf.Tensor"], b: typing.Union[np.ndarray, "tf.Tensor"], ) -> "tf.Tensor": """ Compute the multi-scale Gaussian kernel between two datasets. Parameters ---------- a : np.ndarray or tf.Tensor Input array of shape (n, d). b : np.ndarray or tf.Tensor Input array of shape (m, d). Returns ------- tf.Tensor A tensor of shape (n, m) representing the Gaussian kernel matrix. """ numerator = np.expand_dims(self.squaredDistance(a, b), 0) return np.sum(self.weights * np.exp(-numerator / (np.power(self.scales, 2))), 0)
[docs] def compute( self, a: typing.Union[np.ndarray, "tf.Tensor"], b: typing.Union[np.ndarray, "tf.Tensor"], ) -> "tf.Tensor": """ Compute the Maximum Mean Discrepancy (MMD) between two samples. Parameters ---------- a : np.ndarray or tf.Tensor First sample of shape (n, d). b : np.ndarray or tf.Tensor Second sample of shape (m, d). Returns ------- tf.Tensor The MMD score between the two distributions. """ return ( self.gaussian_kernel(a, a).mean() + self.gaussian_kernel(b, b).mean() - 2 * self.gaussian_kernel(a, b).mean() )