Source code for modAL.density

"""
Measures for estimating the information density of a given sample.
"""
from typing import Callable, Union

import numpy as np
from scipy.spatial.distance import cosine, euclidean
from sklearn.metrics.pairwise import pairwise_distances

from modAL.utils.data import modALinput


[docs]def similarize_distance(distance_measure: Callable) -> Callable: """ Takes a distance measure and converts it into a information_density measure. Args: distance_measure: The distance measure to be converted into information_density measure. Returns: The information_density measure obtained from the given distance measure. """ def sim(*args, **kwargs): return 1/(1 + distance_measure(*args, **kwargs)) return sim
cosine_similarity = similarize_distance(cosine) euclidean_similarity = similarize_distance(euclidean)
[docs]def information_density(X: modALinput, metric: Union[str, Callable] = 'euclidean') -> np.ndarray: """ Calculates the information density metric of the given data using the given metric. Args: X: The data for which the information density is to be calculated. metric: The metric to be used. Should take two 1d numpy.ndarrays for argument. Todo: Should work with all possible modALinput. Perhaps refactor the module to use some stuff from sklearn.metrics.pairwise Returns: The information density for each sample. """ # inf_density = np.zeros(shape=(X.shape[0],)) # for X_idx, X_inst in enumerate(X): # inf_density[X_idx] = sum(similarity_measure(X_inst, X_j) for X_j in X) # # return inf_density/X.shape[0] similarity_mtx = 1/(1+pairwise_distances(X, X, metric=metric)) return similarity_mtx.mean(axis=1)