Source code for er_evaluation.metrics._metrics

from functools import wraps

import numpy as np
import pandas as pd
import sklearn.metrics as sm
from scipy.special import comb

from er_evaluation.data_structures import MembershipVector
from er_evaluation.error_analysis import (error_indicator,
                                          expected_relative_extra_from_table,
                                          expected_relative_missing_from_table,
                                          record_error_table)
from er_evaluation.summary import number_of_links
from er_evaluation.utils import expand_grid


def _f_score(P, R, beta=1.0):
    """
    Compute the weighted F1 score for a given precision and recall.

    Args:
        P (float): Precision.
        R (float): Recall.
        beta (float): Weighting factor.

    Returns:
        float: Weighted F1 score.

    Examples:
        >>> _f_score(0.5, 0.5, beta=1.0)
        0.5
    """
    D = beta**2 * P + R
    if D == 0:
        return 0
    else:
        return (1 + beta**2) * P * R / D


[docs]def pairwise_precision(prediction, reference): r""" Pairwise precision for the inner join of two clusterings. Pairwise precision: Consider two clusterings of a set of records, refered to as the *predicted* and *reference* clusterings. Let :math:`T` be the set of record pairs which appear in the same reference cluster, and let :math:`P` be the set of record pairs which appear in the same predicted clusters. Pairwise precision is then defined as .. math:: P = \frac{\lvert T \cap P \rvert}{\lvert P \rvert} This is the proportion of correctly predicted links among all predicted links. Args: prediction (Series): Membership vector for the predicted clustering. reference (Series): Membership vector for the reference clustering. Returns: float: Pairwise precision for the inner join of `prediction` and `reference`. Examples: >>> prediction = pd.Series(index=[1,2,3,4,5,6,7,8], data=[1,1,2,3,2,4,4,4]) >>> reference = pd.Series(index=[1,2,3,4,5,6,7,8], data=["c1", "c1", "c1", "c2", "c2", "c3", "c3", "c4"]) >>> pairwise_precision(prediction, reference) 0.4 Notes: NA values are dropped from membership vectors prior to computing the metric. """ prediction = MembershipVector(prediction, dropna=True) reference = MembershipVector(reference, dropna=True) inner = pd.concat( {"prediction": prediction, "reference": reference}, axis=1, join="inner", copy=False, ) TP_cluster_sizes = inner.groupby(["prediction", "reference"]).size().values TP = np.sum(comb(TP_cluster_sizes, 2)) P = number_of_links(inner.prediction) if P == 0: return 1.0 else: return TP / P
[docs]def pairwise_recall(prediction, reference): r""" Pairwise recall for the inner join of two clusterings. Pairwise recall: Consider two clusterings of a set of records, refered to as the *predicted* and *reference* clusterings. Let :math:`T` be the set of record pairs which appear in the same reference cluster, and let :math:`P` be the set of record pairs which appear in the same predicted clusters. Pairwise recall is then defined as .. math:: R = \frac{\lvert T \cap P \rvert}{\lvert T \rvert} This is the proportion of correctly predicted links among all true links. Args: prediction (Series): Membership vector for the predicted clustering. reference (Series): Membership vector for the reference clustering. Returns: float: Pairwise recall computed on the inner join of `predicted` and `reference`. Examples: >>> prediction = pd.Series(index=[1,2,3,4,5,6,7,8], data=[1,1,2,3,2,4,4,4]) >>> reference = pd.Series(index=[1,2,3,4,5,6,7,8], data=["c1", "c1", "c1", "c2", "c2", "c3", "c3", "c4"]) >>> pairwise_recall(prediction, reference) 0.4 Notes: NA values are dropped from membership vectors prior to computing the metric. """ prediction = MembershipVector(prediction, dropna=True) reference = MembershipVector(reference, dropna=True) return pairwise_precision(reference, prediction)
[docs]def pairwise_f(prediction, reference, beta=1.0): r""" Pairwise F score for the inner join of two clusterings. Pairwise F score: Pairwise F score is defined as the weighted harmonic mean of pairwise precision and pairwise recall: .. math:: F_\beta = \frac{(1 + \beta^2)PR}{ \beta^2 P+R}. The :math:`\beta` parameter controls the relative weight of precision and recall. When :math:`\beta = 1`, the F1 score is the harmonic mean of precision and recall. When :math:`\beta < 1`, the F1 score is weighted towards precision. When :math:`\beta > 1`, the F score is weighted towards recall. Args: prediction (Series): Membership vector for the predicted clustering. reference (Series): Membership vector for the reference clustering. beta (float): Weight of precision in the F score. Returns: float: Pairwise F score for the inner join of `prediction` and `reference`. Examples: >>> prediction = pd.Series(index=[1,2,3,4,5,6,7,8], data=[1,1,2,3,2,4,4,4]) >>> reference = pd.Series(index=[1,2,3,4,5,6,7,8], data=["c1", "c1", "c1", "c2", "c2", "c3", "c3", "c4"]) >>> pairwise_f(prediction, reference) 0.4000000000000001 Notes: NA values are dropped from membership vectors prior to computing the metric. """ prediction = MembershipVector(prediction, dropna=True) reference = MembershipVector(reference, dropna=True) P = pairwise_precision(prediction, reference) R = pairwise_recall(prediction, reference) return _f_score(P, R, beta=beta)
[docs]def cluster_precision(prediction, reference): r""" Cluster precision for the inner join of two clusterings. Cluster precision: Consider two clusterings of a set of records, refered to as the *predicted* and *reference* clusterings. Let :math:`C` be the set of reference (true) clusters, and let :math:`\hat C` be the set of predicted clusters. Cluster precision is then defined as .. math:: P = \frac{\lvert C \cap \hat C \rvert}{\lvert \hat C \rvert} This is the proportion of correctly predicted clusters among all predicted clusters. Args: prediction (Series): Membership vector for the predicted clustering. reference (Series): Membership vector for the reference clustering. Returns: float: Cluster precision for the inner join of `prediction` and `reference`. Examples: >>> prediction = pd.Series(index=[1,2,3,4,5,6,7,8], data=[1,1,2,3,2,4,4,5]) >>> reference = pd.Series(index=[1,2,3,4,5,6,7,8], data=["c1", "c1", "c1", "c2", "c2", "c3", "c3", "c4"]) >>> cluster_precision(prediction, reference) 0.4 Notes: NA values are dropped from membership vectors prior to computing the metric. """ prediction = MembershipVector(prediction, dropna=True) reference = MembershipVector(reference, dropna=True) inner = pd.concat( {"prediction": prediction, "reference": reference}, axis=1, join="inner", copy=False, ) errors = error_indicator(inner.prediction, inner.reference) return (1 - errors).sum() / inner.prediction.nunique()
[docs]def cluster_recall(prediction, reference): r""" Cluster recall for the inner join of two clusterings. Cluster recall: Consider two clusterings of a set of records, refered to as the *predicted* and *reference* clusterings. Let :math:`C` be the set of reference (true) clusters, and let :math:`\hat C` be the set of predicted clusters. Cluster recall is then defined as .. math:: R = \frac{\lvert C \cap \hat C \rvert}{\lvert C \rvert} This is the proportion of correctly predicted clusters among all reference (true) clusters. Args: prediction (Series): Membership vector for the predicted clustering. reference (Series): Membership vector for the reference clustering. Returns: float: Cluster recall for the inner join of `prediction` and `reference`. Examples: >>> prediction = pd.Series(index=[1,2,3,4,5,6,7,8], data=[1,1,2,3,2,4,4,5]) >>> reference = pd.Series(index=[1,2,3,4,5,6,7,8], data=["c1", "c1", "c1", "c2", "c2", "c3", "c3", "c4"]) >>> cluster_recall(prediction, reference) 0.5 Notes: NA values are dropped from membership vectors prior to computing the metric. """ prediction = MembershipVector(prediction, dropna=True) reference = MembershipVector(reference, dropna=True) return cluster_precision(reference, prediction)
[docs]def cluster_f(prediction, reference, beta=1.0): r""" Cluster F score for the inner join of two clusterings. Cluster F score: Cluster F score is defined as the weighted harmonic mean of cluster precision and cluster recall: .. math:: F_\beta = \frac{(1 + \beta^2)PR}{ \beta^2 P+R} The :math:`\beta` parameter controls the relative weight of precision and recall. When :math:`\beta = 1`, the F score is the harmonic mean of precision and recall. When :math:`\beta < 1`, the F score is weighted towards precision. When :math:`\beta > 1`, the F score is weighted towards recall. Args: prediction (Series): Membership vector for the predicted clustering. reference (Series): Membership vector for the reference clustering. beta (float): Weight of precision in the F score. Returns: float: Cluster F score for the inner join of `prediction` and `reference`. Examples: >>> prediction = pd.Series(index=[1,2,3,4,5,6,7,8], data=[1,1,2,3,2,4,4,5]) >>> reference = pd.Series(index=[1,2,3,4,5,6,7,8], data=["c1", "c1", "c1", "c2", "c2", "c3", "c3", "c4"]) >>> cluster_f(prediction, reference) 0.4444444444444445 Notes: NA values are dropped from membership vectors prior to computing the metric. """ prediction = MembershipVector(prediction, dropna=True) reference = MembershipVector(reference, dropna=True) P = cluster_precision(prediction, reference) R = cluster_recall(prediction, reference) return _f_score(P, R, beta=beta)
[docs]def b_cubed_precision(prediction, reference): r""" B-cubed precision for the inner join of two clusterings, with equal weight placed on each ground truth cluster. Mathematically, this is defined as .. math:: P_{B^3} = \frac{1}{\lvert \mathcal{C}\rvert}\sum_{c \in \mathcal{C}} \frac{1}{\lvert c \rvert} \sum_{r \in c} \frac{\lvert c(r) \cap \hat c(r)\rvert }{\lvert \hat c(r) \rvert} where * :math:`\mathcal{C}` is the set of ground truth clusters, * :math:`c` is a ground truth cluster, * :math:`r` is a mention in :math:`c`, * :math:`c(r)` is the cluster associated with :math:`r` in the ground truth clustering, * :math:`\hat c(r)` is the cluster associated with :math:`r` in the predicted clustering. Args: prediction (Series): Membership vector for the predicted clustering. reference (Series): Membership vector for the reference clustering. Returns: float: B-cubed precision for the inner join of `prediction` and `reference`. Examples: >>> prediction = pd.Series(index=[1,2,3,4,5,6,7,8], data=[1,1,2,3,2,4,4,4]) >>> reference = pd.Series(index=[1,2,3,4,5,6,7,8], data=["c1", "c1", "c1", "c2", "c2", "c3", "c3", "c4"]) >>> b_cubed_precision(prediction, reference) 0.6458333333333334 Notes: NA values are dropped from membership vectors prior to computing the metric. """ prediction = MembershipVector(prediction, dropna=True) reference = MembershipVector(reference, dropna=True) inner = pd.concat( {"prediction": prediction, "reference": reference}, axis=1, join="inner", copy=False, ) error_table = record_error_table(inner.prediction, inner.reference) errors = expected_relative_extra_from_table(error_table) return 1 - np.mean(errors)
[docs]def b_cubed_recall(prediction, reference): r""" B-cubed recall for the inner join of two clusterings, with equal weight placed on each ground truth cluster. Mathematically, this is defined as .. math:: R_{B^3} = \frac{1}{\lvert \mathcal{C}\rvert}\sum_{c \in \mathcal{C}} \frac{1}{\lvert c \rvert} \sum_{r \in c} \frac{\lvert c(r) \cap \hat c(r)\rvert }{\lvert c(r) \rvert} where * :math:`\mathcal{C}` is the set of ground truth clusters, * :math:`c` is a ground truth cluster, * :math:`r` is a mention in :math:`c`, * :math:`c(r)` is the cluster associated with :math:`r` in the ground truth clustering, and * :math:`\hat c(r)` is the cluster associated with :math:`r` in the predicted clustering. Args: prediction (Series): Membership vector for the predicted clustering. reference (Series): Membership vector for the reference clustering. Returns: float: B-cubed recall for the inner join of `prediction` and `reference`. Examples: >>> prediction = pd.Series(index=[1,2,3,4,5,6,7,8], data=[1,1,2,3,2,4,4,4]) >>> reference = pd.Series(index=[1,2,3,4,5,6,7,8], data=["c1", "c1", "c1", "c2", "c2", "c3", "c3", "c4"]) >>> b_cubed_recall(prediction, reference) 0.7638888888888888 Notes: NA values are dropped from membership vectors prior to computing the metric. """ prediction = MembershipVector(prediction, dropna=True) reference = MembershipVector(reference, dropna=True) inner = pd.concat( {"prediction": prediction, "reference": reference}, axis=1, join="inner", copy=False, ) error_table = record_error_table(inner.prediction, inner.reference) errors = expected_relative_missing_from_table(error_table) return 1 - np.mean(errors)
[docs]def b_cubed_f(prediction, reference, beta=1.0): r""" B-cubed F score for the inner join of two clusterings. B-cubed F score: B-cubed F score is defined as the weighted harmonic mean of B-cubed precision :math:`P` and B-cubed recall :math:`R`: .. math:: F_\beta = \frac{(1 + \beta^2)PR}{ \beta^2 P+R} The :math:`\beta` parameter controls the relative weight of precision and recall. When :math:`\beta = 1`, the F score is the harmonic mean of precision and recall. When :math:`\beta < 1`, the F score is weighted towards precision. When :math:`\beta > 1`, the F score is weighted towards recall. Args: prediction (Series): Membership vector for the predicted clustering. reference (Series): Membership vector for the reference clustering. beta (float): Weight of precision in the F score. Returns: float: B-cubed F score for the inner join of `prediction` and `reference`. Examples: >>> prediction = pd.Series(index=[1,2,3,4,5,6,7,8], data=[1,1,2,3,2,4,4,4]) >>> reference = pd.Series(index=[1,2,3,4,5,6,7,8], data=["c1", "c1", "c1", "c2", "c2", "c3", "c3", "c4"]) >>> b_cubed_f(prediction, reference) 0.6999178981937602 Notes: NA values are dropped from membership vectors prior to computing the metric. """ prediction = MembershipVector(prediction, dropna=True) reference = MembershipVector(reference, dropna=True) P = b_cubed_precision(prediction, reference) R = b_cubed_recall(prediction, reference) return _f_score(P, R, beta=beta)
def wrap_sklearn_metric(sklearn_metric): """Generic function to wrap sklearn cluster metrics. Args: sklearn_metric (function): cluster metric to wrap. Notes: * The prediction and reference membership vectors are inner joined and NA values are dropped before this metric is computed. """ @wraps(sklearn_metric) def func(prediction, reference, **kwargs): prediction = MembershipVector(prediction, dropna=True) reference = MembershipVector(reference, dropna=True) inner = pd.concat( {"prediction": prediction, "reference": reference}, axis=1, join="inner", copy=False, ) prediction_codes = pd.Categorical(inner.prediction).codes.astype(np.int64) reference_codes = pd.Categorical(inner.reference).codes.astype(np.int64) return sklearn_metric(reference_codes, prediction_codes, **kwargs) return func
[docs]def cluster_homogeneity(prediction, reference): """Cluster homogeneity score (based on conditional entropy). This wraps scikit-learn's `homogeneity score function <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.homogeneity_score.html>`_. Args: prediction (Series): membership vector for predicted clusters, i.e. a pandas Series indexed by mention ids and with values representing predicted cluster assignment. reference (Series): membership vector for reference (true) clusters, i.e. a pandas Series indexed by mention ids and with values representing reference cluster assignment. Returns: float: homogeneity score Notes: * The prediction and reference membership vectors are inner joined before this metric is computed. * NA values are dropped from membership vectors prior to computing the metric. """ return wrap_sklearn_metric(sm.homogeneity_score)(prediction, reference)
[docs]def cluster_completeness(prediction, reference): """Cluster completeness score (based on conditional entropy) This wraps scikit-learn's `completeness score function <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.completeness_score.html>`_. Args: prediction (Series): membership vector for predicted clusters, i.e. a pandas Series indexed by mention ids and with values representing predicted cluster assignment. reference (Series): membership vector for reference (true) clusters, i.e. a pandas Series indexed by mention ids and with values representing reference cluster assignment. Returns: float: completeness score Notes: * The prediction and reference membership vectors are inner joined before this metric is computed. * NA values are dropped from membership vectors prior to computing the metric. """ return wrap_sklearn_metric(sm.completeness_score)(prediction, reference)
[docs]def cluster_v_measure(prediction, reference, beta=1.0): """Compute the V-measure. This wraps scikit-learn's `V-measure function <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.v_measure_score.html#sklearn-metrics-v-measure-score>`_. Args: prediction (Series): membership vector for predicted clusters, i.e. a pandas Series indexed by mention ids and with values representing predicted cluster assignment. reference (Series): membership vector for reference (true) clusters, i.e. a pandas Series indexed by mention ids and with values representing reference cluster assignment. Returns: float: V-measure Notes: * The prediction and reference membership vectors are inner joined before this metric is computed. * NA values are dropped from membership vectors prior to computing the metric. """ return wrap_sklearn_metric(sm.v_measure_score)(prediction, reference, beta=beta)
[docs]def rand_score(prediction, reference): """Compute the Rand index. This wraps scikit-learn's `rand index score function <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.rand_score.html#sklearn.metrics.rand_score>`_. Args: prediction (Series): membership vector for predicted clusters, i.e. a pandas Series indexed by mention ids and with values representing predicted cluster assignment. reference (Series): membership vector for reference (true) clusters, i.e. a pandas Series indexed by mention ids and with values representing reference cluster assignment. Returns: float: rand index score Notes: * The prediction and reference membership vectors are inner joined before this metric is computed. * NA values are dropped from membership vectors prior to computing the metric. """ return wrap_sklearn_metric(sm.rand_score)(prediction, reference)
[docs]def adjusted_rand_score(prediction, reference): """Compute the adjusted Rand index. This wraps scikit-learn's `adjusted rand score function <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_rand_score.html#sklearn-metrics-adjusted-rand-score>`_. Args: prediction (Series): membership vector for predicted clusters, i.e. a pandas Series indexed by mention ids and with values representing predicted cluster assignment. reference (Series): membership vector for reference (true) clusters, i.e. a pandas Series indexed by mention ids and with values representing reference cluster assignment. Returns: float: adjusted rand index Notes: * The prediction and reference membership vectors are inner joined before this metric is computed. * NA values are dropped from membership vectors prior to computing the metric. """ return wrap_sklearn_metric(sm.adjusted_rand_score)(prediction, reference)
DEFAULT_METRICS = { "Pairwise Precision": pairwise_precision, "Pairwise Recall": pairwise_recall, "Pairwise F1": pairwise_f, "B-Cubed Precision": b_cubed_precision, "B-Cubed Recall": b_cubed_recall, "B-Cubed F1": b_cubed_f, "Cluster Precision": cluster_precision, "Cluster Recall": cluster_recall, "Cluster F1": cluster_f, }
[docs]def metrics_table(predictions, references, metrics=DEFAULT_METRICS): """ Apply a set of metrics to all combinations of prediction and reference membership vectors. Args: predictions (Dict): Dictionary of membership vectors. references (Dict): Dictionary of membership vectors. metrics (Dict): Dictionary of metrics to apply to the prediction and reference pairs. Returns: DataFrame: Dataframe with columns "prediction", "reference", "metric", and "value", containing the value of the given metric applied to the corresponding prediction and reference membership vector. Examples: >>> predictions = {"prediction_1": pd.Series(index=[1,2,3,4,5,6,7,8], data=[1,1,2,3,2,4,4,4])} >>> references = {"reference_1": pd.Series(index=[1,2,3,4,5,6,7,8], data=["c1", "c1", "c1", "c2", "c2", "c3", "c3", "c4"])} >>> metrics = {"precision": pairwise_precision, "recall": pairwise_recall} >>> metrics_table(predictions, references, metrics) # doctest: +NORMALIZE_WHITESPACE prediction reference metric value 0 prediction_1 reference_1 precision 0.4 1 prediction_1 reference_1 recall 0.4 """ params = expand_grid(prediction=predictions, reference=references, metric=metrics) def lambd(pred_key, ref_key, metrics_key): return metrics[metrics_key](predictions[pred_key], references[ref_key]) params["value"] = params.apply(lambda x: lambd(x["prediction"], x["reference"], x["metric"]), axis=1) return params