Source code for er_evaluation.metrics._metrics

from functools import wraps

import numpy as np
import pandas as pd
import sklearn.metrics as sm
from scipy.special import comb

from er_evaluation.data_structures import MembershipVector
from er_evaluation.error_analysis import (error_indicator,
                                          expected_relative_extra_from_table,
                                          expected_relative_missing_from_table,
                                          record_error_table)
from er_evaluation.summary import number_of_links
from er_evaluation.utils import expand_grid


def _f_score(P, R, beta=1.0):
    """
    Compute the weighted F1 score for a given precision and recall.

    Args:
        P (float): Precision.
        R (float): Recall.
        beta (float): Weighting factor.

    Returns:
        float: Weighted F1 score.

    Examples:
        >>> _f_score(0.5, 0.5, beta=1.0)
        0.5
    """
    D = beta**2 * P + R
    if D == 0:
        return 0
    else:
        return (1 + beta**2) * P * R / D


[docs]def pairwise_precision(prediction, reference):
    r"""
    Pairwise precision for the inner join of two clusterings.

    Pairwise precision:
        Consider two clusterings of a set of records, refered to as the *predicted* and *reference* clusterings. Let :math:`T` be the set of record pairs which appear in the same reference cluster, and let :math:`P` be the set of record pairs which appear in the same predicted clusters. Pairwise precision is then defined as

        .. math::

            P = \frac{\lvert T \cap P \rvert}{\lvert P \rvert}

        This is the proportion of correctly predicted links among all predicted links.

    Args:
        prediction (Series): Membership vector for the predicted clustering.
        reference (Series): Membership vector for the reference clustering.

    Returns:
        float: Pairwise precision for the inner join of `prediction` and `reference`.

    Examples:
        >>> prediction = pd.Series(index=[1,2,3,4,5,6,7,8], data=[1,1,2,3,2,4,4,4])
        >>> reference = pd.Series(index=[1,2,3,4,5,6,7,8], data=["c1", "c1", "c1", "c2", "c2", "c3", "c3", "c4"])
        >>> pairwise_precision(prediction, reference)
        0.4

    Notes:
        NA values are dropped from membership vectors prior to computing the metric.
    """
    prediction = MembershipVector(prediction, dropna=True)
    reference = MembershipVector(reference, dropna=True)

    inner = pd.concat(
        {"prediction": prediction, "reference": reference},
        axis=1,
        join="inner",
        copy=False,
    )
    TP_cluster_sizes = inner.groupby(["prediction", "reference"]).size().values

    TP = np.sum(comb(TP_cluster_sizes, 2))
    P = number_of_links(inner.prediction)

    if P == 0:
        return 1.0
    else:
        return TP / P


[docs]def pairwise_recall(prediction, reference):
    r"""
    Pairwise recall for the inner join of two clusterings.

    Pairwise recall:
        Consider two clusterings of a set of records, refered to as the *predicted* and *reference* clusterings. Let :math:`T` be the set of record pairs which appear in the same reference cluster, and let :math:`P` be the set of record pairs which appear in the same predicted clusters. Pairwise recall is then defined as

        .. math::

            R = \frac{\lvert T \cap P \rvert}{\lvert T \rvert}

        This is the proportion of correctly predicted links among all true links.

    Args:
        prediction (Series): Membership vector for the predicted clustering.
        reference (Series): Membership vector for the reference clustering.

    Returns:
        float: Pairwise recall computed on the inner join of `predicted` and `reference`.

    Examples:
        >>> prediction = pd.Series(index=[1,2,3,4,5,6,7,8], data=[1,1,2,3,2,4,4,4])
        >>> reference = pd.Series(index=[1,2,3,4,5,6,7,8], data=["c1", "c1", "c1", "c2", "c2", "c3", "c3", "c4"])
        >>> pairwise_recall(prediction, reference)
        0.4

    Notes:
        NA values are dropped from membership vectors prior to computing the metric.
    """
    prediction = MembershipVector(prediction, dropna=True)
    reference = MembershipVector(reference, dropna=True)

    return pairwise_precision(reference, prediction)


[docs]def pairwise_f(prediction, reference, beta=1.0):
    r"""
    Pairwise F score for the inner join of two clusterings.

    Pairwise F score:
        Pairwise F score is defined as the weighted harmonic mean of pairwise precision and pairwise recall:

        .. math::

            F_\beta = \frac{(1 + \beta^2)PR}{ \beta^2 P+R}.

        The :math:`\beta` parameter controls the relative weight of precision and recall. When :math:`\beta = 1`, the F1 score is the harmonic mean of precision and recall. When :math:`\beta < 1`, the F1 score is weighted towards precision. When :math:`\beta > 1`, the F score is weighted towards recall.

    Args:
        prediction (Series): Membership vector for the predicted clustering.
        reference (Series): Membership vector for the reference clustering.
        beta (float): Weight of precision in the F score.

    Returns:
        float: Pairwise F score for the inner join of `prediction` and `reference`.

    Examples:
        >>> prediction = pd.Series(index=[1,2,3,4,5,6,7,8], data=[1,1,2,3,2,4,4,4])
        >>> reference = pd.Series(index=[1,2,3,4,5,6,7,8], data=["c1", "c1", "c1", "c2", "c2", "c3", "c3", "c4"])
        >>> pairwise_f(prediction, reference)
        0.4000000000000001

    Notes:
        NA values are dropped from membership vectors prior to computing the metric.
    """
    prediction = MembershipVector(prediction, dropna=True)
    reference = MembershipVector(reference, dropna=True)

    P = pairwise_precision(prediction, reference)
    R = pairwise_recall(prediction, reference)

    return _f_score(P, R, beta=beta)


[docs]def cluster_precision(prediction, reference):
    r"""
    Cluster precision for the inner join of two clusterings.

    Cluster precision:
        Consider two clusterings of a set of records, refered to as the *predicted* and *reference* clusterings. Let :math:`C` be the set of reference (true) clusters, and let :math:`\hat C` be the set of predicted clusters. Cluster precision is then defined as

        .. math::

            P = \frac{\lvert C \cap \hat C \rvert}{\lvert \hat C \rvert}

        This is the proportion of correctly predicted clusters among all predicted clusters.

    Args:
        prediction (Series): Membership vector for the predicted clustering.
        reference (Series): Membership vector for the reference clustering.

    Returns:
        float: Cluster precision for the inner join of `prediction` and `reference`.

    Examples:
        >>> prediction = pd.Series(index=[1,2,3,4,5,6,7,8], data=[1,1,2,3,2,4,4,5])
        >>> reference = pd.Series(index=[1,2,3,4,5,6,7,8], data=["c1", "c1", "c1", "c2", "c2", "c3", "c3", "c4"])
        >>> cluster_precision(prediction, reference)
        0.4

    Notes:
        NA values are dropped from membership vectors prior to computing the metric.
    """
    prediction = MembershipVector(prediction, dropna=True)
    reference = MembershipVector(reference, dropna=True)

    inner = pd.concat(
        {"prediction": prediction, "reference": reference},
        axis=1,
        join="inner",
        copy=False,
    )
    errors = error_indicator(inner.prediction, inner.reference)

    return (1 - errors).sum() / inner.prediction.nunique()


[docs]def cluster_recall(prediction, reference):
    r"""
    Cluster recall for the inner join of two clusterings.

    Cluster recall:
        Consider two clusterings of a set of records, refered to as the *predicted* and *reference* clusterings. Let :math:`C` be the set of reference (true) clusters, and let :math:`\hat C` be the set of predicted clusters. Cluster recall is then defined as

        .. math::

            R = \frac{\lvert C \cap \hat C \rvert}{\lvert C \rvert}

        This is the proportion of correctly predicted clusters among all reference (true) clusters.

    Args:
        prediction (Series): Membership vector for the predicted clustering.
        reference (Series): Membership vector for the reference clustering.

    Returns:
        float: Cluster recall for the inner join of `prediction` and `reference`.

    Examples:
        >>> prediction = pd.Series(index=[1,2,3,4,5,6,7,8], data=[1,1,2,3,2,4,4,5])
        >>> reference = pd.Series(index=[1,2,3,4,5,6,7,8], data=["c1", "c1", "c1", "c2", "c2", "c3", "c3", "c4"])
        >>> cluster_recall(prediction, reference)
        0.5

    Notes:
        NA values are dropped from membership vectors prior to computing the metric.
    """
    prediction = MembershipVector(prediction, dropna=True)
    reference = MembershipVector(reference, dropna=True)

    return cluster_precision(reference, prediction)


[docs]def cluster_f(prediction, reference, beta=1.0):
    r"""
    Cluster F score for the inner join of two clusterings.

    Cluster F score:
        Cluster F score is defined as the weighted harmonic mean of cluster precision and cluster recall:

        .. math::

            F_\beta = \frac{(1 + \beta^2)PR}{ \beta^2 P+R}

        The :math:`\beta` parameter controls the relative weight of precision and recall. When :math:`\beta = 1`, the F score is the harmonic mean of precision and recall. When :math:`\beta < 1`, the F score is weighted towards precision. When :math:`\beta > 1`, the F score is weighted towards recall.

    Args:
        prediction (Series): Membership vector for the predicted clustering.
        reference (Series): Membership vector for the reference clustering.
        beta (float): Weight of precision in the F score.

    Returns:
        float: Cluster F score for the inner join of `prediction` and `reference`.

    Examples:
        >>> prediction = pd.Series(index=[1,2,3,4,5,6,7,8], data=[1,1,2,3,2,4,4,5])
        >>> reference = pd.Series(index=[1,2,3,4,5,6,7,8], data=["c1", "c1", "c1", "c2", "c2", "c3", "c3", "c4"])
        >>> cluster_f(prediction, reference)
        0.4444444444444445

    Notes:
        NA values are dropped from membership vectors prior to computing the metric.
    """
    prediction = MembershipVector(prediction, dropna=True)
    reference = MembershipVector(reference, dropna=True)

    P = cluster_precision(prediction, reference)
    R = cluster_recall(prediction, reference)

    return _f_score(P, R, beta=beta)


[docs]def b_cubed_precision(prediction, reference):
    r"""
    B-cubed precision for the inner join of two clusterings, with equal weight placed on each ground truth cluster.

    Mathematically, this is defined as

    .. math::

        P_{B^3} = \frac{1}{\lvert \mathcal{C}\rvert}\sum_{c \in \mathcal{C}} \frac{1}{\lvert c \rvert} \sum_{r \in c} \frac{\lvert c(r) \cap \hat c(r)\rvert }{\lvert \hat c(r) \rvert}

    where

    * :math:`\mathcal{C}` is the set of ground truth clusters,
    * :math:`c` is a ground truth cluster,
    * :math:`r` is a mention in :math:`c`,
    * :math:`c(r)` is the cluster associated with :math:`r` in the ground truth clustering,
    * :math:`\hat c(r)` is the cluster associated with :math:`r` in the predicted clustering.

    Args:
        prediction (Series): Membership vector for the predicted clustering.
        reference (Series): Membership vector for the reference clustering.

    Returns:
        float: B-cubed precision for the inner join of `prediction` and `reference`.

    Examples:
        >>> prediction = pd.Series(index=[1,2,3,4,5,6,7,8], data=[1,1,2,3,2,4,4,4])
        >>> reference = pd.Series(index=[1,2,3,4,5,6,7,8], data=["c1", "c1", "c1", "c2", "c2", "c3", "c3", "c4"])
        >>> b_cubed_precision(prediction, reference)
        0.6458333333333334

    Notes:
        NA values are dropped from membership vectors prior to computing the metric.
    """
    prediction = MembershipVector(prediction, dropna=True)
    reference = MembershipVector(reference, dropna=True)

    inner = pd.concat(
        {"prediction": prediction, "reference": reference},
        axis=1,
        join="inner",
        copy=False,
    )

    error_table = record_error_table(inner.prediction, inner.reference)
    errors = expected_relative_extra_from_table(error_table)

    return 1 - np.mean(errors)


[docs]def b_cubed_recall(prediction, reference):
    r"""
    B-cubed recall for the inner join of two clusterings, with equal weight placed on each ground truth cluster.

    Mathematically, this is defined as

    .. math::

        R_{B^3} = \frac{1}{\lvert \mathcal{C}\rvert}\sum_{c \in \mathcal{C}} \frac{1}{\lvert c \rvert} \sum_{r \in c} \frac{\lvert c(r) \cap \hat c(r)\rvert }{\lvert c(r) \rvert}

    where

    * :math:`\mathcal{C}` is the set of ground truth clusters,
    * :math:`c` is a ground truth cluster,
    * :math:`r` is a mention in :math:`c`,
    * :math:`c(r)` is the cluster associated with :math:`r` in the ground truth clustering, and
    * :math:`\hat c(r)` is the cluster associated with :math:`r` in the predicted clustering.

    Args:
        prediction (Series): Membership vector for the predicted clustering.
        reference (Series): Membership vector for the reference clustering.

    Returns:
        float: B-cubed recall for the inner join of `prediction` and `reference`.

    Examples:
        >>> prediction = pd.Series(index=[1,2,3,4,5,6,7,8], data=[1,1,2,3,2,4,4,4])
        >>> reference = pd.Series(index=[1,2,3,4,5,6,7,8], data=["c1", "c1", "c1", "c2", "c2", "c3", "c3", "c4"])
        >>> b_cubed_recall(prediction, reference)
        0.7638888888888888

    Notes:
        NA values are dropped from membership vectors prior to computing the metric.
    """
    prediction = MembershipVector(prediction, dropna=True)
    reference = MembershipVector(reference, dropna=True)

    inner = pd.concat(
        {"prediction": prediction, "reference": reference},
        axis=1,
        join="inner",
        copy=False,
    )

    error_table = record_error_table(inner.prediction, inner.reference)
    errors = expected_relative_missing_from_table(error_table)

    return 1 - np.mean(errors)


[docs]def b_cubed_f(prediction, reference, beta=1.0):
    r"""
    B-cubed F score for the inner join of two clusterings.

    B-cubed F score:
        B-cubed F score is defined as the weighted harmonic mean of B-cubed precision :math:`P` and B-cubed recall :math:`R`:

        .. math::

            F_\beta = \frac{(1 + \beta^2)PR}{ \beta^2 P+R}

        The :math:`\beta` parameter controls the relative weight of precision and recall. When :math:`\beta = 1`, the F score is the harmonic mean of precision and recall. When :math:`\beta < 1`, the F score is weighted towards precision. When :math:`\beta > 1`, the F score is weighted towards recall.

    Args:
        prediction (Series): Membership vector for the predicted clustering.
        reference (Series): Membership vector for the reference clustering.
        beta (float): Weight of precision in the F score.

    Returns:
        float: B-cubed F score for the inner join of `prediction` and `reference`.

    Examples:
        >>> prediction = pd.Series(index=[1,2,3,4,5,6,7,8], data=[1,1,2,3,2,4,4,4])
        >>> reference = pd.Series(index=[1,2,3,4,5,6,7,8], data=["c1", "c1", "c1", "c2", "c2", "c3", "c3", "c4"])
        >>> b_cubed_f(prediction, reference)
        0.6999178981937602

    Notes:
        NA values are dropped from membership vectors prior to computing the metric.
    """
    prediction = MembershipVector(prediction, dropna=True)
    reference = MembershipVector(reference, dropna=True)

    P = b_cubed_precision(prediction, reference)
    R = b_cubed_recall(prediction, reference)

    return _f_score(P, R, beta=beta)


def wrap_sklearn_metric(sklearn_metric):
    """Generic function to wrap sklearn cluster metrics.

    Args:
        sklearn_metric (function): cluster metric to wrap.

    Notes:
        * The prediction and reference membership vectors are inner joined and NA values are dropped before this metric is computed.
    """

    @wraps(sklearn_metric)
    def func(prediction, reference, **kwargs):
        prediction = MembershipVector(prediction, dropna=True)
        reference = MembershipVector(reference, dropna=True)

        inner = pd.concat(
            {"prediction": prediction, "reference": reference},
            axis=1,
            join="inner",
            copy=False,
        )
        prediction_codes = pd.Categorical(inner.prediction).codes.astype(np.int64)
        reference_codes = pd.Categorical(inner.reference).codes.astype(np.int64)

        return sklearn_metric(reference_codes, prediction_codes, **kwargs)

    return func


[docs]def cluster_homogeneity(prediction, reference):
    """Cluster homogeneity score (based on conditional entropy).

    This wraps scikit-learn's `homogeneity score function <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.homogeneity_score.html>`_.

    Args:
        prediction (Series):  membership vector for predicted clusters, i.e. a pandas Series indexed by mention ids and with values representing predicted cluster assignment.
        reference (Series):  membership vector for reference (true) clusters, i.e. a pandas Series indexed by mention ids and with values representing reference cluster assignment.

    Returns:
        float: homogeneity score

    Notes:
        * The prediction and reference membership vectors are inner joined before this metric is computed.
        * NA values are dropped from membership vectors prior to computing the metric.
    """
    return wrap_sklearn_metric(sm.homogeneity_score)(prediction, reference)


[docs]def cluster_completeness(prediction, reference):
    """Cluster completeness score (based on conditional entropy)

    This wraps scikit-learn's `completeness score function <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.completeness_score.html>`_.

    Args:
        prediction (Series):  membership vector for predicted clusters, i.e. a pandas Series indexed by mention ids and with values representing predicted cluster assignment.
        reference (Series):  membership vector for reference (true) clusters, i.e. a pandas Series indexed by mention ids and with values representing reference cluster assignment.

    Returns:
        float: completeness score

    Notes:
        * The prediction and reference membership vectors are inner joined before this metric is computed.
        * NA values are dropped from membership vectors prior to computing the metric.
    """
    return wrap_sklearn_metric(sm.completeness_score)(prediction, reference)


[docs]def cluster_v_measure(prediction, reference, beta=1.0):
    """Compute the V-measure.

    This wraps scikit-learn's `V-measure function <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.v_measure_score.html#sklearn-metrics-v-measure-score>`_.

    Args:
        prediction (Series):  membership vector for predicted clusters, i.e. a pandas Series indexed by mention ids and with values representing predicted cluster assignment.
        reference (Series):  membership vector for reference (true) clusters, i.e. a pandas Series indexed by mention ids and with values representing reference cluster assignment.

    Returns:
        float: V-measure

    Notes:
        * The prediction and reference membership vectors are inner joined before this metric is computed.
        * NA values are dropped from membership vectors prior to computing the metric.
    """
    return wrap_sklearn_metric(sm.v_measure_score)(prediction, reference, beta=beta)


[docs]def rand_score(prediction, reference):
    """Compute the Rand index.

    This wraps scikit-learn's `rand index score function <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.rand_score.html#sklearn.metrics.rand_score>`_.

    Args:
        prediction (Series):  membership vector for predicted clusters, i.e. a pandas Series indexed by mention ids and with values representing predicted cluster assignment.
        reference (Series):  membership vector for reference (true) clusters, i.e. a pandas Series indexed by mention ids and with values representing reference cluster assignment.

    Returns:
        float: rand index score

    Notes:
        * The prediction and reference membership vectors are inner joined before this metric is computed.
        * NA values are dropped from membership vectors prior to computing the metric.
    """
    return wrap_sklearn_metric(sm.rand_score)(prediction, reference)


[docs]def adjusted_rand_score(prediction, reference):
    """Compute the adjusted Rand index.

    This wraps scikit-learn's `adjusted rand score function <https://scikit-learn.org/stable/modules/generated/sklearn.metrics.adjusted_rand_score.html#sklearn-metrics-adjusted-rand-score>`_.

    Args:
        prediction (Series):  membership vector for predicted clusters, i.e. a pandas Series indexed by mention ids and with values representing predicted cluster assignment.
        reference (Series):  membership vector for reference (true) clusters, i.e. a pandas Series indexed by mention ids and with values representing reference cluster assignment.

    Returns:
        float: adjusted rand index

    Notes:
        * The prediction and reference membership vectors are inner joined before this metric is computed.
        * NA values are dropped from membership vectors prior to computing the metric.
    """
    return wrap_sklearn_metric(sm.adjusted_rand_score)(prediction, reference)


DEFAULT_METRICS = {
    "Pairwise Precision": pairwise_precision,
    "Pairwise Recall": pairwise_recall,
    "Pairwise F1": pairwise_f,
    "B-Cubed Precision": b_cubed_precision,
    "B-Cubed Recall": b_cubed_recall,
    "B-Cubed F1": b_cubed_f,
    "Cluster Precision": cluster_precision,
    "Cluster Recall": cluster_recall,
    "Cluster F1": cluster_f,
}


[docs]def metrics_table(predictions, references, metrics=DEFAULT_METRICS):
    """
    Apply a set of metrics to all combinations of prediction and reference membership vectors.

    Args:
        predictions (Dict): Dictionary of membership vectors.
        references (Dict): Dictionary of membership vectors.
        metrics (Dict): Dictionary of metrics to apply to the prediction and reference pairs.

    Returns:
        DataFrame: Dataframe with columns "prediction", "reference", "metric", and "value", containing the value of the given metric applied to the corresponding prediction and reference membership vector.

    Examples:
        >>> predictions = {"prediction_1": pd.Series(index=[1,2,3,4,5,6,7,8], data=[1,1,2,3,2,4,4,4])}
        >>> references = {"reference_1": pd.Series(index=[1,2,3,4,5,6,7,8], data=["c1", "c1", "c1", "c2", "c2", "c3", "c3", "c4"])}
        >>> metrics = {"precision": pairwise_precision, "recall": pairwise_recall}
        >>> metrics_table(predictions, references, metrics) # doctest: +NORMALIZE_WHITESPACE
            prediction	    reference	metric	    value
        0	prediction_1	reference_1	precision	0.4
        1	prediction_1	reference_1	recall	    0.4
    """
    params = expand_grid(prediction=predictions, reference=references, metric=metrics)

    def lambd(pred_key, ref_key, metrics_key):
        return metrics[metrics_key](predictions[pred_key], references[ref_key])

    params["value"] = params.apply(lambda x: lambd(x["prediction"], x["reference"], x["metric"]), axis=1)

    return params