Source code for pylimma.auroc

# SPDX-License-Identifier: GPL-3.0-or-later
#
# This module is a Python port of code from R limma. Original R copyrights:
#   auROC.R   Copyright (C) 2003-2020 Gordon Smyth
# Python port: Copyright (C) 2026 John Mulvey
"""
Area under ROC curve for empirical data.

Port of R limma's ``auROC`` (``limma/R/auROC.R``).
"""

from __future__ import annotations

import numpy as np



[docs]
def au_roc(truth, stat=None) -> float:
    """
    Area under the empirical ROC curve.

    Mirrors R's ``limma::auROC(truth, stat=NULL)``. ``truth`` is a
    logical / integer vector of test outcomes. When ``stat`` is
    supplied, cases are sorted by ``stat`` in decreasing order (ties
    resolved by averaging sensitivity). When ``stat`` is ``None`` the
    truth order is taken as the ranking.

    Returns NaN when ``truth`` contains any NA / NaN, when ``truth``
    is constant, or when ``stat`` contains any NA.
    """
    truth = np.asarray(truth)
    if np.any(pd_isna(truth)):
        return float("nan")
    ntests = truth.size
    truth_bool = truth.astype(bool)
    truth_int = truth_bool.astype(int)
    npos = int(truth_int.sum())
    if npos == 0 or npos == ntests:
        return float("nan")

    if stat is None:
        sensitivity = np.cumsum(truth_int) / npos
        return float(np.mean(sensitivity[~truth_bool]))

    stat = np.asarray(stat).ravel()
    if stat.size != ntests:
        raise ValueError("lengths differ")
    if np.any(np.isnan(stat.astype(float, copy=False))):
        return float("nan")

    # Decreasing-order sort, stable to match R's order(..., decreasing=TRUE).
    o = np.argsort(-stat, kind="stable")
    truth_int = truth_int[o]
    truth_bool = truth_bool[o]
    stat = stat[o]
    sensitivity = np.cumsum(truth_int) / npos

    # Replace sensitivity with averages across tied-stat runs.
    tie_to_prev = stat[:-1] == stat[1:]
    if np.any(tie_to_prev):
        iseq2prev = np.concatenate([[False], tie_to_prev])
        tied_first = np.where(~iseq2prev)[0]
        tied_last = np.concatenate([tied_first[1:] - 1, [ntests - 1]])
        sens_last = sensitivity[tied_last]
        sens_prev = np.concatenate([[0.0], sens_last[:-1]])
        sens_avg = (sens_last + sens_prev) / 2.0
        sensitivity = np.repeat(sens_avg, tied_last - tied_first + 1)

    return float(np.mean(sensitivity[~truth_bool]))



def pd_isna(a):
    """Stripped-down equivalent of ``pandas.isna`` for any array-like."""
    import pandas as pd

    return pd.isna(a)