# SPDX-License-Identifier: GPL-3.0-or-later
#
# This module is a Python port of code from R limma. Original R copyrights:
# decidetests.R Copyright (C) 2004-2017 Gordon Smyth
# Python port: Copyright (C) 2026 John Mulvey
"""
Multiple testing decisions for pylimma.
Implements decision procedures for classifying genes as differentially expressed:
- decide_tests(): classify genes as up/down/not significant
- classify_tests_f(): F-test based classification for multiple contrasts
"""
from __future__ import annotations
from typing import TYPE_CHECKING
import numpy as np
from scipy import stats
from .classes import _is_anndata, _resolve_fit_input
from .utils import p_adjust
if TYPE_CHECKING:
pass
[docs]
def classify_tests_f(
fit: dict | np.ndarray,
cor_matrix: np.ndarray | None = None,
df: float | np.ndarray = np.inf,
p_value: float = 0.01,
fstat_only: bool = False,
) -> np.ndarray | tuple[np.ndarray, int, float]:
"""
Use F-tests to classify vectors of t-statistics into outcomes.
This function performs an overall F-test for each gene, and optionally
classifies which contrasts are significant using a step-down procedure.
Parameters
----------
fit : dict
Fit object containing t-statistics and coefficient covariance.
Must have keys: 't', and optionally 'cov_coefficients', 'df_prior',
'df_residual'.
p_value : float, default 0.01
P-value threshold for significance.
fstat_only : bool, default False
If True, return only the F-statistics (with df1, df2 as attributes).
If False, return a classification matrix (-1, 0, 1).
Returns
-------
ndarray or tuple
If fstat_only=True: tuple of (F-statistics, df1, df2)
If fstat_only=False: matrix of test results (-1=down, 0=not sig, 1=up)
Notes
-----
The F-statistic is computed as a quadratic form in the t-statistics,
adjusted for correlation between coefficients. When the coefficients
are uncorrelated, this reduces to the mean of squared t-statistics.
"""
# R's classifyTestsF accepts either an MArrayLM-like list or a
# bare t-statistic matrix. Support both here.
if isinstance(fit, dict):
tstat = np.asarray(fit["t"])
else:
tstat = np.asarray(fit)
fit = None
if tstat.ndim == 1:
tstat = tstat.reshape(-1, 1)
n_genes, n_tests = tstat.shape
# df resolution: explicit kwarg takes precedence, otherwise derive
# from df_prior + df_residual on the fit (matches R decidetests.R:190
# and the df=Inf default). Keep df as a vector when supplied - scipy
# broadcasts it over the gene axis.
if fit is not None and np.isinf(np.asarray(df)).all():
if "df_prior" in fit and "df_residual" in fit:
df = fit["df_prior"] + fit["df_residual"]
# Single coefficient case
if n_tests == 1:
fstat = tstat[:, 0] ** 2
if fstat_only:
return fstat, 1, df
p = 2 * stats.t.sf(np.abs(tstat[:, 0]), df)
result = np.sign(tstat[:, 0]) * (p < p_value)
return result.astype(int).reshape(-1, 1)
# Multiple coefficients: build the correlation matrix from the
# explicit cor_matrix kwarg, otherwise derive it from the fit's
# cov_coefficients (R falls back to diag(n_tests)/sqrt(n_tests)
# when neither is available).
if cor_matrix is not None:
cor_matrix = np.asarray(cor_matrix, dtype=np.float64)
elif fit is not None and fit.get("cov_coefficients") is not None:
cov = np.asarray(fit["cov_coefficients"], dtype=np.float64)
# Strip NaN rows/columns: pylimma's _lm_series_fast fills
# non-estimable (rank-deficient) columns with NaN in the
# (n_coefs, n_coefs) cov matrix, whereas R's cov.coefficients
# has shape (rank, rank) with no NaN padding. Without this mask
# the eigendecomposition below crashes on NaN input.
diag_vals = np.diag(cov)
nan_mask = np.isnan(diag_vals)
if nan_mask.any():
keep = ~nan_mask
cov = cov[np.ix_(keep, keep)]
# Also drop corresponding tstat columns so shapes align.
tstat = tstat[:, keep]
n_tests = tstat.shape[1]
diag_vals = np.diag(cov)
if n_tests == 0:
# Every test was non-estimable - return all-zeros (no gene
# classified significant) rather than crashing.
if fstat_only:
return np.zeros(n_genes), 1, df
return np.zeros((n_genes, 1), dtype=int)
if n_tests == 1:
# After stripping, reduce to the single-coefficient path.
fstat = tstat[:, 0] ** 2
if fstat_only:
return fstat, 1, df
p = 2 * stats.t.sf(np.abs(tstat[:, 0]), df)
result = np.sign(tstat[:, 0]) * (p < p_value)
return result.astype(int).reshape(-1, 1)
if np.min(diag_vals) == 0:
cov = cov.copy()
zero_mask = diag_vals == 0
cov[np.diag_indices_from(cov)] = np.where(zero_mask, 1.0, diag_vals)
std = np.sqrt(np.diag(cov))
std[std == 0] = 1
cor_matrix = cov / np.outer(std, std)
if cor_matrix is not None:
eigvals, eigvecs = np.linalg.eigh(cor_matrix)
r = int(np.sum(eigvals / eigvals[-1] > 1e-8))
Q = eigvecs[:, -r:] / np.sqrt(eigvals[-r:]) / np.sqrt(r)
else:
r = n_tests
Q = np.eye(r) / np.sqrt(r)
# Compute F-statistic
tQ = tstat @ Q
fstat = np.sum(tQ**2, axis=1)
df2 = df
if fstat_only:
return fstat, r, df2
# Classification using step-down procedure. stats.f.ppf with
# vector df2 returns a vector of per-gene thresholds.
qF = stats.f.ppf(1 - p_value, r, df2)
qF = np.asarray(qF)
if qF.ndim == 0:
qF = np.full(n_genes, float(qF))
result = np.zeros((n_genes, n_tests), dtype=float)
for i in range(n_genes):
x = tstat[i, :]
if np.any(np.isnan(x)):
result[i, :] = np.nan # R sets to NA, not 0
continue
# Check if overall F-test is significant
if (x @ Q @ Q.T @ x) > qF[i]:
# Order by absolute t-statistic
order = np.argsort(np.abs(x))[::-1]
result[i, order[0]] = int(np.sign(x[order[0]]))
# Step-down: check if adding each coefficient improves the F
for j in range(1, n_tests):
bigger = order[:j]
x_adj = x.copy()
# Set larger coefficients to same magnitude as current
x_adj[bigger] = np.sign(x[bigger]) * np.abs(x[order[j]])
if (x_adj @ Q @ Q.T @ x_adj) > qF[i]:
result[i, order[j]] = int(np.sign(x[order[j]]))
else:
break
return result
[docs]
def decide_tests(
data,
method: str = "separate",
adjust_method: str = "BH",
p_value: float = 0.05,
lfc: float = 0.0,
coefficients: np.ndarray | None = None,
cor_matrix: np.ndarray | None = None,
tstat: np.ndarray | None = None,
df: float | np.ndarray = np.inf,
genewise_p_value: np.ndarray | None = None,
*,
key: str = "pylimma",
) -> np.ndarray:
r"""
Classify genes as differentially expressed.
Applies multiple testing correction and classifies each gene-coefficient
combination as up-regulated (1), down-regulated (-1), or not significant (0).
Parameters
----------
data : AnnData, dict, or ndarray
Fit object from e_bayes(), or a matrix of p-values.
If AnnData, reads from adata.uns[key].
method : str, default "separate"
Method for multiple testing correction:
- "separate": adjust p-values for each coefficient separately
- "global": adjust all p-values together
- "hierarchical": first test overall significance (F-test), then adjust within significant genes
- "nestedF": use nested F-tests for multiple contrasts
adjust_method : str, default "BH"
P-value adjustment method: "BH", "bonferroni", "holm", "BY", "none".
p_value : float, default 0.05
Significance threshold for adjusted p-values.
lfc : float, default 0.0
Log fold-change threshold. Genes with \|logFC\| < lfc are set to 0.
key : str, default "pylimma"
Key for fit results in adata.uns (AnnData input only).
Returns
-------
ndarray
Matrix of test results with values -1 (down), 0 (not significant), 1 (up).
Shape is (n_genes, n_coefficients).
Notes
-----
The "separate" method is the default and most commonly used. It adjusts
p-values within each coefficient independently.
The "hierarchical" and "nestedF" methods first perform an overall F-test
for each gene, then test individual contrasts only for genes that pass
the F-test. This can increase power when there are many contrasts.
Examples
--------
>>> fit = e_bayes(lm_fit(expr, design))
>>> results = decide_tests(fit, p_value=0.05, lfc=1)
>>> np.sum(results == 1, axis=0) # count up-regulated per coefficient
"""
# Dispatch: AnnData / dict / fallback ndarray of p-values.
# When a bare ndarray is passed, R's decideTests.default also
# accepts coefficients / cor.matrix / tstat / df / genewise.p.value
# as keyword args so callers can decide without a fit dict.
if _is_anndata(data) or isinstance(data, dict):
fit, _adata, _adata_key = _resolve_fit_input(data, key)
else:
p = np.asarray(data)
if p.ndim == 1:
p = p.reshape(-1, 1)
return _decide_tests_p(
p,
method=method,
adjust_method=adjust_method,
p_value=p_value,
coefficients=coefficients if coefficients is not None else tstat,
lfc=lfc,
genewise_p_value=genewise_p_value,
)
# Auto-run e_bayes if not already run (R parity). When the input
# was AnnData, persist the moderated fit back to adata.uns[key] so
# subsequent top_table / treat / contrasts_fit calls on the same
# adata see the eBayes-augmented slots. Without the write-back,
# top_table(adata) would raise "Need to run e_bayes() first" even
# though decide_tests implicitly ran it.
if "p_value" not in fit:
from .ebayes import e_bayes
fit = e_bayes(fit)
if _adata is not None:
# Plain dict for h5ad compatibility; see lm_fit.
_adata.uns[_adata_key] = dict(fit)
p = fit["p_value"]
# Explicit kwargs override fit-derived values, matching R's
# decideTests.default where any of coefficients/tstat/cor.matrix/
# df/genewise.p.value takes precedence over the object slots.
if coefficients is None:
coefficients = fit.get("coefficients")
if tstat is None:
tstat = fit.get("t")
if method == "nestedF":
return _decide_tests_nested_f(
fit,
adjust_method=adjust_method,
p_value=p_value,
lfc=lfc,
cor_matrix=cor_matrix,
df=df,
)
elif method == "hierarchical":
return _decide_tests_hierarchical(
fit,
adjust_method=adjust_method,
p_value=p_value,
lfc=lfc,
genewise_p_value=genewise_p_value,
)
else:
# MArrayLM dispatch uses strict '<' for p (decidetests.R:123,129)
# and '* (abs(coef) > lfc)' for the lfc threshold (decidetests.R:165).
# The bare-ndarray entry above uses '<=' / '<' (decidetests.R:89/101).
return _decide_tests_p(
p,
method=method,
adjust_method=adjust_method,
p_value=p_value,
coefficients=coefficients,
lfc=lfc,
genewise_p_value=genewise_p_value,
from_marraylm=True,
)
def _decide_tests_p(
p: np.ndarray,
method: str,
adjust_method: str,
p_value: float,
coefficients: np.ndarray | None,
lfc: float,
genewise_p_value: np.ndarray | None = None,
from_marraylm: bool = False,
) -> np.ndarray:
"""Decide tests from a matrix of p-values.
R has two different boundary conventions depending on the dispatch
path. `from_marraylm=True` (MArrayLM dispatch, decidetests.R:123,165)
uses strict ``<`` on p-value and drops genes with ``abs(coef) <= lfc``.
The default ``from_marraylm=False`` (bare-ndarray dispatch,
decidetests.R:89,101) uses inclusive ``<=`` on p-value and drops genes
with ``abs(coef) < lfc``. pylimma follows whichever entry point the
caller used.
"""
p = np.asarray(p)
if p.ndim == 1:
p = p.reshape(-1, 1)
n_genes, n_coefs = p.shape
# Validate p-values
if np.any((p > 1) | (p < 0)):
raise ValueError("p-values must be between 0 and 1")
# Adjust p-values
if method == "separate":
p_adj = np.zeros_like(p)
for j in range(n_coefs):
valid = ~np.isnan(p[:, j])
p_adj[valid, j] = p_adjust(p[valid, j], method=adjust_method)
p_adj[~valid, j] = np.nan
elif method == "global":
valid = ~np.isnan(p)
p_adj = np.full_like(p, np.nan)
p_adj[valid] = p_adjust(p[valid], method=adjust_method)
else:
raise ValueError(f"Unknown method: {method}")
# Classification - boundary differs between MArrayLM and default R paths.
if from_marraylm:
is_de = (p_adj < p_value).astype(int)
else:
is_de = (p_adj <= p_value).astype(int)
if coefficients is not None:
coefficients = np.asarray(coefficients)
if coefficients.shape != p.shape:
raise ValueError("coefficients and p-values have different shapes")
# Apply sign
is_de = is_de * np.sign(coefficients).astype(int)
# Apply lfc threshold - boundary differs between dispatch paths.
if lfc > 0:
if from_marraylm:
is_de[np.abs(coefficients) <= lfc] = 0
else:
is_de[np.abs(coefficients) < lfc] = 0
return is_de
def _decide_tests_hierarchical(
fit: dict,
adjust_method: str,
p_value: float,
lfc: float,
genewise_p_value: np.ndarray | None = None,
) -> np.ndarray:
"""Hierarchical testing: F-test first, then individual tests."""
p = fit["p_value"]
coefficients = fit.get("coefficients")
# Explicit genewise_p_value override takes precedence over the
# fit's F.p.value (matches R decideTests.default lines 57-67).
if genewise_p_value is None:
f_p_value = fit.get("F_p_value")
else:
f_p_value = np.asarray(genewise_p_value, dtype=np.float64)
if f_p_value is None:
raise ValueError("F-test p-values not found. Run e_bayes() with multiple coefficients.")
if np.any(np.isnan(f_p_value)):
raise ValueError("Cannot handle NA F p-values in hierarchical method")
# Adjust F-test p-values
f_adj = p_adjust(f_p_value, method=adjust_method)
de_gene = f_adj < p_value
# Count DE genes for adjusting threshold
n_de = np.sum(de_gene)
n_total = np.sum(~np.isnan(f_p_value))
# Adjust p-value cutoff based on number of DE genes
if adjust_method.lower() in ("bh", "fdr"):
a = n_de / n_total if n_total > 0 else 1
elif adjust_method.lower() == "bonferroni":
a = 1 / n_total if n_total > 0 else 1
elif adjust_method.lower() == "holm":
a = 1 / (n_total - n_de + 1) if n_total > n_de else 1
elif adjust_method.lower() == "by":
a = n_de / n_total / np.sum(1 / np.arange(1, n_total + 1)) if n_total > 0 else 1
else:
a = 1
p_cutoff = a * p_value
# Initialize result
result = np.zeros_like(p, dtype=int)
# For DE genes, adjust p-values row-wise
de_idx = np.where(de_gene)[0]
for i in de_idx:
p_row = p_adjust(p[i, :], method=adjust_method)
sig = p_row < p_cutoff
if coefficients is not None:
result[i, sig] = np.sign(coefficients[i, sig]).astype(int)
else:
result[i, sig] = 1
# Apply lfc threshold - MArrayLM convention (decidetests.R:165 uses
# `* (abs(coef) > lfc)` which drops genes on the boundary).
if lfc > 0 and coefficients is not None:
result[np.abs(coefficients) <= lfc] = 0
return result
def _decide_tests_nested_f(
fit: dict,
adjust_method: str,
p_value: float,
lfc: float,
cor_matrix: np.ndarray | None = None,
df: float | np.ndarray = np.inf,
) -> np.ndarray:
"""Nested F-test method for multiple contrasts."""
f_p_value = fit.get("F_p_value")
coefficients = fit.get("coefficients")
if f_p_value is None:
raise ValueError("F-test p-values not found. Run e_bayes() with multiple coefficients.")
if np.any(np.isnan(f_p_value)):
raise ValueError("nestedF method cannot handle NA p-values")
n_genes = len(f_p_value)
# Adjust F-test p-values
f_adj = p_adjust(f_p_value, method=adjust_method)
de_gene = f_adj < p_value
n_de = np.sum(de_gene)
n_total = np.sum(~np.isnan(f_p_value))
# Adjust p-value cutoff
if adjust_method.lower() in ("bh", "fdr"):
a = n_de / n_total if n_total > 0 else 1
elif adjust_method.lower() == "bonferroni":
a = 1 / n_total if n_total > 0 else 1
elif adjust_method.lower() == "holm":
a = 1 / (n_total - n_de + 1) if n_total > n_de else 1
elif adjust_method.lower() == "by":
a = n_de / n_total / np.sum(1 / np.arange(1, n_total + 1)) if n_total > 0 else 1
else:
a = 1
p_cutoff = a * p_value
# Initialize result
n_coefs = fit["t"].shape[1]
result = np.zeros((n_genes, n_coefs), dtype=int)
# For DE genes, use classify_tests_f
if np.any(de_gene):
# Create subset fit for DE genes
de_idx = np.where(de_gene)[0]
fit_subset = {
"t": fit["t"][de_idx, :],
"cov_coefficients": fit.get("cov_coefficients"),
"df_prior": fit.get("df_prior"),
"df_residual": (
fit["df_residual"][de_idx]
if isinstance(fit["df_residual"], np.ndarray)
else fit["df_residual"]
),
}
# Forward explicit cor_matrix / df overrides so nestedF honours
# R's decideTests(..., cor.matrix=..., df=...) call pattern.
result[de_idx, :] = classify_tests_f(
fit_subset,
cor_matrix=cor_matrix,
df=df,
p_value=p_cutoff,
)
# Apply lfc threshold - MArrayLM convention (decidetests.R:165).
if lfc > 0 and coefficients is not None:
result[np.abs(coefficients) <= lfc] = 0
return result
def summarize_test_results(
results: np.ndarray,
coef_names: list[str] | None = None,
) -> dict:
"""
Summarize test results by counting up/down/not significant genes.
Provides similar functionality to R's summary.TestResults method.
Parameters
----------
results : ndarray
Test results matrix from decide_tests(), with values -1, 0, 1.
Shape (n_genes, n_coefficients).
coef_names : list of str, optional
Names for the coefficients (columns).
Returns
-------
dict
down : ndarray - count of genes with result -1 per coefficient
not_sig : ndarray - count of genes with result 0 per coefficient
up : ndarray - count of genes with result 1 per coefficient
coef_names : list - coefficient names
total : int - total number of genes
Examples
--------
>>> results = decide_tests(fit)
>>> summary = summarize_test_results(results)
>>> print(f"Up: {summary['up']}, Down: {summary['down']}")
"""
results = np.asarray(results)
if results.ndim == 1:
results = results.reshape(-1, 1)
n_genes, n_coefs = results.shape
if coef_names is None:
coef_names = [f"coef_{i}" for i in range(n_coefs)]
# Count each category, handling NaN
down = np.sum(results == -1, axis=0)
not_sig = np.sum(results == 0, axis=0)
up = np.sum(results == 1, axis=0)
return {
"down": down,
"not_sig": not_sig,
"up": up,
"coef_names": coef_names,
"total": n_genes,
}