# SPDX-License-Identifier: GPL-3.0-or-later
#
# This module is a Python port of code from R limma. Original R copyrights:
# ebayes.R Copyright (C) 2003-2025 Gordon Smyth
# Python port: Copyright (C) 2026 John Mulvey
"""
Empirical Bayes moderation for pylimma.
Implements the core eBayes statistics from limma:
- e_bayes(): compute moderated t-statistics, p-values, B-statistics
"""
from __future__ import annotations
import warnings
from typing import TYPE_CHECKING
import numpy as np
from scipy import stats
from scipy.special import gammaln
from .classes import MArrayLM, _resolve_fit_input
from .lmfit import is_fullrank
from .squeeze_var import squeeze_var
if TYPE_CHECKING:
pass
def _t_isf_log(log_p: np.ndarray, df: float) -> np.ndarray:
"""
Inverse survival function for t-distribution with log-scale input.
R uses qt(log_p, df, lower.tail=FALSE, log.p=TRUE) which handles
extreme p-values on the log scale without precision loss. Python's
scipy.stats.t.isf requires the p-value itself, causing underflow for
very small p-values.
This function handles underflow by using an asymptotic approximation
for very extreme values.
Parameters
----------
log_p : ndarray
Log of tail probability (i.e., logsf values, all <= 0)
df : float
Degrees of freedom (scalar, common to all values)
Returns
-------
ndarray
Quantiles (positive values since we're dealing with upper tail)
"""
result = np.empty_like(log_p)
# Try standard method: isf(exp(log_p), df)
p = np.exp(log_p)
# Check for underflow: p == 0 but log_p is finite (not -inf)
underflow = (p == 0) & np.isfinite(log_p)
standard = ~underflow
if np.any(standard):
result[standard] = stats.t.isf(p[standard], df)
if np.any(underflow):
# Asymptotic approximation for extreme tail
# For large t: sf(t, df) ~ c(df) * t^(-df)
# where c(df) = Gamma((df+1)/2) / (sqrt(pi*df) * Gamma(df/2))
# So: log_p ~ log(c(df)) - df * log(t)
# => log(t) ~ (log(c(df)) - log_p) / df
log_c = gammaln((df + 1) / 2) - 0.5 * np.log(np.pi * df) - gammaln(df / 2)
log_t = (log_c - log_p[underflow]) / df
result[underflow] = np.exp(log_t)
return result
def _classify_tests_f(
fit: dict,
fstat_only: bool = True,
) -> np.ndarray:
"""
Compute F-statistics from t-statistics.
Internal wrapper that calls classify_tests_f from decide_tests module.
"""
from .decide_tests import classify_tests_f
return classify_tests_f(fit, p_value=0.01, fstat_only=fstat_only)
def _tmixture_vector(
tstat: np.ndarray,
stdev_unscaled: np.ndarray,
df: np.ndarray | float,
proportion: float,
v0_lim: tuple[float, float] | None = None,
) -> float:
"""
Estimate scale factor in mixture of two t-distributions.
tstat is assumed to follow sqrt(1+v0/v1)*t(df) with probability `proportion`
and t(df) otherwise. v1 is stdev_unscaled^2 and v0 is to be estimated.
Parameters
----------
tstat : ndarray
t-statistics
stdev_unscaled : ndarray
Unscaled standard errors
df : ndarray or float
Degrees of freedom
proportion : float
Expected proportion of differentially expressed genes
v0_lim : tuple, optional
Limits for v0
Returns
-------
float
Estimated prior variance
"""
# Remove missing values
valid = ~np.isnan(tstat)
if not np.all(valid):
tstat = tstat[valid]
stdev_unscaled = stdev_unscaled[valid]
if isinstance(df, np.ndarray):
df = df[valid]
n_genes = len(tstat)
n_target = int(np.ceil(proportion / 2 * n_genes))
if n_target < 1:
return np.nan
# Ensure p at least matches selected proportion
p = max(n_target / n_genes, proportion)
# Work with absolute t-statistics
tstat = np.abs(tstat)
# Method requires equal df - adjust t-statistics to max df.
# Copy to avoid mutating caller's df array (line below writes into df).
df = np.atleast_1d(df).astype(np.float64, copy=True)
if len(df) == 1:
df = np.full(n_genes, df[0])
max_df = np.max(df)
needs_adjustment = df < max_df
if np.any(needs_adjustment):
# Convert via p-value on log scale (R parity: uses log.p=TRUE throughout)
tail_p = stats.t.logsf(tstat[needs_adjustment], df[needs_adjustment])
tstat[needs_adjustment] = _t_isf_log(tail_p, max_df)
df[needs_adjustment] = max_df
# Select top statistics. Use stable descending sort to match R's
# order(..., decreasing=TRUE) (ebayes.R:143); default quicksort +
# [::-1] has a reversed tie order which diverges on ties.
order = np.argsort(-tstat, kind="stable")[:n_target]
tstat = tstat[order]
v1 = stdev_unscaled[order] ** 2
# Compare to order statistics
r = np.arange(1, n_target + 1)
p0 = 2 * stats.t.sf(tstat, max_df)
p_target = ((r - 0.5) / n_genes - (1 - p) * p0) / p
v0 = np.zeros(n_target)
pos = p_target > p0
if np.any(pos):
q_target = stats.t.isf(p_target[pos] / 2, max_df)
v0[pos] = v1[pos] * ((tstat[pos] / q_target) ** 2 - 1)
if v0_lim is not None:
v0 = np.clip(v0, v0_lim[0], v0_lim[1])
return float(np.mean(v0))
def _tmixture_matrix(
tstat: np.ndarray,
stdev_unscaled: np.ndarray,
df: np.ndarray | float,
proportion: float,
v0_lim: tuple[float, float] | None = None,
) -> np.ndarray:
"""
Estimate prior variance for each coefficient.
Parameters
----------
tstat : ndarray
t-statistics (n_genes, n_coefs)
stdev_unscaled : ndarray
Unscaled standard errors (n_genes, n_coefs)
df : ndarray or float
Degrees of freedom
proportion : float
Expected proportion of DE genes
v0_lim : tuple, optional
Limits for v0
Returns
-------
ndarray
Prior variance for each coefficient
"""
n_coefs = tstat.shape[1]
v0 = np.zeros(n_coefs)
for j in range(n_coefs):
v0[j] = _tmixture_vector(tstat[:, j], stdev_unscaled[:, j], df, proportion, v0_lim)
return v0
[docs]
def e_bayes(
data,
proportion: float = 0.01,
stdev_coef_lim: tuple[float, float] = (0.1, 4.0),
trend: bool | np.ndarray = False,
span: float | None = None,
robust: bool = False,
winsor_tail_p: tuple[float, float] = (0.05, 0.1),
legacy: bool | None = None,
key: str = "pylimma",
) -> dict | None:
"""
Empirical Bayes moderation of t-statistics.
Computes moderated t-statistics, p-values, and B-statistics (log-odds
of differential expression) by empirical Bayes shrinkage of the
gene-wise sample variances towards a common prior.
Parameters
----------
data : AnnData or dict
Either an AnnData object with fit results in adata.uns[key],
or a dict returned by lm_fit() or contrasts_fit().
proportion : float, default 0.01
Expected proportion of differentially expressed genes.
stdev_coef_lim : tuple, default (0.1, 4.0)
Limits for the prior standard deviation of coefficients.
trend : bool or array_like, default False
If True, allow prior variance to depend on mean expression
(Amean from the fit). If a numeric array, use that as the
covariate for the mean-variance trend.
span : float, optional
Span for lowess smoothing when fitting the mean-variance trend.
Only used when trend is True. If None, an appropriate span is
chosen automatically.
robust : bool, default False
If True, use robust estimation of hyperparameters. Outlier
variances are Winsorized and the prior df is estimated robustly.
winsor_tail_p : tuple, default (0.05, 0.1)
Winsorization proportions for robust estimation. The first value
is for the lower tail, the second for the upper tail.
legacy : bool, optional
If True, use the original limma hyperparameter estimation method.
If False, use the newer method which handles unequal residual df
better. If None (default), auto-detect based on whether all
residual df are equal.
key : str, default "pylimma"
Key for fit results in adata.uns (AnnData input only).
Returns
-------
dict or None
If input is dict, returns updated dict with moderated statistics.
If input is AnnData, updates adata.uns[key] in place and returns None.
Notes
-----
The moderated statistics added to the fit are:
- t: moderated t-statistics
- p_value: two-sided p-values
- lods: B-statistics (log-odds of differential expression)
- s2_prior: prior variance
- df_prior: prior degrees of freedom
- s2_post: posterior variance
- df_total: total degrees of freedom
- F: moderated F-statistic (if multiple contrasts)
- F_p_value: F-statistic p-value (if multiple contrasts)
References
----------
Smyth, G. K. (2004). Linear models and empirical Bayes methods for
assessing differential expression in microarray experiments.
Statistical Applications in Genetics and Molecular Biology, 3(1), Article 3.
"""
fit, adata, adata_key = _resolve_fit_input(data, key)
is_anndata = adata is not None
# Shallow-copy so e_bayes(fit) does not mutate the caller's
# lm_fit/contrasts_fit output. Matches R's copy-on-modify:
# `fit <- eBayes(fit)` returns a new list whether or not the
# caller rebinds the original name. Slot arrays remain shared
# references (none of the code below writes into them in place).
fit = MArrayLM(fit)
# Validate fit object
coefficients = fit.get("coefficients")
stdev_unscaled = fit.get("stdev_unscaled")
sigma = fit.get("sigma")
df_residual = fit.get("df_residual")
if any(x is None for x in [coefficients, stdev_unscaled, sigma, df_residual]):
raise ValueError("fit must contain coefficients, stdev_unscaled, sigma, df_residual")
if np.max(df_residual) == 0:
raise ValueError("No residual degrees of freedom in linear model fits")
if not np.any(np.isfinite(sigma)):
raise ValueError("No finite residual standard deviations")
# Get covariate for trend estimation
if isinstance(trend, bool):
if trend:
covariate = fit.get("Amean")
if covariate is None:
raise ValueError("Need Amean component in fit to estimate trend")
else:
covariate = None
else:
# trend is a numeric array
trend = np.asarray(trend, dtype=np.float64)
if len(trend) != len(sigma):
raise ValueError(
"If trend is numeric then it should have length equal to the number of genes"
)
covariate = trend
# Squeeze variances
sv = squeeze_var(
sigma**2,
df_residual,
covariate=covariate,
span=span,
robust=robust,
winsor_tail_p=winsor_tail_p,
legacy=legacy,
)
s2_prior = sv["var_prior"]
s2_post = sv["var_post"]
df_prior = sv["df_prior"]
# Moderated t-statistics
t_stat = coefficients / stdev_unscaled / np.sqrt(s2_post)[:, np.newaxis]
# Total degrees of freedom (capped at pooled df)
df_total = df_residual + df_prior
df_pooled = np.nansum(df_residual)
df_total = np.minimum(df_total, df_pooled)
# P-values
p_value = 2 * stats.t.sf(np.abs(t_stat), df_total[:, np.newaxis])
# B-statistic (log-odds of DE)
var_prior_lim = (
stdev_coef_lim[0] ** 2 / np.median(s2_prior),
stdev_coef_lim[1] ** 2 / np.median(s2_prior),
)
var_prior = _tmixture_matrix(t_stat, stdev_unscaled, df_total, proportion, var_prior_lim)
if np.any(np.isnan(var_prior)):
nan_mask = np.isnan(var_prior)
s2_flat = np.atleast_1d(s2_prior).astype(np.float64).ravel()
var_prior[nan_mask] = np.resize(1.0 / s2_flat, int(nan_mask.sum()))
warnings.warn("Estimation of var_prior failed - set to default value")
# Compute log-odds
r = stdev_unscaled**2 + var_prior
r = r / stdev_unscaled**2
t2 = t_stat**2
# Handle infinite df_prior (can be scalar or array in robust eBayes)
inf_df = np.atleast_1d(df_prior > 1e6)
if np.any(inf_df):
# Some or all genes have infinite prior df - use limiting formula
kernel = t2 * (1 - 1 / r) / 2
if np.any(~inf_df):
# Some genes have finite prior df - apply standard formula to those
finite_mask = ~inf_df
t2_f = t2[finite_mask]
r_f = r[finite_mask]
df_total_f = df_total[finite_mask]
kernel[finite_mask] = (
(1 + df_total_f[:, np.newaxis])
/ 2
* np.log(
(t2_f + df_total_f[:, np.newaxis]) / (t2_f / r_f + df_total_f[:, np.newaxis])
)
)
else:
# All genes have finite prior df - use standard formula
kernel = (
(1 + df_total[:, np.newaxis])
/ 2
* np.log((t2 + df_total[:, np.newaxis]) / (t2 / r + df_total[:, np.newaxis]))
)
lods = np.log(proportion / (1 - proportion)) - np.log(r) / 2 + kernel
# Update fit
fit["df_prior"] = df_prior
fit["s2_prior"] = s2_prior
fit["var_prior"] = var_prior
# R's eBayes records the user-supplied DE proportion (ebayes.R:15);
# stored for reproducibility/inspection - not read by downstream
# pylimma code but part of the MArrayLM slot contract.
fit["proportion"] = proportion
fit["s2_post"] = s2_post
fit["t"] = t_stat
fit["df_total"] = df_total
fit["p_value"] = p_value
fit["lods"] = lods
# F-statistic (if design is full rank)
# For single coefficient, F = t^2 with df1 = 1 (handled by classify_tests_f)
design = fit.get("design")
if design is not None:
if is_fullrank(design):
f_stat, df1, df2 = _classify_tests_f(fit)
fit["F"] = f_stat
# df2 is a per-gene vector when df_residual varies
# (NAs, ndups>1). Broadcast pf() over genes and fall back
# to the chi-squared limit element-wise where df2 is Inf.
df2_arr = np.asarray(df2, dtype=np.float64)
if df2_arr.ndim == 0:
if np.isinf(df2_arr):
fit["F_p_value"] = stats.chi2.sf(f_stat * df1, df1)
else:
fit["F_p_value"] = stats.f.sf(f_stat, df1, df2_arr)
else:
mask_inf = np.isinf(df2_arr)
df2_safe = np.where(mask_inf, 1.0, df2_arr)
fp = stats.f.sf(f_stat, df1, df2_safe)
if mask_inf.any():
fp = np.where(mask_inf, stats.chi2.sf(f_stat * df1, df1), fp)
fit["F_p_value"] = fp
if not isinstance(fit, MArrayLM):
fit = MArrayLM(fit)
if is_anndata:
# Plain dict for h5ad compatibility; see lm_fit.
data.uns[key] = dict(fit)
return None
return fit
[docs]
def treat(
data,
fc: float = 1.2,
lfc: float | None = None,
trend: bool = False,
robust: bool = False,
winsor_tail_p: tuple[float, float] = (0.05, 0.1),
legacy: bool | None = None,
upshot: bool = False,
*,
span: float | None = None,
key: str = "pylimma",
) -> dict | None:
r"""
Moderated t-statistics relative to a log fold-change threshold.
TREAT (t-tests relative to a threshold) tests for evidence that the
true log fold-change is greater than a minimum value, rather than
simply testing for non-zero effect.
Parameters
----------
data : AnnData or dict
Either an AnnData object with fit results in adata.uns[key],
or a dict returned by lm_fit() or contrasts_fit().
fc : float, default 1.2
Fold-change threshold. Used if lfc is None.
lfc : float, optional
Log2 fold-change threshold. If provided, overrides fc.
trend : bool, default False
If True, allow prior variance to depend on mean expression.
span : float, optional
Span for lowess smoothing when fitting the mean-variance trend.
Only used when trend is True.
robust : bool, default False
If True, use robust estimation of hyperparameters.
winsor_tail_p : tuple, default (0.05, 0.1)
Winsorization proportions for robust estimation.
legacy : bool, optional
If True, use the original limma hyperparameter estimation method.
If False, use the newer method which handles unequal residual df
better. If None (default), auto-detect.
upshot : bool, default False
If True, use Gaussian quadrature to compute more accurate p-values
when the log fold-change threshold is small. This averages the
p-value over the interval [-lfc, lfc] rather than testing at the
boundary.
key : str, default "pylimma"
Key for fit results in adata.uns (AnnData input only).
Returns
-------
dict or None
If input is dict, returns updated dict with TREAT statistics.
If input is AnnData, updates adata.uns[key] in place and returns None.
Notes
-----
The key difference from e_bayes() is that TREAT computes p-values for
the hypothesis \|logFC\| > lfc, rather than logFC != 0. This is useful
when you want to find genes with biologically meaningful effect sizes.
The returned fit contains:
- t: moderated t-statistics (for display, not for p-values)
- p_value: p-values from TREAT test
- treat_lfc: the log fold-change threshold used
Note that B-statistics (lods) are not computed by TREAT.
References
----------
McCarthy, D. J. and Smyth, G. K. (2009). Testing significance relative
to a fold-change threshold is a TREAT. Bioinformatics, 25(6), 765-771.
"""
fit, adata, adata_key = _resolve_fit_input(data, key)
is_anndata = adata is not None
# Shallow-copy so treat(fit) does not mutate the caller's fit.
# Matches R's copy-on-modify; see e_bayes for rationale.
fit = MArrayLM(fit)
# Validate fit object
coefficients = fit.get("coefficients")
stdev_unscaled = fit.get("stdev_unscaled")
sigma = fit.get("sigma")
df_residual = fit.get("df_residual")
if any(x is None for x in [coefficients, stdev_unscaled, sigma, df_residual]):
raise ValueError("fit must contain coefficients, stdev_unscaled, sigma, df_residual")
if np.max(df_residual) == 0:
raise ValueError("No residual degrees of freedom in linear model fits")
if not np.any(np.isfinite(sigma)):
raise ValueError("No finite residual standard deviations")
# Clear any existing lods (B-statistics not computed for TREAT)
fit["lods"] = None
coefficients = np.asarray(coefficients)
stdev_unscaled = np.asarray(stdev_unscaled)
if trend:
covariate = fit.get("Amean")
if covariate is None:
raise ValueError("Need Amean component in fit to estimate trend")
else:
covariate = None
# Squeeze variances
sv = squeeze_var(
sigma**2,
df_residual,
covariate=covariate,
span=span,
robust=robust,
winsor_tail_p=winsor_tail_p,
legacy=legacy,
)
fit["df_prior"] = sv["df_prior"]
fit["s2_prior"] = sv["var_prior"]
fit["s2_post"] = sv["var_post"]
# Total degrees of freedom
df_total = df_residual + sv["df_prior"]
df_pooled = np.nansum(df_residual)
df_total = np.minimum(df_total, df_pooled)
fit["df_total"] = df_total
# Determine log fold-change threshold
if lfc is None:
lfc = np.log2(fc)
lfc = abs(lfc)
fit["treat_lfc"] = lfc
# Standard errors
se = stdev_unscaled * np.sqrt(sv["var_post"])[:, np.newaxis]
# Absolute coefficients
acoef = np.abs(coefficients)
# T-statistics for display (direction only)
t_stat = np.zeros_like(coefficients)
# Compute p-values using TREAT methodology
if upshot and lfc > 0:
# Use Gaussian quadrature for more accurate p-values
# Matching R's gauss.quad.prob(16, dist="uniform", l=-lfc, u=lfc)
from numpy.polynomial.legendre import leggauss
nodes, weights = leggauss(16)
# Transform nodes from [-1, 1] to [-lfc, lfc]
nodes = lfc * nodes
# Convert to probability weights (R divides by 2, not multiplies by interval)
weights = weights / 2 # Now weights sum to 1.0 (probability weights)
p_value = np.zeros_like(coefficients)
# Use only the positive half (nodes 8:16 correspond to [0, lfc])
# Positive half weights sum to 0.5
for i in range(8, 16):
lfci = nodes[i]
tstat_right_i = (acoef - lfci) / se
tstat_left_i = (acoef + lfci) / se
p_value += weights[i] * (
stats.t.sf(tstat_right_i, df_total[:, np.newaxis])
+ stats.t.sf(tstat_left_i, df_total[:, np.newaxis])
)
# Double for symmetry (we only integrated positive half)
p_value = 2 * p_value
# For display t-stat, use half the threshold (matching R's behaviour)
lfc_display = lfc / 2
tstat_right = np.maximum((acoef - lfc_display) / se, 0)
tstat_left = (acoef + lfc_display) / se
# In upshot mode, R uses lfc/2 for direction comparison
lfc_threshold = lfc_display
else:
# Standard TREAT p-values
tstat_right = (acoef - lfc) / se
tstat_left = (acoef + lfc) / se
# P-value is sum of both tails
p_value = stats.t.sf(tstat_right, df_total[:, np.newaxis]) + stats.t.sf(
tstat_left, df_total[:, np.newaxis]
)
# For display t-statistics, use the more conservative (closer to 0)
tstat_right = np.maximum(tstat_right, 0)
# In non-upshot mode, use original lfc for direction threshold
lfc_threshold = lfc
# Handle NaN coefficients
coefficients_clean = coefficients.copy()
coefficients_clean[np.isnan(coefficients_clean)] = 0
# Assign t-statistics based on direction
fc_up = coefficients_clean > lfc_threshold
fc_down = coefficients_clean < -lfc_threshold
t_stat[fc_up] = tstat_right[fc_up]
t_stat[fc_down] = -tstat_right[fc_down]
fit["t"] = t_stat
fit["p_value"] = p_value
if not isinstance(fit, MArrayLM):
fit = MArrayLM(fit)
if is_anndata:
# Plain dict for h5ad compatibility; see lm_fit.
data.uns[key] = dict(fit)
return None
return fit
[docs]
def top_treat(
fit,
coef: int | str = 0,
sort_by: str = "p",
resort_by: str | None = None,
**kwargs,
):
"""
Top-ranked genes after a :func:`treat` fit.
Thin wrapper around :func:`pylimma.top_table` that enforces the
two guardrails R limma's ``topTreat`` enforces: ``coef`` must be a
single coefficient, and ``sort_by="b"`` / ``resort_by="b"`` are
rejected because :func:`treat` does not produce a B-statistic.
Parameters
----------
fit : MArrayLM, AnnData, or dict
Fit object produced by :func:`treat`.
coef : int or str, default 0
Single coefficient index or name. Lists are not allowed; if a
sequence is supplied only the first element is used and a
warning is emitted.
sort_by : str, default "p"
Column to sort by. Passing ``"b"`` raises ``ValueError``.
resort_by : str, optional
Secondary sort column. Passing ``"b"`` raises ``ValueError``.
**kwargs
Forwarded to :func:`pylimma.top_table`.
Returns
-------
pandas.DataFrame
Ranked-gene table, same schema as :func:`top_table` minus the
``b`` column (which :func:`treat` does not compute).
Notes
-----
R limma's own ``topTreat`` source comments that the function "may
become deprecated soon as topTable() takes over"; pylimma keeps
it for direct R-to-pylimma call-site compatibility.
"""
from .toptable import top_table
if np.ndim(coef) > 0 and len(coef) > 1: # type: ignore[arg-type]
warnings.warn(
"treat is for single coefficients: only first value of coef being used",
stacklevel=2,
)
coef = coef[0] # type: ignore[index]
# R treat.R:86-87 raises whenever sort.by/resort.by names B; the
# comparison is on the raw user value, so capital "B" must also
# be rejected.
if isinstance(sort_by, str) and sort_by.lower() == "b":
raise ValueError('Trying to sort_by="b", but treat does not produce a B-statistic')
if isinstance(resort_by, str) and resort_by.lower() == "b":
raise ValueError('Trying to resort_by="b", but treat does not produce a B-statistic')
return top_table(fit, coef=coef, sort_by=sort_by, resort_by=resort_by, **kwargs)
[docs]
def pred_fcm(
fit,
coef: int = 1,
var_indep_of_fc: bool = True,
all_de: bool = True,
prop_true_null_method: str = "lfdr",
*,
key: str = "pylimma",
) -> np.ndarray:
"""
Predictive (empirical-Bayes shrunken) fold changes.
Port of R limma's ``predFCm`` (``predFCm.R``). Uses the eBayes
posterior variance together with an estimated proportion of true
nulls to shrink log-fold-changes toward zero.
Parameters
----------
fit : AnnData, MArrayLM, or dict
Fit from :func:`lm_fit` + :func:`e_bayes`. For AnnData input,
the fit is read from ``adata.uns[key]``.
coef : int, default 1
Zero-based coefficient index (R uses 1-based).
var_indep_of_fc : bool, default True
If True, assume the prior variance is independent of
fold-change magnitude.
all_de : bool, default True
If True, assume all genes are differentially expressed.
prop_true_null_method : {"lfdr", "convest", "mean", "hist"}
Forwarded to :func:`prop_true_null`.
key : str, default "pylimma"
Key for fit results in adata.uns (AnnData input only).
"""
from .utils import fit_gamma_intercept, prop_true_null
# Accept AnnData-stored fits (parity with contrasts_fit / e_bayes /
# treat / top_table / decide_tests).
fit, _adata, _adata_key = _resolve_fit_input(fit, key)
if fit.get("p_value") is None:
fit = e_bayes(fit)
coef = int(coef)
p_col = fit["p_value"][:, coef]
p = 1.0 - prop_true_null(p_col, method=prop_true_null_method)
if p == 0:
p = 1e-8
trend = np.asarray(fit["s2_prior"]).size > 1
robust = np.asarray(fit["df_prior"]).size > 1
fit = e_bayes(fit, proportion=p, trend=trend, robust=robust)
v = float(np.asarray(fit["cov_coefficients"])[coef, coef])
coef_col = np.asarray(fit["coefficients"])[:, coef]
s2_post = np.asarray(fit["s2_post"])
if var_indep_of_fc:
v0 = fit_gamma_intercept(coef_col**2, offset=v * s2_post)
if v0 < 0:
v0 = 1e-8
pfc = coef_col * v0 / (v0 + v * s2_post)
if not all_de:
A = p / (1 - p)
B = np.sqrt(v * s2_post / (v * s2_post + v0))
C = np.exp(coef_col**2 * v0 / (2 * v**2 * s2_post**2 + 2 * v * v0 * s2_post))
lods = np.log(A * B * C)
prob_de = np.exp(lods) / (1 + np.exp(lods))
prob_de[lods > 700] = 1.0
pfc = pfc * prob_de
else:
b2 = coef_col**2 / s2_post
v0 = fit_gamma_intercept(b2, offset=v)
# R uses pmin here (bug-for-bug port).
v0 = np.minimum(v0, 1e-8)
pfc = coef_col * v0 / (v0 + v)
if not all_de:
A = p / (1 - p)
B = np.sqrt(v / (v + v0))
C = np.exp(coef_col**2 * v0 / (2 * v**2 * s2_post + 2 * v * v0 * s2_post))
lods = np.log(A * B * C)
prob_de = np.exp(lods) / (1 + np.exp(lods))
prob_de[lods > 700] = 1.0
pfc = pfc * prob_de
return pfc