Source code for pylimma.ebayes

# SPDX-License-Identifier: GPL-3.0-or-later
#
# This module is a Python port of code from R limma. Original R copyrights:
#   ebayes.R                   Copyright (C) 2003-2025 Gordon Smyth
# Python port: Copyright (C) 2026 John Mulvey
"""
Empirical Bayes moderation for pylimma.

Implements the core eBayes statistics from limma:
- e_bayes(): compute moderated t-statistics, p-values, B-statistics
"""

from __future__ import annotations

import warnings
from typing import TYPE_CHECKING

import numpy as np
from scipy import stats
from scipy.special import gammaln

from .classes import MArrayLM, _resolve_fit_input
from .lmfit import is_fullrank
from .squeeze_var import squeeze_var

if TYPE_CHECKING:
    pass


def _t_isf_log(log_p: np.ndarray, df: float) -> np.ndarray:
    """
    Inverse survival function for t-distribution with log-scale input.

    R uses qt(log_p, df, lower.tail=FALSE, log.p=TRUE) which handles
    extreme p-values on the log scale without precision loss. Python's
    scipy.stats.t.isf requires the p-value itself, causing underflow for
    very small p-values.

    This function handles underflow by using an asymptotic approximation
    for very extreme values.

    Parameters
    ----------
    log_p : ndarray
        Log of tail probability (i.e., logsf values, all <= 0)
    df : float
        Degrees of freedom (scalar, common to all values)

    Returns
    -------
    ndarray
        Quantiles (positive values since we're dealing with upper tail)
    """
    result = np.empty_like(log_p)

    # Try standard method: isf(exp(log_p), df)
    p = np.exp(log_p)

    # Check for underflow: p == 0 but log_p is finite (not -inf)
    underflow = (p == 0) & np.isfinite(log_p)
    standard = ~underflow

    if np.any(standard):
        result[standard] = stats.t.isf(p[standard], df)

    if np.any(underflow):
        # Asymptotic approximation for extreme tail
        # For large t: sf(t, df) ~ c(df) * t^(-df)
        # where c(df) = Gamma((df+1)/2) / (sqrt(pi*df) * Gamma(df/2))
        # So: log_p ~ log(c(df)) - df * log(t)
        # => log(t) ~ (log(c(df)) - log_p) / df
        log_c = gammaln((df + 1) / 2) - 0.5 * np.log(np.pi * df) - gammaln(df / 2)
        log_t = (log_c - log_p[underflow]) / df
        result[underflow] = np.exp(log_t)

    return result


def _classify_tests_f(
    fit: dict,
    fstat_only: bool = True,
) -> np.ndarray:
    """
    Compute F-statistics from t-statistics.

    Internal wrapper that calls classify_tests_f from decide_tests module.
    """
    from .decide_tests import classify_tests_f

    return classify_tests_f(fit, p_value=0.01, fstat_only=fstat_only)


def _tmixture_vector(
    tstat: np.ndarray,
    stdev_unscaled: np.ndarray,
    df: np.ndarray | float,
    proportion: float,
    v0_lim: tuple[float, float] | None = None,
) -> float:
    """
    Estimate scale factor in mixture of two t-distributions.

    tstat is assumed to follow sqrt(1+v0/v1)*t(df) with probability `proportion`
    and t(df) otherwise. v1 is stdev_unscaled^2 and v0 is to be estimated.

    Parameters
    ----------
    tstat : ndarray
        t-statistics
    stdev_unscaled : ndarray
        Unscaled standard errors
    df : ndarray or float
        Degrees of freedom
    proportion : float
        Expected proportion of differentially expressed genes
    v0_lim : tuple, optional
        Limits for v0

    Returns
    -------
    float
        Estimated prior variance
    """
    # Remove missing values
    valid = ~np.isnan(tstat)
    if not np.all(valid):
        tstat = tstat[valid]
        stdev_unscaled = stdev_unscaled[valid]
        if isinstance(df, np.ndarray):
            df = df[valid]

    n_genes = len(tstat)
    n_target = int(np.ceil(proportion / 2 * n_genes))
    if n_target < 1:
        return np.nan

    # Ensure p at least matches selected proportion
    p = max(n_target / n_genes, proportion)

    # Work with absolute t-statistics
    tstat = np.abs(tstat)

    # Method requires equal df - adjust t-statistics to max df.
    # Copy to avoid mutating caller's df array (line below writes into df).
    df = np.atleast_1d(df).astype(np.float64, copy=True)
    if len(df) == 1:
        df = np.full(n_genes, df[0])
    max_df = np.max(df)

    needs_adjustment = df < max_df
    if np.any(needs_adjustment):
        # Convert via p-value on log scale (R parity: uses log.p=TRUE throughout)
        tail_p = stats.t.logsf(tstat[needs_adjustment], df[needs_adjustment])
        tstat[needs_adjustment] = _t_isf_log(tail_p, max_df)
        df[needs_adjustment] = max_df

    # Select top statistics. Use stable descending sort to match R's
    # order(..., decreasing=TRUE) (ebayes.R:143); default quicksort +
    # [::-1] has a reversed tie order which diverges on ties.
    order = np.argsort(-tstat, kind="stable")[:n_target]
    tstat = tstat[order]
    v1 = stdev_unscaled[order] ** 2

    # Compare to order statistics
    r = np.arange(1, n_target + 1)
    p0 = 2 * stats.t.sf(tstat, max_df)
    p_target = ((r - 0.5) / n_genes - (1 - p) * p0) / p

    v0 = np.zeros(n_target)
    pos = p_target > p0
    if np.any(pos):
        q_target = stats.t.isf(p_target[pos] / 2, max_df)
        v0[pos] = v1[pos] * ((tstat[pos] / q_target) ** 2 - 1)

    if v0_lim is not None:
        v0 = np.clip(v0, v0_lim[0], v0_lim[1])

    return float(np.mean(v0))


def _tmixture_matrix(
    tstat: np.ndarray,
    stdev_unscaled: np.ndarray,
    df: np.ndarray | float,
    proportion: float,
    v0_lim: tuple[float, float] | None = None,
) -> np.ndarray:
    """
    Estimate prior variance for each coefficient.

    Parameters
    ----------
    tstat : ndarray
        t-statistics (n_genes, n_coefs)
    stdev_unscaled : ndarray
        Unscaled standard errors (n_genes, n_coefs)
    df : ndarray or float
        Degrees of freedom
    proportion : float
        Expected proportion of DE genes
    v0_lim : tuple, optional
        Limits for v0

    Returns
    -------
    ndarray
        Prior variance for each coefficient
    """
    n_coefs = tstat.shape[1]
    v0 = np.zeros(n_coefs)
    for j in range(n_coefs):
        v0[j] = _tmixture_vector(tstat[:, j], stdev_unscaled[:, j], df, proportion, v0_lim)
    return v0


[docs] def e_bayes( data, proportion: float = 0.01, stdev_coef_lim: tuple[float, float] = (0.1, 4.0), trend: bool | np.ndarray = False, span: float | None = None, robust: bool = False, winsor_tail_p: tuple[float, float] = (0.05, 0.1), legacy: bool | None = None, key: str = "pylimma", ) -> dict | None: """ Empirical Bayes moderation of t-statistics. Computes moderated t-statistics, p-values, and B-statistics (log-odds of differential expression) by empirical Bayes shrinkage of the gene-wise sample variances towards a common prior. Parameters ---------- data : AnnData or dict Either an AnnData object with fit results in adata.uns[key], or a dict returned by lm_fit() or contrasts_fit(). proportion : float, default 0.01 Expected proportion of differentially expressed genes. stdev_coef_lim : tuple, default (0.1, 4.0) Limits for the prior standard deviation of coefficients. trend : bool or array_like, default False If True, allow prior variance to depend on mean expression (Amean from the fit). If a numeric array, use that as the covariate for the mean-variance trend. span : float, optional Span for lowess smoothing when fitting the mean-variance trend. Only used when trend is True. If None, an appropriate span is chosen automatically. robust : bool, default False If True, use robust estimation of hyperparameters. Outlier variances are Winsorized and the prior df is estimated robustly. winsor_tail_p : tuple, default (0.05, 0.1) Winsorization proportions for robust estimation. The first value is for the lower tail, the second for the upper tail. legacy : bool, optional If True, use the original limma hyperparameter estimation method. If False, use the newer method which handles unequal residual df better. If None (default), auto-detect based on whether all residual df are equal. key : str, default "pylimma" Key for fit results in adata.uns (AnnData input only). Returns ------- dict or None If input is dict, returns updated dict with moderated statistics. If input is AnnData, updates adata.uns[key] in place and returns None. Notes ----- The moderated statistics added to the fit are: - t: moderated t-statistics - p_value: two-sided p-values - lods: B-statistics (log-odds of differential expression) - s2_prior: prior variance - df_prior: prior degrees of freedom - s2_post: posterior variance - df_total: total degrees of freedom - F: moderated F-statistic (if multiple contrasts) - F_p_value: F-statistic p-value (if multiple contrasts) References ---------- Smyth, G. K. (2004). Linear models and empirical Bayes methods for assessing differential expression in microarray experiments. Statistical Applications in Genetics and Molecular Biology, 3(1), Article 3. """ fit, adata, adata_key = _resolve_fit_input(data, key) is_anndata = adata is not None # Shallow-copy so e_bayes(fit) does not mutate the caller's # lm_fit/contrasts_fit output. Matches R's copy-on-modify: # `fit <- eBayes(fit)` returns a new list whether or not the # caller rebinds the original name. Slot arrays remain shared # references (none of the code below writes into them in place). fit = MArrayLM(fit) # Validate fit object coefficients = fit.get("coefficients") stdev_unscaled = fit.get("stdev_unscaled") sigma = fit.get("sigma") df_residual = fit.get("df_residual") if any(x is None for x in [coefficients, stdev_unscaled, sigma, df_residual]): raise ValueError("fit must contain coefficients, stdev_unscaled, sigma, df_residual") if np.max(df_residual) == 0: raise ValueError("No residual degrees of freedom in linear model fits") if not np.any(np.isfinite(sigma)): raise ValueError("No finite residual standard deviations") # Get covariate for trend estimation if isinstance(trend, bool): if trend: covariate = fit.get("Amean") if covariate is None: raise ValueError("Need Amean component in fit to estimate trend") else: covariate = None else: # trend is a numeric array trend = np.asarray(trend, dtype=np.float64) if len(trend) != len(sigma): raise ValueError( "If trend is numeric then it should have length equal to the number of genes" ) covariate = trend # Squeeze variances sv = squeeze_var( sigma**2, df_residual, covariate=covariate, span=span, robust=robust, winsor_tail_p=winsor_tail_p, legacy=legacy, ) s2_prior = sv["var_prior"] s2_post = sv["var_post"] df_prior = sv["df_prior"] # Moderated t-statistics t_stat = coefficients / stdev_unscaled / np.sqrt(s2_post)[:, np.newaxis] # Total degrees of freedom (capped at pooled df) df_total = df_residual + df_prior df_pooled = np.nansum(df_residual) df_total = np.minimum(df_total, df_pooled) # P-values p_value = 2 * stats.t.sf(np.abs(t_stat), df_total[:, np.newaxis]) # B-statistic (log-odds of DE) var_prior_lim = ( stdev_coef_lim[0] ** 2 / np.median(s2_prior), stdev_coef_lim[1] ** 2 / np.median(s2_prior), ) var_prior = _tmixture_matrix(t_stat, stdev_unscaled, df_total, proportion, var_prior_lim) if np.any(np.isnan(var_prior)): nan_mask = np.isnan(var_prior) s2_flat = np.atleast_1d(s2_prior).astype(np.float64).ravel() var_prior[nan_mask] = np.resize(1.0 / s2_flat, int(nan_mask.sum())) warnings.warn("Estimation of var_prior failed - set to default value") # Compute log-odds r = stdev_unscaled**2 + var_prior r = r / stdev_unscaled**2 t2 = t_stat**2 # Handle infinite df_prior (can be scalar or array in robust eBayes) inf_df = np.atleast_1d(df_prior > 1e6) if np.any(inf_df): # Some or all genes have infinite prior df - use limiting formula kernel = t2 * (1 - 1 / r) / 2 if np.any(~inf_df): # Some genes have finite prior df - apply standard formula to those finite_mask = ~inf_df t2_f = t2[finite_mask] r_f = r[finite_mask] df_total_f = df_total[finite_mask] kernel[finite_mask] = ( (1 + df_total_f[:, np.newaxis]) / 2 * np.log( (t2_f + df_total_f[:, np.newaxis]) / (t2_f / r_f + df_total_f[:, np.newaxis]) ) ) else: # All genes have finite prior df - use standard formula kernel = ( (1 + df_total[:, np.newaxis]) / 2 * np.log((t2 + df_total[:, np.newaxis]) / (t2 / r + df_total[:, np.newaxis])) ) lods = np.log(proportion / (1 - proportion)) - np.log(r) / 2 + kernel # Update fit fit["df_prior"] = df_prior fit["s2_prior"] = s2_prior fit["var_prior"] = var_prior # R's eBayes records the user-supplied DE proportion (ebayes.R:15); # stored for reproducibility/inspection - not read by downstream # pylimma code but part of the MArrayLM slot contract. fit["proportion"] = proportion fit["s2_post"] = s2_post fit["t"] = t_stat fit["df_total"] = df_total fit["p_value"] = p_value fit["lods"] = lods # F-statistic (if design is full rank) # For single coefficient, F = t^2 with df1 = 1 (handled by classify_tests_f) design = fit.get("design") if design is not None: if is_fullrank(design): f_stat, df1, df2 = _classify_tests_f(fit) fit["F"] = f_stat # df2 is a per-gene vector when df_residual varies # (NAs, ndups>1). Broadcast pf() over genes and fall back # to the chi-squared limit element-wise where df2 is Inf. df2_arr = np.asarray(df2, dtype=np.float64) if df2_arr.ndim == 0: if np.isinf(df2_arr): fit["F_p_value"] = stats.chi2.sf(f_stat * df1, df1) else: fit["F_p_value"] = stats.f.sf(f_stat, df1, df2_arr) else: mask_inf = np.isinf(df2_arr) df2_safe = np.where(mask_inf, 1.0, df2_arr) fp = stats.f.sf(f_stat, df1, df2_safe) if mask_inf.any(): fp = np.where(mask_inf, stats.chi2.sf(f_stat * df1, df1), fp) fit["F_p_value"] = fp if not isinstance(fit, MArrayLM): fit = MArrayLM(fit) if is_anndata: # Plain dict for h5ad compatibility; see lm_fit. data.uns[key] = dict(fit) return None return fit
[docs] def treat( data, fc: float = 1.2, lfc: float | None = None, trend: bool = False, robust: bool = False, winsor_tail_p: tuple[float, float] = (0.05, 0.1), legacy: bool | None = None, upshot: bool = False, *, span: float | None = None, key: str = "pylimma", ) -> dict | None: r""" Moderated t-statistics relative to a log fold-change threshold. TREAT (t-tests relative to a threshold) tests for evidence that the true log fold-change is greater than a minimum value, rather than simply testing for non-zero effect. Parameters ---------- data : AnnData or dict Either an AnnData object with fit results in adata.uns[key], or a dict returned by lm_fit() or contrasts_fit(). fc : float, default 1.2 Fold-change threshold. Used if lfc is None. lfc : float, optional Log2 fold-change threshold. If provided, overrides fc. trend : bool, default False If True, allow prior variance to depend on mean expression. span : float, optional Span for lowess smoothing when fitting the mean-variance trend. Only used when trend is True. robust : bool, default False If True, use robust estimation of hyperparameters. winsor_tail_p : tuple, default (0.05, 0.1) Winsorization proportions for robust estimation. legacy : bool, optional If True, use the original limma hyperparameter estimation method. If False, use the newer method which handles unequal residual df better. If None (default), auto-detect. upshot : bool, default False If True, use Gaussian quadrature to compute more accurate p-values when the log fold-change threshold is small. This averages the p-value over the interval [-lfc, lfc] rather than testing at the boundary. key : str, default "pylimma" Key for fit results in adata.uns (AnnData input only). Returns ------- dict or None If input is dict, returns updated dict with TREAT statistics. If input is AnnData, updates adata.uns[key] in place and returns None. Notes ----- The key difference from e_bayes() is that TREAT computes p-values for the hypothesis \|logFC\| > lfc, rather than logFC != 0. This is useful when you want to find genes with biologically meaningful effect sizes. The returned fit contains: - t: moderated t-statistics (for display, not for p-values) - p_value: p-values from TREAT test - treat_lfc: the log fold-change threshold used Note that B-statistics (lods) are not computed by TREAT. References ---------- McCarthy, D. J. and Smyth, G. K. (2009). Testing significance relative to a fold-change threshold is a TREAT. Bioinformatics, 25(6), 765-771. """ fit, adata, adata_key = _resolve_fit_input(data, key) is_anndata = adata is not None # Shallow-copy so treat(fit) does not mutate the caller's fit. # Matches R's copy-on-modify; see e_bayes for rationale. fit = MArrayLM(fit) # Validate fit object coefficients = fit.get("coefficients") stdev_unscaled = fit.get("stdev_unscaled") sigma = fit.get("sigma") df_residual = fit.get("df_residual") if any(x is None for x in [coefficients, stdev_unscaled, sigma, df_residual]): raise ValueError("fit must contain coefficients, stdev_unscaled, sigma, df_residual") if np.max(df_residual) == 0: raise ValueError("No residual degrees of freedom in linear model fits") if not np.any(np.isfinite(sigma)): raise ValueError("No finite residual standard deviations") # Clear any existing lods (B-statistics not computed for TREAT) fit["lods"] = None coefficients = np.asarray(coefficients) stdev_unscaled = np.asarray(stdev_unscaled) if trend: covariate = fit.get("Amean") if covariate is None: raise ValueError("Need Amean component in fit to estimate trend") else: covariate = None # Squeeze variances sv = squeeze_var( sigma**2, df_residual, covariate=covariate, span=span, robust=robust, winsor_tail_p=winsor_tail_p, legacy=legacy, ) fit["df_prior"] = sv["df_prior"] fit["s2_prior"] = sv["var_prior"] fit["s2_post"] = sv["var_post"] # Total degrees of freedom df_total = df_residual + sv["df_prior"] df_pooled = np.nansum(df_residual) df_total = np.minimum(df_total, df_pooled) fit["df_total"] = df_total # Determine log fold-change threshold if lfc is None: lfc = np.log2(fc) lfc = abs(lfc) fit["treat_lfc"] = lfc # Standard errors se = stdev_unscaled * np.sqrt(sv["var_post"])[:, np.newaxis] # Absolute coefficients acoef = np.abs(coefficients) # T-statistics for display (direction only) t_stat = np.zeros_like(coefficients) # Compute p-values using TREAT methodology if upshot and lfc > 0: # Use Gaussian quadrature for more accurate p-values # Matching R's gauss.quad.prob(16, dist="uniform", l=-lfc, u=lfc) from numpy.polynomial.legendre import leggauss nodes, weights = leggauss(16) # Transform nodes from [-1, 1] to [-lfc, lfc] nodes = lfc * nodes # Convert to probability weights (R divides by 2, not multiplies by interval) weights = weights / 2 # Now weights sum to 1.0 (probability weights) p_value = np.zeros_like(coefficients) # Use only the positive half (nodes 8:16 correspond to [0, lfc]) # Positive half weights sum to 0.5 for i in range(8, 16): lfci = nodes[i] tstat_right_i = (acoef - lfci) / se tstat_left_i = (acoef + lfci) / se p_value += weights[i] * ( stats.t.sf(tstat_right_i, df_total[:, np.newaxis]) + stats.t.sf(tstat_left_i, df_total[:, np.newaxis]) ) # Double for symmetry (we only integrated positive half) p_value = 2 * p_value # For display t-stat, use half the threshold (matching R's behaviour) lfc_display = lfc / 2 tstat_right = np.maximum((acoef - lfc_display) / se, 0) tstat_left = (acoef + lfc_display) / se # In upshot mode, R uses lfc/2 for direction comparison lfc_threshold = lfc_display else: # Standard TREAT p-values tstat_right = (acoef - lfc) / se tstat_left = (acoef + lfc) / se # P-value is sum of both tails p_value = stats.t.sf(tstat_right, df_total[:, np.newaxis]) + stats.t.sf( tstat_left, df_total[:, np.newaxis] ) # For display t-statistics, use the more conservative (closer to 0) tstat_right = np.maximum(tstat_right, 0) # In non-upshot mode, use original lfc for direction threshold lfc_threshold = lfc # Handle NaN coefficients coefficients_clean = coefficients.copy() coefficients_clean[np.isnan(coefficients_clean)] = 0 # Assign t-statistics based on direction fc_up = coefficients_clean > lfc_threshold fc_down = coefficients_clean < -lfc_threshold t_stat[fc_up] = tstat_right[fc_up] t_stat[fc_down] = -tstat_right[fc_down] fit["t"] = t_stat fit["p_value"] = p_value if not isinstance(fit, MArrayLM): fit = MArrayLM(fit) if is_anndata: # Plain dict for h5ad compatibility; see lm_fit. data.uns[key] = dict(fit) return None return fit
[docs] def top_treat( fit, coef: int | str = 0, sort_by: str = "p", resort_by: str | None = None, **kwargs, ): """ Top-ranked genes after a :func:`treat` fit. Thin wrapper around :func:`pylimma.top_table` that enforces the two guardrails R limma's ``topTreat`` enforces: ``coef`` must be a single coefficient, and ``sort_by="b"`` / ``resort_by="b"`` are rejected because :func:`treat` does not produce a B-statistic. Parameters ---------- fit : MArrayLM, AnnData, or dict Fit object produced by :func:`treat`. coef : int or str, default 0 Single coefficient index or name. Lists are not allowed; if a sequence is supplied only the first element is used and a warning is emitted. sort_by : str, default "p" Column to sort by. Passing ``"b"`` raises ``ValueError``. resort_by : str, optional Secondary sort column. Passing ``"b"`` raises ``ValueError``. **kwargs Forwarded to :func:`pylimma.top_table`. Returns ------- pandas.DataFrame Ranked-gene table, same schema as :func:`top_table` minus the ``b`` column (which :func:`treat` does not compute). Notes ----- R limma's own ``topTreat`` source comments that the function "may become deprecated soon as topTable() takes over"; pylimma keeps it for direct R-to-pylimma call-site compatibility. """ from .toptable import top_table if np.ndim(coef) > 0 and len(coef) > 1: # type: ignore[arg-type] warnings.warn( "treat is for single coefficients: only first value of coef being used", stacklevel=2, ) coef = coef[0] # type: ignore[index] # R treat.R:86-87 raises whenever sort.by/resort.by names B; the # comparison is on the raw user value, so capital "B" must also # be rejected. if isinstance(sort_by, str) and sort_by.lower() == "b": raise ValueError('Trying to sort_by="b", but treat does not produce a B-statistic') if isinstance(resort_by, str) and resort_by.lower() == "b": raise ValueError('Trying to resort_by="b", but treat does not produce a B-statistic') return top_table(fit, coef=coef, sort_by=sort_by, resort_by=resort_by, **kwargs)
[docs] def pred_fcm( fit, coef: int = 1, var_indep_of_fc: bool = True, all_de: bool = True, prop_true_null_method: str = "lfdr", *, key: str = "pylimma", ) -> np.ndarray: """ Predictive (empirical-Bayes shrunken) fold changes. Port of R limma's ``predFCm`` (``predFCm.R``). Uses the eBayes posterior variance together with an estimated proportion of true nulls to shrink log-fold-changes toward zero. Parameters ---------- fit : AnnData, MArrayLM, or dict Fit from :func:`lm_fit` + :func:`e_bayes`. For AnnData input, the fit is read from ``adata.uns[key]``. coef : int, default 1 Zero-based coefficient index (R uses 1-based). var_indep_of_fc : bool, default True If True, assume the prior variance is independent of fold-change magnitude. all_de : bool, default True If True, assume all genes are differentially expressed. prop_true_null_method : {"lfdr", "convest", "mean", "hist"} Forwarded to :func:`prop_true_null`. key : str, default "pylimma" Key for fit results in adata.uns (AnnData input only). """ from .utils import fit_gamma_intercept, prop_true_null # Accept AnnData-stored fits (parity with contrasts_fit / e_bayes / # treat / top_table / decide_tests). fit, _adata, _adata_key = _resolve_fit_input(fit, key) if fit.get("p_value") is None: fit = e_bayes(fit) coef = int(coef) p_col = fit["p_value"][:, coef] p = 1.0 - prop_true_null(p_col, method=prop_true_null_method) if p == 0: p = 1e-8 trend = np.asarray(fit["s2_prior"]).size > 1 robust = np.asarray(fit["df_prior"]).size > 1 fit = e_bayes(fit, proportion=p, trend=trend, robust=robust) v = float(np.asarray(fit["cov_coefficients"])[coef, coef]) coef_col = np.asarray(fit["coefficients"])[:, coef] s2_post = np.asarray(fit["s2_post"]) if var_indep_of_fc: v0 = fit_gamma_intercept(coef_col**2, offset=v * s2_post) if v0 < 0: v0 = 1e-8 pfc = coef_col * v0 / (v0 + v * s2_post) if not all_de: A = p / (1 - p) B = np.sqrt(v * s2_post / (v * s2_post + v0)) C = np.exp(coef_col**2 * v0 / (2 * v**2 * s2_post**2 + 2 * v * v0 * s2_post)) lods = np.log(A * B * C) prob_de = np.exp(lods) / (1 + np.exp(lods)) prob_de[lods > 700] = 1.0 pfc = pfc * prob_de else: b2 = coef_col**2 / s2_post v0 = fit_gamma_intercept(b2, offset=v) # R uses pmin here (bug-for-bug port). v0 = np.minimum(v0, 1e-8) pfc = coef_col * v0 / (v0 + v) if not all_de: A = p / (1 - p) B = np.sqrt(v / (v + v0)) C = np.exp(coef_col**2 * v0 / (2 * v**2 * s2_post + 2 * v * v0 * s2_post)) lods = np.log(A * B * C) prob_de = np.exp(lods) / (1 + np.exp(lods)) prob_de[lods > 700] = 1.0 pfc = pfc * prob_de return pfc