Source code for pylimma.fitmixture_mod

# SPDX-License-Identifier: GPL-3.0-or-later
#
# This module is a Python port of code from R limma. Original R copyrights:
#   fitmixture.R   Copyright (C) 2007 Gordon Smyth
# Python port: Copyright (C) 2026 John Mulvey
"""
``fitmixture`` - fit a two-sample mixture model via non-linear least
squares. Port of R limma's ``fitmixture``.
"""

from __future__ import annotations

import numpy as np
from scipy import linalg

from .utils import logcosh


[docs] def fitmixture(log2e, mixprop, niter: int = 4, trace: bool = False) -> dict: """ Fit a mixture model by non-linear least squares. Port of ``limma/R/fitmixture.R``. Parameters ---------- log2e : ndarray log2 expression matrix, shape (nprobes, narrays). mixprop : array-like of float, length narrays Mixture proportion of sample 1 in each array. niter : int, default 4 Non-linear least squares iterations. trace : bool, default False If True, print stdev / beta summaries per iteration. Returns ------- dict with keys ``A``, ``M``, ``stdev``. """ log2e = np.asarray(log2e, dtype=np.float64) if log2e.ndim == 1: log2e = log2e.reshape(-1, 1) mixprop = np.asarray(mixprop, dtype=np.float64) narrays = log2e.shape[1] nprobes = log2e.shape[0] y = 2.0**log2e # Linear pre-fit: y ~ cbind(mixprop, 1 - mixprop) per-probe. X = np.column_stack([mixprop, 1 - mixprop]) start, _, _, _ = linalg.lstsq(X, y.T) start = np.maximum(start, 1.0) # b = log(start[0]) - log(start[1]) per probe. b = np.log(start[0, :]) - np.log(start[1, :]) z = np.log(y) pm = np.ones(nprobes)[:, None] @ (2 * mixprop - 1)[None, :] sold = None for i in range(niter): mub = logcosh(b / 2)[:, None] + np.log(1 + np.tanh(b / 2)[:, None] * pm) a = np.mean(z - mub, axis=1) mu = a[:, None] + mub if trace: s = np.sqrt(narrays / (narrays - 2) * np.mean((z - mu) ** 2, axis=1)) if sold is not None: print("stdev changes", np.percentile(sold - s, [0, 25, 50, 75, 100])) sold = s tb = np.tanh(b / 2)[:, None] dmu = (tb + pm) / (1 + tb * pm) / 2 numer = np.mean(dmu * (z - mu), axis=1) denom = 1e-6 + np.mean((dmu - np.mean(dmu, axis=1, keepdims=True)) ** 2, axis=1) b = b + numer / denom if trace: print("betas", np.percentile(b, [0, 25, 50, 75, 100])) mub = logcosh(b / 2)[:, None] + np.log(1 + np.tanh(b / 2)[:, None] * pm) a = np.mean(z - mub, axis=1) mu = a[:, None] + mub s = np.sqrt(narrays / (narrays - 2) * np.mean((z - mu) ** 2, axis=1)) l2 = np.log(2.0) return {"A": a / l2, "M": b / l2, "stdev": s / l2}