Source code for stdpopsim.dfe

"""
Common infrastructure for specifying DFEs.
"""
# import numpy as np
import textwrap
import attr
import collections.abc
import numpy as np


[docs]@attr.s(kw_only=True)
class MutationType(object):
    """
    Class representing a "type" of mutation.  The design closely (exactly)
    mirrors SLiM's MutationType class.

    The main thing that mutation types carry is a way of drawing a selection
    coefficient for each new mutation. This ``distribution_type`` should be one
    of (see the SLiM manual for more information on these):

    - ``f``: fixed, one parameter (the selection coefficient)
    - ``e``: exponential, one parameter (mean)
    - ``g``: gamma, two parameters (mean, shape)
    - ``n``: normal, two parameters (mean, SD)
    - ``w``: Weibull, two parameters (scale, shape)
    - ``lp``: positive logNormal, two parameters (mean and sd on log scale; see rlnorm)
    - ``ln``: negative logNormal, two parameters (mean and sd on log scale; see rlnorm)

    Type "lp" is always positive, and type "ln" is always negative: both use
    the same log-normal distribution, but "ln" is multiplied by -1.  For
    exponential and gamma, a negative mean can be provided, obtaining always
    negative values.

    :ivar distribution_type: A one-letter abbreviation for the distribution of
        fitness effects that each new mutation of this type draws from (see below).
    :vartype distribution_type: str
    :ivar distribution_args: Arguments for the distribution type.
    :vartype distribution_type: list
    :ivar dominance_coeff: A single dominance coefficient (negative = underdominance,
        0 = recessive, 0.5 = additive, 1.0 = completely dominant, > 1.0 = overdominant)
    :vartype dominance_coeff: float
    :ivar convert_to_substitution: Whether to retain any fixed mutations in the
        simulation: if not, we cannot ask about their frequency once fixed.
        (Either way, they will remain in the tree sequence).
    :vartype convert_to_substitution: bool
    """

    dominance_coeff = attr.ib(default=0.5, type=float)
    distribution_type = attr.ib(default="f", type=str)
    distribution_args = attr.ib(factory=lambda: [0], type=list)
    convert_to_substitution = attr.ib(default=True, type=bool)

    def __attrs_post_init__(self):
        if not isinstance(self.dominance_coeff, (float, int)):
            raise ValueError("dominance_coeff must be a number.")

        if not isinstance(self.distribution_type, str):
            raise ValueError("distribution_type must be str.")

        if not isinstance(self.distribution_args, list):
            raise ValueError("distribution_args must be list.")

        for i in range(len(self.distribution_args)):
            if not isinstance(self.distribution_args[i], (float, int)):
                raise ValueError(f"distribution_args[{i}] is not a number.")
            if not np.isfinite(self.distribution_args[i]):
                raise ValueError(f"distribution_args[{i}] is an invalid parameter.")

        if not isinstance(self.convert_to_substitution, bool):
            raise ValueError("convert_to_substitution must be bool.")

        if not np.isfinite(self.dominance_coeff):
            raise ValueError(f"Invalid dominance coefficient {self.dominance_coeff}.")

        # To add a new distribution type: validate the
        # distribution_args here, and add unit tests.
        if self.distribution_type == "f":
            # Fixed-value selection coefficent.
            if len(self.distribution_args) != 1:
                raise ValueError(
                    "Fixed-value mutation types (distribution_type='f') "
                    "take a single selection-coefficient parameter."
                )
        elif self.distribution_type == "g":
            # Gamma-distributed selection coefficient with (mean, shape)
            # parameterisation. A negative value for the mean is permitted,
            # and indicates a reflection of the horizontal axis.
            # See Eidos documentation for rgamma().
            if len(self.distribution_args) != 2:
                raise ValueError(
                    "Gamma-distributed sel. coefs. (distribution_type='g') "
                    "use a (mean, shape) parameterisation."
                )
            if self.distribution_args[1] <= 0:
                raise ValueError("The shape parameter must be positive.")
        elif self.distribution_type == "e":
            # An exponentially-distributed fitness effect (mean).
            # See Eidos documentation for rexp().
            if len(self.distribution_args) != 1:
                raise ValueError(
                    "Exponentially-distributed sel. coefs. (distribution_type='e') "
                    "use a (mean) parameterisation."
                )
        elif self.distribution_type == "n":
            # An normally-distributed fitness effect (mean, standard deviation).
            # See Eidos documentation for rnorm().
            if len(self.distribution_args) != 2:
                raise ValueError(
                    "Normally-distributed sel. coefs. (distribution_type='n') "
                    "use a (mean, sd) parameterisation."
                )
            if self.distribution_args[1] < 0:
                raise ValueError("The sd parameter must be nonnegative.")
        elif self.distribution_type == "w":
            # A Weibull-distributed fitness effect (scale, shape).
            # See Eidos documentation for rweibull().
            if len(self.distribution_args) != 2:
                raise ValueError(
                    "Weibull-distributed sel. coef. (distribution_type='w') "
                    "use a (scale, shape) parameterisation."
                )
            if self.distribution_args[0] <= 0:
                raise ValueError("The scale parameter must be positive.")
            if self.distribution_args[1] <= 0:
                raise ValueError("The shape parameter must be positive.")
        elif self.distribution_type in ("lp", "ln"):
            # A lognormally-distributed fitness effect (meanlog, sdlog),
            # either positive or negative.
            # See Eidos documentation for rlnorm().
            if len(self.distribution_args) != 2:
                raise ValueError(
                    "Lognormally-distributed sel. coefs. (distribution_type='lp'/'ln') "
                    "use a (meanlog, sdlog) parameterisation, requiring sdlog > 0."
                )
            if self.distribution_args[1] < 0:
                raise ValueError("The sdlog parameter must be nonnegative.")
            # dealing with lognormal distribution
            # (adding instead of multiplying the mean):
            logmean = self.distribution_args[0]
            logsd = self.distribution_args[1]
            sign = "" if self.distribution_type == "lp" else "-1 *"
            self.distribution_args = [
                f"return {sign}rlnorm(1, {logmean} + log(Q), {logsd});"
            ]
            self.distribution_type = "s"
        else:
            raise ValueError(
                f"{self.distribution_type} is not a supported distribution type."
            )

        # The index(s) of the param in the distribution_args list that should be
        # multiplied by Q when using --slim-scaling-factor Q.
        self.Q_scaled_index = {
            "e": [0],  # mean
            "f": [0],  # fixed value
            "g": [0],  # mean
            "n": [0, 1],  # mean and sd
            "w": [0],  # scale
            "s": [],  # script types should just printout arguments
        }[self.distribution_type]

    @property
    def is_neutral(self):
        """
        Tests whether the mutation type is strictly neutral. This is defined here to
        be of type "f" and with fitness effect 0.0, and so excludes other situations
        that also produce only neutral mutations (e.g., exponential with mean 0).
        """
        return self.distribution_type == "f" and self.distribution_args[0] == 0


[docs]@attr.s(kw_only=True)
class DFE:
    """
    Class representing a "Distribution of Fitness Effects", i.e., a DFE.
    The class records the different *mutation types*, and the *proportions*
    with which they occur. The overall rate of mutations will be determined
    by the Contig to which the DFE is applied (see :meth:`.Contig.add_dfe`).

    Instances of this class are constructed by DFE implementors, following the
    :ref:`developer documentation <sec_development_dfe_model>`. To instead
    obtain a pre-specified model as listed in the :ref:`sec_catalog`,
    see :meth:`Species.get_dfe`.

    ``proportions`` and ``mutation_types`` must be lists of the same length,
    and ``proportions`` should be nonnegative numbers summing to 1.

    :ivar ~.mutation_types: A list of :class:`.MutationType` objects associated
        with the DFE. Defaults to an empty list.
    :vartype ~.mutation_types: list
    :ivar ~.proportions: A list of the proportions of new mutations that
        fall in to each of the mutation types (must sum to 1).
    :vartype ~.proportions: list
    :ivar ~.id: The unique identifier for this model. DFE IDs should be
        short and memorable, and conform to the stdpopsim
        :ref:`naming conventions <sec_development_naming_conventions>`
        for DFE models.
    :vartype ~.id: str
    :ivar ~.description: A short description of this model as it would be used in
        written text, e.g., "Lognormal DFE". This should
        describe the DFE itself and not contain author or year information.
    :vartype ~.description: str
    :ivar long_description: A concise, but detailed, summary of the DFE model.
    :vartype long_description: str
    :ivar citations: A list of :class:`Citations <.Citation>`, that describe the primary
        reference(s) for the DFE model.
    :vartype citations: list of :class:`Citation`
    """

    id = attr.ib()
    description = attr.ib()
    long_description = attr.ib()
    mutation_types = attr.ib(default=None)
    proportions = attr.ib(default=None)
    citations = attr.ib(default=None)
    qc_dfe = attr.ib(default=None)

    def __attrs_post_init__(self):
        self.citations = [] if self.citations is None else self.citations
        self.mutation_types = [] if self.mutation_types is None else self.mutation_types
        if self.proportions is None and len(self.mutation_types) == 0:
            self.proportions = []
        elif self.proportions is None:
            # will error below if this doesn't make sense
            self.proportions = [1]

        if not (isinstance(self.proportions, (collections.abc.Sequence, np.ndarray))):
            raise ValueError("proportions must be a list or numpy array.")

        if not (isinstance(self.mutation_types, list)):
            raise ValueError("mutation_types must be a list.")

        if not (len(self.proportions) == len(self.mutation_types)):
            raise ValueError(
                "proportions and mutation_types must be lists of the same length."
            )

        for p in self.proportions:
            if not isinstance(p, (float, int)) or p < 0:
                raise ValueError("proportions must be nonnegative numbers.")

        if len(self.proportions) > 0:
            sum_p = sum(self.proportions)
            if not np.isclose(sum_p, 1):
                raise ValueError("proportions must sum to 1.0.")

        for m in self.mutation_types:
            if not isinstance(m, MutationType):
                raise ValueError(
                    "mutation_types must be a list of MutationType objects."
                )

    @property
    def is_neutral(self):
        return all([m.is_neutral for m in self.mutation_types])

    def __str__(self):
        long_desc_lines = [
            line.strip()
            for line in textwrap.wrap(textwrap.dedent(self.long_description))
        ]
        long_desc = "\n║                     ".join(long_desc_lines)
        s = (
            "DFE:\n"
            f"║  id               = {self.id}\n"
            f"║  description      = {self.description}\n"
            f"║  long_description = {long_desc}\n"
            f"║  citations        = {[cite.doi for cite in self.citations]}\n"
        )
        return s

[docs]    def register_qc(self, qc_dfe):
        """
        Register a QC model implementation for this DFE.
        """
        if not isinstance(qc_dfe, self.__class__):
            raise ValueError(f"Cannot register non-DFE '{qc_dfe}' as QC DFE.")
        if self.qc_dfe is not None:
            raise ValueError(f"QC DFE already registered for {self.id}.")
        self.qc_dfe = qc_dfe


def neutral_dfe(convert_to_substitution=True):
    id = "neutral"
    description = "neutral DFE"
    long_description = "strictly neutral mutations"
    neutral = MutationType(convert_to_substitution=convert_to_substitution)
    return DFE(
        id=id,
        description=description,
        long_description=long_description,
        mutation_types=[neutral],
        proportions=[1.0],
    )