Source code for paftacular.constants

# Table from the specification showing differences from yb
import re
from enum import StrEnum


[docs] class InternalSeries(StrEnum): """Enumeration of internal ion series types""" AX = "ax" BX = "bx" CX = "cx" AY = "ay" BY = "by" CY = "cy" AZ = "az" BZ = "bz" CZ = "cz"
INTERNAL_SERIES_TO_DIFF: dict[InternalSeries, str | None] = { InternalSeries.AX: None, InternalSeries.BX: "+CO", InternalSeries.CX: "+CHNO", InternalSeries.AY: "-CO", InternalSeries.BY: None, InternalSeries.CY: "+NH", InternalSeries.AZ: "-CHNO", InternalSeries.BZ: "-NH", InternalSeries.CZ: None, } INTERNAL_MASS_DIFFS: dict[tuple[str, str], None | str] = { ("a", "x"): None, # Default, no difference ("b", "x"): "+CO", ("c", "x"): "+CHNO", ("a", "y"): "-CO", ("b", "y"): None, # Default, no difference ("c", "y"): "+NH", ("a", "z"): "-CHNO", ("b", "z"): "-NH", ("c", "z"): None, # No difference }
[docs] class IonSeries(StrEnum): """Enumeration of ion series types""" A = "a" B = "b" C = "c" D = "d" V = "v" W = "w" X = "x" Y = "y" Z = "z" DA = "da" DB = "db" WA = "wa" WB = "wb"
[docs] class BackboneCleavageType(StrEnum): """Types of backbone cleavages for internal fragments""" A = "a" # C-CO bond cleavage B = "b" # CO-NH bond cleavage C = "c" # NH-CH bond cleavage X = "x" # CH-CO bond cleavage Y = "y" # CO-NH bond cleavage Z = "z" # NH-CH bond cleavage
[docs] class AnnotationName(StrEnum): PRECURSOR = "precursor" IMMONIUM = "immonium" REFERENCE = "reference" NAMED_COMPOUND = "named_compound" FORMULA = "formula" SMILES = "smiles" UNANNOTATED = "unannotated" SERIES = "series" INTERNAL = "internal"
[docs] class AminoAcids(StrEnum): """Standard amino acids""" A = "A" C = "C" D = "D" E = "E" F = "F" G = "G" H = "H" I = "I" K = "K" L = "L" M = "M" N = "N" P = "P" Q = "Q" R = "R" S = "S" T = "T" V = "V" W = "W" Y = "Y"
ISOTOPE_REGEX_PATTERN = r"([+-]?)(\d*)i((?:\d+)?(?:[A-Z][a-z]*)?|A)?" NEUTRAL_LOSS_REGEX_PATTERN = ( r"[+-](?:\d+(?:\.\d+)?(?!\[)|\d*(?:(?:\[[0-9]+[A-Z][A-Za-z0-9]*\])|(?:[A-Z][A-Za-z0-9]*))+|\d*\[(?:[A-Za-z0-9:\.]+)(?:\[[A-Za-z0-9\.:\-]+\])?\])" ) ADDUCT_REGEX_PATTERN = r"([+-])(\d*)([A-Z][A-Za-z0-9]*)" MAX_CACHE_SIZE = 10_000 # Regex components for better readability _AUXILIARY = r"(?P<is_auxiliary>&)?" _ANALYTE_REF = r"(?:(?P<analyte_reference>\d+)@)?" # Ion type patterns _PEPTIDE_SERIES = r"(?:(?P<series>(?:da|db|wa|wb)|[axbyczdwv]\.?)(?P<ordinal>\d+)(?:\{(?P<sequence_ordinal>.+)\})?)" _INTERNAL = r"(?P<series_internal>m(?P<internal_start>\d+):(?P<internal_end>\d+)(?:\{(?P<sequence_internal>.+)\})?)" _PRECURSOR = r"(?P<precursor>p)" _IMMONIUM = r"(?:I(?P<immonium>[A-Z])(?:\[(?P<immonium_modification>(?:[^\]]+))\])?)" _REFERENCE = r"(?P<reference>r(?:(?:\[(?P<reference_label>[^\]]+)\])))" _FORMULA = r"(?:f\{(?P<formula>[A-Za-z0-9\[\]]+)\})" _NAMED = r"(?:_\{(?P<named_compound>[^\{\}\s,/]+)\})" _SMILES = r"(?:s\{(?P<smiles>[^\}]+)\})" _UNKNOWN = r"(?:(?P<unannotated>\?)(?P<unannotated_label>\d+)?)" # Combine all ion types _ION_TYPES = f"(?:{_PEPTIDE_SERIES}|{_INTERNAL}|{_PRECURSOR}|{_IMMONIUM}|{_REFERENCE}|{_FORMULA}|{_NAMED}|{_SMILES}|{_UNKNOWN})" # Modifiers _NEUTRAL_LOSSES = r"(?P<neutral_losses>(?:[+-](?:\d+(?:\.\d+)?|\d*(?:(?:(?:\[[0-9]+[A-Z][A-Za-z0-9]*\])\ |(?:[A-Z][A-Za-z0-9]*))+)|(?:\d*\[(?:(?:[A-Za-z0-9:\.]+)(?:\[(?:[A-Za-z0-9\.:\-]+)\])?)\])))+)?" _ISOTOPE = r"(?P<isotope>(?:(?:[+-]\d*)i(?:(?:\d+)?(?:[A-Z][a-z]*)?|A)?)+)?" _ADDUCTS = r"(?:\[(?P<adducts>M(?:[+-]\d*[A-Z][A-Za-z0-9]*)+)\])?" _CHARGE = r"(?:\^(?P<charge>[+-]?\d+))?" _MASS_ERROR = r"(?:/(?P<mass_error>-?\d+(?:\.\d+)?)(?P<mass_error_unit>ppm)?)?" _CONFIDENCE = r"(?:\*(?P<confidence>\d*(?:\.\d+)?))?" # Full pattern FULL_PAF_PATTERN = re.compile(f"^{_AUXILIARY}{_ANALYTE_REF}{_ION_TYPES}{_NEUTRAL_LOSSES}{_ISOTOPE}{_ADDUCTS}{_CHARGE}{_MASS_ERROR}{_CONFIDENCE}$")