Source code for paftacular.util
import re
from collections import Counter
[docs]
def parse_formula(formula: str) -> Counter[str]:
"""
Parse a chemical formula into element counts, supporting isotopes.
Args:
formula: Chemical formula string (e.g., "H2O", "CO2", "[13C2]H6")
Returns:
Counter mapping element symbols to their counts
- Regular elements: "H", "O", "Ca"
- Isotopes: "13C", "2H" (without brackets)
"""
if not formula:
raise ValueError("Empty formula string")
element_counts = Counter()
i = 0
while i < len(formula):
# Skip whitespace
if formula[i].isspace():
i += 1
continue
# Handle isotope notation: [13C2] or [13C]
if formula[i] == "[":
close = formula.find("]", i)
if close == -1:
raise ValueError(f"Unclosed bracket at position {i}")
# Extract content inside brackets: "13C2" or "13C"
content = formula[i + 1 : close]
# Parse: isotope_number + element + optional_count
# Pattern: digits followed by element (capital + optional lowercase) + optional digits
match = re.match(r"^(\d+)([A-Z][a-z]?)(\d*)$", content)
if not match:
raise ValueError(f"Invalid isotope format: [{content}]")
isotope_num, element, count_str = match.groups()
count = int(count_str) if count_str else 1
# Use isotope notation WITHOUT brackets as key: 13C
element_key = f"{isotope_num}{element}"
element_counts[element_key] += count
i = close + 1
# Handle regular element: C2, Ca, H
elif formula[i].isupper():
# Get element symbol (capital + optional lowercase)
element = formula[i]
i += 1
if i < len(formula) and formula[i].islower():
element += formula[i]
i += 1
# Get optional count
count_str = ""
while i < len(formula) and formula[i].isdigit():
count_str += formula[i]
i += 1
count = int(count_str) if count_str else 1
element_counts[element] += count
else:
raise ValueError(f"Unexpected character '{formula[i]}' at position {i}")
if not element_counts:
raise ValueError(f"No elements found in formula: '{formula}'")
return element_counts