""" # Summary Subject line normalization utilities for Mail Intake. This module provides helper functions for normalizing email subject lines to enable reliable thread-level comparison and grouping. Normalization is intentionally conservative to avoid altering semantic meaning while removing common reply and forward prefixes. """ import re _PREFIX_RE = re.compile(r"^(re|fw|fwd)\s*:\s*", re.IGNORECASE) """ Regular expression matching common reply/forward subject prefixes. """ def normalize_subject(subject: str) -> str: """ Normalize an email subject for thread-level comparison. Args: subject (str): Raw subject line from a message header. Returns: str: Normalized subject string suitable for thread grouping. Notes: **Responsibilities:** - Strips common prefixes such as `Re:`, `Fwd:`, and `FW:`. - Repeats prefix stripping to handle stacked prefixes. - Collapses excessive whitespace. - Preserves original casing (no lowercasing). **Guarantees:** - This function is intentionally conservative and avoids aggressive transformations that could alter the semantic meaning of the subject. """ if not subject: return "" normalized = subject.strip() # Strip prefixes repeatedly (e.g., Re: Fwd: Re:) while True: new_value = _PREFIX_RE.sub("", normalized) if new_value == normalized: break normalized = new_value.strip() # Normalize whitespace normalized = " ".join(normalized.split()) return normalized