mail-intake/mail_intake/parsers/subject.py

import re


_PREFIX_RE = re.compile(r"^(re|fw|fwd)\s*:\s*", re.IGNORECASE)


def normalize_subject(subject: str) -> str:
    """
    Normalize an email subject for thread-level comparison.

    Operations:
    - Strip common prefixes (Re:, Fwd:, FW:)
    - Collapse whitespace
    - Preserve original casing (no lowercasing)

    This function is intentionally conservative.
    """
    if not subject:
        return ""

    normalized = subject.strip()

    # Strip prefixes repeatedly (e.g., Re: Fwd: Re:)
    while True:
        new_value = _PREFIX_RE.sub("", normalized)
        if new_value == normalized:
            break
        normalized = new_value.strip()

    # Normalize whitespace
    normalized = " ".join(normalized.split())

    return normalized