"""
Subject line normalization utilities for Mail Intake.

This module provides helper functions for normalizing email subject lines
to enable reliable thread-level comparison and grouping.

Normalization is intentionally conservative to avoid altering semantic
meaning while removing common reply and forward prefixes.
"""

import re


_PREFIX_RE = re.compile(r"^(re|fw|fwd)\s*:\s*", re.IGNORECASE)
"""Regular expression matching common reply/forward subject prefixes."""


def normalize_subject(subject: str) -> str:
    """
    Normalize an email subject for thread-level comparison.

    Operations:
    - Strips common prefixes such as ``Re:``, ``Fwd:``, and ``FW:``
    - Repeats prefix stripping to handle stacked prefixes
    - Collapses excessive whitespace
    - Preserves original casing (no lowercasing)

    This function is intentionally conservative and avoids aggressive
    transformations that could alter the semantic meaning of the subject.

    Args:
        subject: Raw subject line from a message header.

    Returns:
        Normalized subject string suitable for thread grouping.
    """
    if not subject:
        return ""

    normalized = subject.strip()

    # Strip prefixes repeatedly (e.g., Re: Fwd: Re:)
    while True:
        new_value = _PREFIX_RE.sub("", normalized)
        if new_value == normalized:
            break
        normalized = new_value.strip()

    # Normalize whitespace
    normalized = " ".join(normalized.split())

    return normalized