"""
# Summary

Subject line normalization utilities for Mail Intake.

This module provides helper functions for normalizing email subject lines
to enable reliable thread-level comparison and grouping.

Normalization is intentionally conservative to avoid altering semantic
meaning while removing common reply and forward prefixes.
"""

import re


_PREFIX_RE = re.compile(r"^(re|fw|fwd)\s*:\s*", re.IGNORECASE)
"""
Regular expression matching common reply/forward subject prefixes.
"""


def normalize_subject(subject: str) -> str:
    """
    Normalize an email subject for thread-level comparison.

    Args:
        subject (str):
            Raw subject line from a message header.

    Returns:
        str:
            Normalized subject string suitable for thread grouping.

    Notes:
        **Responsibilities:**

            - Strips common prefixes such as `Re:`, `Fwd:`, and `FW:`.
            - Repeats prefix stripping to handle stacked prefixes.
            - Collapses excessive whitespace.
            - Preserves original casing (no lowercasing).

        **Guarantees:**

            - This function is intentionally conservative and avoids aggressive
              transformations that could alter the semantic meaning of the subject.
    """
    if not subject:
        return ""

    normalized = subject.strip()

    # Strip prefixes repeatedly (e.g., Re: Fwd: Re:)
    while True:
        new_value = _PREFIX_RE.sub("", normalized)
        if new_value == normalized:
            break
        normalized = new_value.strip()

    # Normalize whitespace
    normalized = " ".join(normalized.split())

    return normalized