Files
mail-intake/mail_intake/parsers/subject.py
2026-01-03 05:21:55 +05:30

34 lines
766 B
Python

import re
_PREFIX_RE = re.compile(r"^(re|fw|fwd)\s*:\s*", re.IGNORECASE)
def normalize_subject(subject: str) -> str:
"""
Normalize an email subject for thread-level comparison.
Operations:
- Strip common prefixes (Re:, Fwd:, FW:)
- Collapse whitespace
- Preserve original casing (no lowercasing)
This function is intentionally conservative.
"""
if not subject:
return ""
normalized = subject.strip()
# Strip prefixes repeatedly (e.g., Re: Fwd: Re:)
while True:
new_value = _PREFIX_RE.sub("", normalized)
if new_value == normalized:
break
normalized = new_value.strip()
# Normalize whitespace
normalized = " ".join(normalized.split())
return normalized