docs(mail_intake): add comprehensive docstrings across ingestion, adapters, auth, and parsing layers
- docs(mail_intake/__init__.py): document module-based public API and usage patterns - docs(mail_intake/ingestion/reader.py): document high-level ingestion orchestration - docs(mail_intake/adapters/base.py): document adapter contract for mail providers - docs(mail_intake/adapters/gmail.py): document Gmail adapter implementation and constraints - docs(mail_intake/auth/base.py): document authentication provider contract - docs(mail_intake/auth/google.py): document Google OAuth authentication provider - docs(mail_intake/models/message.py): document canonical email message model - docs(mail_intake/models/thread.py): document canonical email thread model - docs(mail_intake/parsers/body.py): document message body extraction logic - docs(mail_intake/parsers/headers.py): document message header normalization utilities - docs(mail_intake/parsers/subject.py): document subject normalization utilities - docs(mail_intake/config.py): document global configuration model - docs(mail_intake/exceptions.py): document library exception hierarchy
This commit is contained in:
@@ -1,7 +1,18 @@
|
||||
"""
|
||||
Subject line normalization utilities for Mail Intake.
|
||||
|
||||
This module provides helper functions for normalizing email subject lines
|
||||
to enable reliable thread-level comparison and grouping.
|
||||
|
||||
Normalization is intentionally conservative to avoid altering semantic
|
||||
meaning while removing common reply and forward prefixes.
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
|
||||
_PREFIX_RE = re.compile(r"^(re|fw|fwd)\s*:\s*", re.IGNORECASE)
|
||||
"""Regular expression matching common reply/forward subject prefixes."""
|
||||
|
||||
|
||||
def normalize_subject(subject: str) -> str:
|
||||
@@ -9,11 +20,19 @@ def normalize_subject(subject: str) -> str:
|
||||
Normalize an email subject for thread-level comparison.
|
||||
|
||||
Operations:
|
||||
- Strip common prefixes (Re:, Fwd:, FW:)
|
||||
- Collapse whitespace
|
||||
- Preserve original casing (no lowercasing)
|
||||
- Strips common prefixes such as ``Re:``, ``Fwd:``, and ``FW:``
|
||||
- Repeats prefix stripping to handle stacked prefixes
|
||||
- Collapses excessive whitespace
|
||||
- Preserves original casing (no lowercasing)
|
||||
|
||||
This function is intentionally conservative.
|
||||
This function is intentionally conservative and avoids aggressive
|
||||
transformations that could alter the semantic meaning of the subject.
|
||||
|
||||
Args:
|
||||
subject: Raw subject line from a message header.
|
||||
|
||||
Returns:
|
||||
Normalized subject string suitable for thread grouping.
|
||||
"""
|
||||
if not subject:
|
||||
return ""
|
||||
|
||||
Reference in New Issue
Block a user