Files
Vishesh 'ironeagle' Bangotra f22af90e98 docs(mail_intake): add comprehensive docstrings across ingestion, adapters, auth, and parsing layers
- docs(mail_intake/__init__.py): document module-based public API and usage patterns
- docs(mail_intake/ingestion/reader.py): document high-level ingestion orchestration
- docs(mail_intake/adapters/base.py): document adapter contract for mail providers
- docs(mail_intake/adapters/gmail.py): document Gmail adapter implementation and constraints
- docs(mail_intake/auth/base.py): document authentication provider contract
- docs(mail_intake/auth/google.py): document Google OAuth authentication provider
- docs(mail_intake/models/message.py): document canonical email message model
- docs(mail_intake/models/thread.py): document canonical email thread model
- docs(mail_intake/parsers/body.py): document message body extraction logic
- docs(mail_intake/parsers/headers.py): document message header normalization utilities
- docs(mail_intake/parsers/subject.py): document subject normalization utilities
- docs(mail_intake/config.py): document global configuration model
- docs(mail_intake/exceptions.py): document library exception hierarchy
2026-01-09 17:40:25 +05:30

88 lines
2.6 KiB
Python

"""
Message header parsing utilities for Mail Intake.
This module provides helper functions for normalizing and extracting
useful information from provider-native message headers.
The functions here are intentionally simple and tolerant of malformed
or incomplete header data.
"""
from typing import Dict, List, Tuple, Optional
def parse_headers(raw_headers: List[Dict[str, str]]) -> Dict[str, str]:
"""
Convert a list of Gmail-style headers into a normalized dict.
Provider payloads (such as Gmail) typically represent headers as a list
of name/value mappings. This function normalizes them into a
case-insensitive dictionary keyed by lowercase header names.
Args:
raw_headers: List of header dictionaries, each containing
``name`` and ``value`` keys.
Returns:
Dictionary mapping lowercase header names to stripped values.
Example:
Input:
[
{"name": "From", "value": "John Doe <john@example.com>"},
{"name": "Subject", "value": "Re: Interview Update"},
]
Output:
{
"from": "John Doe <john@example.com>",
"subject": "Re: Interview Update",
}
"""
headers: Dict[str, str] = {}
for header in raw_headers or []:
name = header.get("name")
value = header.get("value")
if not name or value is None:
continue
headers[name.lower()] = value.strip()
return headers
def extract_sender(headers: Dict[str, str]) -> Tuple[str, Optional[str]]:
"""
Extract sender email and optional display name from headers.
This function parses the ``From`` header and attempts to extract:
- Sender email address
- Optional human-readable display name
Args:
headers: Normalized header dictionary as returned by
:func:`parse_headers`.
Returns:
A tuple ``(email, name)`` where:
- ``email`` is the sender email address
- ``name`` is the display name, or ``None`` if unavailable
Examples:
``"John Doe <john@example.com>"`` → ``("john@example.com", "John Doe")``
``"john@example.com"`` → ``("john@example.com", None)``
"""
from_header = headers.get("from")
if not from_header:
return "", None
if "<" in from_header and ">" in from_header:
name_part, email_part = from_header.split("<", 1)
email = email_part.rstrip(">").strip()
name = name_part.strip().strip('"') or None
return email, name
return from_header.strip(), None