google styled doc

This commit is contained in:
2026-03-08 00:29:24 +05:30
parent 9f37af5761
commit 9f9e472ada
21 changed files with 593 additions and 358 deletions

View File

@@ -1,6 +1,10 @@
"""
Message parsing utilities for Mail Intake.
---
## Summary
This package contains **provider-aware but adapter-agnostic parsing helpers**
used to extract and normalize structured information from raw mail payloads.
@@ -16,6 +20,17 @@ This package does not:
Parsing functions are designed to be composable and are orchestrated by the
ingestion layer.
---
## Public API
extract_body
parse_headers
extract_sender
normalize_subject
---
"""
from .body import extract_body

View File

@@ -1,6 +1,10 @@
"""
Message header parsing utilities for Mail Intake.
---
## Summary
This module provides helper functions for normalizing and extracting
useful information from provider-native message headers.
@@ -15,29 +19,34 @@ def parse_headers(raw_headers: List[Dict[str, str]]) -> Dict[str, str]:
"""
Convert a list of Gmail-style headers into a normalized dict.
Provider payloads (such as Gmail) typically represent headers as a list
of name/value mappings. This function normalizes them into a
case-insensitive dictionary keyed by lowercase header names.
Args:
raw_headers: List of header dictionaries, each containing
``name`` and ``value`` keys.
raw_headers (List[Dict[str, str]]):
List of header dictionaries, each containing ``name`` and ``value`` keys.
Returns:
Dictionary mapping lowercase header names to stripped values.
Dict[str, str]:
Dictionary mapping lowercase header names to stripped values.
Notes:
**Guarantees:**
- Provider payloads (such as Gmail) typically represent headers as a list of name/value mappings
- This function normalizes them into a case-insensitive dictionary keyed by lowercase header names
Example:
Input:
[
{"name": "From", "value": "John Doe <john@example.com>"},
{"name": "Subject", "value": "Re: Interview Update"},
]
Output:
{
"from": "John Doe <john@example.com>",
"subject": "Re: Interview Update",
}
Typical usage:
Input:
[
{"name": "From", "value": "John Doe <john@example.com>"},
{"name": "Subject", "value": "Re: Interview Update"},
]
Output:
{
"from": "John Doe <john@example.com>",
"subject": "Re: Interview Update",
}
"""
headers: Dict[str, str] = {}
@@ -57,22 +66,24 @@ def extract_sender(headers: Dict[str, str]) -> Tuple[str, Optional[str]]:
"""
Extract sender email and optional display name from headers.
This function parses the ``From`` header and attempts to extract:
- Sender email address
- Optional human-readable display name
Args:
headers: Normalized header dictionary as returned by
:func:`parse_headers`.
headers (Dict[str, str]):
Normalized header dictionary as returned by :func:`parse_headers`.
Returns:
A tuple ``(email, name)`` where:
- ``email`` is the sender email address
- ``name`` is the display name, or ``None`` if unavailable
Tuple[str, Optional[str]]:
A tuple ``(email, name)`` where ``email`` is the sender email address and ``name`` is the display name, or ``None`` if unavailable.
Examples:
``"John Doe <john@example.com>"`` → ``("john@example.com", "John Doe")``
``"john@example.com"`` → ``("john@example.com", None)``
Notes:
**Responsibilities:**
- This function parses the ``From`` header and attempts to extract sender email address and optional human-readable display name
Example:
Typical values:
``"John Doe <john@example.com>"`` -> ``("john@example.com", "John Doe")``
``"john@example.com"`` -> ``("john@example.com", None)``
"""
from_header = headers.get("from")
if not from_header:

View File

@@ -1,6 +1,10 @@
"""
Subject line normalization utilities for Mail Intake.
---
## Summary
This module provides helper functions for normalizing email subject lines
to enable reliable thread-level comparison and grouping.
@@ -12,27 +16,34 @@ import re
_PREFIX_RE = re.compile(r"^(re|fw|fwd)\s*:\s*", re.IGNORECASE)
"""Regular expression matching common reply/forward subject prefixes."""
"""
Regular expression matching common reply/forward subject prefixes.
"""
def normalize_subject(subject: str) -> str:
"""
Normalize an email subject for thread-level comparison.
Operations:
- Strips common prefixes such as ``Re:``, ``Fwd:``, and ``FW:``
- Repeats prefix stripping to handle stacked prefixes
- Collapses excessive whitespace
- Preserves original casing (no lowercasing)
This function is intentionally conservative and avoids aggressive
transformations that could alter the semantic meaning of the subject.
Args:
subject: Raw subject line from a message header.
subject (str):
Raw subject line from a message header.
Returns:
Normalized subject string suitable for thread grouping.
str:
Normalized subject string suitable for thread grouping.
Notes:
**Responsibilities:**
- Strips common prefixes such as ``Re:``, ``Fwd:``, and ``FW:``
- Repeats prefix stripping to handle stacked prefixes
- Collapses excessive whitespace
- Preserves original casing (no lowercasing)
**Guarantees:**
- This function is intentionally conservative and avoids aggressive transformations that could alter the semantic meaning of the subject
"""
if not subject:
return ""