docs(mail_intake): add comprehensive docstrings across ingestion, adapters, auth, and parsing layers

- docs(mail_intake/__init__.py): document module-based public API and usage patterns
- docs(mail_intake/ingestion/reader.py): document high-level ingestion orchestration
- docs(mail_intake/adapters/base.py): document adapter contract for mail providers
- docs(mail_intake/adapters/gmail.py): document Gmail adapter implementation and constraints
- docs(mail_intake/auth/base.py): document authentication provider contract
- docs(mail_intake/auth/google.py): document Google OAuth authentication provider
- docs(mail_intake/models/message.py): document canonical email message model
- docs(mail_intake/models/thread.py): document canonical email thread model
- docs(mail_intake/parsers/body.py): document message body extraction logic
- docs(mail_intake/parsers/headers.py): document message header normalization utilities
- docs(mail_intake/parsers/subject.py): document subject normalization utilities
- docs(mail_intake/config.py): document global configuration model
- docs(mail_intake/exceptions.py): document library exception hierarchy
This commit is contained in:
2026-01-09 17:40:25 +05:30
parent dbfef295b8
commit f22af90e98
18 changed files with 751 additions and 71 deletions

View File

@@ -0,0 +1,24 @@
"""
Mail ingestion orchestration for Mail Intake.
This package contains **high-level ingestion components** responsible for
coordinating mail retrieval, parsing, normalization, and model construction.
It represents the **top of the ingestion pipeline** and is intended to be the
primary interaction surface for library consumers.
Components in this package:
- Are provider-agnostic
- Depend only on adapter and parser contracts
- Contain no provider-specific API logic
- Expose read-only ingestion workflows
Consumers are expected to construct a mail adapter and pass it to the
ingestion layer to begin processing messages and threads.
"""
from .reader import MailIntakeReader
__all__ = [
"MailIntakeReader",
]

View File

@@ -1,3 +1,18 @@
"""
High-level mail ingestion orchestration for Mail Intake.
This module provides the primary, provider-agnostic entry point for
reading and processing mail data.
It coordinates:
- Mail adapter access
- Message and thread iteration
- Header and body parsing
- Normalization and model construction
No provider-specific logic or API semantics are permitted in this layer.
"""
from datetime import datetime
from typing import Iterator, Dict, Any
@@ -14,22 +29,43 @@ class MailIntakeReader:
"""
High-level read-only ingestion interface.
This is the primary entry point users should interact with.
It orchestrates:
- adapter calls
- parsing
- normalization
- model construction
This class is the **primary entry point** for consumers of the Mail
Intake library.
No provider-specific logic exists here.
It orchestrates the full ingestion pipeline:
- Querying the adapter for message references
- Fetching raw provider messages
- Parsing and normalizing message data
- Constructing domain models
This class is intentionally:
- Provider-agnostic
- Stateless beyond iteration scope
- Read-only
"""
def __init__(self, adapter: MailIntakeAdapter):
"""
Initialize the mail reader.
Args:
adapter: Mail adapter implementation used to retrieve raw
messages and threads from a mail provider.
"""
self._adapter = adapter
def iter_messages(self, query: str) -> Iterator[MailIntakeMessage]:
"""
Iterate over parsed messages matching a provider query.
Args:
query: Provider-specific query string used to filter messages.
Yields:
Fully parsed and normalized `MailIntakeMessage` instances.
Raises:
MailIntakeParsingError: If a message cannot be parsed.
"""
for ref in self._adapter.iter_message_refs(query):
raw = self._adapter.fetch_message(ref["message_id"])
@@ -39,7 +75,17 @@ class MailIntakeReader:
"""
Iterate over threads constructed from messages matching a query.
Messages are grouped by thread_id and yielded as complete threads.
Messages are grouped by `thread_id` and yielded as complete thread
objects containing all associated messages.
Args:
query: Provider-specific query string used to filter messages.
Returns:
An iterator of `MailIntakeThread` instances.
Raises:
MailIntakeParsingError: If a message cannot be parsed.
"""
threads: Dict[str, MailIntakeThread] = {}
@@ -61,7 +107,17 @@ class MailIntakeReader:
def _parse_message(self, raw_message: Dict[str, Any]) -> MailIntakeMessage:
"""
Parse a raw provider message into a MailIntakeMessage.
Parse a raw provider message into a `MailIntakeMessage`.
Args:
raw_message: Provider-native message payload.
Returns:
A fully populated `MailIntakeMessage` instance.
Raises:
MailIntakeParsingError: If the message payload is missing required
fields or cannot be parsed.
"""
try:
message_id = raw_message["id"]