From 9f9e472adada3b9a83ff22073c57c3b10c1d58f4 Mon Sep 17 00:00:00 2001 From: Vishesh 'ironeagle' Bangotra Date: Sun, 8 Mar 2026 00:29:24 +0530 Subject: [PATCH] google styled doc --- mail_intake/__init__.py | 89 ++++++++++++----------------- mail_intake/adapters/__init__.py | 14 ++++- mail_intake/adapters/base.py | 58 ++++++++++++------- mail_intake/adapters/gmail.py | 66 +++++++++++++-------- mail_intake/auth/__init__.py | 13 +++++ mail_intake/auth/base.py | 43 ++++++++------ mail_intake/auth/google.py | 54 ++++++++++------- mail_intake/config.py | 39 ++++++++----- mail_intake/credentials/__init__.py | 14 +++++ mail_intake/credentials/pickle.py | 54 ++++++++++------- mail_intake/credentials/redis.py | 88 ++++++++++++++-------------- mail_intake/credentials/store.py | 76 +++++++++++++----------- mail_intake/exceptions.py | 31 ++++++---- mail_intake/ingestion/__init__.py | 12 ++++ mail_intake/ingestion/reader.py | 64 ++++++++++++--------- mail_intake/models/__init__.py | 13 +++++ mail_intake/models/message.py | 53 ++++++++++++----- mail_intake/models/thread.py | 49 ++++++++++------ mail_intake/parsers/__init__.py | 15 +++++ mail_intake/parsers/headers.py | 71 +++++++++++++---------- mail_intake/parsers/subject.py | 35 ++++++++---- 21 files changed, 593 insertions(+), 358 deletions(-) diff --git a/mail_intake/__init__.py b/mail_intake/__init__.py index ae09028..e2c00d5 100644 --- a/mail_intake/__init__.py +++ b/mail_intake/__init__.py @@ -1,6 +1,10 @@ """ Mail Intake — provider-agnostic, read-only email ingestion framework. +--- + +## Summary + Mail Intake is a **contract-first library** designed to ingest, parse, and normalize email data from external providers (such as Gmail) into clean, provider-agnostic domain models. @@ -20,9 +24,9 @@ as a first-class module at the package root: The package root acts as a **namespace**, not a facade. Consumers are expected to import functionality explicitly from the appropriate module. ----------------------------------------------------------------------- -Installation ----------------------------------------------------------------------- +--- + +## Installation Install using pip: @@ -35,9 +39,9 @@ Or with Poetry: Mail Intake is pure Python and has no runtime dependencies beyond those required by the selected provider (for example, Google APIs for Gmail). ----------------------------------------------------------------------- -Basic Usage ----------------------------------------------------------------------- +--- + +## Quick start Minimal Gmail ingestion example (local development): @@ -65,27 +69,41 @@ Iterating over threads: for thread in reader.iter_threads("subject:Interview"): print(thread.normalized_subject, len(thread.messages)) ----------------------------------------------------------------------- -Extensibility Model ----------------------------------------------------------------------- +--- + +## Architecture Mail Intake is designed to be extensible via **public contracts** exposed through its modules: -- Users MAY implement their own mail adapters by subclassing - ``adapters.MailIntakeAdapter`` -- Users MAY implement their own authentication providers by subclassing - ``auth.MailIntakeAuthProvider[T]`` -- Users MAY implement their own credential persistence layers by - implementing ``credentials.CredentialStore[T]`` +- Users MAY implement their own mail adapters by subclassing ``adapters.MailIntakeAdapter`` +- Users MAY implement their own authentication providers by subclassing ``auth.MailIntakeAuthProvider[T]`` +- Users MAY implement their own credential persistence layers by implementing ``credentials.CredentialStore[T]`` Users SHOULD NOT subclass built-in adapter implementations. Built-in adapters (such as Gmail) are reference implementations and may change internally without notice. ----------------------------------------------------------------------- -Public API Surface ----------------------------------------------------------------------- +**Design Guarantees:** +- Read-only access: no mutation of provider state +- Provider-agnostic domain models +- Explicit configuration and dependency injection +- No implicit global state or environment reads +- Deterministic, testable behavior +- Distributed-safe authentication design + +Mail Intake favors correctness, clarity, and explicitness over convenience +shortcuts. + +**Core Philosophy:** +`Mail Intake` is built as a **contract-first ingestion pipeline**: +1. **Layered Decoupling**: Adapters handle transport, Parsers handle format normalization, and Ingestion orchestrates. +2. **Provider Agnosticism**: Domain models and core logic never depend on provider-specific (e.g., Gmail) API internals. +3. **Stateless Workflows**: The library functions as a read-only pipe, ensuring side-effect-free ingestion. + +--- + +## Public API The supported public API consists of the following top-level modules: @@ -101,40 +119,7 @@ The supported public API consists of the following top-level modules: Classes and functions should be imported explicitly from these modules. No individual symbols are re-exported at the package root. ----------------------------------------------------------------------- -Design Guarantees ----------------------------------------------------------------------- - -- Read-only access: no mutation of provider state -- Provider-agnostic domain models -- Explicit configuration and dependency injection -- No implicit global state or environment reads -- Deterministic, testable behavior -- Distributed-safe authentication design - -Mail Intake favors correctness, clarity, and explicitness over convenience -shortcuts. - -## Core Philosophy - -`Mail Intake` is built as a **contract-first ingestion pipeline**: - -1. **Layered Decoupling**: Adapters handle transport, Parsers handle format normalization, and Ingestion orchestrates. -2. **Provider Agnosticism**: Domain models and core logic never depend on provider-specific (e.g., Gmail) API internals. -3. **Stateless Workflows**: The library functions as a read-only pipe, ensuring side-effect-free ingestion. - -## Documentation Design - -Follow these "AI-Native" docstring principles across the codebase: - -### For Humans -- **Namespace Clarity**: Always specify which module a class or function belongs to. -- **Contract Explanations**: Use the `adapters` and `auth` base classes to explain extension requirements. - -### For LLMs -- **Dotted Paths**: Use full dotted paths in docstrings to help agents link concepts across modules. -- **Typed Interfaces**: Provide `.pyi` stubs for every public module to ensure perfect context for AI coding tools. -- **Canonical Exceptions**: Always use `: description` pairs in `Raises` blocks to enable structured error analysis. +--- """ diff --git a/mail_intake/adapters/__init__.py b/mail_intake/adapters/__init__.py index 908d78c..b664925 100644 --- a/mail_intake/adapters/__init__.py +++ b/mail_intake/adapters/__init__.py @@ -1,6 +1,10 @@ """ Mail provider adapter implementations for Mail Intake. +--- + +## Summary + This package contains **adapter-layer implementations** responsible for interfacing with external mail providers and exposing a normalized, provider-agnostic contract to the rest of the system. @@ -15,8 +19,14 @@ Provider-specific logic **must not leak** outside of adapter implementations. All parsings, normalizations, and transformations must be handled by downstream components. -Public adapters exported from this package are considered the supported -integration surface for mail providers. +--- + +## Public API + + MailIntakeAdapter + MailIntakeGmailAdapter + +--- """ from .base import MailIntakeAdapter diff --git a/mail_intake/adapters/base.py b/mail_intake/adapters/base.py index f956e75..546415d 100644 --- a/mail_intake/adapters/base.py +++ b/mail_intake/adapters/base.py @@ -1,6 +1,10 @@ """ Mail provider adapter contracts for Mail Intake. +--- + +## Summary + This module defines the **provider-agnostic adapter interface** used for read-only mail ingestion. @@ -17,12 +21,16 @@ class MailIntakeAdapter(ABC): """ Base adapter interface for mail providers. - This interface defines the minimal contract required to: - - Discover messages matching a query - - Retrieve full message payloads - - Retrieve full thread payloads + Notes: + **Guarantees:** - Adapters are intentionally read-only and must not mutate provider state. + - discover messages matching a query + - retrieve full message payloads + - retrieve full thread payloads + + **Lifecycle:** + + - adapters are intentionally read-only and must not mutate provider state """ @abstractmethod @@ -30,21 +38,26 @@ class MailIntakeAdapter(ABC): """ Iterate over lightweight message references matching a query. - Implementations must yield dictionaries containing at least: - - ``message_id``: Provider-specific message identifier - - ``thread_id``: Provider-specific thread identifier - Args: - query: Provider-specific query string used to filter messages. + query (str): + Provider-specific query string used to filter messages. Yields: - Dictionaries containing message and thread identifiers. + Dict[str, str]: + Dictionaries containing message and thread identifiers. - Example yield: - { - "message_id": "...", - "thread_id": "..." - } + Notes: + **Guarantees:** + + - Implementations must yield dictionaries containing at least ``message_id`` and ``thread_id`` + + Example: + Typical yield: + + { + "message_id": "...", + "thread_id": "..." + } """ raise NotImplementedError @@ -54,11 +67,12 @@ class MailIntakeAdapter(ABC): Fetch a full raw message by message identifier. Args: - message_id: Provider-specific message identifier. + message_id (str): + Provider-specific message identifier. Returns: - Provider-native message payload - (e.g., Gmail message JSON structure). + Dict[str, Any]: + Provider-native message payload (e.g., Gmail message JSON structure). """ raise NotImplementedError @@ -68,9 +82,11 @@ class MailIntakeAdapter(ABC): Fetch a full raw thread by thread identifier. Args: - thread_id: Provider-specific thread identifier. + thread_id (str): + Provider-specific thread identifier. Returns: - Provider-native thread payload. + Dict[str, Any]: + Provider-native thread payload. """ raise NotImplementedError diff --git a/mail_intake/adapters/gmail.py b/mail_intake/adapters/gmail.py index 969dc86..05ca21e 100644 --- a/mail_intake/adapters/gmail.py +++ b/mail_intake/adapters/gmail.py @@ -1,6 +1,10 @@ """ Gmail adapter implementation for Mail Intake. +--- + +## Summary + This module provides a **Gmail-specific implementation** of the `MailIntakeAdapter` contract. @@ -30,15 +34,18 @@ class MailIntakeGmailAdapter(MailIntakeAdapter): Gmail REST API. It translates the generic mail intake contract into Gmail-specific API calls. - This class is the ONLY place where: - - googleapiclient is imported - - Gmail REST semantics are known - - .execute() is called + Notes: + **Responsibilities:** - Design constraints: - - Must remain thin and imperative - - Must not perform parsing or interpretation - - Must not expose Gmail-specific types beyond this class + - This class is the ONLY place where googleapiclient is imported + - Gmail REST semantics are known + - .execute() is called + + **Constraints:** + + - Must remain thin and imperative + - Must not perform parsing or interpretation + - Must not expose Gmail-specific types beyond this class """ def __init__( @@ -50,9 +57,11 @@ class MailIntakeGmailAdapter(MailIntakeAdapter): Initialize the Gmail adapter. Args: - auth_provider: Authentication provider capable of supplying - valid Gmail API credentials. - user_id: Gmail user identifier. Defaults to `"me"`. + auth_provider (MailIntakeAuthProvider): + Authentication provider capable of supplying valid Gmail API credentials. + + user_id (str): + Gmail user identifier. Defaults to `"me"`. """ self._auth_provider = auth_provider self._user_id = user_id @@ -64,10 +73,12 @@ class MailIntakeGmailAdapter(MailIntakeAdapter): Lazily initialize and return the Gmail API service client. Returns: - Initialized Gmail API service instance. + Any: + Initialized Gmail API service instance. Raises: - MailIntakeAdapterError: If the Gmail service cannot be initialized. + MailIntakeAdapterError: + If the Gmail service cannot be initialized. """ if self._service is None: try: @@ -84,15 +95,16 @@ class MailIntakeGmailAdapter(MailIntakeAdapter): Iterate over message references matching the query. Args: - query: Gmail search query string. + query (str): + Gmail search query string. Yields: - Dictionaries containing: - - ``message_id``: Gmail message ID - - ``thread_id``: Gmail thread ID + Dict[str, str]: + Dictionaries containing ``message_id`` and ``thread_id``. Raises: - MailIntakeAdapterError: If the Gmail API returns an error. + MailIntakeAdapterError: + If the Gmail API returns an error. """ try: request = ( @@ -126,13 +138,16 @@ class MailIntakeGmailAdapter(MailIntakeAdapter): Fetch a full Gmail message by message ID. Args: - message_id: Gmail message identifier. + message_id (str): + Gmail message identifier. Returns: - Provider-native Gmail message payload. + Dict[str, Any]: + Provider-native Gmail message payload. Raises: - MailIntakeAdapterError: If the Gmail API returns an error. + MailIntakeAdapterError: + If the Gmail API returns an error. """ try: return ( @@ -151,13 +166,16 @@ class MailIntakeGmailAdapter(MailIntakeAdapter): Fetch a full Gmail thread by thread ID. Args: - thread_id: Gmail thread identifier. + thread_id (str): + Gmail thread identifier. Returns: - Provider-native Gmail thread payload. + Dict[str, Any]: + Provider-native Gmail thread payload. Raises: - MailIntakeAdapterError: If the Gmail API returns an error. + MailIntakeAdapterError: + If the Gmail API returns an error. """ try: return ( diff --git a/mail_intake/auth/__init__.py b/mail_intake/auth/__init__.py index 342f482..75b2187 100644 --- a/mail_intake/auth/__init__.py +++ b/mail_intake/auth/__init__.py @@ -1,6 +1,10 @@ """ Authentication provider implementations for Mail Intake. +--- + +## Summary + This package defines the **authentication layer** used by mail adapters to obtain provider-specific credentials. @@ -15,6 +19,15 @@ Authentication providers: Consumers should depend on the abstract interface and use concrete implementations only where explicitly required. + +--- + +## Public API + + MailIntakeAuthProvider + MailIntakeGoogleAuth + +--- """ from .base import MailIntakeAuthProvider diff --git a/mail_intake/auth/base.py b/mail_intake/auth/base.py index f48a500..91c3a22 100644 --- a/mail_intake/auth/base.py +++ b/mail_intake/auth/base.py @@ -1,6 +1,10 @@ """ Authentication provider contracts for Mail Intake. +--- + +## Summary + This module defines the **authentication abstraction layer** used by mail adapters to obtain provider-specific credentials. @@ -23,15 +27,18 @@ class MailIntakeAuthProvider(ABC, Generic[T]): providers and mail adapters by requiring providers to explicitly declare the type of credentials they return. - Authentication providers encapsulate all logic required to: - - Acquire credentials from an external provider - - Refresh or revalidate credentials as needed - - Handle authentication-specific failure modes - - Coordinate with credential persistence layers where applicable + Notes: + **Responsibilities:** - Mail adapters must treat returned credentials as opaque and - provider-specific, relying only on the declared credential type - expected by the adapter. + - Acquire credentials from an external provider + - Refresh or revalidate credentials as needed + - Handle authentication-specific failure modes + - Coordinate with credential persistence layers where applicable + + **Constraints:** + + - Mail adapters must treat returned credentials as opaque and provider-specific + - Mail adapters rely only on the declared credential type expected by the adapter """ @abstractmethod @@ -39,21 +46,21 @@ class MailIntakeAuthProvider(ABC, Generic[T]): """ Retrieve valid, provider-specific credentials. - This method is synchronous by design and represents the sole - entry point through which adapters obtain authentication - material. - - Implementations must either return credentials of the declared - type ``T`` that are valid at the time of return or raise an - authentication-specific exception. - Returns: - Credentials of type ``T`` suitable for immediate use by the - corresponding mail adapter. + T: + Credentials of type ``T`` suitable for immediate use by the + corresponding mail adapter. Raises: Exception: An authentication-specific exception indicating that credentials could not be obtained or validated. + + Notes: + **Guarantees:** + + - This method is synchronous by design + - Represents the sole entry point through which adapters obtain authentication material + - Implementations must either return credentials of the declared type ``T`` that are valid at the time of return or raise an exception """ raise NotImplementedError diff --git a/mail_intake/auth/google.py b/mail_intake/auth/google.py index ea05aee..f3d03d2 100644 --- a/mail_intake/auth/google.py +++ b/mail_intake/auth/google.py @@ -1,6 +1,10 @@ """ Google authentication provider implementation for Mail Intake. +--- + +## Summary + This module provides a **Google OAuth–based authentication provider** used primarily for Gmail access. @@ -33,13 +37,17 @@ class MailIntakeGoogleAuth(MailIntakeAuthProvider): This provider implements the `MailIntakeAuthProvider` interface using Google's OAuth 2.0 flow and credential management libraries. - Responsibilities: - - Load cached credentials from a credential store when available - - Refresh expired credentials when possible - - Initiate an interactive OAuth flow only when required - - Persist refreshed or newly obtained credentials via the store + Notes: + **Responsibilities:** - This class is synchronous by design and maintains a minimal internal state. + - Load cached credentials from a credential store when available + - Refresh expired credentials when possible + - Initiate an interactive OAuth flow only when required + - Persist refreshed or newly obtained credentials via the store + + **Guarantees:** + + - This class is synchronous by design and maintains a minimal internal state """ def __init__( @@ -52,15 +60,13 @@ class MailIntakeGoogleAuth(MailIntakeAuthProvider): Initialize the Google authentication provider. Args: - credentials_path: - Path to the Google OAuth client secrets file used to - initiate the OAuth 2.0 flow. + credentials_path (str): + Path to the Google OAuth client secrets file used to initiate the OAuth 2.0 flow. - store: - Credential store responsible for persisting and - retrieving Google OAuth credentials. + store (CredentialStore[Credentials]): + Credential store responsible for persisting and retrieving Google OAuth credentials. - scopes: + scopes (Sequence[str]): OAuth scopes required for Gmail access. """ self.credentials_path = credentials_path @@ -71,19 +77,23 @@ class MailIntakeGoogleAuth(MailIntakeAuthProvider): """ Retrieve valid Google OAuth credentials. - This method attempts to: - 1. Load cached credentials from the configured credential store - 2. Refresh expired credentials when possible - 3. Perform an interactive OAuth login as a fallback - 4. Persist valid credentials for future use - Returns: - A ``google.oauth2.credentials.Credentials`` instance suitable - for use with Google API clients. + Credentials: + A ``google.oauth2.credentials.Credentials`` instance suitable + for use with Google API clients. Raises: - MailIntakeAuthError: If credentials cannot be loaded, refreshed, + MailIntakeAuthError: + If credentials cannot be loaded, refreshed, or obtained via interactive authentication. + + Notes: + **Lifecycle:** + + - Load cached credentials from the configured credential store + - Refresh expired credentials when possible + - Perform an interactive OAuth login as a fallback + - Persist valid credentials for future use """ creds = self.store.load() diff --git a/mail_intake/config.py b/mail_intake/config.py index 6898ceb..ca10d5d 100644 --- a/mail_intake/config.py +++ b/mail_intake/config.py @@ -1,6 +1,10 @@ """ Global configuration models for Mail Intake. +--- + +## Summary + This module defines the **top-level configuration object** used to control mail ingestion behavior across adapters, authentication providers, and ingestion workflows. @@ -18,28 +22,37 @@ class MailIntakeConfig: """ Global configuration for mail-intake. - This configuration is intentionally explicit and immutable. - No implicit environment reads or global state. + Notes: + **Guarantees:** - Design principles: - - Immutable once constructed - - Explicit configuration over implicit defaults - - No direct environment or filesystem access - - This model is safe to pass across layers and suitable for serialization. + - This configuration is intentionally explicit and immutable + - No implicit environment reads or global state + - Explicit configuration over implicit defaults + - No direct environment or filesystem access + - This model is safe to pass across layers and suitable for serialization """ provider: str = "gmail" - """Identifier of the mail provider to use (e.g., ``"gmail"``).""" + """ + Identifier of the mail provider to use (e.g., ``"gmail"``). + """ user_id: str = "me" - """Provider-specific user identifier. Defaults to the authenticated user.""" + """ + Provider-specific user identifier. Defaults to the authenticated user. + """ readonly: bool = True - """Whether ingestion should operate in read-only mode.""" + """ + Whether ingestion should operate in read-only mode. + """ credentials_path: Optional[str] = None - """Optional path to provider credentials configuration.""" + """ + Optional path to provider credentials configuration. + """ token_path: Optional[str] = None - """Optional path to persisted authentication tokens.""" + """ + Optional path to persisted authentication tokens. + """ diff --git a/mail_intake/credentials/__init__.py b/mail_intake/credentials/__init__.py index 934923b..6589a5c 100644 --- a/mail_intake/credentials/__init__.py +++ b/mail_intake/credentials/__init__.py @@ -1,6 +1,10 @@ """ Credential persistence interfaces and implementations for Mail Intake. +--- + +## Summary + This package defines the abstractions and concrete implementations used to persist authentication credentials across Mail Intake components. @@ -16,6 +20,16 @@ The package provides: Credential lifecycle management, interpretation, and security policy decisions remain the responsibility of authentication providers. + +--- + +## Public API + + CredentialStore + PickleCredentialStore + RedisCredentialStore + +--- """ from mail_intake.credentials.store import CredentialStore diff --git a/mail_intake/credentials/pickle.py b/mail_intake/credentials/pickle.py index fb8ed11..faedc64 100644 --- a/mail_intake/credentials/pickle.py +++ b/mail_intake/credentials/pickle.py @@ -1,6 +1,10 @@ """ Local filesystem–based credential persistence for Mail Intake. +--- + +## Summary + This module provides a file-backed implementation of the ``CredentialStore`` abstraction using Python's ``pickle`` module. @@ -29,13 +33,16 @@ class PickleCredentialStore(CredentialStore[T]): filesystem. It is a simple implementation intended primarily for development, testing, and single-process execution contexts. - This implementation: - - Stores credentials on the local filesystem - - Uses pickle for serialization and deserialization - - Does not provide encryption, locking, or concurrency guarantees + Notes: + **Guarantees:** - Credential lifecycle management, validation, and refresh logic are - explicitly out of scope for this class. + - Stores credentials on the local filesystem + - Uses pickle for serialization and deserialization + - Does not provide encryption, locking, or concurrency guarantees + + **Constraints:** + + - Credential lifecycle management, validation, and refresh logic are explicitly out of scope for this class """ def __init__(self, path: str): @@ -43,7 +50,7 @@ class PickleCredentialStore(CredentialStore[T]): Initialize a pickle-backed credential store. Args: - path: + path (str): Filesystem path where credentials will be stored. The file will be created or overwritten as needed. """ @@ -53,15 +60,16 @@ class PickleCredentialStore(CredentialStore[T]): """ Load credentials from the local filesystem. - If the credential file does not exist or cannot be successfully - deserialized, this method returns ``None``. - - The store does not attempt to validate or interpret the returned - credentials. - Returns: - An instance of type ``T`` if credentials are present and - successfully deserialized; otherwise ``None``. + Optional[T]: + An instance of type ``T`` if credentials are present and + successfully deserialized; otherwise ``None``. + + Notes: + **Guarantees:** + + - If the credential file does not exist or cannot be successfully deserialized, this method returns ``None`` + - The store does not attempt to validate or interpret the returned credentials """ try: with open(self.path, "rb") as fh: @@ -73,12 +81,14 @@ class PickleCredentialStore(CredentialStore[T]): """ Persist credentials to the local filesystem. - Any previously stored credentials at the configured path are - overwritten. - Args: - credentials: + credentials (T): The credential object to persist. + + Notes: + **Responsibilities:** + + - Any previously stored credentials at the configured path are overwritten """ with open(self.path, "wb") as fh: pickle.dump(credentials, fh) @@ -87,8 +97,10 @@ class PickleCredentialStore(CredentialStore[T]): """ Remove persisted credentials from the local filesystem. - This method deletes the credential file if it exists and should - be treated as an idempotent operation. + Notes: + **Lifecycle:** + + - This method deletes the credential file if it exists and should be treated as an idempotent operation """ import os diff --git a/mail_intake/credentials/redis.py b/mail_intake/credentials/redis.py index 2b112d2..dace35b 100644 --- a/mail_intake/credentials/redis.py +++ b/mail_intake/credentials/redis.py @@ -1,6 +1,10 @@ """ Redis-backed credential persistence for Mail Intake. +--- + +## Summary + This module provides a Redis-based implementation of the ``CredentialStore`` abstraction, enabling credential persistence across distributed and horizontally scaled deployments. @@ -37,14 +41,16 @@ class RedisCredentialStore(CredentialStore[T]): distributed and horizontally scaled deployments where credentials must be shared across multiple processes or nodes. - The store is intentionally generic and delegates all serialization - concerns to caller-provided functions. This avoids unsafe mechanisms - such as pickle and allows credential formats to be explicitly - controlled and audited. + Notes: + **Responsibilities:** - This class is responsible only for persistence and retrieval. - It does not interpret, validate, refresh, or otherwise manage - the lifecycle of the credentials being stored. + - This class is responsible only for persistence and retrieval + - It does not interpret, validate, refresh, or otherwise manage the lifecycle of the credentials being stored + + **Guarantees:** + + - The store is intentionally generic and delegates all serialization concerns to caller-provided functions + - This avoids unsafe mechanisms such as pickle and allows credential formats to be explicitly controlled and audited """ def __init__( @@ -59,31 +65,20 @@ class RedisCredentialStore(CredentialStore[T]): Initialize a Redis-backed credential store. Args: - redis_client: - An initialized Redis client instance (for example, - ``redis.Redis`` or a compatible interface) used to - communicate with the Redis server. + redis_client (Any): + An initialized Redis client instance (for example, ``redis.Redis`` or a compatible interface) used to communicate with the Redis server. - key: - The Redis key under which credentials are stored. - Callers are responsible for applying appropriate - namespacing to avoid collisions. + key (str): + The Redis key under which credentials are stored. Callers are responsible for applying appropriate namespacing to avoid collisions. - serialize: - A callable that converts a credential object of type - ``T`` into a ``bytes`` representation suitable for - storage in Redis. + serialize (Callable[[T], bytes]): + A callable that converts a credential object of type ``T`` into a ``bytes`` representation suitable for storage in Redis. - deserialize: - A callable that converts a ``bytes`` payload retrieved - from Redis back into a credential object of type ``T``. + deserialize (Callable[[bytes], T]): + A callable that converts a ``bytes`` payload retrieved from Redis back into a credential object of type ``T``. - ttl_seconds: - Optional time-to-live (TTL) for the stored credentials, - expressed in seconds. When provided, Redis will - automatically expire the stored credentials after the - specified duration. If ``None``, credentials are stored - without an expiration. + ttl_seconds (Optional[int]): + Optional time-to-live (TTL) for the stored credentials, expressed in seconds. When provided, Redis will automatically expire the stored credentials after the specified duration. If ``None``, credentials are stored without an expiration. """ self.redis = redis_client self.key = key @@ -95,16 +90,16 @@ class RedisCredentialStore(CredentialStore[T]): """ Load credentials from Redis. - If no value exists for the configured key, or if the stored - payload cannot be successfully deserialized, this method - returns ``None``. - - The store does not attempt to validate the returned credentials - or determine whether they are expired or otherwise usable. - Returns: - An instance of type ``T`` if credentials are present and - successfully deserialized; otherwise ``None``. + Optional[T]: + An instance of type ``T`` if credentials are present and + successfully deserialized; otherwise ``None``. + + Notes: + **Guarantees:** + + - If no value exists for the configured key, or if the stored payload cannot be successfully deserialized, this method returns ``None`` + - The store does not attempt to validate the returned credentials or determine whether they are expired or otherwise usable """ raw = self.redis.get(self.key) if not raw: @@ -118,13 +113,15 @@ class RedisCredentialStore(CredentialStore[T]): """ Persist credentials to Redis. - Any previously stored credentials under the same key are - overwritten. If a TTL is configured, the credentials will - expire automatically after the specified duration. - Args: - credentials: + credentials (T): The credential object to persist. + + Notes: + **Responsibilities:** + + - Any previously stored credentials under the same key are overwritten + - If a TTL is configured, the credentials will expire automatically after the specified duration """ payload = self.serialize(credentials) if self.ttl_seconds: @@ -136,7 +133,10 @@ class RedisCredentialStore(CredentialStore[T]): """ Remove stored credentials from Redis. - This operation deletes the configured Redis key if it exists. - Implementations should treat this method as idempotent. + Notes: + **Lifecycle:** + + - This operation deletes the configured Redis key if it exists + - Implementations should treat this method as idempotent """ self.redis.delete(self.key) diff --git a/mail_intake/credentials/store.py b/mail_intake/credentials/store.py index 4ba5a83..686b9fc 100644 --- a/mail_intake/credentials/store.py +++ b/mail_intake/credentials/store.py @@ -1,6 +1,10 @@ """ Credential persistence abstractions for Mail Intake. +--- + +## Summary + This module defines the generic persistence contract used to store and retrieve authentication credentials across Mail Intake components. @@ -29,16 +33,18 @@ class CredentialStore(ABC, Generic[T]): Abstract base class defining a generic persistence interface for authentication credentials. - This interface separates *credential lifecycle management* from - *credential storage mechanics*. Implementations are responsible - only for persistence concerns, while authentication providers - retain full control over credential creation, validation, refresh, - and revocation logic. + Notes: + **Responsibilities:** - The store is intentionally agnostic to: - - The concrete credential type being stored - - The serialization format used to persist credentials - - The underlying storage backend or durability guarantees + - Provide persistent storage separating life-cycle management from storage mechanics + - Keep implementation focused only on persistence + + **Constraints:** + + - The store is intentionally agnostic to: + - The concrete credential type being stored + - The serialization format used to persist credentials + - The underlying storage backend or durability guarantees """ @abstractmethod @@ -46,16 +52,16 @@ class CredentialStore(ABC, Generic[T]): """ Load previously persisted credentials. - Implementations should return ``None`` when no credentials are - present or when stored credentials cannot be successfully - decoded or deserialized. - - The store must not attempt to validate, refresh, or otherwise - interpret the returned credentials. - Returns: - An instance of type ``T`` if credentials are available and - loadable; otherwise ``None``. + Optional[T]: + An instance of type ``T`` if credentials are available and + loadable; otherwise ``None``. + + Notes: + **Guarantees:** + + - Implementations should return ``None`` when no credentials are present or when stored credentials cannot be successfully decoded or deserialized + - The store must not attempt to validate, refresh, or otherwise interpret the returned credentials """ @abstractmethod @@ -63,18 +69,20 @@ class CredentialStore(ABC, Generic[T]): """ Persist credentials to the underlying storage backend. - This method is invoked when credentials are newly obtained or - have been refreshed and are known to be valid at the time of - persistence. - - Implementations are responsible for: - - Ensuring durability appropriate to the deployment context - - Applying encryption or access controls where required - - Overwriting any previously stored credentials - Args: - credentials: + credentials (T): The credential object to persist. + + Notes: + **Lifecycle:** + + - This method is invoked when credentials are newly obtained or have been refreshed and are known to be valid at the time of persistence + + **Responsibilities:** + + - Ensuring durability appropriate to the deployment context + - Applying encryption or access controls where required + - Overwriting any previously stored credentials """ @abstractmethod @@ -82,9 +90,13 @@ class CredentialStore(ABC, Generic[T]): """ Remove any persisted credentials from the store. - This method is called when credentials are known to be invalid, - revoked, corrupted, or otherwise unusable, and must ensure that - no stale authentication material remains accessible. + Notes: + **Lifecycle:** - Implementations should treat this operation as idempotent. + - This method is called when credentials are known to be invalid, revoked, corrupted, or otherwise unusable + - Must ensure that no stale authentication material remains accessible + + **Guarantees:** + + - Implementations should treat this operation as idempotent """ diff --git a/mail_intake/exceptions.py b/mail_intake/exceptions.py index 5bba3fb..6d08b55 100644 --- a/mail_intake/exceptions.py +++ b/mail_intake/exceptions.py @@ -1,6 +1,10 @@ """ Exception hierarchy for Mail Intake. +--- + +## Summary + This module defines the **canonical exception types** used throughout the Mail Intake library. @@ -14,11 +18,12 @@ class MailIntakeError(Exception): """ Base exception for all Mail Intake errors. - This is the root of the Mail Intake exception hierarchy. - All errors raised by the library must derive from this class. + Notes: + **Guarantees:** - Consumers should generally catch this type when handling - library-level failures. + - This is the root of the Mail Intake exception hierarchy + - All errors raised by the library must derive from this class + - Consumers should generally catch this type when handling library-level failures """ @@ -26,8 +31,10 @@ class MailIntakeAuthError(MailIntakeError): """ Authentication and credential-related failures. - Raised when authentication providers are unable to acquire, - refresh, or persist valid credentials. + Notes: + **Lifecycle:** + + - Raised when authentication providers are unable to acquire, refresh, or persist valid credentials """ @@ -35,8 +42,10 @@ class MailIntakeAdapterError(MailIntakeError): """ Errors raised by mail provider adapters. - Raised when a provider adapter encounters API errors, - transport failures, or invalid provider responses. + Notes: + **Lifecycle:** + + - Raised when a provider adapter encounters API errors, transport failures, or invalid provider responses """ @@ -44,6 +53,8 @@ class MailIntakeParsingError(MailIntakeError): """ Errors encountered while parsing message content. - Raised when raw provider payloads cannot be interpreted - or normalized into internal domain models. + Notes: + **Lifecycle:** + + - Raised when raw provider payloads cannot be interpreted or normalized into internal domain models """ diff --git a/mail_intake/ingestion/__init__.py b/mail_intake/ingestion/__init__.py index 54b10ac..a3cfddb 100644 --- a/mail_intake/ingestion/__init__.py +++ b/mail_intake/ingestion/__init__.py @@ -1,6 +1,10 @@ """ Mail ingestion orchestration for Mail Intake. +--- + +## Summary + This package contains **high-level ingestion components** responsible for coordinating mail retrieval, parsing, normalization, and model construction. @@ -15,6 +19,14 @@ Components in this package: Consumers are expected to construct a mail adapter and pass it to the ingestion layer to begin processing messages and threads. + +--- + +## Public API + + MailIntakeReader + +--- """ from .reader import MailIntakeReader diff --git a/mail_intake/ingestion/reader.py b/mail_intake/ingestion/reader.py index 6bec5b8..5df3c25 100644 --- a/mail_intake/ingestion/reader.py +++ b/mail_intake/ingestion/reader.py @@ -1,6 +1,10 @@ """ High-level mail ingestion orchestration for Mail Intake. +--- + +## Summary + This module provides the primary, provider-agnostic entry point for reading and processing mail data. @@ -29,19 +33,15 @@ class MailIntakeReader: """ High-level read-only ingestion interface. - This class is the **primary entry point** for consumers of the Mail - Intake library. + Notes: + **Responsibilities:** - It orchestrates the full ingestion pipeline: - - Querying the adapter for message references - - Fetching raw provider messages - - Parsing and normalizing message data - - Constructing domain models + - This class is the primary entry point for consumers of the Mail Intake library + - It orchestrates the full ingestion pipeline: Querying the adapter for message references, fetching raw provider messages, parsing and normalizing message data, constructing domain models - This class is intentionally: - - Provider-agnostic - - Stateless beyond iteration scope - - Read-only + **Constraints:** + + - This class is intentionally: Provider-agnostic, stateless beyond iteration scope, read-only """ def __init__(self, adapter: MailIntakeAdapter): @@ -49,8 +49,8 @@ class MailIntakeReader: Initialize the mail reader. Args: - adapter: Mail adapter implementation used to retrieve raw - messages and threads from a mail provider. + adapter (MailIntakeAdapter): + Mail adapter implementation used to retrieve raw messages and threads from a mail provider. """ self._adapter = adapter @@ -59,13 +59,16 @@ class MailIntakeReader: Iterate over parsed messages matching a provider query. Args: - query: Provider-specific query string used to filter messages. + query (str): + Provider-specific query string used to filter messages. Yields: - Fully parsed and normalized `MailIntakeMessage` instances. + MailIntakeMessage: + Fully parsed and normalized `MailIntakeMessage` instances. Raises: - MailIntakeParsingError: If a message cannot be parsed. + MailIntakeParsingError: + If a message cannot be parsed. """ for ref in self._adapter.iter_message_refs(query): raw = self._adapter.fetch_message(ref["message_id"]) @@ -75,17 +78,22 @@ class MailIntakeReader: """ Iterate over threads constructed from messages matching a query. - Messages are grouped by `thread_id` and yielded as complete thread - objects containing all associated messages. - Args: - query: Provider-specific query string used to filter messages. + query (str): + Provider-specific query string used to filter messages. - Returns: - An iterator of `MailIntakeThread` instances. + Yields: + MailIntakeThread: + An iterator of `MailIntakeThread` instances. Raises: - MailIntakeParsingError: If a message cannot be parsed. + MailIntakeParsingError: + If a message cannot be parsed. + + Notes: + **Guarantees:** + + - Messages are grouped by `thread_id` and yielded as complete thread objects containing all associated messages """ threads: Dict[str, MailIntakeThread] = {} @@ -110,14 +118,16 @@ class MailIntakeReader: Parse a raw provider message into a `MailIntakeMessage`. Args: - raw_message: Provider-native message payload. + raw_message (Dict[str, Any]): + Provider-native message payload. Returns: - A fully populated `MailIntakeMessage` instance. + MailIntakeMessage: + A fully populated `MailIntakeMessage` instance. Raises: - MailIntakeParsingError: If the message payload is missing required - fields or cannot be parsed. + MailIntakeParsingError: + If the message payload is missing required fields or cannot be parsed. """ try: message_id = raw_message["id"] diff --git a/mail_intake/models/__init__.py b/mail_intake/models/__init__.py index 47d17c3..293638c 100644 --- a/mail_intake/models/__init__.py +++ b/mail_intake/models/__init__.py @@ -1,6 +1,10 @@ """ Domain models for Mail Intake. +--- + +## Summary + This package defines the **canonical, provider-agnostic data models** used throughout the Mail Intake ingestion pipeline. @@ -11,6 +15,15 @@ Models in this package: - Serve as stable inputs for downstream processing and analysis These models form the core internal data contract of the library. + +--- + +## Public API + + MailIntakeMessage + MailIntakeThread + +--- """ from .message import MailIntakeMessage diff --git a/mail_intake/models/message.py b/mail_intake/models/message.py index 115b4eb..0784deb 100644 --- a/mail_intake/models/message.py +++ b/mail_intake/models/message.py @@ -1,6 +1,10 @@ """ Message domain models for Mail Intake. +--- + +## Summary + This module defines the **canonical, provider-agnostic representation** of an individual email message as used internally by the Mail Intake ingestion pipeline. @@ -19,37 +23,58 @@ class MailIntakeMessage: """ Canonical internal representation of a single email message. - This model represents a fully parsed and normalized email message. - It is intentionally provider-agnostic and suitable for persistence, - indexing, and downstream processing. + Notes: + **Guarantees:** - No provider-specific identifiers, payloads, or API semantics - should appear in this model. + - This model represents a fully parsed and normalized email message + - It is intentionally provider-agnostic and suitable for persistence, indexing, and downstream processing + + **Constraints:** + + - No provider-specific identifiers, payloads, or API semantics should appear in this model """ message_id: str - """Provider-specific message identifier.""" + """ + Provider-specific message identifier. + """ thread_id: str - """Provider-specific thread identifier to which this message belongs.""" + """ + Provider-specific thread identifier to which this message belongs. + """ timestamp: datetime - """Message timestamp as a timezone-naive UTC datetime.""" + """ + Message timestamp as a timezone-naive UTC datetime. + """ from_email: str - """Sender email address.""" + """ + Sender email address. + """ from_name: Optional[str] - """Optional human-readable sender name.""" + """ + Optional human-readable sender name. + """ subject: str - """Raw subject line of the message.""" + """ + Raw subject line of the message. + """ body_text: str - """Extracted plain-text body content of the message.""" + """ + Extracted plain-text body content of the message. + """ snippet: str - """Short provider-supplied preview snippet of the message.""" + """ + Short provider-supplied preview snippet of the message. + """ raw_headers: Dict[str, str] - """Normalized mapping of message headers (header name → value).""" + """ + Normalized mapping of message headers (header name → value). + """ diff --git a/mail_intake/models/thread.py b/mail_intake/models/thread.py index 6cd85c3..85ae65e 100644 --- a/mail_intake/models/thread.py +++ b/mail_intake/models/thread.py @@ -1,6 +1,10 @@ """ Thread domain models for Mail Intake. +--- + +## Summary + This module defines the **canonical, provider-agnostic representation** of an email thread as used internally by the Mail Intake ingestion pipeline. @@ -20,40 +24,53 @@ class MailIntakeThread: """ Canonical internal representation of an email thread. - A thread groups multiple related messages under a single subject - and participant set. It is designed to support reasoning over - conversational context such as job applications, interviews, - follow-ups, and ongoing discussions. + Notes: + **Guarantees:** - This model is provider-agnostic and safe to persist. + - A thread groups multiple related messages under a single subject and participant set + - It is designed to support reasoning over conversational context such as job applications, interviews, follow-ups, and ongoing discussions + - This model is provider-agnostic and safe to persist """ thread_id: str - """Provider-specific thread identifier.""" + """ + Provider-specific thread identifier. + """ normalized_subject: str - """Normalized subject line used to group related messages.""" + """ + Normalized subject line used to group related messages. + """ participants: Set[str] = field(default_factory=set) - """Set of unique participant email addresses observed in the thread.""" + """ + Set of unique participant email addresses observed in the thread. + """ messages: List[MailIntakeMessage] = field(default_factory=list) - """Ordered list of messages belonging to this thread.""" + """ + Ordered list of messages belonging to this thread. + """ last_activity_at: datetime | None = None - """Timestamp of the most recent message in the thread.""" + """ + Timestamp of the most recent message in the thread. + """ def add_message(self, message: MailIntakeMessage) -> None: """ Add a message to the thread and update derived fields. - This method: - - Appends the message to the thread - - Tracks unique participants - - Updates the last activity timestamp - Args: - message: Parsed mail message to add to the thread. + message (MailIntakeMessage): + Parsed mail message to add to the thread. + + Notes: + **Responsibilities:** + + - Appends the message to the thread + - Tracks unique participants + - Updates the last activity timestamp """ self.messages.append(message) diff --git a/mail_intake/parsers/__init__.py b/mail_intake/parsers/__init__.py index f2d4855..d8c10c1 100644 --- a/mail_intake/parsers/__init__.py +++ b/mail_intake/parsers/__init__.py @@ -1,6 +1,10 @@ """ Message parsing utilities for Mail Intake. +--- + +## Summary + This package contains **provider-aware but adapter-agnostic parsing helpers** used to extract and normalize structured information from raw mail payloads. @@ -16,6 +20,17 @@ This package does not: Parsing functions are designed to be composable and are orchestrated by the ingestion layer. + +--- + +## Public API + + extract_body + parse_headers + extract_sender + normalize_subject + +--- """ from .body import extract_body diff --git a/mail_intake/parsers/headers.py b/mail_intake/parsers/headers.py index 74fdc87..66db6ac 100644 --- a/mail_intake/parsers/headers.py +++ b/mail_intake/parsers/headers.py @@ -1,6 +1,10 @@ """ Message header parsing utilities for Mail Intake. +--- + +## Summary + This module provides helper functions for normalizing and extracting useful information from provider-native message headers. @@ -15,29 +19,34 @@ def parse_headers(raw_headers: List[Dict[str, str]]) -> Dict[str, str]: """ Convert a list of Gmail-style headers into a normalized dict. - Provider payloads (such as Gmail) typically represent headers as a list - of name/value mappings. This function normalizes them into a - case-insensitive dictionary keyed by lowercase header names. - Args: - raw_headers: List of header dictionaries, each containing - ``name`` and ``value`` keys. + raw_headers (List[Dict[str, str]]): + List of header dictionaries, each containing ``name`` and ``value`` keys. Returns: - Dictionary mapping lowercase header names to stripped values. + Dict[str, str]: + Dictionary mapping lowercase header names to stripped values. + + Notes: + **Guarantees:** + + - Provider payloads (such as Gmail) typically represent headers as a list of name/value mappings + - This function normalizes them into a case-insensitive dictionary keyed by lowercase header names Example: - Input: - [ - {"name": "From", "value": "John Doe "}, - {"name": "Subject", "value": "Re: Interview Update"}, - ] - - Output: - { - "from": "John Doe ", - "subject": "Re: Interview Update", - } + Typical usage: + + Input: + [ + {"name": "From", "value": "John Doe "}, + {"name": "Subject", "value": "Re: Interview Update"}, + ] + + Output: + { + "from": "John Doe ", + "subject": "Re: Interview Update", + } """ headers: Dict[str, str] = {} @@ -57,22 +66,24 @@ def extract_sender(headers: Dict[str, str]) -> Tuple[str, Optional[str]]: """ Extract sender email and optional display name from headers. - This function parses the ``From`` header and attempts to extract: - - Sender email address - - Optional human-readable display name - Args: - headers: Normalized header dictionary as returned by - :func:`parse_headers`. + headers (Dict[str, str]): + Normalized header dictionary as returned by :func:`parse_headers`. Returns: - A tuple ``(email, name)`` where: - - ``email`` is the sender email address - - ``name`` is the display name, or ``None`` if unavailable + Tuple[str, Optional[str]]: + A tuple ``(email, name)`` where ``email`` is the sender email address and ``name`` is the display name, or ``None`` if unavailable. - Examples: - ``"John Doe "`` → ``("john@example.com", "John Doe")`` - ``"john@example.com"`` → ``("john@example.com", None)`` + Notes: + **Responsibilities:** + + - This function parses the ``From`` header and attempts to extract sender email address and optional human-readable display name + + Example: + Typical values: + + ``"John Doe "`` -> ``("john@example.com", "John Doe")`` + ``"john@example.com"`` -> ``("john@example.com", None)`` """ from_header = headers.get("from") if not from_header: diff --git a/mail_intake/parsers/subject.py b/mail_intake/parsers/subject.py index 960a71f..1fdec7d 100644 --- a/mail_intake/parsers/subject.py +++ b/mail_intake/parsers/subject.py @@ -1,6 +1,10 @@ """ Subject line normalization utilities for Mail Intake. +--- + +## Summary + This module provides helper functions for normalizing email subject lines to enable reliable thread-level comparison and grouping. @@ -12,27 +16,34 @@ import re _PREFIX_RE = re.compile(r"^(re|fw|fwd)\s*:\s*", re.IGNORECASE) -"""Regular expression matching common reply/forward subject prefixes.""" +""" +Regular expression matching common reply/forward subject prefixes. +""" def normalize_subject(subject: str) -> str: """ Normalize an email subject for thread-level comparison. - Operations: - - Strips common prefixes such as ``Re:``, ``Fwd:``, and ``FW:`` - - Repeats prefix stripping to handle stacked prefixes - - Collapses excessive whitespace - - Preserves original casing (no lowercasing) - - This function is intentionally conservative and avoids aggressive - transformations that could alter the semantic meaning of the subject. - Args: - subject: Raw subject line from a message header. + subject (str): + Raw subject line from a message header. Returns: - Normalized subject string suitable for thread grouping. + str: + Normalized subject string suitable for thread grouping. + + Notes: + **Responsibilities:** + + - Strips common prefixes such as ``Re:``, ``Fwd:``, and ``FW:`` + - Repeats prefix stripping to handle stacked prefixes + - Collapses excessive whitespace + - Preserves original casing (no lowercasing) + + **Guarantees:** + + - This function is intentionally conservative and avoids aggressive transformations that could alter the semantic meaning of the subject """ if not subject: return ""